/* (C) Copyright International Business Machines Corporation 23 January */
/* 1990.  All Rights Reserved. */
/*  */
/* See the file USERAGREEMENT distributed with this software for full */
/* terms and conditions of use. */
/* C Language Parser
** Andy Lowry, Apr 1989
**/

%{
static char sccsid[] = "@(#)minic.y	1.2 3/13/90";
%}

/* The following yacc grammar is intended to recognize the C
** programming language.  The lexical analyzer is assumed to consume
** comments silently and maintain an idea of the name of the current
** input file and the position within it (handling cpp line markers if
** needed) in the variables inputfile and linecount.  The following
** special handling is also required on the part of the lexical
** analyzer, to help disambiguate a few areas of the grammar:

** - Symbols should be returned either as token T_IDENTIFIER or as
** token T_TYPEDEFNAME, depending on whether the symbol is the same as
** one that has been defined in a prior typedef declaration.  The
** lexical analyzer should check the symbol table entry to make its
** decision.  This handling is required so that input like "foo;" can
** be parsed correctly as either an empty declaration of type "foo" or
** as an expression statement referencing the variable "foo".  Of
** course, in neither case does the input have any runtime effect, but
** the language allows both, and the statement interpretation would
** cause a break from the declarations section to the statements
** section in a compound statement.

** - When a left parenthesis is immediately followed by a right
** parenthesis (possibly with intervening whitespace and comments),
** the special token T_PARENS should be returned.  This is needed
** because otherwise in input like "x = (int ()) y;" the abstract
** declarator (inside the cast) would be ambiguous: it could be an
** empty abstract declarator followed by "()" to indicate "function
** returning"; or it could be an empty abstract declarator surrounded
** by parentheses.  By using the token T_PARENS in the grammar rule
** for the former interpretation, the ambiguity is resolved.  Of
** course, T_PARENS must also appear anywhere else in the grammar
** where "()" is allowed.  This normally just adds an extra case to
** the existing rules and turns anything like '(' opt_xxx ')' into '('
** xxx ')' (so the xxx is not optional) in the original rule.

** - In some cases where the T_PARENS token would be returned
** according to the preceding discussion, the lexical analyzer must
** look ahead one token beyond the parens and return either T_PARENS1
** or T_PARENS2, depending on what it finds.  To accomodate this, the
** lexical analyzer should *always* look ahead when it is about to
** return T_PARENS and switch to either T_PARENS1 or T_PARENS2 if the
** next token is in either List 1 or List 2 below, respectively.  If
** the token is in neither list, T_PARENS is returned as usual.  In
** the grammar, rules that don't care about following context for
** empty parens should use the nonterminal symbol 'parens', which is
** defined as any of the three possible tokens.

**   List 1: '{' T_VOID, T_CHAR, T_SHORT, T_INT, T_LONG, T_UNSIGNED, T_FLOAT,
**     T_DOUBLE, T_STRUCT, T_UNION, T_ENUM, T_TYPEDEFNAME, T_AUTO,
**     T_STATIC, T_EXTERN, T_REGISTER, T_TYPEDEF

**   List 2: '[', '(', T_PARENS, ',', '=', ':', ';'

** This special case is needed to distinguish a procedure declaration
** from a procedure definition.  List1 contains all the tokens that
** could follow the parens in a procedure definition, while List 2
** contains all the tokens that could follow the parens in a
** declaration.  As an example of problem input, consider "int
** x(){...}" and "int x();".  In the first case, "x" should be reduced as
** a complete declarator and "()" shifted for the function_declarator
** rule.  In the second case, "x()" is the declarator, so "x" must not
** be reduced to a complete declarator.  The decision to reduce or not
** to reduce cannot be made without knowledge of what follows the
** parens.

** Note that some if not all of the problems mentioned above only
** occur when the program is illegal for some other reason (like
** declaring a function that returns a function, as opposed to a
** pointer to a function).  They could probably be handled by
** adjustments to the grammar to disallow those cases, but that would
** complicate the grammar considerably.  The current approach trades
** relative simplicity in the grammar for a small number of special
** cases in the lexical analyzer and some additional checking required
** by later compiler stages for invalid constructions that passed
** through the parser.
**/


%{
#include "minic.h"		/* common declarations */
%}

/* Declare all the types we can get with a token or nonterminal */
%union {
  int intval;			/* normal integer */
  long longval;			/* long integer */
  double dblval;		/* long float */
  char *strval;			/* string of chars */
  S_ENTRY *symval;		/* symbol table entry */
}

/* Token types that come with an associated value */
%token T_IDENT			/* identifier, name in yytext */
%token T_INTCONST		/* normal sized integer in yyint */
%token T_LONGCONST		/* long integer in yylong */
%token T_DOUBLECONST		/* long float in yydouble */
%token T_SQSTRING		/* string in single quotes, text in yystr */
%token T_DQSTRING		/* string in double quotes, text in yystr */

/* Tokens for multi-character operators */
%token T_INCREMENT		/* ++ */
%token T_DECREMENT		/* -- */
%token T_RARROW			/* -> */
%token T_LSHIFT			/* << */
%token T_RSHIFT			/* >> */
%token T_LEQ			/* <= */
%token T_GEQ			/* >= */
%token T_EQ			/* == */
%token T_NEQ			/* != */
%token T_AND			/* && */
%token T_IOR			/* || */
%token T_ADD_ASSIGN		/* += */
%token T_SUB_ASSIGN		/* -= */
%token T_MULT_ASSIGN		/* *= */
%token T_DIV_ASSIGN		/* /= */
%token T_MOD_ASSIGN		/* %= */
%token T_RSHIFT_ASSIGN		/* >>= */
%token T_LSHIFT_ASSIGN		/* <<= */
%token T_AND_ASSIGN		/* &= */
%token T_XOR_ASSIGN		/* ^= */
%token T_IOR_ASSIGN		/* |= */

/* Tokens for reserved words */
%token T_VOID
%token T_INT
%token T_CHAR
%token T_FLOAT
%token T_DOUBLE
%token T_STRUCT
%token T_UNION
%token T_ENUM
%token T_LONG
%token T_SHORT
%token T_UNSIGNED
%token T_AUTO
%token T_EXTERN
%token T_REGISTER
%token T_TYPEDEF
%token T_STATIC
%token T_GOTO
%token T_RETURN
%token T_SIZEOF
%token T_BREAK
%token T_CONTINUE
%token T_IF
%token T_ELSE
%token T_FOR
%token T_DO
%token T_WHILE
%token T_SWITCH
%token T_CASE
%token T_DEFAULT
%token T_ENTRY

/* Tokens that delimit a preprocessor line */
%token T_PPBEGIN		/* following tokens from a PP line */
%token T_PPEND			/* PP line is over */
%token T_PPGARBAGE		/* couldn't tokenize PP line */

/* Tokens requiring special look-ahead checking on the part of the */
/* lexical analyzer... see discussion at top of this file */
%token T_PARENS			/* left and right parens with only */
				/* whitespace and/or comments inside */
%token T_PARENS1		/* T_PARENS preceded by a complete */
				/* declarator and followed by a token */
				/* that could start a function_body */
%token T_PARENS2		/* T_PARENS preceded by a complete */
				/* declarator and followed by a token */
				/* that would imply the parens should */
				/* be incorporated into the declarator */
%token T_TYPEDEFNAME		/* user defined type, name in yytext */


/* Precedence rules */
%nonassoc	P_IFTHEN	/* if-then without ELSE */
%nonassoc	T_ELSE		/* shifting T_ELSE beats reducing if-then */

%start program

%%

/* External definitions (as in 'not contained in a procedure body', */
/* not as in 'extern') comprise a C program file */

program	:	external_definition
	|	external_definition program
	;

external_definition
	:	function_definition
	|	data_definition
	;

function_definition
	:	opt_decl_specifiers function_declarator function_body
	;

function_declarator
	:	declarator '(' parameter_list ')'
	|	declarator T_PARENS1
	;

parameter_list
	:	identifier
	|	identifier ',' parameter_list
	;

function_body
	:	opt_noinit_declaration_list compound_statement
	;

data_definition
	:	opt_decl_specifiers opt_init_declarator_list ';'
	;

/* Expressions... we don't rely on precedence because some of the */
/* operator characters appear in other contexts in the grammar, and */
/* doing everything explicitly avoids confusion */

expression_list
	:	nonsequence
	|	expression_list ',' nonsequence
	;

opt_constant_expression
	:	empty
	|	constant_expression
	;

constant_expression
	:	nonsequence
	;

opt_expression
	:	empty
	|	expression
	;

expression
	:	sequence
	;

sequence:	expr1
	|	sequence ',' expr1
	;

nonsequence
	:	expr1
	;

expr1	:	expr2
	|	expr2 asgnop expr1
	;

asgnop	:	'='
	|	T_ADD_ASSIGN
	|	T_SUB_ASSIGN
	|	T_MULT_ASSIGN
	|	T_DIV_ASSIGN
	|	T_MOD_ASSIGN
	|	T_RSHIFT_ASSIGN
	|	T_LSHIFT_ASSIGN
	|	T_AND_ASSIGN
	|	T_XOR_ASSIGN
	|	T_IOR_ASSIGN
	;

expr2	:	expr3
	|	expr2 '?' expr2 ':' expr3
	;

expr3	:	expr4
	|	expr3 T_IOR expr4
	;

expr4	:	expr5
	|	expr4 T_AND expr5
	;

expr5	:	expr6
	|	expr5 '|' expr6
	;
	
expr6	:	expr7
	|	expr6 '^' expr7
	;

expr7	:	expr8
	|	expr7 '&' expr8
	;

expr8	:	expr9
	|	expr8 T_EQ expr9
	|	expr8 T_NEQ expr9
	;

expr9	:	expr10
	|	expr9 '<' expr10
	|	expr9 '>' expr10
	|	expr9 T_LEQ expr10
	|	expr9 T_GEQ expr10
	;

expr10	:	expr11
	|	expr10 T_RSHIFT expr11
	|	expr10 T_LSHIFT	expr11
	;

expr11	:	expr12
	|	expr11 '+' expr12
	|	expr11 '-' expr12
	;

expr12	:	expr13
	|	expr12 '*' expr13
	|	expr12 '/' expr13
	|	expr12 '%' expr13
	;

expr13	:	expr14
	|	'*' expr13
	|	'&' expr13
	|	'-' expr13
	|	'!' expr13
	|	'~' expr13
	|	T_INCREMENT expr13
	|	T_DECREMENT expr13
	|	cast expr13
	;

expr14	:	expr15
	|	T_SIZEOF expr14
	|	T_SIZEOF '(' type_name ')'
	;

expr15	:	expr16
	|	expr15 T_INCREMENT
	|	expr15 T_DECREMENT
	;

expr16	:	primary
	;

primary	:	identifier
	|	constant
	|	string
	|	'(' expression ')'
	|	primary '[' expression ']'
	|	primary '.' identifier
	|	primary T_RARROW identifier
	|	primary '(' expression_list ')'
	|	primary parens
	;

cast	:	'(' type_name ')'
	;

constant:	T_INTCONST
	|	T_LONGCONST
	|	T_DOUBLECONST
	|	T_SQSTRING
	;

string	:	T_DQSTRING
	;

/* Declarations */

opt_declaration_list
	:	empty
	|	declaration_list
	;

declaration_list
	:	declaration
	|	declaration declaration_list
	;

declaration
	:	decl_specifiers opt_init_declarator_list ';'
	;

opt_noinit_declaration_list
	:	empty
	|	noinit_declaration_list
	;

noinit_declaration_list
	:	noinit_declaration
	|	noinit_declaration noinit_declaration_list
	;

noinit_declaration
	:	decl_specifiers opt_declarator_list ';'
	;

opt_decl_specifiers
	:	empty
	|	decl_specifiers
	;

decl_specifiers
	:	type_specifier 
	|	sc_specifier
	|	type_specifier decl_specifiers
	|	sc_specifier decl_specifiers
	;

type_specifiers
	:	type_specifier
	|	type_specifier type_specifiers
	;

type_specifier
	:	T_VOID
	|	T_CHAR
	|	T_SHORT
	|	T_INT
	|	T_LONG
	|	T_UNSIGNED
	|	T_FLOAT
	|	T_DOUBLE
	|	aggregate_specifier
	|	typedef_name
	;

sc_specifier
	:	T_AUTO
	|	T_STATIC
	|	T_EXTERN
	|	T_REGISTER
	|	T_TYPEDEF
	;

opt_init_declarator_list
	:	empty
	|	init_declarator_list
	;

init_declarator_list
	:	init_declarator
	|	init_declarator ',' init_declarator_list
	;

init_declarator
	:	declarator
	|	declarator initializer
	;

opt_declarator_list
	:	empty
	|	declarator_list
	;

declarator_list
	:	declarator
	|	declarator ',' declarator_list
	;

declarator
	:	decl1
	|	'*' decl1
	;

decl1	:	decl2
	|	decl1 '[' opt_constant_expression ']'
	|	decl1 T_PARENS2
	;

decl2	:	identifier
	|	'(' declarator ')'
	;


initializer
	:	'=' initializer_item
	;

initializer_item
	:	nonsequence
	|	'{' initializer_list '}'
	|	'{' initializer_list ',' '}'
	;

initializer_list
	:	initializer_item
	|	initializer_list ',' initializer_item
	;


aggregate_specifier
	:	struct_or_union '{' struct_decl_list '}'
	|	struct_or_union identifier '{' struct_decl_list '}'
	|	struct_or_union identifier
	|	T_ENUM '{' enumerator_list '}'
	|	T_ENUM identifier '{' enumerator_list '}'
	|	T_ENUM identifier
	;

struct_or_union
	:	T_STRUCT
	|	T_UNION
	;

struct_decl_list
	:	struct_decl
	|	struct_decl struct_decl_list
	;

struct_decl
	:	type_specifiers struct_declarator_list
	;

struct_declarator_list
	:	struct_declarator
	|	struct_declarator ',' struct_declarator_list
	;

struct_declarator
	:	declarator
	|	declarator ':' constant_expression
	|	':' constant_expression
	;

enumerator_list
	:	enumerator
	|	enumerator ',' enumerator_list
	;

enumerator
	:	identifier
	|	identifier '=' constant_expression
	;

/* Lexical analyzer and parser need to cooperate on this one so names */
/* that have been associated with types via 'typedef' are returned as */
/* such, rather than as ordinary identifiers.  Otherwise certain */
/* ambiguities won't go away. */

typedef_name
	:	T_TYPEDEFNAME
	;


type_name
	:	type_specifiers abstract_declarator
	;

abstract_declarator
	:	adecl1
	|	'*' abstract_declarator
	;

adecl1	:	adecl2
	|	adecl1 '[' opt_constant_expression ']'
	|	adecl1 parens
	;

adecl2	:	empty
	|	'(' abstract_declarator ')'
	;

/* Statements */

compound_statement
	:	'{' opt_declaration_list opt_statement_list '}'
	;

opt_statement_list
	:	empty
	|	statement_list
	;

statement_list
	:	statement
	|	statement statement_list
	;

statement
	:	compound_statement
	|	expression ';'
	|	T_IF '(' expression ')' statement	%prec P_IFTHEN
	|	T_IF '(' expression ')' statement T_ELSE statement
	|	T_WHILE '(' expression ')' statement
	|	T_DO statement T_WHILE '(' expression ')' ';'
	|	T_FOR '(' opt_expression ';' opt_expression ';'
			opt_expression ')' statement
	|	T_SWITCH '(' expression ')' statement
	|	T_CASE constant_expression ':' statement
	|	T_DEFAULT ':' statement
	|	T_BREAK ';'
	|	T_CONTINUE ';'
	|	T_RETURN ';'
	|	T_RETURN expression ';'
	|	T_GOTO identifier ';'
	|	identifier ':' statement
	|	';'
	;


/* A few loose ends */

identifier
	:	T_IDENT
	;

parens	:	T_PARENS
	|	T_PARENS1
	|	T_PARENS2
	;

empty	:
	;
