Escolar Documentos
Profissional Documentos
Cultura Documentos
CSE 4100
Prof. Steven A. Demurjian Computer Science & Engineering Department The University of Connecticut 371 Fairfield Way, Unit 2155 Storrs, CT 06269-3155
steve@engr.uconn.edu http://www.engr.uconn.edu/~steve (860) 486 - 4818
Material for course thanks to: Laurent Michel Aggelos Kiayias Robert LeBarre
LandY.1
Two Compiler Writing Tools that are Utilized to easily Specify: Lexical Tokens and their Order of Processing (Lex) Context Free Grammar for LALR(1) (Yacc) Both Lex and Yacc have Long History in Computing Lex and Yacc Earliest Days of Unix Minicomputers Flex and Bison From GNU JFlex - Fast Scanner Generator for Java BYacc/J Berkeley CUP, ANTRL, PCYACC, PCLEX and PCYACC from Abacus
LandY.2
A Unix Utility from early 1970s A Compiler that Takes as Source a Specification for: Tokens/Patterns of a Language Generates a C Lexical Analyzer Program Pictorially:
Lex Source Program: lex.y lex.yy.c
Lex Compiler
lex.yy.c
C Compiler
a.out
Input stream
a.out
Sequence of tokens
LandY.3
Declarations: Defs, Constants, Types, #includes, etc. that can Occur in a C Program Regular Definitions (expressions) Translation Rules: Pairs of (Regular Expression, Action) Informs Lexical Analyzer of Action when Pattern is Recognized Lex.y File Format: Auxiliary Procedures: DECLARATIONS Designer Defined C Code %% Can Replace System Calls TRANSLATION RULES
%% AUXILIARY PROCEDURES
LandY.4
Regular Expression [a-zA-Z] [0-9] Rules for later [ \t\n]+ token definitions [A-Za-z][A-Za-z0-9]* "(*"([^*]|\n|"*"+[^)])*"*"+")" [0-9]+/([^0-9]|"..") [0-9]+"."[0-9]*([0-9]|"E"[+-]?[0-9]+) \'([^']|\'\')*\'
{printf(" %s ", yytext);return(T_ASSIGN);} {printf(" %s ", yytext);return(T_ELSE);}
Token Definitions
LandY.5
{id} {printf(" %s ", yytext);return(T_IDENTIFIER);} {integer} {printf(" %s ", yytext);return(T_INTEGER);} {real} {printf(" %s ", yytext);return(T_REAL);} {string} {printf(" %s ", yytext);return(T_STRING);} {comment} {/* T_COMMENT */} {ws} {/* spaces, tabs, newlines */} ":=" "else" "then" "<=" "<" "<>" ">=" ">" %%
LandY.7
%s %s %s %s %s %s %s %s
%% install_id() { /* A procedure to install the lexeme whose first character is pointed to by yytext and whose length is yylen into symbol table and return a pointer */ }
LandY.8
char *yytext; Pointer to current lexeme terminated by \0 int yylen; Number of chacters in yytex but not \0 yylval: Global variable through which the token value can be returned to Yacc Parser (Yacc) can access yylval, yylen, and yytext How are these used? Consider Integer Tokens: yylval = ascii_to_integer (yytext); Conversion from String to actual Integer Value
LandY.9
Command Line: lex myfile.l Generates lex.yy.c pclex myfile.l Generates myfile.c -v flag Includes Statistics on State Machine, etc.
LandY.10
Compilation at Unix Command Line: lex lexfile.l (creates lex.yy.c) cc lex.yy.c ll (include lex library)
LandY.11
#define T_IDENTIFIER 300 #define T INTEGER 301 #define T_REAL 302 #define T STRING 303 #define T_ASSIGN 304 #define T ELSE 305 #define T_IF 306 #define T_THEN 307 #define T_EQ 308 #define T LT 309 #define T_NE 310 #define T GE 311 #define T_GT 312 #define YYNEWLINE 10 yylex ( ) { int nstr; extern int yyprevious; while((nstr - yylook()) >- 0) yyfussy: switch(nstr) { case 0: if(yywrap()) return(0); break; case 1: {printf(" %s ", yytext);return(TASSIGN);} break; case 2: {printf(" %s ", yytext);return(T_ELSE);} break; case 3: (printf(" %s ", yytext) ;return (T IF) ; } break;
LandY.13
yytext);return(T_INTEGER);)
yytext) ;return(T_REAL); } yytext);return(T_STRING);}
LandY.14
break; case 14: {/* T COMMENT */} break; case 15: {/* spaces, tabs, newlines */} break; case -1: break; default: fprintf(yyout,"bad switch yylook %d",nstr); ) return (0); } /* end of yylex */ yywrapO{} main() { int i; do { i = yylex(); } while (i!=0); }
LandY.15
A Pascal lex.l
CSE 4100 %{ #include "y.tab.h" %} letter digit ws id comment integer real string %% ":=" {return(T_ASSIGN);} ":" {return(T_COLON);} "array" {return(T_ARRAY);} "begin" {return(T_BEGIN);} "case" {return(T_CASE);} "const" {return(T_CONST);} "downto" {return(T_DOWNTO);} "do" {return(T_DO);} "else" {return(T_ELSE);} "end" {return(T_END);} "file" {return(T_FILE);} "for" {return(T_FOR);}
LandY.16
A Pascal lex.l
"function" {return(T_FUNCTION);} /* "goto" {return(T_GOTO);} */ CSE "if" {return(T_IF);} 4100 "label" {return(T_LABEL);} "nil" {return(T_NIL);} "not" {return(T_NOT);} "of" {return(T_OF);} /* "packed" {return(T_PACKED);} */ "procedure" {return(T_PROCEDURE);} "end" {return(T_END);} "program" {return(T_PROGRAM);} "record" {return(T_RECORD);} "repeat" {return(T_REPEAT);} "set" {return(T_SET);} "then" {return(T_THEN);} "to" {return(T_TO);} "type" {return(T_TYPE);} "until" {return(T_UNTIL);} "var" {return(T_VAR);} "while" {return(T_WHILE);} /* "with" {return(T_WITH);} */ "+" {return(T_PLUS);} "-" {return(T_MINUS);} "or" {return(T_OR);} "and" {return(T_AND);} "div" {return(T_DIV);} "mod" {return(T_MOD);} "/" {return(T_RDIV);}
LandY.17
A Pascal lex.l
"*" CSE "(" 4100 ")" "=" "," ".." "." "[" "]" "<=" "<" "<>" ">=" ">" "in" "^" ";" {return(T_MULT);} {return(T_LPAREN);} {return(T_RPAREN);} {return(T_EQ);} {return(T_COMMA);} {return(T_RANGE);} {return(T_PERIOD);} {return(T_LBRACK);} {return(T_RBRACK);} {return(T_EQ);} {return(T_LT);} {return(T_NE);} {return(T_GE);} {return(T_GT);} {return(T_IN);} {return(T_UPARROW);} {return(T_SEMI);}
{id} {return(T_IDENTIFIER);} {integer} {return(T_INTEGER);} {real} {return(T_REAL);} {string} {return(T_STRING);} {comment} {/* T_COMMENT */} {ws} {/* spaces, tabs, newlines */}
LandY.18
What is Latex? Text Processing Language Embed Commands into Ascii File Opposite of Words WYSIWYG Geared Towards Publishing Particularly Prior to Newer Versions of Work Very Powerful Text Formatting Language Invented by Computer Scientist Donald Knuth http://www-cs-faculty.stanford.edu/~uno/ http://www-csfaculty.stanford.edu/~uno/abcde.html Famous for: The Art of Computer Programming http://www-csfaculty.stanford.edu/~uno/taocp.html
LandY.19
Task 1: Oct 5: Design and implement a lexical analyzer using the flex generator on the Linux boxes that is able to identify all lexical tokens for the latex subset. Task 2: Oct 12: Design and develop a context free grammar (CFG) for a subset of Latex. Task 3: Oct 17: Calculate FIRST and FOLLOW for a grammar provided after deliverable part 1b.
LandY.20
latex.all.txt
CSE 4100
BASIC LATEX COMMANDS/OPTIONS The following discusses the Latex commands and options which will be supported in our text processor. TEXT THAT IS SHOWN IN ALL CAPITAL LETTERS CORRESPONDS TO TOKENS WHICH HAVE MANY DIFFERENT OPTIONS. 1. Section, Subsections, and Table of Contents Commands \section{STRING} \subsection{STRING} Examples or Explanation \section{Introduction} \subsection{A Text Processor} \subsection{Legal Latex Commands} \section{Using Latex} Generate a table of contents with page numbers
\tableofcontents
Specifically, it would generate: 1 Introduction 1.1 A Text Processor 1.2 Legal Latex Commands
2 Using Latex
LandY.21
latex.all.txt
2. Formatting Commands That Effect The Overall Document CSE 4100 Commands Examples or Explanation
\renewcommand{\baselinestretch}{INTEGER}
Establish the spacing 1 is single, 2 is double, etc. \pagenumbering{STYLE} STYLE is either arabic, roman, alph, Roman, or Alph arabic numbers pages using 1, 2, 3, ... etc. roman numbers pages using i, ii, iii, ... etc. alph numbers pages using a, b, c, ... etc. Roman numbers pages using I, II, III, ... etc. Alph numbers pages using A, B, C, ... etc. \arabic{COUNTER} COUNTER indicates the initial value of page numbers \roman{COUNTER} COUNTER indicates the initial value of page numbers \alph{COUNTER} COUNTER indicates the initial value of page numbers In this case, counter must be <= 26. \Roman{COUNTER} COUNTER indicates the initial value of page numbers \Alph{COUNTER} COUNTER indicates the initial value of page numbers In this case, counter must be <= 26. \vspace{INTEGER} Insert an INTEGER number of blank lines \hspace{INTEGER} Insert an INTEGER number of blank spaces \rm \it Change the font to roman Change the font to italics or underline
When the \rm or \it commands are used within curly braces, i.e., {\it The Huskies win again!}, only the text within the braces is affected. Otherwise, the command switches the mode of printing from that point on in the text. LandY.22
latex.all.txt
3. Using Backslash to Indicate a Character Rather Than a Command. CSE 4100 The backslash character (\) is used to tell Latex that the next character should be treated as a character and not as a command. The backslash is used with the following characters:
&
Without the backslash, each character has a special meaning, i.e., % is for a comment that is ignored during text processing, & divides column entries of tables, etc. With a backslash, i.e., \%, the character is interpreted as itself.
LandY.23
latex.all.txt
4. Begin/End Blocks - Centering and Verbatim CSE 4100 Begin/end blocks are used within Latex to identify a scope over which a given command applies. They are best illustrated with examples. \begin{verbatim} Four Score and Seven Years Ago Our Forefathers \end{verbatim} \begin{center} Four Score and\\ Seven Years\\ Ago Our Forefathers \end{center} The verbatim option displays the text exactly as it appears within the input file.
The center option centers the entire block of text as a single unit. The \\ are used to signal the end of a line.
This produces the output: Four Score and Seven Years Ago Our Forefathers Without the second \\, after Seven Years, the output would be: Four Score and Seven Years Ago Our Forefathers
LandY.24
latex.all.txt
Commands can be combined, such as: CSE 4100 \begin{center} \begin{verbatim} Four Score and Seven Years Ago Our Forefathers \end{verbatim} \end{center} This combination centers the entire block, exactly as it appears, without changing the indentation within each line.
The output in this case would be: Four Score and Seven Years Ago Our Forefathers
LandY.25
latex.all.txt
5. Begin/End Blocks - single and Lists CSE 4100 Begin/end blocks can also be utilized to construct lists of items automatically. For example, the following input and commands: \begin{single} \begin{itemize} \item Lexical Analyzer uses DFAs and NFAs \item Parsing using CFGs \item Code Generation uses templates and also makes extensive use of syntax-directed translation via attribute grammars \end{itemize} \end{single} \noindent These are some of the phases for compilation that we'll study over the course of the semester. Produces the output: - Lexical Analyzer uses DFAs and NFAs - Parsing uses CFGs - Code Generation uses templates and also makes extensive use of syntax-directed translation via attribute grammars. These are some of the phases for compilation that we'll study over the course of the semester. The command \noindent is used to make sure that a new paragraph is not started after the list has completed, which would occur as a default. LandY.26
latex.all.txt
The enumerate option is similar, but generates numbers for each item: CSE 4100 \begin{enumerate} \item Lexical Analyzer uses DFAs and NFAs \item Parsing using CFGs \item Code Generation uses templates and also makes extensive use of syntax-directed translation via attribute grammars \end{enumerate} Notice that without the single begin/end block, the following output is produced: 1. Lexical Analyzer uses DFAs and NFAs 2. Parsing uses CFGs 3. Code Generation uses templates and also makes extensive use of syntax-directed translation via attribute grammars.
LandY.27
\section{Introduction}
This is an example of text that would be transformed into a paragraph in latex. Blank lines between text in the input cause a new paragraph to be generated. \vspace{10} \it When the blank line occurs after a section, no indentation of the paragraph is performed. However, \hspace{20} all other blanks, would result in a 5 space indent of the paragraph. \rm \subsection{A Text Processor} A {\it text processor} is a very useful tool, since it allows us to develop formatted documents that are easy to read.
LandY.28
Notes
CSE 4100
Not all of my Latex works in MikTex since it is based on an older version of Latex In prior two slides to get this to work you need to: Add \documentstyle{article} as First Line Add pt to the vspace and hspace
\vspace{10pt} \hspace{20pt}
Delete the \arabic{5} Web page has: latex.in.miktex.tex File with Changes
LandY.30
LandY.31
No. 1 2 3
Explanation allows centering of text used for italics used to identify items in a list
Notice that the table has been centered and the first column is right justified, the second column is centered, and the third column is left justified.
LandY.32
...
\end{tabular}
\begin{table}[location-options] indicates the start of the table environment, where location-options indicates where to place a table and may be either h (for here), t (for float to top of next page), or b (for float to bottom of current or next page). \caption{STRING} \label{WORD} \end{table} which indicates the tables caption which labels the caption/table with a word used to finish the table environment
Then, when \ref{WORD} appears in the text, the label is searched for and the automatic number assigned to the table is inserted.
LandY.33
I will Send out an Email with a Zip File of Tests Your Lexical Analyzer Should Recognize All of these!
LandY.34
Oct 5: Design and implement a lexical analyzer using the flex generator on the Linux boxes that is able to identify all lexical tokens for the latex subset. Oct 12: Design and develop a context free grammar (CFG) for a subset of Latex. Oct 17: Calculate FIRST and FOLLOW for a grammar provided after deliverable part 1b.
LandY.35
%{ /* THIS IS LATEX.L */
[ \t\n]+ ([a-zA-Z0-9])*
LandY.36
"begin"
"document" "end"
{word} {ws}
%%
Remaining Code:
/* need main routine at bottom */ yywrap(){return 0;}
main() { int i; do { i = yylex(); printf("i is: %d ****\n", i); } while (i!= EOF); }
Building lex.yy.c and Compiling/Executing: ssh to Engineering Linux Box flex latex.l gcc lex.yy.c lfl a.out < latex.l
LandY.38
Lex.yy.c File
CSE #line 3 "lex.yy.c" 4100
#define
/* A lexical scanner generated by flex */ #define FLEX_SCANNER #define YY_FLEX_MAJOR_VERSION 2 #define YY_FLEX_MINOR_VERSION 5 #define YY_FLEX_SUBMINOR_VERSION 34 #if YY_FLEX_SUBMINOR_VERSION > 0 #define FLEX_BETA #endif /* First, we deal with issues. */ /* begin #include #include #include #include platform-specific or compiler-specific
Lex.yy.c File
/* THOUSAND LINES OF CODE MISSING */
CSE 4100 void yyfree (void * ptr )
{ free( (char *) ptr ); /* see yyrealloc() for (char *) cast */ } #define YYTABLES_NAME "yytables" #line 29 "latex.l"
/* need main routine at bottom */ yywrap(){return 0;} main() { int i; do { i = yylex(); printf("i is: %d ****\n", i); } while (i!= EOF); }
LandY.40
; Lexeme: \ ; Lexeme: begiin ; Lexeme: { ; Lexeme: document ; Lexeme: } ; Lexeme: Hello ; Lexeme: world ; Lexeme: Does ; Lexeme: this
LandY.41
Output Continued
i is: CSE Val: 4100 i is: Val: i is: Val: i is: Val: i is: Val: i is: Val: i is: Val: i is: Val: i is: Val: i is: Val: i is: 203 203 203 203 203 203 203 203 203 203 203 204 204 201 201 205 205 202 202 206 206 **** ; Lexeme: work **** ; Lexeme: even **** ; Lexeme: on **** ; Lexeme: multiple **** ; Lexeme: lines ****
; Lexeme: \
**** ; Lexeme: end **** ; Lexeme: { ****
; Lexeme: document
**** ; Lexeme: } ****
LandY.42
\[(h|t|b)\] (c|l|r)+
%% "\\" "{" "}" {printf(" Val: %d\t; Lexeme: %s \n", TBACKSL, yytext);return(TBACKSL);} {printf(" Val: %d\t; Lexeme: %s \n", TLCURLYB, yytext);return(TLCURLYB);} {printf(" Val: %d\t; Lexeme: %s \n", TRCURLYB, yytext);return(TRCURLYB);} {printf(" Val: %d\t; Lexeme: %s \n", TBEGIN, yytext);return(TBEGIN);} {printf(" Val: %d\t; Lexeme: %s \n", TDOCUMENT, yytext);return(TDOCUMENT);} {printf(" Val: %d\t; Lexeme: %s \n", TEND, yytext);return(TEND);} {printf(" Val: %d\t; Lexeme: %s \n", TTABLESPEC,yytext);return(TTABLESPEC);} {printf(" Val: %d\t; Lexeme: %s \n", TCOLSPEC, yytext);return(TCOLSPEC);} {printf(" Val: %d\t; Lexeme: %s \n", TWORD, yytext);return(TWORD);} { /* DO NOTHING */ }
LandY.43
"begin"
"document" "end"
{tablespec}
{colspec} {word}
{ws}
LandY.44
\[(h|t|b)\] (c|l|r)+
%% "\\" "{" "}" {printf(" Val: %d\t; Lexeme: %s \n", TBACKSL, yytext);return(TBACKSL);} {printf(" Val: %d\t; Lexeme: %s \n", TLCURLYB, yytext);return(TLCURLYB);} {printf(" Val: %d\t; Lexeme: %s \n", TRCURLYB, yytext);return(TRCURLYB);} {printf(" Val: %d\t; Lexeme: %s \n", TBEGIN, yytext);return(TBEGIN);} {printf(" Val: %d\t; Lexeme: %s \n", TDOCUMENT, yytext);return(TDOCUMENT);} {printf(" Val: %d\t; Lexeme: %s \n", TEND, yytext);return(TEND);} {printf(" Val: %d\t; Lexeme: %s \n", TTABLESPEC, yytext);return(TTABLESPEC);} {printf(" Val: %d\t; Lexeme: %s \n", TCOLSPEC, yytext);return(TCOLSPEC);} {printf(" Val: %d\t; Lexeme: %s \n", TWORD, yytext);return(TWORD);} { /* DO NOTHING */ }
LandY.45
"\\begin"
"\{document\}" "\\end"
{tablespec}
{colspec} {word}
{ws}
; Lexeme: lrcll
**** ; Lexeme: lines **** ; Lexeme: [t] **** ; Lexeme: \end
****
; Lexeme: this **** ; Lexeme: work **** ; Lexeme: even ****
****
; Lexeme: {document} ****
; Lexeme: on
**** ; Lexeme: cclcrr ****
LandY.46
Task 1: Oct 5: Design and implement a lexical analyzer using the flex generator on the Linux boxes that is able to identify all lexical tokens for the latex subset. Task 2: Oct 12: Design and develop a context free grammar (CFG) for a subset of Latex. Task 3: Oct 17: Calculate FIRST and FOLLOW for a grammar provided after deliverable part 1b. Design a CFG for the project that allows Latex programs (e.g., text to be formatted) to be recognized. This will provide you with important language design experience. How do you Get Started? Lets Consider Initial Grammar in Project 1 Spec
LandY.47
Latex Program is defined by start_doc, end_doc, and main_body main_body is Left Recursive with Multiple main_options Main_option is either text_option or latex_otpions
---> start_doc main_body end_do ---> "\" "begin" "{" "document" "} ---> "\" "end" "{" "document" "} ---> | ---> | main_body main_option main_option text_option latex_options
main_option
LandY.48
Text_option is a sequence of words Latex_options starts with either A backslash \ A left curly brace { Backs_options can be Begin/end blocks Sections Etc.
---> | ---> | text_option "word" "\" "{" "word"
text_option
latex_options
backs_options curlyb_options
backs_options
---> | |
begin_end_opts begin_options
---> --->
begin_options
begin_block
end_options "}"
"}"
position
section_options
--->
---> |
"h"
"t"
"b
How would we write one of the begin_blocks, say for an Itemize List?
Itemize_list ---> | itemize_list item item What Does item go to?
backs_roman backs_italics
Key Issue
CSE 4100
Need to Re-Examine and Reanalyze latex.all.txt and all of the various test cases (emailed) Look for the Required Sturcture What are the Different Blocks? What are Options within Blocks? How are Nested Blocks Supported? What are Backslash and Curly Brace Options? You need to make sure that your Grammar can Parse any of the sample test cases You check this by Doing a Derivation for the Test Case or for a Portion of Latex
LandY.52
Task 1: Oct 5: Design and implement a lexical analyzer using the flex generator on the Linux boxes that is able to identify all lexical tokens for the latex subset. Task 2: Oct 12: Design and develop a context free grammar (CFG) for a subset of Latex. Task 3: Oct 17: Calculate FIRST and FOLLOW for a grammar provided after deliverable part 1b. We will use Yacc Notation for the Grammar See Following Slides Notice that : replaces arrow and | still means alternate rule.\
LandY.53
#include <ctype.h> %} %start latexstatement %token %token %token %token %token %token %token %token BACKSL WORD ITEMIZE H CAPTION TABOCON LROMAN RM LBEGIN WSWORD ENUMERATE T LABEL RENEW CROMAN IT LCURLYB SPECCHAR TABULAR B DBLBS BASELINES LALPH NOINDENT DOCUMENT CENTER TABLE R ITEM INTEGER CALPH REF RCURLYB VERBATIM LSQRB C SECTION PAGENUM VSPACE END SINGLE RSQRB L SUBSEC ARABIC HSPACE
%% latexstatement
: ; : ; :
startdoc
mainbody
enddoc
startdoc
BACKSL
LBEGIN
LCURLYB
DOCUMENT
RCURLYB
enddoc
BACKSL
END
LCURLYB
DOCUMENT
RCURLYB
LandY.54
: | ; : | | ; : | ; : | ;
mainoption
textoption
textoption WORD
WORD
wstextoption
wstextoption WSWORD
WSWORD
commentoption
: ;
: | ; :
SPECCHAR
textoption
latexoptions
RCURLYB
curlyboptions
BACKSL
fonts
textoption
LandY.55
Bison
CSE 4100
Compiler Writing Tool that Generates LALR(1) Parser Grammar Rules (BNF) can be Modified/Augmented with Semantic Actions via Code Segments Can work in Conjunction with Lex or Separately Three Major Parts of a Bison Specification: Declarations %% Grammar Rules %% User Supplied Programs
LandY.56
A First Example
CSE 4100
%{ /*Includes and Global Variables here*/ #include <stdio.h> #include <ctype.h> %} %start line %token DIGIT %% /* Grammar Rules */ line : expr '\n' ;
expr : expr '+' term | term ; term : term '*' fact | fact ; fact : '(' expr ')' | DIGIT ; %%
%% /* Define own yylex */ yylex(){ int c; c = getchar(); if (isdigit(c)) { yylval = c-'0'; return DIGIT; } return c; } /* Error Routine */ yyerror(){} /* yyparse calls yylex */ main() { yyparse(); }
LandY.57
line : expr '\n' expr : expr '+' term | term term : term '*' fact | fact fact : '(' expr ')' | DIGIT
LandY.58
E + T E + T * F E + T * DIGIT E + F * DIGIT E + DIGIT * DIGIT T + DIGIT * DIGIT F + DIGIT * DIGIT DIGIT + DIGIT * DIGIT
F T E + E DIGIT + E F + E T + E
DIGIT
* T + E
DIGIT * T + E
F * T + E
T + E
LandY.59
CSE 4100
state 0
state 1
(6)
7/300 terminals, 4/300 nonterminals 8/600 grammar rules, 13/1000 states 0 shift/reduce, 0 reduce/reduce conflicts reported 8/350 working sets used memory: states,etc. 69/24000, parser 9/12000 9/600 distinct lookahead sets 4 extra closures 13 shift entries, 1 exceptions 7 goto entries 3 entries saved by goto default Optimizer space used: input 38/24000, output 218/12000 218 table entries, 205 zero maximum spread: 257, maximum offset: 43
LandY.61
Defining Precedence
CSE 4100
%token NUMBER %left '+' '-' %left '*' '/' %right UMINUS
| expr '-' expr | expr '*' expr | expr '/' expr | '(' expr ') {$$ | '-expr %prec | NUMBER ;
$$ = $2 | DIGIT
Input Grammar May be Ambiguous Bison (and others) have Default Disambiguating Rules In a Shift/Reduce Conflict, the Shift is Chosen In a Reduce/Reduce Conflict, the Reduction is to Reduce by earlier rule (listed from top-down) Cant Control S/R Conflict Resolution However, for R/R Resolution Reorder Rules to Force Different Shift Rewrite the Grammar to Remove Ambiguity Other Error is: Rule Not Reduced
If S/R Picks Shift, and Rule Never Reduced Elsewhere
LandY.63
State 3 contains 1 shift/reduce conflict. Grammar rule 1 statement -> if_then opt_else rule 2 statement -> assign_stmt rule 3 if_then -> T_IF rel_expr T_THEN statement rule 4 opt_else -> /* empty */ rule 5 opt_else -> T_ELSE statement rule 6 assign_stmt -> T_IDENTIFIER T_ASSIGN value rule 7 value -> TINTEGER rule 8 value -> TREAL rule 9 value -> T STRING rule 10 rel_expr -> compare rel_op compare rule 11 compare -> T_IDENTIFIER rule 12 compare -> value rule 13 rel_op -> T_EQ rule 14 rel_op -> T_LT rule 15 rel_op -> T_NE rule 16 rel_op -> T_GE rule 17 rel_op -> T_GT
LandY.64
Terminals, with rules where they appear $ (-1) error (256) T_IF (258) 3 T_THEN (259) 3 T_ELSE (260) 5 T_IDENTIFIER (261) 6 11 T_ASSIGN (262) 6 T_INTEGER (263) 7 T_REAL (264) 8 T_STRING (265) 9 T_EQ (266) 13 T_LT (267) 14 T_NE (268) 15 T_GE (269) 16 T_GT (270) 17
LandY.65
Nonterminals, with rules where they appear statement (16) on left: 1 2, on right: 3 5 if_then (17) on left: 3, on right: 1 opt_else (18) on left: 4 5, on right: 1 assign_stmt (19) on left: 6, on right: 2 value (20) on left: 7 8 9, on right: 6 12 rel_expr (21) on left: 10, on right: 3 compare (22) on left: 11 12, on right: 10 rel_op (23) on left: 13 14 15 16 17, on right: 10
LandY.66
state 0 T_IF T_IDENTIFIER statement if_then assign_stmt state 1 if_then -> T_IF . rel_expr T_THEN statement (rule 3) TIDENTIFIER shift, and go to state 5 TINTEGER shift, and go to state 6 T REAL shift, and go to state 7 T_STRING shift, and go to state 8 value go to state 9 rel_expr go to state 10 compare go to state 11 state 2 assign_stmt -> T_IDENTIFIER . TASSIGN value (rule 6) T_ASSIGN shift, and go to state 12 shift, and go to state 1 shift, and go to state 2 go to state 26 go to state 3 go to state 4
LandY.67
state 3 statement -> if_then . opt_else (rule 1) T_ELSE shift, and go to state 13 T ELSE [reduce using rule 4 (opt_else)] $default reduce using rule 4 (opt_else) opt_else go to state 14 ... etc ... state 25 rel_expr -> compare rel_op compare (rule 10) $default reduce using rule 10 (rel_expr) state 26 $ go to state 27 state 27 $ go to state 28 state 28 $default accept
LandY.68
Use All Capital Letters for Token Names and All Lower Case for Non-Terminals (Helps Debugging) Put Grammar Rules and Actions on Separate Lines (Makes Moving them Easier) Put all Rules with Same Left Hand Side Together and Utilize Veritical Bar for Alternatives Put a Semicolon After the Very Last Alternative for Each Left Hand Side and on a Separate Line Yacc Encourages Left Recursion LALR Discourages Right Recursion!
LandY.69
Two Tasks:
Note that when I last gave this project, I put intentional errors in both latex.in and latex.l. I think I took them all out of latex.l, but am not sure about latex.in.
LandY.70
LandY.71
CSE 4100 %}
"\\\\" "\\" "{" "}" {special} "[" "]" "alph" "Alph" "arabic" "baselinestretch" "begin" "caption" "center" "document" "end" "enumerate"
{printf(" {printf(" {printf(" {printf(" {printf(" {printf(" {printf(" {printf(" {printf(" {printf(" {printf(" {printf(" {printf(" {printf(" {printf(" {printf(" {printf("
%s \n", yytext);fflush(stdout); return(DBLBS);} %s \n", yytext);fflush(stdout); return(BACKSL);} %s \n", yytext);fflush(stdout); return(LCURLYB);} %s \n", yytext);fflush(stdout); return(RCURLYB);} %s \n", yytext);fflush(stdout); return(SPECCHAR);} %s \n", yytext);fflush(stdout); return(LSQRB);} %s \n", yytext);fflush(stdout); return(RSQRB);} %s \n", yytext);fflush(stdout); return(LALPH );} %s \n", yytext);fflush(stdout); return(CALPH);} %s \n", yytext);fflush(stdout); return(ARABIC);} %s \n", yytext);fflush(stdout);return(BASELINES);} %s \n", yytext);fflush(stdout); return(LBEGIN);} %s \n", yytext);fflush(stdout); return(CAPTION);} %s \n", yytext);fflush(stdout); return(CENTER );} %s \n", yytext);fflush(stdout); return(DOCUMENT);} %s \n", yytext);fflush(stdout); return(END);} %s \n", yytext);fflush(stdout); return(ENUMERATE);}
LandY.72
{ /* DO NOTHING */ }
LandY.73
%{ /*
LCURLYB
DOCUMENT
RCURLYB
mainbody : mainbody mainoption {fprintf(fp,"after mainbody1\n");} | mainoption {fprintf(fp,"after mainbody2\n");} ; mainoption : textoption {fprintf(fp,"after mainoption1\n");} | commentoption {fprintf(fp,"after mainoption2\n");} | latexoptions {fprintf(fp,"after mainoption3\n");} ; textoption : textoption WORD {fprintf(fp,"after textoption1\n");} | WORD {fprintf(fp,"after textoption2\n");} ;
LandY.75
nonewpara
: ;
: ;
NOINDENT
reference
REF
LCURLYB
WORD
RCURLYB
LandY.76
Building lex.yy.c and Compiling/Executing: ssh to Linux flex latex.l bison v latexp2.y gcc latexp2.tab.c lfl a.out < latex.in
LandY.77
12 conflicts: 1 shift/reduce 53 conflicts: 1 shift/reduce 67 conflicts: 1 shift/reduce 70 conflicts: 1 shift/reduce 73 conflicts: 1 shift/reduce 102 conflicts: 1 shift/reduce
LandY.79
state 1 2 startdoc: BACKSL . LBEGIN LCURLYB DOCUMENT RCURLYB LBEGIN shift, and go to state 4
state 2 0 $accept: latexstatement . $end $end shift, and go to state 5 state 3 1 latexstatement: startdoc . mainbody enddoc BACKSL LCURLYB WORD SPECCHAR shift, shift, shift, shift, and and and and to to to to to go go go go to to to to state state state state 10 11 12 13 14 6 7 8 9
go go go go go
LandY.80
9 textoption: textoption . WORD 13 commentoption: SPECCHAR textoption . WORD shift, and go to state 57 [reduce using rule 13 (commentoption)] reduce using rule 13 (commentoption)
WORD $default
LandY.81
WORD DBLBS
28 beginendopts: beginoptions beginblock . endoptions BACKSL BACKSL shift, and go to state 99 [reduce using rule 56 (optcaption)] go to state 100 go to state 101 go to state 102
LandY.82
LandY.83
What is Actually Occurring see Part 2 Spec Three Rules Never Used: 35 beginblock: listblock 56 optcaption: /* empty */ 58 optlabel: /* empty */ Certain Grammar Combos Cant Occur Six Shift/Reduce Errors Explore the Item Set and the Involved Grammar Rules Shift Always Picked What is the Grammar Behavior (rule that is Fired) based on that? What are the Options to Fix the Problem?
LandY.84
Whats the Solution? Need to Rework the Grammar so that All of the S/R Errors that Cause Problems and the Rules Not Reduced are Rectified Try Rewriting the Grammar Rules OK to Introduce S/R and R/R as Long as Program Still Parses and no Rules not Reduced This means Does it Run on All Test Cases!
LandY.85
LandY.87
Task 1 Involves Fixing S/R and Rules Not Reduced Errors Generate Revised latexp2.y Task 2 Involves Separate Activity to Supported Nested Blocks and Verbatim May Require Grammar and Perhaps flex Changes Need to Recognize white space for Verbatim
LandY.88
Hand in Requirements: Log File for Grammar Changes to eliminate the shift/reduce errors and other problems for Task 1
Track Original Grammar Segments and Revisions Hand in Revised Grammar for Task 1
Log File for Grammar Changes to support Nested Blocks and Verbatim for for Task 2
Track Original Grammar Segments and Revisions Hand in Revised Grammar for Task 2
Test Cases for both Tasks (own Test Cases) Compilation Instructions if Different from Default
LandY.89
Need to Focus on Rules not Reduced See Proj2Advice.doc that was Emailed Well Briefly Review Note that out of the six S/R errors, two do not need to be fixed For those two, need to Examine the State, the involved Rules, the Shift/Reduction Conflict The Reduction May not Occur in that State but if it Occurs in Another State May be OK Test a Sample Input Associated with Grammar Rules that are Involved
LandY.90
position
tablespec
tablespec colspec R | C |
colspec
colspec
endtableopts
optcaption
optlabel
optcaption
textoption
RCURLYB
optlabel
WORD
RCURLYB
LandY.91
What are the four possibilities? 1. neither is present; 2. optcaption only 3. optlabel only 4. both present Can you rewrite the grammar rules above to precisely cover these four options more explicitly? Can you alleviate the epsilon-epsilon possibility in endtableoptions? You still want that option, but if you can get the other three non-empty options (2, 3, and 4) recognized then you will likely also be able to recognize the epsilon-epsilon case.
LandY.92
Rule not reduced: 35 beginblock: listblock state 73 35 beginblock: listblock . 63 listblock: listblock . anitem BACKSL BACKSL anitem shift, and go to state 65 [reduce using rule 35 (beginblock)] go to state 104
This always SHIFTS when seeing anitem that starts with a BACKSL!
State 65 us Processing the anitem Rule
LandY.93
\begin{document} \begin{itemize} \item Single is for Single spacing \item Hello again \end{itemize} \end{document}
\
begin { document } \ begin { itemize } after item after after Single after is for after after Single after spacing after \ item after after Hello after again after \ end after after after startdoc begendcmds4 begtableopts3 beginoptions textoption2 textoption1 textoption1 textoption1 textoption1 anitem listblock2 textoption2 textoption1 anitem listblock1
LandY.94
%{ /*Includes and Global Variables here*/ #include <stdio.h> #include <ctype.h> %} %start line %token DIGIT %% /* Grammar Rules */ line : expr '\n' ;
expr : expr '+' term | term ; term : term '*' fact | fact ; fact : '(' expr ')' | DIGIT ; %%
%% /* Define own yylex */ yylex(){ int c; c = getchar(); if (isdigit(c)) { yylval = c-'0'; return DIGIT; } return c; } /* Error Routine */ yyerror(){} /* yyparse calls yylex */ main() { yyparse(); }
LandY.95
line : expr '\n' expr : expr '+' term | term term : term '*' fact | fact fact : '(' expr ')' | DIGIT
LandY.96
E + T E + T * F E + T * DIGIT E + F * DIGIT E + DIGIT * DIGIT T + DIGIT * DIGIT F + DIGIT * DIGIT DIGIT + DIGIT * DIGIT
F T E + E DIGIT + E F + E T + E
DIGIT
* T + E
DIGIT * T + E
F * T + E
T + E
LandY.97
{expr.val = term.val}
term : term1 '*' fact {term.val = term1.val * fact.val} | fact
{term.val = fact.val}
fact : '(' expr ')
{fact.val = expr.val}
| DIGIT
{fact.val = DIGIT.lexval}
LandY.98
yyv
$3 $2 $1
Consider Grammar Rule S -> A B C Eventually, A B C on Stack to be Replaced by S in Reduction For that Rule, Offsets into Parsing Stack are Defined as: $1 = A, $2 = B, $3 = C
LandY.99
yyv
$3 $2 $1
Consider Grammar Rule S -> A B C (all are nonterminals) Eventually, A B C on Stack to be Replaced by S in Reduction For that Rule, Offsets into Parsing Stack are Defined as: $1 = A, $2 = B, $3 = C
line : expr {line.val = expr.val } $$ = $1 expr : expr1 '+' term {expr.val = expr1.val + term.val} $$ = $1 + $3 | term
{expr.val = term.val}
$$ = $1 term : term1 '*' fact {term.val = term1.val * fact.val} $$ = $1 * $3 | fact
{term.val = fact.val}
$$ = $1 fact : '(' expr ')
{fact.val = expr.val}
$$ = $2 | DIGIT
{fact.val = DIGIT.lexval}
$$ = char_to_int(yytext)
LandY.101
IN LEX: char yytext[YYLMAX]; int yylength; yytext: globally passes lexeme to parser Yylval: Set in lexical analyzer Returns Token value What is place in stack yyv
IN YACC: #ifndef YYSTYPE #define YYSTYPE int #endif YYSTYPE yylval, yyval; YYSTYPE yyv[YYMAXDEPTH]; yyv S -> A B C $$ $1 $2 $3
$3 $2 $1
LandY.102
Pascal to C Conversion
CSE 4100
Utilize a Limited Subset of Pascal If-Then-Else and Assignment Statements Relational (Boolean) Expressions and Operators Conversions of Note: If-Then-Else goes to If-Else (no then in C) = Goes to == < > Goes to != := Goes to = Key Issues Define String Variables to Hold Concatenated Program Bottom Up Construction Utilizes Current Lexeme (yytext) Concatenated with Appropriate Conversions Information Passes Up the Grammar
LandY.103
Pascal to C Conversion
CSE 4100
%{ #include <stdio.h> #include <ctype.h> char strans[100], atrans[100], itrans[100], etrans[100], vtrans[100], retrans[100], ctrans[100], rtrans[100]; %} %start statement %token T_IF T_THEN T_ELSE T_IDENTIFIER T_ASSIGN T_INTEGER T_REAL %token T_STRING T_EQ T_LT T_LE T_NE T_GE T_GT
%% statement : if_then opt_else {strcpy(strans, itrans); strcat(strans, etrans); printf("%s\n", strans);} | assign_stmt {strcat(strans, atrans); printf("%s\n", strans);} ;
if_then : T_IF rel_expr {strcpy(itrans, "if "); strcat(itrans, retrans);} T_THEN assign_stmt{strcat(itrans, atrans);} ;
LandY.104
Pascal to C Conversion
CSE 4100
opt_else : /* the empty case */ {strcpy(etrans, "");} | T_ELSE assign_stmt {strcpy(etrans, " else "); strcat(etrans, atrans);} ; assign_stmt : T_IDENTIFIER {strcpy(atrans, yytext);} T_ASSIGN {strcat(atrans, "=");} value {strcat(atrans, vtrans);} ; value : T_INTEGER {strcpy(vtrans, yytext);} | T_REAL {strcpy(vtrans, yytext);} | T_STRING {strcpy(vtrans, yytext);} ; rel_expr : compare {strcpy(retrans, ctrans);} rel_op {strcat(retrans, rtrans);} compare {strcat(retrans, ctrans);} ;
LandY.105
Pascal to C Conversion
CSE 4100
compare : T_IDENTIFIER {strcpy(ctrans, yytext);} | value {strcpy(ctrans, yytext);} ; rel_op : T_EQ {strcpy(rtrans, "==");} | T_LT {strcpy(rtrans, "<");} | T_LE {strcpy(rtrans, "<=");} | T_NE {strcpy(rtrans, "!=");} | T_GE {strcpy(rtrans, ">=");} | T_GT {strcpy(rtrans, ">");} ; %% #include "lex.yy.c" yyerror(){} main() { yyparse(); }
LandY.106
/* SAMPLE INPUT ... */ procedure MAIN is X, Y: INTEGER; A, B, C: FLOAT; D, E: CHARACTER; begin if (X = Y) and (Z /= W) then Z:= X; if (A <= B) then A := B; end if; X := X + 1; else Y:=Y+1; end if; A :=B +C * D; A :=B * C / D; end MAIN;
LandY.107
/* AND OUTPUT */ TYPE BEING CONVERTED TO: TYPE BEING CONVERTED TO: TYPE BEING CONVERTED TO: assign_stmt*** Z = X ; assign stmt*** A = B ; if stmt*** if ( A <= B { A = B ; } assign stmt*** X = X + 1 assign_stmt*** Y = Y + 1 if stmt*** if ( X == Y && Z != W { Z =- X ; if ( A <= B { A = B ; } X = X + 1 ; } else { Y = Y + 1; } assign_stmt*** A = B + C assign_stmt*** A = B * C
; ;
* D ; / D ;
LandY.108
%{ #include <stdio.h> #include <ctype.h> Typedef char *stype; #define YYSTYPE stype; char strans[100], atrans[100], itrans[100], etrans[100], vtrans[100], retrans[100], ctrans[100], rtrans[100]; %} . . . Etc . . . %% statement : if_then opt_else {strcat(itrans, etrans); $$ = itrans; printf("%s\n", $$);} | assign_stmt {$$ = atrans; printf("%s\n", $$);} ; IN Y.TAB.C REDEFINES CONTENTS OF PARSING STACK #ifndef YYSTYPE #define YYSTYPE int #endif YYSTYPE yylval, yyval; YYSTYPE yyv[YYMAXDEPTH];
LandY.109
Unions Define Ability of Data Structure to be of Multiple Types (one or other attribute active) Consider the C Union Definition:
union EITHEROR /* Union Type Name */ { char trans[100]; int XYZ; } EOR; /* Variable Name */ EOR.trans is a string (use strcpy, strcat, etc.) EOR.XYZ is an int (use assignment, boolean expr, etc.)
LandY.110
%{ #include <stdio.h> #include <ctype.h> %} %start statement %union { char trans[100]; int XYZ; }
Union Definition
%token T_IF T_THEN T_ELSE T_IDENTIFIER %token T_STRING T_ASSIGN T_INTEGER T_REAL %token T_EQ T_LT T_LE T_NE T_GE T_GT
%type <trans> statement if_then opt_else %type <trans> assign_stmt value compare %type <trans> rel_op variable rel_expr
Redefines nonterminals of type <trans> to allow them to be that part of the union
THIS EFFECTIVELY REPLACES YYSTYPE %union { yyv char trans[100]; int XYZ; } S -> A B C $$ $1 $2 $3 $$.trans $1.XYX $2.trans Etc.
$3 $2 $1
LandY.112
statement : if_then opt_else {strcpy($$, $1); strcat($$, $2); printf("%s\n", $$);} | assign_stmt {strcpy($$, $1); printf("%s\n", $$);} ;
opt_else : /* the empty case */ {strcpy($$, "");} | T_ELSE assign_stmt {strcpy($$, " else "); strcat($$, $2);} ;
LandY.113
assign_stmt : variable T_ASSIGN value {strcpy($$, $1); strcat($$, " = "); strcat($$, $3);} ; value : T_INTEGER {strcpy($$, yytext);} | T_REAL {strcpy($$, yytext);} | T_STRING {strcpy($$, yytext);} ; rel_expr : compare rel_op compare {strcpy($$, $1); strcat($$, $2); strcat($$, $3);} ; compare : T_IDENTIFIER {strcpy($$, yytext);} | value {strcpy($$, yytext);}
LandY.114
variable : T_IDENTIFIER {strcpy($$, yytext);} ; rel_op : T_EQ {strcpy($$, | T_LT {strcpy($$, | T_LE {strcpy($$, | T_NE {strcpy($$, | T_GE {strcpy($$, | T_GT {strcpy($$, ; %% #include "lex.yy.c" yyerror(){} yywrap(){} main() { yyparse(); }
" != ");}
" >= ");} " > ");}
LandY.115
%{ #include <stdio.h> #include <ctype.h> %} %start statement %union { char trans[100]; int XYZ; } %token T_IF T_THEN T_ELSE T_IDENTIFIER %token T_STRING T_ASSIGN T_INTEGER T_REAL %token T_EQ T_LT T_LE T_NE T_GE T_GT
%type <trans> T_IDENTIFIER T_ASSIGN etc . . . type <trans> statement if_then opt_else %type <trans> assign_stmt value compare %type <trans> rel_op variable rel_expr
/* ALSO, types and tokens for XYZ are possible */ %%
LandY.116
assign_stmt : T_IDENTIFIER T_ASSIGN value {strcpy($$, $1); strcat($$, " = "); strcat($$, $3);} ; value : T_INTEGER {strcpy($$, yytext);} | T_REAL {strcpy($$, yytext);} | T_STRING {strcpy($$, yytext);} ;
LandY.117
Using Bison for Syntax Directed Translation Implementation of Attribute Grammar Given Input Latex File: Basic Text Processing Capabilities Advanced Text Processing Capabilities Nested Blocks in Single Enviornment Full Blown Verbatim Type checking for
Begin/End Blocks Combinations of Blocks Tabular Specification
Documentation (written using your Latex Syntax Directed Translator and Document Generator)
LandY.118
latex.l : latexp3c.y :
Common lexical analyzer specification Yacc file with nested blocks, WS, and verbatim along with basic code generation
latexp3c.output : S/R and R/R Conflicts - Are all OK? generate.c : Basic routines for formatted text generation util.c : Utility routines latex.input.txt : latexout.txt : latextoc.txt : Sample input Generated output for sample (with errors!) Generated table of contents for sample
proj3gs.doc : Grading Sheet - place initials next to which parts each person on the team was primarily responsible for.
LandY.119
Your Revised latexp3c.y file You may have multiple versions for each of the major Document Generation Capabilities Documentation of your Solution in Latex Using your Syntax Directed Translator/Generator Assumptions Log file with Major design decisions, problems, etc. Test Cases and Test Results (to be supplied) Zip File (lastnames.zip) 42 Students 21 Teams of 2! Email me your Teams by Nov 11th!
LandY.120
LandY.121
_____
_____
_____
%s %s %s %s
"\\alph" {printf(" %s \n", yytext);return(LALPH1);} "{alph}" {printf(" %s \n", yytext);return(LALPH2);} "\\Alph" {printf(" %s \n", yytext);return(CALPH1);} "{Alph}" {printf(" %s \n", yytext);return(CALPH2);} "\\arabic" {printf(" %s \n", yytext);return(ARABIC1);} "{arabic}" {printf(" %s \n", yytext);return(ARABIC2);} "\\baselinestretch" {printf(" %s \n", yytext);return(BASELINES);} "\\begin" {printf(" %s \n", yytext);return(LBEGIN);} "\\caption" {printf(" %s \n", yytext);return(CAPTION);} "{center}" {printf(" %s \n", yytext);return(CENTER );}
LandY.123
LandY.124
{printf(" %s \n", yytext);return(WORD);} {printf("ws--%s--ws\n", yytext); if ((strcmp(yytext, "\n\n") == 0) && (ws_flag == 0)) return(WS); else if (ws_flag == 1) return(WS);}
%%
LandY.125
/* THIS IS latexp3code.y */ %{ /* A YACC FOR PART 3 OF THE PROJECT WHERE VERBATIM AND NESTING WORKS */ #include <stdio.h> #include <ctype.h> #include <string.h> #define BUF_SIZE 512 int ws_flag = 0; #include "lex.yy.c" #include "util.c" #include "generate.c" %}
%union {
char int } trans[BUF_SIZE+1]; val;
%start latexstatement
LandY.126
%type <trans> textoption wsorword %type <val> style2 ARABIC2 LROMAN2 CROMAN2 LALPH2 CALPH2
%%
NOTE: YOU NEED TO ADD %type for ALL NON-TERMINALS and TOKENS that you wish to use the $$, $1, $2, etc. notation and the redefined parsing stack.
LandY.127
: ; : ; : ; : | ; :
startdoc
mainbody
enddoc
startdoc
LBEGIN
DOCUMENT
enddoc
END
DOCUMENT
mainbody
mainoption
| | ;
LandY.128
textoption wsorword { strcat($$, " "); strcat($$, $2); } wsorword { strcpy($$, $1); }
wsorword
WS {
strcpy($$, yytext); } WORD { strcpy($$, yytext); }
LandY.129
: | ; : ; : | | | | | | | | | | ; : ;
RCURLYB
curlyboptions
fonts
textoption
backsoptions
beginendopts sectionoptions tableofcont linespacing pagenumbers pagenuminit spacing fonts specialchar nonewpara reference
beginendopts
LBEGIN
begcmds
beginblock
endbegin
LandY.130
begcmds
: | | | | | | ; : | ; : | | | | | ;
CENTER VERBATIM {ws_flag=1;} SINGLE ITEMIZE ENUMERATE TABLE begtableopts TABULAR begtabularopts
endbegin
TABLE
endcmds
LandY.131
: | | | ;
beginendopts textoption /* FOR single or verbatim */ {printf("single or verb\n");} entrylist /* FOR center and tabular */ {printf("center or tabular\n");} listblock /* FOR item and enumerate */ {printf("item or enumerate\n");}
listblock
listblock
|
; anitem : | ; : |
anitem
entrylist
entrylist anentry
;
LandY.132
: | ;
entry
DBLBS {printf("anentryA\n");}
beginendopts {printf("anentryB\n");}
entry
: | ;
entry
SPECCHAR
textoption
begtableopts
: ; : ; : | | ;
LSQRB
position
RSQRB
begtabularopts
LCURLYB
COLS
RCURLYB
position
H T B
LandY.133
: | | ;
LCURLYB
textoption
RCURLYB
captionrest
captionrest
: | ; : ; :
END labelrest
labelrest sectionoptions
LABEL
LCURLYB
WORD
RCURLYB
END
SECTION LCURLYB textoption RCURLYB { generate_sec_header(get_sec_ctr(), $3); incr_sec_ctr(); } SUBSEC LCURLYB textoption RCURLYB { generate_subsec_header(get_sec_ctr(), get_subsec_ctr(), $3); incr_subsec_ctr(); }
LandY.134
TABOCON { set_gen_toc(); }
; linespacing : ; pagenumbers : PAGENUM style2 { set_page_style($2); } RENEW LCURLYB BASELINES RCURLYB LCURLYB WORD RCURLYB
LandY.135
spacing
horvert
LCURLYB
WORD
RCURLYB
horvert
: | ;
: | ;
VSPACE HSPACE
fonts
RM IT
LandY.136
: | | ; : ; : ;
nonewpara
NOINDENT
reference %% yyerror(){}
REF
LCURLYB
WORD
RCURLYB
Latex.input.txt
CSE 4100 \begin{document} \pagenumbering{arabic} \arabic{5} \renewcommand{\baselinestretch}{2} \tableofcontents \section{Introduction} This is an example of text that would be transformed into a paragraph in latex. Blank lines between text in the input cause a new paragraph to be generated. When the blank line occurs after a section, no indentation of the paragraph is performed. However, all other blanks, would result in a five space indent of the paragraph. \subsection{A Text Processor} A text processor is a very useful tool, since it allows us to develop formatted documents that are easy to read.
LandY.138
Latex.input.txt
CSE 4100 \subsection{Legal Latex Commands} We have seen that there are many different Latex commands, that can be used in many different ways. However, sometimes, we wish to use a character to mean itself, and override its Latex interpretation. For example, to use curly braces, we employ the backslash a set of integers. \section{Using Latex} Finally, there are many other useful commands that involve begin/end blocks, that establish an environment. These blocks behave in a similar fashion to begin/end blocks in a programming language, since they set a scope. We have discussed a number of examples. It is important to note, even at this early stage, that lists may be created within lists, allowing the nesting of blocks and environments. \end{document}a
LandY.139
latexout.txt
CSE 4100
1 Introduction
This is an example of text that would be transformed into a paragraph in latex. Blank lines between text in the input cause a new paragraph to be generated. When the blank line occurs after a section, no indentation of the paragraph is performed. However, all other blanks, would result in a five 2.1 A Text Processor
A text processor is a very useful tool, since it allows us to develop formatted documents that are easy to
LandY.140
latexout.txt
2.2 Legal Latex Commands
CSE 4100
We have seen that there are many different Latex commands, that can be used in many different ways. However, sometimes, we wish to use a character to mean itself, and override its Latex interpretation. For example, to use curly braces, we employ the backslash a 2 Using Latex Finally, there are many other useful commands that involve begin end blocks, that establish an environment. These blocks behave in a similar fashion to begin end blocks in a programming language, since they set a scope. We have discussed a number of examples. It is important to note, even at this early stage, that lists may be created within lists, allowing the nesting of WHY DOESNT PRINT IT ALL OUT?
LandY.141
latextoc.txt
CSE 4100
1 Introduction ---------- PAGE 5 2.1 A Text Processor ---------- PAGE 5 2.2 Legal Latex Commands ---------- PAGE 5 2 Using Latex ---------- PAGE 5
LandY.142
FILE *fpout; FILE *fptoc; #define #define #define #define char int OUT_WIDTH SPACE_LEFT LINES_PER_PAGE TOC_ON 40 5 40 1
void init_lines_so_far() { lines_so_far = 0; } void incr_lines_so_far() { lines_so_far++; } int check_done_page() { if (lines_so_far < LINES_PER_PAGE) return 1; else return 0; }
LandY.143
struct {
doc_symtab int int int int int int int page_no_counter; page_style; line_spacing; current_font; generate_toc; section_counter; subsect_counter;
LandY.144
int inc_page_no() { DST.page_no_counter++; return (DST.page_no_counter - 1); } void set_page_style(s) int s; { DST.page_style = s; }
LandY.146
/* THIS IS THE generate.c FILE */ init_output_page() { fprintf(fpout, "\n\n\n\n\n"); fflush(fpout); } void generate_sec_header(i, s) int i; char *s; { fprintf(fpout, "\n\n%d %s\n", i, s); fflush(fpout); if (get_gen_toc() == TOC_ON) fprintf(fptoc, "\n%d %s ---------- PAGE %d\n", i, s, get_page_no()); }
LandY.147
void generate_subsec_header(i, j, s) int i,j; char *s; { fprintf(fpout, "\n\n%d.%d %s\n", i, j, s); fflush(fpout); if (get_gen_toc() == TOC_ON) fprintf(fptoc, "\n%d.%d %s ---------- PAGE %d\n", i, j, s, get_page_no()); }
LandY.148
void generate_formatted_text(s) char *s; { int slen = strlen(s); int i, j, k, r; int llen; for (i = 0; i <= slen; ) { for (j = 0; ((j < OUT_WIDTH) && (i <= slen)); i++, j++) line[j] = s[i]; if (i <= slen) { if ((line[j-1] != ' ') && (s[i] !=' ')) { for (k = j-1; line[k] != ' '; k--) ; i = i - (j - k - 1); j = k; } for ( ; s[i] == ' '; i++) ; }
LandY.149
line[j] = '\0'; llen = strlen(line); if (i <= slen) { fprintf(fpout, "\n%s", line); fflush(fpout); } else { for(r = 0; r <= llen; r++) s[r] = line[r]; /* includes backslash 0 */ } } }
LandY.150
512
a, b; c[BUF_SIZE]; d[BUF_SIZE];
symtabtest val;
st;
%token ETC... %type <st> entrylist entry DBLBS listblock anitem %type <st> textoption wsorword WORD WS ITEM
%% ETC...
LandY.151
mainoption
textoption
| | ; :
textoption { fprintf(fp, "%d %d %s %s\n", $1.a, $1.b, $1.c, $1.d); } commentoption latexoptions textoption wsorword { $$.a = 5; } wsorword { $$.b = 10; }
wsorword
; :
Consider Ada9X (originally Ada95 and now Ada2005) is a Package Based, OO Programming Language Builds Upon the Original Ada Language Extension of Pascal Developed as a Language for DoD Named After Ada Lovelace (1815-1852) Worked on Charles Babbages Early Mechanical Gerneral Purpose Computer/Analytical Engine The worlds First Programmer Wrote the worlds First Computer Program on Bernoulli Numbers
LandY.153
Ada9X Lex
CSE 4100 %{ /******* A "lex"-style lexer for Ada 9X ****************************/ /* Copyright (C) Intermetrics, Inc. 1994 Cambridge, MA USA */ /* Copying permitted if accompanied by this statement. */ /* Derivative works are permitted if accompanied by this statement.*/ /* This lexer is known to be only approximately correct, but it is */ /* more than adequate for most uses (the lexing of apostrophe is */ /* not as sophisticated as it needs to be to be "perfect"). */ /* As usual there is *no warranty* but we hope it is useful. */ /*******************************************************************/ int error_count; %} DIGIT EXTENDED_DIGIT INTEGER EXPONENT DECIMAL_LITERAL BASE BASED_INTEGER BASED_LITERAL [0-9] [0-9a-zA-Z] ({DIGIT}(_?{DIGIT})*) ([eE](\+?|-){INTEGER}) {INTEGER}(\.?{INTEGER})?{EXPONENT}? {INTEGER} {EXTENDED_DIGIT}(_?{EXTENDED_DIGIT})* {BASE}#{BASED_INTEGER}(\.{BASED_INTEGER})?#{EXPONENT}?
LandY.154
Ada9X Lex
CSE 4100
%% "." "<" "(" "+" "|" "&" "*" ")" ";" "-" "/" "," ">" ":" "=" "'" ".." "<<" "<>" "<=" "**" "/=" ">>" ">=" ":=" "=>"
return('.'); return('<'); return('('); return('+'); return('|'); return('&'); return('*'); return(')'); return(';'); return('-'); return('/'); return(','); return('>'); return(':'); return('='); return(TIC); return(DOT_DOT); return(LT_LT); return(BOX); return(LT_EQ); return(EXPON); return(NE); return(GT_GT); return(GE); return(IS_ASSIGNED); return(RIGHT_SHAFT);
LandY.155
Ada9X Lex
CSE 4100 [a-zA-Z](_?[a-zA-Z0-9])* { return(lk_keyword(yytext)); } "'"."'" return(char_lit); \"(\"\"|[^\n\"])*\" return(char_string); {DECIMAL_LITERAL} return(numeric_lit); {BASED_LITERAL} return(numeric_lit); --.*\n ; [ \t\n\f] ; . {fprintf(stderr, " Illegal character:%c: on line %d\n", *yytext, yylineno); error_count++;} %% /* * Keywords stored in alpha order */ typedef struct { char * kw; int kwv; } KEY_TABLE; /* Reserved keyword list and Token values * as defined in y.tab.h */ # define NUM_KEYWORDS 69
LandY.156
Ada9X Lex
KEY_TABLE key_tab[NUM_KEYWORDS] = { CSE {"ABSTRACT", ABSTRACT}, {"ACCEPT", ACCEPT}, {"ACCESS", ACCESS}, 4100 {"ALIASED", ALIASED}, {"ALL", ALL}, {"AND", AND}, {"ARRAY", ARRAY}, {"AT", AT}, {"BEGIN", BEGiN}, {"BODY", BODY}, {"CASE", CASE}, {"CONSTANT", CONSTANT}, {"DECLARE", DECLARE}, {"DELAY", DELAY}, {"DELTA", DELTA}, {"DIGITS", DIGITS}, {"DO", DO}, {"ELSE", ELSE}, {"ELSIF", ELSIF}, {"END", END}, {"ENTRY", ENTRY}, {"EXCEPTION", EXCEPTION}, {"EXIT", EXIT}, {"FOR", FOR}, {"FUNCTION", FUNCTION}, {"GENERIC", GENERIC}, {"GOTO", GOTO}, {"IF", IF}, {"IN", IN}, {"IS", IS}, {"LIMITED", LIMITED}, {"LOOP", LOOP}, {"MOD", MOD}, {"NEW", NEW}, {"NOT", NOT}, {"NULL", NuLL}, {"OF", OF}, {"OR", OR}, {"OTHERS", OTHERS}, {"OUT", OUT}, {"PACKAGE", PACKAGE}, {"PRAGMA", PRAGMA}, {"PRIVATE", PRIVATE}, {"PROCEDURE", PROCEDURE}, {"PROTECTED", PROTECTED}, {"RAISE", RAISE}, {"RANGE", RANGE}, {"RECORD", RECORD}, {"REM", REM}, {"RENAMES", RENAMES}, {"REQUEUE", REQUEUE}, {"RETURN", RETURN}, {"REVERSE", REVERSE}, {"SELECT", SELECT}, {"SEPARATE", SEPARATE}, {"SUBTYPE", SUBTYPE}, {"TAGGED", TAGGED}, {"TASK", TASK}, {"TERMINATE", TERMINATE}, {"THEN", THEN}, {"TYPE", TYPE}, {"UNTIL", UNTIL}, {"USE", USE}, {"WHEN", WHEN}, {"WHILE", WHILE}, {"WITH", WITH}, {"XOR", XOR} };
LandY.157
Ada9X Lex
CSE 4100 to_upper(str) char *str; { char * cp; for (cp=str; *cp; cp++) { if (islower(*cp)) *cp -= ('a' - 'A') ; } } lk_keyword(str) char *str; { int min; int max; int guess, compare; min = 0; max = NUM_KEYWORDS-1; guess = (min + max) / 2; to_upper(str); for (guess=(min+max)/2; min<=max; guess=(min+max)/2) { if ((compare = strcmp(key_tab[guess].kw, str)) < 0) { min = guess + 1; } else if (compare > 0) { max = guess - 1; } else {return key_tab[guess].kwv;} } return identifier; }
LandY.158
Ada9X Lex
yyerror(s) CSE char *s; 4100 { extern int yychar;
error_count++;
fprintf(stderr," %s", s); if (yylineno) fprintf(stderr,", on line %d,", yylineno); fprintf(stderr," on input: "); if (yychar >= 0400) { if ((yychar >= ABORT) && (yychar <= XOR)) { fprintf(stderr, "(token) %s #%d\n", key_tab[yychar-ABORT].kw, yychar); } else switch (yychar) { case char_lit : fprintf(stderr, "character literal\n"); break; case identifier : fprintf(stderr, "identifier\n"); break; case char_string : fprintf(stderr, "string\n"); break; case numeric_lit : fprintf(stderr, "numeric literal\n"); break; case TIC : fprintf(stderr, "single-quote\n"); break; case DOT_DOT : fprintf(stderr, "..\n"); break; LandY.159
Ada9X Lex
CSE 4100 case LT_LT : fprintf(stderr, "<<\n"); break; case BOX : fprintf(stderr, "<>\n"); break; case LT_EQ : fprintf(stderr, "<=\n"); break; case EXPON : fprintf(stderr, "**\n"); break; case NE : fprintf(stderr, "/=\n"); break; case GT_GT : fprintf(stderr, ">>\n"); break; case GE : fprintf(stderr, ">=\n"); break; case IS_ASSIGNED : fprintf(stderr, ":=\n"); break; case RIGHT_SHAFT : fprintf(stderr, "=>\n"); break; default : fprintf(stderr, "(token) %d\n", yychar); } } else {switch (yychar) { case '\t': fprintf(stderr,"horizontal-tab\n"); return; case '\n': fprintf(stderr,"newline\n"); return; case '\0': fprintf(stderr,"\$end\n"); return; case ' ': fprintf(stderr, "(blank)"); return; default : fprintf(stderr,"(char) %c\n", yychar); return; } LandY.160 }
Ada9X Yacc
CSE 4100 /******* A YACC grammar for Ada 9X *********************************/ /* Copyright (C) Intermetrics, Inc. 1994 Cambridge, MA USA */ /* Copying permitted if accompanied by this statement. */ /* Derivative works are permitted if accompanied by this statement.*/ /* This grammar is thought to be correct as of May 1, 1994 */ /* but as usual there is *no warranty* to that effect. */ /*******************************************************************/ %{ #include <stdio.h> #include <ctype.h> #include <strings.h> #define BUF_SIZE 512 %} %union { char int %token %token %token %token %token %token %token %token %token %token %token trans[BUF_SIZE+1]; val; }
TIC DOT_DOT LT_LT BOX LT_EQ EXPON NE GT_GT GE IS_ASSIGNED RIGHT_SHAFT ABORT ABS ABSTRACT ACCEPT ACCESS ALIASED ALL AND ARRAY AT BEGiN BODY CASE CONSTANT DECLARE DELAY DELTA DIGITS DO ELSE ELSIF END ENTRY EXCEPTION EXIT FOR FUNCTION GENERIC GOTO IF IN IS LIMITED LOOP MOD NEW NOT NuLL OF OR OTHERS OUT PACKAGE PRAGMA PRIVATE PROCEDURE PROTECTED RAISE RANGE RECORD REM RENAMES REQUEUE RETURN REVERSE SELECT SEPARATE SUBTYPE TAGGED TASK TERMINATE THEN TYPE UNTIL USE WHEN WHILE WITH XOR char_lit identifier char_string numeric_lit
LandY.161
Ada9X Yacc
%type CSE %type 4100 %type %type %type %type %type %type %type %type %type %type %type %type %type %type %type %type %type %type %type %type %type %type %type %type <trans> <trans> <trans> <trans> <trans> <trans> <trans> <trans> <trans> <trans> <trans> <trans> <trans> <trans> <trans> <trans> <trans> <trans> <trans> <trans> <trans> <trans> <trans> <trans> <trans> <trans> access_opt access_type adding address_spec aliased_opt align_opt allocator alternative alternative_s array_type assign_stmt attrib_def attribute_id basic_loop block block_body block_decl body body_opt body_stub c_id_opt c_name_list case_hdr case_stmt choice choice_s code_stmt comp_assoc comp_decl comp_decl_s comp_list comp_loc_s comp_unit compilation component_subtype_def compound_name compound_stmt cond_clause cond_clause_s cond_part condition constr_array_type context_spec decl decl_item decl_item_or_body decl_item_or_body_s1 decl_item_s decl_item_s1 decl_part def_id def_id_s derived_type designator discrete_range discrete_with_range discrim_part discrim_part_opt discrim_spec discrim_spec_s else_opt exit_stmt expression factor fixed_type float_type formal_part formal_part_opt generic_decl generic_derived_type generic_discrim_part_opt generic_formal generic_formal_part generic_inst generic_pkg_inst generic_subp_inst generic_type_def goal_symbol goto_stmt id_opt if_stmt index index_s init_opt integer_type iter_discrete_range_s iter_index_constraint iter_part iteration label label_opt limited_opt literal logical loop_stmt mark mode multiplying name name_opt name_s null_stmt number_decl object_decl
LandY.162
Ada9X Yacc
CSE 4100
%type %type %type %type %type %type %type %type %type %type %type %type %type %type %type %type %type %type %type %type %type %type %type %type %type %type %type
<trans> <trans> <trans> <trans> <trans> <trans> <trans> <trans> <trans> <trans> <trans> <trans> <trans> <trans> <trans> <trans> <trans> <trans> <trans> <trans> <trans> <trans> <trans> <trans> <trans> <trans> <trans>
object_qualifier_opt object_subtype_def param param_s paren_expression pkg_body pkg_decl pkg_spec primary private_opt private_part private_type procedure_call prot_body prot_decl prot_def prot_elem_decl prot_elem_decl_s prot_op_body prot_op_body_s prot_op_decl prot_op_decl_s prot_opt prot_private_opt prot_spec qualified range range_constr_opt range_constraint range_spec range_spec_opt real_type record_def record_type record_type_spec relation relational rep_spec return_stmt reverse_opt short_circuit simple_expression simple_stmt statement statement_s subp_default subprog_body subprog_decl subprog_spec subprog_spec_is_push subunit subunit_body tagged_opt term type_completion type_decl type_def unary unconstr_array_type unit unlabeled use_clause use_clause_opt value value_s value_s_2 variant variant_part variant_s when_opt with_clause my_identifier error epsilon ALIASED CONSTANT IS_ASSIGNED TYPE IS '(' NEW ABSTRACT RANGE MOD DIGITS DELTA NOT ARRAY ACCESS CASE WHEN OTHERS NuLL TAGGED RECORD PROTECTED AND OR my_char_lit '=' NE '<' LT_EQ '>' GE '+' '-' '*' '/' ':' LT_LT IF ELSE CASE WHEN WHILE FOR REVERSE LOOP DECLARE BEGiN EXIT RETURN GOTO PROCEDURE FUNCTION IN OUT PACKAGE PRIVATE LIMITED USE WITH SEPARATE GENERIC FOR AT my_char_string my_numeric_lit
LandY.163
Ada9X Yacc
CSE 4100 %% goal_symbol : compilation ; decl : object_decl | number_decl | type_decl | subprog_decl | pkg_decl | prot_decl | generic_decl | body_stub | error ';' ; object_decl : def_id_s ':' object_qualifier_opt object_subtype_def init_opt ';' ; def_id_s : def_id | def_id_s ',' def_id ; def_id : my_identifier {strcpy($$, $1);} ; object_qualifier_opt : epsilon | ALIASED | CONSTANT | ALIASED CONSTANT ;
LandY.164
Ada9X Yacc
object_subtype_def : name CSE | array_type 4100 ; init_opt : epsilon | IS_ASSIGNED expression ; number_decl : def_id_s ':' CONSTANT IS_ASSIGNED expression ';' ; type_decl : TYPE my_identifier discrim_part_opt type_completion ';' ; discrim_part_opt : epsilon | discrim_part | '(' BOX ')' ; type_completion : epsilon | IS type_def ; type_def : integer_type | real_type | array_type | record_type | access_type | derived_type | private_type ; ETC See Full Yacc on web page
LandY.165
Ada9X Yacc
REMAINING NON GRAMMAR CODE AT END OF YACC FILE CSE 4100 %% mystrcat(s, t) char s[], t[]; { int i, j; i = j = 0; while (s[i] != '\0') i++; s[i] = ' '; i++; while ((s[i++] = t[j++]) != '\0') ; }
LandY.166
Ada9X Yacc
CSE 4100 /* To build this, run it through lex, compile it, and link it with */ /* the result of yacc'ing and cc'ing grammar9x.y, plus "-ly" */ FILE *fp; #include "lex.yy.c" main(argc, argv) int argc; char *argv[]; { /* Simple Ada 9X syntax checker */ /* Checks standard input if no arguments */ /* Checks files if one or more arguments */ extern int error_count; extern int yyparse(); extern int yylineno; FILE *flptr; int i; fp = fopen("output","w");
LandY.167
Ada9X Yacc
if (argc == 1) { CSE yyparse(); 4100 } else { for (i = 1; i < argc; i++) { if ((flptr = freopen(argv[i], "r",stdin)) == NULL) { fprintf(stderr, "%s: Can't open %s", argv[0], argv[i]); } else { if (argc > 2) fprintf(stderr, "%s:\n", argv[i]); yylineno = 1; yyparse(); } } } if (error_count) { fprintf(stderr, "%d syntax error%s detected\n", error_count, error_count == 1? "": "s"); exit(-1); } else { fprintf(stderr, "No syntax errors detected\n"); } } yywrap() {return 1;}
LandY.168