-
Notifications
You must be signed in to change notification settings - Fork 131
Expand file tree
/
Copy pathexpscan.l
More file actions
338 lines (300 loc) · 8.21 KB
/
expscan.l
File metadata and controls
338 lines (300 loc) · 8.21 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
/*
* Lex source for Fed-X lexical analyzer.
*
* This software was developed by U.S. Government employees as part of
* their official duties and is not subject to copyright.
*
* $Log: expscan.l,v $
* Revision 1.12 1997/05/29 20:17:34 sauderd
* Made some changes to Symbol (to be Symbol_) and false and true to be False
* and True. These changes affect the generated expscan.c file so that it will
* compile.
*
* Revision 1.11 1994/11/22 18:32:39 clark
* Part 11 IS; group reference
*
* Revision 1.10 1994/05/12 17:22:23 libes
* added #ifdefs for flex
*
* Revision 1.9 1994/05/12 17:18:10 libes
* made flex understand multiple files
*
* Revision 1.8 1994/05/11 19:50:00 libes
* numerous fixes
*
* Revision 1.7 1993/10/15 18:47:26 libes
* CADDETC certified
*
* Revision 1.5 1993/02/22 21:46:33 libes
* fixed unmatched_open_comment handler
*
* Revision 1.4 1992/08/18 17:11:36 libes
* rm'd extraneous error messages
*
* Revision 1.3 1992/06/08 18:05:20 libes
* prettied up interface to print_objects_when_running
*
* Revision 1.2 1992/05/31 08:30:54 libes
* multiple files
*
* Revision 1.1 1992/05/28 03:52:25 libes
* Initial revision
*
* Revision 1.4 1992/05/05 19:49:03 libes
* final alpha
*
* Revision 1.3 1992/02/12 07:02:49 libes
* do sub/supertypes
*
* Revision 1.2 1992/02/09 00:49:04 libes
* does ref/use correctly
*
* Revision 1.1 1992/02/05 08:40:30 libes
* Initial revision
*
* Revision 1.0.1.1 1992/01/22 02:47:57 libes
* copied from ~pdes
*
* Revision 4.9 1991/06/14 20:49:12 libes
* removed old infinity, added backslash
*
* Revision 4.8.1.1 1991/05/16 04:07:57 libes
* made scanner (under lex) understand hooks for doing include directive
*
* Revision 4.8.1.0 1991/05/16 01:10:15 libes
* branch for fixes to old code
*
* Revision 4.8 1991/05/03 21:09:02 libes
* Added sanity check to make sure lex/flex match -DLEX/FLEX
*
* Revision 4.7 1991/05/02 05:49:18 libes
* fixed bug in testing for exceeding open_comment[nesting_level]
*
* Revision 4.6 1991/04/29 19:44:40 libes
* Print all open comments rather than just one.
*
* Revision 4.5 1991/04/29 15:39:02 libes
* Changed commenting style (back) as per SNC who claims that N9 meant to
* say that tail remarks cannot occur in an open comment, nor can nested
* comments begin in a tail remark.
*
* Revision 4.4 1991/04/29 15:01:46 libes
* Add bounds checking to nesting level history
*
* Revision 4.3 1991/04/26 20:12:50 libes
* Made scanner work with lex
* Simulated exclusive states with inclusive states
* Fixed line counting
* Speeded up whitespace matching
* Convert unknown chars to whitespace
* Disabled default rule matching (enabled "jamming")
* Enabled detection/diagnostics of unterminated comments and strings literals
* Enabled detection/diagnostics of unexpected close comments
* Disabled detection/diagnostics of nested comments
*
* Revision 4.2 1990/12/18 14:00:04 clark
* Cosmetic changes
*
* Revision 4.1 90/09/13 16:29:00 clark
* BPR 2.1 alpha
*
*/
#include <ctype.h>
#if !defined(isascii) && defined(__isascii)
# define isascii __isascii
#endif
#include "express/basic.h"
#include "express/error.h"
#include "express/lexact.h"
#include "express/express.h"
#include "expparse.h"
#include "expscan.h"
enum { INITIAL, code, comment, return_end_schema };
extern int yylineno;
extern bool yyeof;
static int nesting_level = 0;
/* can't imagine this will ever be more than 2 or 3 - DEL */
#define MAX_NESTED_COMMENTS 20
static struct Symbol_ open_comment[MAX_NESTED_COMMENTS];
static_inline
int
SCANnextchar(char* buffer)
{
extern bool SCANread(void);
#ifdef keep_nul
static int escaped = 0;
#endif
if (SCANtext_ready || SCANread()) {
#ifdef keep_nul
if (!*SCANcurrent) {
buffer[0] = SCAN_ESCAPE;
*SCANcurrent = '0';
return 1;
} else if ((*SCANcurrent == SCAN_ESCAPE) && !escaped) {
escaped = 1;
buffer[0] = SCAN_ESCAPE;
return 1;
}
SCANbuffer.numRead--;
#endif
buffer[0] = *(SCANcurrent++);
if (!isascii(buffer[0])) {
ERRORreport_with_line(NONASCII_CHAR,yylineno,
0xff & buffer[0]);
buffer[0] = ' '; /* substitute space */
}
return 1;
} else
return 0;
}
#define NEWLINE (yylineno++)
/* when lex looks ahead over a newline, error messages get thrown off */
/* Fortunately, we know when that occurs, so adjust for it by this hack */
#define LINENO_FUDGE (yylineno - 1)
/* added for re-initializing parser -snc 22-Apr-1992 */
void
SCAN_lex_init(char *filename, FILE *fp) {
/* return to initial scan buffer */
SCAN_current_buffer = 0;
*(SCANcurrent = SCANbuffer.text) = '\0';
SCANbuffer.readEof = false;
SCANbuffer.file = fp;
SCANbuffer.filename = (filename ? filename : "");
current_filename = SCANbuffer.filename;
}
%%
digit = [0-9];
integer = digit+;
letter = [A-Za-z];
id_char = [A-Za-z0-9_];
<> => code
/* Added * at the end of next rule (to make lexer faster) - DEL */
<*>[\t ]* { IGNORE_TOKEN; }
<*>'\n' {
NEWLINE;
IGNORE_TOKEN;
}
<code>"--"[^\n]*'\n' {
NEWLINE;
SCANsave_comment(yytext);
IGNORE_TOKEN;
}
<*>"(*" => comment {
/* nested comment errors will occur with most deeply nested - DEL */
if (nesting_level < MAX_NESTED_COMMENTS) {
open_comment[nesting_level].line = yylineno;
open_comment[nesting_level].filename = current_filename;
}
nesting_level++;
IGNORE_TOKEN;
}
<code> {
/* real literal (like, think it'll fly?!) */
(integer)'.'(integer)?([eE][+-]?(integer))? {
return SCANprocess_real_literal(yytext);
}
/* integer literal */
integer {
return SCANprocess_integer_literal(yytext);
}
/* binary literal */
'%'[01]+ {
return SCANprocess_binary_literal(yytext);
}
/* identifier/keyword */
(letter)(id_char)* {
return SCANprocess_identifier_or_keyword(yytext);
}
/* bad identifier */
[_A-Za-z]id_char* {
ERRORreport_with_line(BAD_IDENTIFIER, yylineno, yytext);
return SCANprocess_identifier_or_keyword(yytext);
}
/* string literal */
"'"([^'\n]|"''")*"'" {
/* ' keep font-lock happy */
return SCANprocess_string(yytext);
}
"'"([^'\n]|"''")*'\n' {
ERRORreport_with_line(UNTERMINATED_STRING, LINENO_FUDGE);
NEWLINE;
return SCANprocess_string(yytext);
}
'"'[^\"\n]*'"' {
return SCANprocess_encoded_string(yytext);
}
'"'[^\"\n]*'\n' {
ERRORreport_with_line(UNTERMINATED_STRING, LINENO_FUDGE);
NEWLINE;
return SCANprocess_encoded_string(yytext);
}
';'[ \t]*"--"[^\n]*'\n' {
NEWLINE;
return SCANprocess_semicolon(yytext, 1);
}
";" { return SCANprocess_semicolon(yytext, 0); }
":=" { return TOK_ASSIGNMENT; }
":" { return TOK_COLON; }
"," { return TOK_COMMA; }
"||" { return TOK_CONCAT_OP; }
"." { return TOK_DOT; }
"=" { return TOK_EQUAL; }
"**" { return TOK_EXP; }
"|" { return TOK_SUCH_THAT; }
"<*" { return TOK_ALL_IN; }
">=" { return TOK_GREATER_EQUAL; }
">" { return TOK_GREATER_THAN; }
"?" { return TOK_QUESTION_MARK; }
":=:" { return TOK_INST_EQUAL; }
":<>:" { return TOK_INST_NOT_EQUAL; }
"[" { return TOK_LEFT_BRACKET; }
"{" { return TOK_LEFT_CURL; }
"(" { return TOK_LEFT_PAREN; }
"<=" { return TOK_LESS_EQUAL; }
"<" { return TOK_LESS_THAN; }
"-" { return TOK_MINUS; }
"<>" { return TOK_NOT_EQUAL; }
"+" { return TOK_PLUS; }
"/" { return TOK_REAL_DIV; }
"]" { return TOK_RIGHT_BRACKET; }
"}" { return TOK_RIGHT_CURL; }
")" { return TOK_RIGHT_PAREN; }
"*" { return TOK_TIMES; }
"\\" { return TOK_BACKSLASH; }
} /* <code> */
<return_end_schema>'X' => code {
return TOK_END_SCHEMA;
}
<comment>"*)" {
if (0 == --nesting_level) {
YYSETCONDITION(code);
}
IGNORE_TOKEN;
}
<code>"*)" {
ERRORreport_with_line(UNMATCHED_CLOSE_COMMENT, yylineno);
IGNORE_TOKEN;
}
<comment> {
'\n' {
NEWLINE;
IGNORE_TOKEN;
}
[^*()\n]* { IGNORE_TOKEN; }
[*()] { IGNORE_TOKEN; }
} /* <comment> */
/* As per N15, 7.1.5.3, all other recognized chars are incorrect - DEL */
<code>[$%&@\^{}~] {
ERRORreport_with_line(UNEXPECTED_CHARACTER,yylineno,yytext[0]);
IGNORE_TOKEN;
}
/* ... and unrecognized characters are treated as whitespace - DEL */
<*>[^] { IGNORE_TOKEN; }
%%
void
SCANskip_to_end_schema(perplex_t scanner)
{
while (yylex(scanner) != TOK_END_SCHEMA);
perplexUnput(scanner, 'X'); /* any old character */
YYSETCONDITION(return_end_schema);
}