diff options
Diffstat (limited to 'scripts/genksyms/lex.l')
-rw-r--r-- | scripts/genksyms/lex.l | 407 |
1 files changed, 407 insertions, 0 deletions
diff --git a/scripts/genksyms/lex.l b/scripts/genksyms/lex.l new file mode 100644 index 000000000000..fe0dfeedf0ff --- /dev/null +++ b/scripts/genksyms/lex.l | |||
@@ -0,0 +1,407 @@ | |||
1 | /* Lexical analysis for genksyms. | ||
2 | Copyright 1996, 1997 Linux International. | ||
3 | |||
4 | New implementation contributed by Richard Henderson <rth@tamu.edu> | ||
5 | Based on original work by Bjorn Ekwall <bj0rn@blox.se> | ||
6 | |||
7 | Taken from Linux modutils 2.4.22. | ||
8 | |||
9 | This program is free software; you can redistribute it and/or modify it | ||
10 | under the terms of the GNU General Public License as published by the | ||
11 | Free Software Foundation; either version 2 of the License, or (at your | ||
12 | option) any later version. | ||
13 | |||
14 | This program is distributed in the hope that it will be useful, but | ||
15 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
17 | General Public License for more details. | ||
18 | |||
19 | You should have received a copy of the GNU General Public License | ||
20 | along with this program; if not, write to the Free Software Foundation, | ||
21 | Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ | ||
22 | |||
23 | |||
24 | %{ | ||
25 | |||
26 | #include <limits.h> | ||
27 | #include <stdlib.h> | ||
28 | #include <string.h> | ||
29 | #include <ctype.h> | ||
30 | |||
31 | #include "genksyms.h" | ||
32 | #include "parse.h" | ||
33 | |||
34 | /* We've got a two-level lexer here. We let flex do basic tokenization | ||
35 | and then we categorize those basic tokens in the second stage. */ | ||
36 | #define YY_DECL static int yylex1(void) | ||
37 | |||
38 | %} | ||
39 | |||
40 | IDENT [A-Za-z_\$][A-Za-z0-9_\$]* | ||
41 | |||
42 | O_INT 0[0-7]* | ||
43 | D_INT [1-9][0-9]* | ||
44 | X_INT 0[Xx][0-9A-Fa-f]+ | ||
45 | I_SUF [Uu]|[Ll]|[Uu][Ll]|[Ll][Uu] | ||
46 | INT ({O_INT}|{D_INT}|{X_INT}){I_SUF}? | ||
47 | |||
48 | FRAC ([0-9]*\.[0-9]+)|([0-9]+\.) | ||
49 | EXP [Ee][+-]?[0-9]+ | ||
50 | F_SUF [FfLl] | ||
51 | REAL ({FRAC}{EXP}?{F_SUF}?)|([0-9]+{EXP}{F_SUF}?) | ||
52 | |||
53 | STRING L?\"([^\\\"]*\\.)*[^\\\"]*\" | ||
54 | CHAR L?\'([^\\\']*\\.)*[^\\\']*\' | ||
55 | |||
56 | MC_TOKEN ([~%^&*+=|<>/-]=)|(&&)|("||")|(->)|(<<)|(>>) | ||
57 | |||
58 | /* Version 2 checksumming does proper tokenization; version 1 wasn't | ||
59 | quite so pedantic. */ | ||
60 | %s V2_TOKENS | ||
61 | |||
62 | /* We don't do multiple input files. */ | ||
63 | %option noyywrap | ||
64 | |||
65 | %% | ||
66 | |||
67 | |||
68 | /* Keep track of our location in the original source files. */ | ||
69 | ^#[ \t]+{INT}[ \t]+\"[^\"\n]+\".*\n return FILENAME; | ||
70 | ^#.*\n cur_line++; | ||
71 | \n cur_line++; | ||
72 | |||
73 | /* Ignore all other whitespace. */ | ||
74 | [ \t\f\v\r]+ ; | ||
75 | |||
76 | |||
77 | {STRING} return STRING; | ||
78 | {CHAR} return CHAR; | ||
79 | {IDENT} return IDENT; | ||
80 | |||
81 | /* The Pedant requires that the other C multi-character tokens be | ||
82 | recognized as tokens. We don't actually use them since we don't | ||
83 | parse expressions, but we do want whitespace to be arranged | ||
84 | around them properly. */ | ||
85 | <V2_TOKENS>{MC_TOKEN} return OTHER; | ||
86 | <V2_TOKENS>{INT} return INT; | ||
87 | <V2_TOKENS>{REAL} return REAL; | ||
88 | |||
89 | "..." return DOTS; | ||
90 | |||
91 | /* All other tokens are single characters. */ | ||
92 | . return yytext[0]; | ||
93 | |||
94 | |||
95 | %% | ||
96 | |||
97 | /* Bring in the keyword recognizer. */ | ||
98 | |||
99 | #include "keywords.c" | ||
100 | |||
101 | |||
102 | /* Macros to append to our phrase collection list. */ | ||
103 | |||
104 | #define _APP(T,L) do { \ | ||
105 | cur_node = next_node; \ | ||
106 | next_node = xmalloc(sizeof(*next_node)); \ | ||
107 | next_node->next = cur_node; \ | ||
108 | cur_node->string = memcpy(xmalloc(L+1), T, L+1); \ | ||
109 | cur_node->tag = SYM_NORMAL; \ | ||
110 | } while (0) | ||
111 | |||
112 | #define APP _APP(yytext, yyleng) | ||
113 | |||
114 | |||
115 | /* The second stage lexer. Here we incorporate knowledge of the state | ||
116 | of the parser to tailor the tokens that are returned. */ | ||
117 | |||
118 | int | ||
119 | yylex(void) | ||
120 | { | ||
121 | static enum { | ||
122 | ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST_ASM, ST_BRACKET, ST_BRACE, | ||
123 | ST_EXPRESSION, ST_TABLE_1, ST_TABLE_2, ST_TABLE_3, ST_TABLE_4, | ||
124 | ST_TABLE_5, ST_TABLE_6 | ||
125 | } lexstate = ST_NOTSTARTED; | ||
126 | |||
127 | static int suppress_type_lookup, dont_want_brace_phrase; | ||
128 | static struct string_list *next_node; | ||
129 | |||
130 | int token, count = 0; | ||
131 | struct string_list *cur_node; | ||
132 | |||
133 | if (lexstate == ST_NOTSTARTED) | ||
134 | { | ||
135 | BEGIN(V2_TOKENS); | ||
136 | next_node = xmalloc(sizeof(*next_node)); | ||
137 | next_node->next = NULL; | ||
138 | lexstate = ST_NORMAL; | ||
139 | } | ||
140 | |||
141 | repeat: | ||
142 | token = yylex1(); | ||
143 | |||
144 | if (token == 0) | ||
145 | return 0; | ||
146 | else if (token == FILENAME) | ||
147 | { | ||
148 | char *file, *e; | ||
149 | |||
150 | /* Save the filename and line number for later error messages. */ | ||
151 | |||
152 | if (cur_filename) | ||
153 | free(cur_filename); | ||
154 | |||
155 | file = strchr(yytext, '\"')+1; | ||
156 | e = strchr(file, '\"'); | ||
157 | *e = '\0'; | ||
158 | cur_filename = memcpy(xmalloc(e-file+1), file, e-file+1); | ||
159 | cur_line = atoi(yytext+2); | ||
160 | |||
161 | goto repeat; | ||
162 | } | ||
163 | |||
164 | switch (lexstate) | ||
165 | { | ||
166 | case ST_NORMAL: | ||
167 | switch (token) | ||
168 | { | ||
169 | case IDENT: | ||
170 | APP; | ||
171 | { | ||
172 | const struct resword *r = is_reserved_word(yytext, yyleng); | ||
173 | if (r) | ||
174 | { | ||
175 | switch (token = r->token) | ||
176 | { | ||
177 | case ATTRIBUTE_KEYW: | ||
178 | lexstate = ST_ATTRIBUTE; | ||
179 | count = 0; | ||
180 | goto repeat; | ||
181 | case ASM_KEYW: | ||
182 | lexstate = ST_ASM; | ||
183 | count = 0; | ||
184 | goto repeat; | ||
185 | |||
186 | case STRUCT_KEYW: | ||
187 | case UNION_KEYW: | ||
188 | dont_want_brace_phrase = 3; | ||
189 | case ENUM_KEYW: | ||
190 | suppress_type_lookup = 2; | ||
191 | goto fini; | ||
192 | |||
193 | case EXPORT_SYMBOL_KEYW: | ||
194 | goto fini; | ||
195 | } | ||
196 | } | ||
197 | if (!suppress_type_lookup) | ||
198 | { | ||
199 | struct symbol *sym = find_symbol(yytext, SYM_TYPEDEF); | ||
200 | if (sym && sym->type == SYM_TYPEDEF) | ||
201 | token = TYPE; | ||
202 | } | ||
203 | } | ||
204 | break; | ||
205 | |||
206 | case '[': | ||
207 | APP; | ||
208 | lexstate = ST_BRACKET; | ||
209 | count = 1; | ||
210 | goto repeat; | ||
211 | |||
212 | case '{': | ||
213 | APP; | ||
214 | if (dont_want_brace_phrase) | ||
215 | break; | ||
216 | lexstate = ST_BRACE; | ||
217 | count = 1; | ||
218 | goto repeat; | ||
219 | |||
220 | case '=': case ':': | ||
221 | APP; | ||
222 | lexstate = ST_EXPRESSION; | ||
223 | break; | ||
224 | |||
225 | case DOTS: | ||
226 | default: | ||
227 | APP; | ||
228 | break; | ||
229 | } | ||
230 | break; | ||
231 | |||
232 | case ST_ATTRIBUTE: | ||
233 | APP; | ||
234 | switch (token) | ||
235 | { | ||
236 | case '(': | ||
237 | ++count; | ||
238 | goto repeat; | ||
239 | case ')': | ||
240 | if (--count == 0) | ||
241 | { | ||
242 | lexstate = ST_NORMAL; | ||
243 | token = ATTRIBUTE_PHRASE; | ||
244 | break; | ||
245 | } | ||
246 | goto repeat; | ||
247 | default: | ||
248 | goto repeat; | ||
249 | } | ||
250 | break; | ||
251 | |||
252 | case ST_ASM: | ||
253 | APP; | ||
254 | switch (token) | ||
255 | { | ||
256 | case '(': | ||
257 | ++count; | ||
258 | goto repeat; | ||
259 | case ')': | ||
260 | if (--count == 0) | ||
261 | { | ||
262 | lexstate = ST_NORMAL; | ||
263 | token = ASM_PHRASE; | ||
264 | break; | ||
265 | } | ||
266 | goto repeat; | ||
267 | default: | ||
268 | goto repeat; | ||
269 | } | ||
270 | break; | ||
271 | |||
272 | case ST_BRACKET: | ||
273 | APP; | ||
274 | switch (token) | ||
275 | { | ||
276 | case '[': | ||
277 | ++count; | ||
278 | goto repeat; | ||
279 | case ']': | ||
280 | if (--count == 0) | ||
281 | { | ||
282 | lexstate = ST_NORMAL; | ||
283 | token = BRACKET_PHRASE; | ||
284 | break; | ||
285 | } | ||
286 | goto repeat; | ||
287 | default: | ||
288 | goto repeat; | ||
289 | } | ||
290 | break; | ||
291 | |||
292 | case ST_BRACE: | ||
293 | APP; | ||
294 | switch (token) | ||
295 | { | ||
296 | case '{': | ||
297 | ++count; | ||
298 | goto repeat; | ||
299 | case '}': | ||
300 | if (--count == 0) | ||
301 | { | ||
302 | lexstate = ST_NORMAL; | ||
303 | token = BRACE_PHRASE; | ||
304 | break; | ||
305 | } | ||
306 | goto repeat; | ||
307 | default: | ||
308 | goto repeat; | ||
309 | } | ||
310 | break; | ||
311 | |||
312 | case ST_EXPRESSION: | ||
313 | switch (token) | ||
314 | { | ||
315 | case '(': case '[': case '{': | ||
316 | ++count; | ||
317 | APP; | ||
318 | goto repeat; | ||
319 | case ')': case ']': case '}': | ||
320 | --count; | ||
321 | APP; | ||
322 | goto repeat; | ||
323 | case ',': case ';': | ||
324 | if (count == 0) | ||
325 | { | ||
326 | /* Put back the token we just read so's we can find it again | ||
327 | after registering the expression. */ | ||
328 | unput(token); | ||
329 | |||
330 | lexstate = ST_NORMAL; | ||
331 | token = EXPRESSION_PHRASE; | ||
332 | break; | ||
333 | } | ||
334 | APP; | ||
335 | goto repeat; | ||
336 | default: | ||
337 | APP; | ||
338 | goto repeat; | ||
339 | } | ||
340 | break; | ||
341 | |||
342 | case ST_TABLE_1: | ||
343 | goto repeat; | ||
344 | |||
345 | case ST_TABLE_2: | ||
346 | if (token == IDENT && yyleng == 1 && yytext[0] == 'X') | ||
347 | { | ||
348 | token = EXPORT_SYMBOL_KEYW; | ||
349 | lexstate = ST_TABLE_5; | ||
350 | APP; | ||
351 | break; | ||
352 | } | ||
353 | lexstate = ST_TABLE_6; | ||
354 | /* FALLTHRU */ | ||
355 | |||
356 | case ST_TABLE_6: | ||
357 | switch (token) | ||
358 | { | ||
359 | case '{': case '[': case '(': | ||
360 | ++count; | ||
361 | break; | ||
362 | case '}': case ']': case ')': | ||
363 | --count; | ||
364 | break; | ||
365 | case ',': | ||
366 | if (count == 0) | ||
367 | lexstate = ST_TABLE_2; | ||
368 | break; | ||
369 | }; | ||
370 | goto repeat; | ||
371 | |||
372 | case ST_TABLE_3: | ||
373 | goto repeat; | ||
374 | |||
375 | case ST_TABLE_4: | ||
376 | if (token == ';') | ||
377 | lexstate = ST_NORMAL; | ||
378 | goto repeat; | ||
379 | |||
380 | case ST_TABLE_5: | ||
381 | switch (token) | ||
382 | { | ||
383 | case ',': | ||
384 | token = ';'; | ||
385 | lexstate = ST_TABLE_2; | ||
386 | APP; | ||
387 | break; | ||
388 | default: | ||
389 | APP; | ||
390 | break; | ||
391 | } | ||
392 | break; | ||
393 | |||
394 | default: | ||
395 | abort(); | ||
396 | } | ||
397 | fini: | ||
398 | |||
399 | if (suppress_type_lookup > 0) | ||
400 | --suppress_type_lookup; | ||
401 | if (dont_want_brace_phrase > 0) | ||
402 | --dont_want_brace_phrase; | ||
403 | |||
404 | yylval = &next_node->next; | ||
405 | |||
406 | return token; | ||
407 | } | ||