diff options
Diffstat (limited to 'scripts')
-rw-r--r-- | scripts/unifdef.c | 998 |
1 files changed, 998 insertions, 0 deletions
diff --git a/scripts/unifdef.c b/scripts/unifdef.c new file mode 100644 index 000000000000..5384b4377333 --- /dev/null +++ b/scripts/unifdef.c | |||
@@ -0,0 +1,998 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2002 - 2005 Tony Finch <dot@dotat.at>. All rights reserved. | ||
3 | * | ||
4 | * This code is derived from software contributed to Berkeley by Dave Yost. | ||
5 | * It was rewritten to support ANSI C by Tony Finch. The original version of | ||
6 | * unifdef carried the following copyright notice. None of its code remains | ||
7 | * in this version (though some of the names remain). | ||
8 | * | ||
9 | * Copyright (c) 1985, 1993 | ||
10 | * The Regents of the University of California. All rights reserved. | ||
11 | * | ||
12 | * Redistribution and use in source and binary forms, with or without | ||
13 | * modification, are permitted provided that the following conditions | ||
14 | * are met: | ||
15 | * 1. Redistributions of source code must retain the above copyright | ||
16 | * notice, this list of conditions and the following disclaimer. | ||
17 | * 2. Redistributions in binary form must reproduce the above copyright | ||
18 | * notice, this list of conditions and the following disclaimer in the | ||
19 | * documentation and/or other materials provided with the distribution. | ||
20 | * | ||
21 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND | ||
22 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
23 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||
24 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE | ||
25 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | ||
26 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | ||
27 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | ||
28 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | ||
29 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | ||
30 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | ||
31 | * SUCH DAMAGE. | ||
32 | */ | ||
33 | |||
34 | #include <sys/cdefs.h> | ||
35 | |||
36 | #ifndef lint | ||
37 | #if 0 | ||
38 | static const char copyright[] = | ||
39 | "@(#) Copyright (c) 1985, 1993\n\ | ||
40 | The Regents of the University of California. All rights reserved.\n"; | ||
41 | #endif | ||
42 | #ifdef __IDSTRING | ||
43 | __IDSTRING(Berkeley, "@(#)unifdef.c 8.1 (Berkeley) 6/6/93"); | ||
44 | __IDSTRING(NetBSD, "$NetBSD: unifdef.c,v 1.8 2000/07/03 02:51:36 matt Exp $"); | ||
45 | __IDSTRING(dotat, "$dotat: things/unifdef.c,v 1.171 2005/03/08 12:38:48 fanf2 Exp $"); | ||
46 | #endif | ||
47 | #endif /* not lint */ | ||
48 | #ifdef __FBSDID | ||
49 | __FBSDID("$FreeBSD: /repoman/r/ncvs/src/usr.bin/unifdef/unifdef.c,v 1.20 2005/05/21 09:55:09 ru Exp $"); | ||
50 | #endif | ||
51 | |||
52 | /* | ||
53 | * unifdef - remove ifdef'ed lines | ||
54 | * | ||
55 | * Wishlist: | ||
56 | * provide an option which will append the name of the | ||
57 | * appropriate symbol after #else's and #endif's | ||
58 | * provide an option which will check symbols after | ||
59 | * #else's and #endif's to see that they match their | ||
60 | * corresponding #ifdef or #ifndef | ||
61 | * | ||
62 | * The first two items above require better buffer handling, which would | ||
63 | * also make it possible to handle all "dodgy" directives correctly. | ||
64 | */ | ||
65 | |||
66 | #include <ctype.h> | ||
67 | #include <err.h> | ||
68 | #include <stdarg.h> | ||
69 | #include <stdbool.h> | ||
70 | #include <stdio.h> | ||
71 | #include <stdlib.h> | ||
72 | #include <string.h> | ||
73 | #include <unistd.h> | ||
74 | |||
75 | size_t strlcpy(char *dst, const char *src, size_t siz); | ||
76 | |||
77 | /* types of input lines: */ | ||
78 | typedef enum { | ||
79 | LT_TRUEI, /* a true #if with ignore flag */ | ||
80 | LT_FALSEI, /* a false #if with ignore flag */ | ||
81 | LT_IF, /* an unknown #if */ | ||
82 | LT_TRUE, /* a true #if */ | ||
83 | LT_FALSE, /* a false #if */ | ||
84 | LT_ELIF, /* an unknown #elif */ | ||
85 | LT_ELTRUE, /* a true #elif */ | ||
86 | LT_ELFALSE, /* a false #elif */ | ||
87 | LT_ELSE, /* #else */ | ||
88 | LT_ENDIF, /* #endif */ | ||
89 | LT_DODGY, /* flag: directive is not on one line */ | ||
90 | LT_DODGY_LAST = LT_DODGY + LT_ENDIF, | ||
91 | LT_PLAIN, /* ordinary line */ | ||
92 | LT_EOF, /* end of file */ | ||
93 | LT_COUNT | ||
94 | } Linetype; | ||
95 | |||
96 | static char const * const linetype_name[] = { | ||
97 | "TRUEI", "FALSEI", "IF", "TRUE", "FALSE", | ||
98 | "ELIF", "ELTRUE", "ELFALSE", "ELSE", "ENDIF", | ||
99 | "DODGY TRUEI", "DODGY FALSEI", | ||
100 | "DODGY IF", "DODGY TRUE", "DODGY FALSE", | ||
101 | "DODGY ELIF", "DODGY ELTRUE", "DODGY ELFALSE", | ||
102 | "DODGY ELSE", "DODGY ENDIF", | ||
103 | "PLAIN", "EOF" | ||
104 | }; | ||
105 | |||
106 | /* state of #if processing */ | ||
107 | typedef enum { | ||
108 | IS_OUTSIDE, | ||
109 | IS_FALSE_PREFIX, /* false #if followed by false #elifs */ | ||
110 | IS_TRUE_PREFIX, /* first non-false #(el)if is true */ | ||
111 | IS_PASS_MIDDLE, /* first non-false #(el)if is unknown */ | ||
112 | IS_FALSE_MIDDLE, /* a false #elif after a pass state */ | ||
113 | IS_TRUE_MIDDLE, /* a true #elif after a pass state */ | ||
114 | IS_PASS_ELSE, /* an else after a pass state */ | ||
115 | IS_FALSE_ELSE, /* an else after a true state */ | ||
116 | IS_TRUE_ELSE, /* an else after only false states */ | ||
117 | IS_FALSE_TRAILER, /* #elifs after a true are false */ | ||
118 | IS_COUNT | ||
119 | } Ifstate; | ||
120 | |||
121 | static char const * const ifstate_name[] = { | ||
122 | "OUTSIDE", "FALSE_PREFIX", "TRUE_PREFIX", | ||
123 | "PASS_MIDDLE", "FALSE_MIDDLE", "TRUE_MIDDLE", | ||
124 | "PASS_ELSE", "FALSE_ELSE", "TRUE_ELSE", | ||
125 | "FALSE_TRAILER" | ||
126 | }; | ||
127 | |||
128 | /* state of comment parser */ | ||
129 | typedef enum { | ||
130 | NO_COMMENT = false, /* outside a comment */ | ||
131 | C_COMMENT, /* in a comment like this one */ | ||
132 | CXX_COMMENT, /* between // and end of line */ | ||
133 | STARTING_COMMENT, /* just after slash-backslash-newline */ | ||
134 | FINISHING_COMMENT, /* star-backslash-newline in a C comment */ | ||
135 | CHAR_LITERAL, /* inside '' */ | ||
136 | STRING_LITERAL /* inside "" */ | ||
137 | } Comment_state; | ||
138 | |||
139 | static char const * const comment_name[] = { | ||
140 | "NO", "C", "CXX", "STARTING", "FINISHING", "CHAR", "STRING" | ||
141 | }; | ||
142 | |||
143 | /* state of preprocessor line parser */ | ||
144 | typedef enum { | ||
145 | LS_START, /* only space and comments on this line */ | ||
146 | LS_HASH, /* only space, comments, and a hash */ | ||
147 | LS_DIRTY /* this line can't be a preprocessor line */ | ||
148 | } Line_state; | ||
149 | |||
150 | static char const * const linestate_name[] = { | ||
151 | "START", "HASH", "DIRTY" | ||
152 | }; | ||
153 | |||
154 | /* | ||
155 | * Minimum translation limits from ISO/IEC 9899:1999 5.2.4.1 | ||
156 | */ | ||
157 | #define MAXDEPTH 64 /* maximum #if nesting */ | ||
158 | #define MAXLINE 4096 /* maximum length of line */ | ||
159 | #define MAXSYMS 4096 /* maximum number of symbols */ | ||
160 | |||
161 | /* | ||
162 | * Sometimes when editing a keyword the replacement text is longer, so | ||
163 | * we leave some space at the end of the tline buffer to accommodate this. | ||
164 | */ | ||
165 | #define EDITSLOP 10 | ||
166 | |||
167 | /* | ||
168 | * Globals. | ||
169 | */ | ||
170 | |||
171 | static bool complement; /* -c: do the complement */ | ||
172 | static bool debugging; /* -d: debugging reports */ | ||
173 | static bool iocccok; /* -e: fewer IOCCC errors */ | ||
174 | static bool killconsts; /* -k: eval constant #ifs */ | ||
175 | static bool lnblank; /* -l: blank deleted lines */ | ||
176 | static bool lnnum; /* -n: add #line directives */ | ||
177 | static bool symlist; /* -s: output symbol list */ | ||
178 | static bool text; /* -t: this is a text file */ | ||
179 | |||
180 | static const char *symname[MAXSYMS]; /* symbol name */ | ||
181 | static const char *value[MAXSYMS]; /* -Dsym=value */ | ||
182 | static bool ignore[MAXSYMS]; /* -iDsym or -iUsym */ | ||
183 | static int nsyms; /* number of symbols */ | ||
184 | |||
185 | static FILE *input; /* input file pointer */ | ||
186 | static const char *filename; /* input file name */ | ||
187 | static int linenum; /* current line number */ | ||
188 | |||
189 | static char tline[MAXLINE+EDITSLOP];/* input buffer plus space */ | ||
190 | static char *keyword; /* used for editing #elif's */ | ||
191 | |||
192 | static Comment_state incomment; /* comment parser state */ | ||
193 | static Line_state linestate; /* #if line parser state */ | ||
194 | static Ifstate ifstate[MAXDEPTH]; /* #if processor state */ | ||
195 | static bool ignoring[MAXDEPTH]; /* ignore comments state */ | ||
196 | static int stifline[MAXDEPTH]; /* start of current #if */ | ||
197 | static int depth; /* current #if nesting */ | ||
198 | static int delcount; /* count of deleted lines */ | ||
199 | static bool keepthis; /* don't delete constant #if */ | ||
200 | |||
201 | static int exitstat; /* program exit status */ | ||
202 | |||
203 | static void addsym(bool, bool, char *); | ||
204 | static void debug(const char *, ...); | ||
205 | static void done(void); | ||
206 | static void error(const char *); | ||
207 | static int findsym(const char *); | ||
208 | static void flushline(bool); | ||
209 | static Linetype getline(void); | ||
210 | static Linetype ifeval(const char **); | ||
211 | static void ignoreoff(void); | ||
212 | static void ignoreon(void); | ||
213 | static void keywordedit(const char *); | ||
214 | static void nest(void); | ||
215 | static void process(void); | ||
216 | static const char *skipcomment(const char *); | ||
217 | static const char *skipsym(const char *); | ||
218 | static void state(Ifstate); | ||
219 | static int strlcmp(const char *, const char *, size_t); | ||
220 | static void unnest(void); | ||
221 | static void usage(void); | ||
222 | |||
223 | #define endsym(c) (!isalpha((unsigned char)c) && !isdigit((unsigned char)c) && c != '_') | ||
224 | |||
225 | /* | ||
226 | * The main program. | ||
227 | */ | ||
228 | int | ||
229 | main(int argc, char *argv[]) | ||
230 | { | ||
231 | int opt; | ||
232 | |||
233 | while ((opt = getopt(argc, argv, "i:D:U:I:cdeklnst")) != -1) | ||
234 | switch (opt) { | ||
235 | case 'i': /* treat stuff controlled by these symbols as text */ | ||
236 | /* | ||
237 | * For strict backwards-compatibility the U or D | ||
238 | * should be immediately after the -i but it doesn't | ||
239 | * matter much if we relax that requirement. | ||
240 | */ | ||
241 | opt = *optarg++; | ||
242 | if (opt == 'D') | ||
243 | addsym(true, true, optarg); | ||
244 | else if (opt == 'U') | ||
245 | addsym(true, false, optarg); | ||
246 | else | ||
247 | usage(); | ||
248 | break; | ||
249 | case 'D': /* define a symbol */ | ||
250 | addsym(false, true, optarg); | ||
251 | break; | ||
252 | case 'U': /* undef a symbol */ | ||
253 | addsym(false, false, optarg); | ||
254 | break; | ||
255 | case 'I': | ||
256 | /* no-op for compatibility with cpp */ | ||
257 | break; | ||
258 | case 'c': /* treat -D as -U and vice versa */ | ||
259 | complement = true; | ||
260 | break; | ||
261 | case 'd': | ||
262 | debugging = true; | ||
263 | break; | ||
264 | case 'e': /* fewer errors from dodgy lines */ | ||
265 | iocccok = true; | ||
266 | break; | ||
267 | case 'k': /* process constant #ifs */ | ||
268 | killconsts = true; | ||
269 | break; | ||
270 | case 'l': /* blank deleted lines instead of omitting them */ | ||
271 | lnblank = true; | ||
272 | break; | ||
273 | case 'n': /* add #line directive after deleted lines */ | ||
274 | lnnum = true; | ||
275 | break; | ||
276 | case 's': /* only output list of symbols that control #ifs */ | ||
277 | symlist = true; | ||
278 | break; | ||
279 | case 't': /* don't parse C comments */ | ||
280 | text = true; | ||
281 | break; | ||
282 | default: | ||
283 | usage(); | ||
284 | } | ||
285 | argc -= optind; | ||
286 | argv += optind; | ||
287 | if (argc > 1) { | ||
288 | errx(2, "can only do one file"); | ||
289 | } else if (argc == 1 && strcmp(*argv, "-") != 0) { | ||
290 | filename = *argv; | ||
291 | input = fopen(filename, "r"); | ||
292 | if (input == NULL) | ||
293 | err(2, "can't open %s", filename); | ||
294 | } else { | ||
295 | filename = "[stdin]"; | ||
296 | input = stdin; | ||
297 | } | ||
298 | process(); | ||
299 | abort(); /* bug */ | ||
300 | } | ||
301 | |||
302 | static void | ||
303 | usage(void) | ||
304 | { | ||
305 | fprintf(stderr, "usage: unifdef [-cdeklnst] [-Ipath]" | ||
306 | " [-Dsym[=val]] [-Usym] [-iDsym[=val]] [-iUsym] ... [file]\n"); | ||
307 | exit(2); | ||
308 | } | ||
309 | |||
310 | /* | ||
311 | * A state transition function alters the global #if processing state | ||
312 | * in a particular way. The table below is indexed by the current | ||
313 | * processing state and the type of the current line. | ||
314 | * | ||
315 | * Nesting is handled by keeping a stack of states; some transition | ||
316 | * functions increase or decrease the depth. They also maintain the | ||
317 | * ignore state on a stack. In some complicated cases they have to | ||
318 | * alter the preprocessor directive, as follows. | ||
319 | * | ||
320 | * When we have processed a group that starts off with a known-false | ||
321 | * #if/#elif sequence (which has therefore been deleted) followed by a | ||
322 | * #elif that we don't understand and therefore must keep, we edit the | ||
323 | * latter into a #if to keep the nesting correct. | ||
324 | * | ||
325 | * When we find a true #elif in a group, the following block will | ||
326 | * always be kept and the rest of the sequence after the next #elif or | ||
327 | * #else will be discarded. We edit the #elif into a #else and the | ||
328 | * following directive to #endif since this has the desired behaviour. | ||
329 | * | ||
330 | * "Dodgy" directives are split across multiple lines, the most common | ||
331 | * example being a multi-line comment hanging off the right of the | ||
332 | * directive. We can handle them correctly only if there is no change | ||
333 | * from printing to dropping (or vice versa) caused by that directive. | ||
334 | * If the directive is the first of a group we have a choice between | ||
335 | * failing with an error, or passing it through unchanged instead of | ||
336 | * evaluating it. The latter is not the default to avoid questions from | ||
337 | * users about unifdef unexpectedly leaving behind preprocessor directives. | ||
338 | */ | ||
339 | typedef void state_fn(void); | ||
340 | |||
341 | /* report an error */ | ||
342 | static void Eelif (void) { error("Inappropriate #elif"); } | ||
343 | static void Eelse (void) { error("Inappropriate #else"); } | ||
344 | static void Eendif(void) { error("Inappropriate #endif"); } | ||
345 | static void Eeof (void) { error("Premature EOF"); } | ||
346 | static void Eioccc(void) { error("Obfuscated preprocessor control line"); } | ||
347 | /* plain line handling */ | ||
348 | static void print (void) { flushline(true); } | ||
349 | static void drop (void) { flushline(false); } | ||
350 | /* output lacks group's start line */ | ||
351 | static void Strue (void) { drop(); ignoreoff(); state(IS_TRUE_PREFIX); } | ||
352 | static void Sfalse(void) { drop(); ignoreoff(); state(IS_FALSE_PREFIX); } | ||
353 | static void Selse (void) { drop(); state(IS_TRUE_ELSE); } | ||
354 | /* print/pass this block */ | ||
355 | static void Pelif (void) { print(); ignoreoff(); state(IS_PASS_MIDDLE); } | ||
356 | static void Pelse (void) { print(); state(IS_PASS_ELSE); } | ||
357 | static void Pendif(void) { print(); unnest(); } | ||
358 | /* discard this block */ | ||
359 | static void Dfalse(void) { drop(); ignoreoff(); state(IS_FALSE_TRAILER); } | ||
360 | static void Delif (void) { drop(); ignoreoff(); state(IS_FALSE_MIDDLE); } | ||
361 | static void Delse (void) { drop(); state(IS_FALSE_ELSE); } | ||
362 | static void Dendif(void) { drop(); unnest(); } | ||
363 | /* first line of group */ | ||
364 | static void Fdrop (void) { nest(); Dfalse(); } | ||
365 | static void Fpass (void) { nest(); Pelif(); } | ||
366 | static void Ftrue (void) { nest(); Strue(); } | ||
367 | static void Ffalse(void) { nest(); Sfalse(); } | ||
368 | /* variable pedantry for obfuscated lines */ | ||
369 | static void Oiffy (void) { if (!iocccok) Eioccc(); Fpass(); ignoreon(); } | ||
370 | static void Oif (void) { if (!iocccok) Eioccc(); Fpass(); } | ||
371 | static void Oelif (void) { if (!iocccok) Eioccc(); Pelif(); } | ||
372 | /* ignore comments in this block */ | ||
373 | static void Idrop (void) { Fdrop(); ignoreon(); } | ||
374 | static void Itrue (void) { Ftrue(); ignoreon(); } | ||
375 | static void Ifalse(void) { Ffalse(); ignoreon(); } | ||
376 | /* edit this line */ | ||
377 | static void Mpass (void) { strncpy(keyword, "if ", 4); Pelif(); } | ||
378 | static void Mtrue (void) { keywordedit("else\n"); state(IS_TRUE_MIDDLE); } | ||
379 | static void Melif (void) { keywordedit("endif\n"); state(IS_FALSE_TRAILER); } | ||
380 | static void Melse (void) { keywordedit("endif\n"); state(IS_FALSE_ELSE); } | ||
381 | |||
382 | static state_fn * const trans_table[IS_COUNT][LT_COUNT] = { | ||
383 | /* IS_OUTSIDE */ | ||
384 | { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Eendif, | ||
385 | Oiffy, Oiffy, Fpass, Oif, Oif, Eelif, Eelif, Eelif, Eelse, Eendif, | ||
386 | print, done }, | ||
387 | /* IS_FALSE_PREFIX */ | ||
388 | { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Strue, Sfalse,Selse, Dendif, | ||
389 | Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Eioccc,Eioccc,Eioccc,Eioccc, | ||
390 | drop, Eeof }, | ||
391 | /* IS_TRUE_PREFIX */ | ||
392 | { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Dfalse,Dfalse,Dfalse,Delse, Dendif, | ||
393 | Oiffy, Oiffy, Fpass, Oif, Oif, Eioccc,Eioccc,Eioccc,Eioccc,Eioccc, | ||
394 | print, Eeof }, | ||
395 | /* IS_PASS_MIDDLE */ | ||
396 | { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Pelif, Mtrue, Delif, Pelse, Pendif, | ||
397 | Oiffy, Oiffy, Fpass, Oif, Oif, Pelif, Oelif, Oelif, Pelse, Pendif, | ||
398 | print, Eeof }, | ||
399 | /* IS_FALSE_MIDDLE */ | ||
400 | { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Pelif, Mtrue, Delif, Pelse, Pendif, | ||
401 | Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eioccc,Eioccc,Eioccc,Eioccc,Eioccc, | ||
402 | drop, Eeof }, | ||
403 | /* IS_TRUE_MIDDLE */ | ||
404 | { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Melif, Melif, Melif, Melse, Pendif, | ||
405 | Oiffy, Oiffy, Fpass, Oif, Oif, Eioccc,Eioccc,Eioccc,Eioccc,Pendif, | ||
406 | print, Eeof }, | ||
407 | /* IS_PASS_ELSE */ | ||
408 | { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Pendif, | ||
409 | Oiffy, Oiffy, Fpass, Oif, Oif, Eelif, Eelif, Eelif, Eelse, Pendif, | ||
410 | print, Eeof }, | ||
411 | /* IS_FALSE_ELSE */ | ||
412 | { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Dendif, | ||
413 | Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Eioccc, | ||
414 | drop, Eeof }, | ||
415 | /* IS_TRUE_ELSE */ | ||
416 | { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Dendif, | ||
417 | Oiffy, Oiffy, Fpass, Oif, Oif, Eelif, Eelif, Eelif, Eelse, Eioccc, | ||
418 | print, Eeof }, | ||
419 | /* IS_FALSE_TRAILER */ | ||
420 | { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Dendif, | ||
421 | Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Eioccc, | ||
422 | drop, Eeof } | ||
423 | /*TRUEI FALSEI IF TRUE FALSE ELIF ELTRUE ELFALSE ELSE ENDIF | ||
424 | TRUEI FALSEI IF TRUE FALSE ELIF ELTRUE ELFALSE ELSE ENDIF (DODGY) | ||
425 | PLAIN EOF */ | ||
426 | }; | ||
427 | |||
428 | /* | ||
429 | * State machine utility functions | ||
430 | */ | ||
431 | static void | ||
432 | done(void) | ||
433 | { | ||
434 | if (incomment) | ||
435 | error("EOF in comment"); | ||
436 | exit(exitstat); | ||
437 | } | ||
438 | static void | ||
439 | ignoreoff(void) | ||
440 | { | ||
441 | if (depth == 0) | ||
442 | abort(); /* bug */ | ||
443 | ignoring[depth] = ignoring[depth-1]; | ||
444 | } | ||
445 | static void | ||
446 | ignoreon(void) | ||
447 | { | ||
448 | ignoring[depth] = true; | ||
449 | } | ||
450 | static void | ||
451 | keywordedit(const char *replacement) | ||
452 | { | ||
453 | strlcpy(keyword, replacement, tline + sizeof(tline) - keyword); | ||
454 | print(); | ||
455 | } | ||
456 | static void | ||
457 | nest(void) | ||
458 | { | ||
459 | depth += 1; | ||
460 | if (depth >= MAXDEPTH) | ||
461 | error("Too many levels of nesting"); | ||
462 | stifline[depth] = linenum; | ||
463 | } | ||
464 | static void | ||
465 | unnest(void) | ||
466 | { | ||
467 | if (depth == 0) | ||
468 | abort(); /* bug */ | ||
469 | depth -= 1; | ||
470 | } | ||
471 | static void | ||
472 | state(Ifstate is) | ||
473 | { | ||
474 | ifstate[depth] = is; | ||
475 | } | ||
476 | |||
477 | /* | ||
478 | * Write a line to the output or not, according to command line options. | ||
479 | */ | ||
480 | static void | ||
481 | flushline(bool keep) | ||
482 | { | ||
483 | if (symlist) | ||
484 | return; | ||
485 | if (keep ^ complement) { | ||
486 | if (lnnum && delcount > 0) | ||
487 | printf("#line %d\n", linenum); | ||
488 | fputs(tline, stdout); | ||
489 | delcount = 0; | ||
490 | } else { | ||
491 | if (lnblank) | ||
492 | putc('\n', stdout); | ||
493 | exitstat = 1; | ||
494 | delcount += 1; | ||
495 | } | ||
496 | } | ||
497 | |||
498 | /* | ||
499 | * The driver for the state machine. | ||
500 | */ | ||
501 | static void | ||
502 | process(void) | ||
503 | { | ||
504 | Linetype lineval; | ||
505 | |||
506 | for (;;) { | ||
507 | linenum++; | ||
508 | lineval = getline(); | ||
509 | trans_table[ifstate[depth]][lineval](); | ||
510 | debug("process %s -> %s depth %d", | ||
511 | linetype_name[lineval], | ||
512 | ifstate_name[ifstate[depth]], depth); | ||
513 | } | ||
514 | } | ||
515 | |||
516 | /* | ||
517 | * Parse a line and determine its type. We keep the preprocessor line | ||
518 | * parser state between calls in the global variable linestate, with | ||
519 | * help from skipcomment(). | ||
520 | */ | ||
521 | static Linetype | ||
522 | getline(void) | ||
523 | { | ||
524 | const char *cp; | ||
525 | int cursym; | ||
526 | int kwlen; | ||
527 | Linetype retval; | ||
528 | Comment_state wascomment; | ||
529 | |||
530 | if (fgets(tline, MAXLINE, input) == NULL) | ||
531 | return (LT_EOF); | ||
532 | retval = LT_PLAIN; | ||
533 | wascomment = incomment; | ||
534 | cp = skipcomment(tline); | ||
535 | if (linestate == LS_START) { | ||
536 | if (*cp == '#') { | ||
537 | linestate = LS_HASH; | ||
538 | cp = skipcomment(cp + 1); | ||
539 | } else if (*cp != '\0') | ||
540 | linestate = LS_DIRTY; | ||
541 | } | ||
542 | if (!incomment && linestate == LS_HASH) { | ||
543 | keyword = tline + (cp - tline); | ||
544 | cp = skipsym(cp); | ||
545 | kwlen = cp - keyword; | ||
546 | /* no way can we deal with a continuation inside a keyword */ | ||
547 | if (strncmp(cp, "\\\n", 2) == 0) | ||
548 | Eioccc(); | ||
549 | if (strlcmp("ifdef", keyword, kwlen) == 0 || | ||
550 | strlcmp("ifndef", keyword, kwlen) == 0) { | ||
551 | cp = skipcomment(cp); | ||
552 | if ((cursym = findsym(cp)) < 0) | ||
553 | retval = LT_IF; | ||
554 | else { | ||
555 | retval = (keyword[2] == 'n') | ||
556 | ? LT_FALSE : LT_TRUE; | ||
557 | if (value[cursym] == NULL) | ||
558 | retval = (retval == LT_TRUE) | ||
559 | ? LT_FALSE : LT_TRUE; | ||
560 | if (ignore[cursym]) | ||
561 | retval = (retval == LT_TRUE) | ||
562 | ? LT_TRUEI : LT_FALSEI; | ||
563 | } | ||
564 | cp = skipsym(cp); | ||
565 | } else if (strlcmp("if", keyword, kwlen) == 0) | ||
566 | retval = ifeval(&cp); | ||
567 | else if (strlcmp("elif", keyword, kwlen) == 0) | ||
568 | retval = ifeval(&cp) - LT_IF + LT_ELIF; | ||
569 | else if (strlcmp("else", keyword, kwlen) == 0) | ||
570 | retval = LT_ELSE; | ||
571 | else if (strlcmp("endif", keyword, kwlen) == 0) | ||
572 | retval = LT_ENDIF; | ||
573 | else { | ||
574 | linestate = LS_DIRTY; | ||
575 | retval = LT_PLAIN; | ||
576 | } | ||
577 | cp = skipcomment(cp); | ||
578 | if (*cp != '\0') { | ||
579 | linestate = LS_DIRTY; | ||
580 | if (retval == LT_TRUE || retval == LT_FALSE || | ||
581 | retval == LT_TRUEI || retval == LT_FALSEI) | ||
582 | retval = LT_IF; | ||
583 | if (retval == LT_ELTRUE || retval == LT_ELFALSE) | ||
584 | retval = LT_ELIF; | ||
585 | } | ||
586 | if (retval != LT_PLAIN && (wascomment || incomment)) { | ||
587 | retval += LT_DODGY; | ||
588 | if (incomment) | ||
589 | linestate = LS_DIRTY; | ||
590 | } | ||
591 | /* skipcomment should have changed the state */ | ||
592 | if (linestate == LS_HASH) | ||
593 | abort(); /* bug */ | ||
594 | } | ||
595 | if (linestate == LS_DIRTY) { | ||
596 | while (*cp != '\0') | ||
597 | cp = skipcomment(cp + 1); | ||
598 | } | ||
599 | debug("parser %s comment %s line", | ||
600 | comment_name[incomment], linestate_name[linestate]); | ||
601 | return (retval); | ||
602 | } | ||
603 | |||
604 | /* | ||
605 | * These are the binary operators that are supported by the expression | ||
606 | * evaluator. Note that if support for division is added then we also | ||
607 | * need short-circuiting booleans because of divide-by-zero. | ||
608 | */ | ||
609 | static int op_lt(int a, int b) { return (a < b); } | ||
610 | static int op_gt(int a, int b) { return (a > b); } | ||
611 | static int op_le(int a, int b) { return (a <= b); } | ||
612 | static int op_ge(int a, int b) { return (a >= b); } | ||
613 | static int op_eq(int a, int b) { return (a == b); } | ||
614 | static int op_ne(int a, int b) { return (a != b); } | ||
615 | static int op_or(int a, int b) { return (a || b); } | ||
616 | static int op_and(int a, int b) { return (a && b); } | ||
617 | |||
618 | /* | ||
619 | * An evaluation function takes three arguments, as follows: (1) a pointer to | ||
620 | * an element of the precedence table which lists the operators at the current | ||
621 | * level of precedence; (2) a pointer to an integer which will receive the | ||
622 | * value of the expression; and (3) a pointer to a char* that points to the | ||
623 | * expression to be evaluated and that is updated to the end of the expression | ||
624 | * when evaluation is complete. The function returns LT_FALSE if the value of | ||
625 | * the expression is zero, LT_TRUE if it is non-zero, or LT_IF if the | ||
626 | * expression could not be evaluated. | ||
627 | */ | ||
628 | struct ops; | ||
629 | |||
630 | typedef Linetype eval_fn(const struct ops *, int *, const char **); | ||
631 | |||
632 | static eval_fn eval_table, eval_unary; | ||
633 | |||
634 | /* | ||
635 | * The precedence table. Expressions involving binary operators are evaluated | ||
636 | * in a table-driven way by eval_table. When it evaluates a subexpression it | ||
637 | * calls the inner function with its first argument pointing to the next | ||
638 | * element of the table. Innermost expressions have special non-table-driven | ||
639 | * handling. | ||
640 | */ | ||
641 | static const struct ops { | ||
642 | eval_fn *inner; | ||
643 | struct op { | ||
644 | const char *str; | ||
645 | int (*fn)(int, int); | ||
646 | } op[5]; | ||
647 | } eval_ops[] = { | ||
648 | { eval_table, { { "||", op_or } } }, | ||
649 | { eval_table, { { "&&", op_and } } }, | ||
650 | { eval_table, { { "==", op_eq }, | ||
651 | { "!=", op_ne } } }, | ||
652 | { eval_unary, { { "<=", op_le }, | ||
653 | { ">=", op_ge }, | ||
654 | { "<", op_lt }, | ||
655 | { ">", op_gt } } } | ||
656 | }; | ||
657 | |||
658 | /* | ||
659 | * Function for evaluating the innermost parts of expressions, | ||
660 | * viz. !expr (expr) defined(symbol) symbol number | ||
661 | * We reset the keepthis flag when we find a non-constant subexpression. | ||
662 | */ | ||
663 | static Linetype | ||
664 | eval_unary(const struct ops *ops, int *valp, const char **cpp) | ||
665 | { | ||
666 | const char *cp; | ||
667 | char *ep; | ||
668 | int sym; | ||
669 | |||
670 | cp = skipcomment(*cpp); | ||
671 | if (*cp == '!') { | ||
672 | debug("eval%d !", ops - eval_ops); | ||
673 | cp++; | ||
674 | if (eval_unary(ops, valp, &cp) == LT_IF) | ||
675 | return (LT_IF); | ||
676 | *valp = !*valp; | ||
677 | } else if (*cp == '(') { | ||
678 | cp++; | ||
679 | debug("eval%d (", ops - eval_ops); | ||
680 | if (eval_table(eval_ops, valp, &cp) == LT_IF) | ||
681 | return (LT_IF); | ||
682 | cp = skipcomment(cp); | ||
683 | if (*cp++ != ')') | ||
684 | return (LT_IF); | ||
685 | } else if (isdigit((unsigned char)*cp)) { | ||
686 | debug("eval%d number", ops - eval_ops); | ||
687 | *valp = strtol(cp, &ep, 0); | ||
688 | cp = skipsym(cp); | ||
689 | } else if (strncmp(cp, "defined", 7) == 0 && endsym(cp[7])) { | ||
690 | cp = skipcomment(cp+7); | ||
691 | debug("eval%d defined", ops - eval_ops); | ||
692 | if (*cp++ != '(') | ||
693 | return (LT_IF); | ||
694 | cp = skipcomment(cp); | ||
695 | sym = findsym(cp); | ||
696 | if (sym < 0) | ||
697 | return (LT_IF); | ||
698 | *valp = (value[sym] != NULL); | ||
699 | cp = skipsym(cp); | ||
700 | cp = skipcomment(cp); | ||
701 | if (*cp++ != ')') | ||
702 | return (LT_IF); | ||
703 | keepthis = false; | ||
704 | } else if (!endsym(*cp)) { | ||
705 | debug("eval%d symbol", ops - eval_ops); | ||
706 | sym = findsym(cp); | ||
707 | if (sym < 0) | ||
708 | return (LT_IF); | ||
709 | if (value[sym] == NULL) | ||
710 | *valp = 0; | ||
711 | else { | ||
712 | *valp = strtol(value[sym], &ep, 0); | ||
713 | if (*ep != '\0' || ep == value[sym]) | ||
714 | return (LT_IF); | ||
715 | } | ||
716 | cp = skipsym(cp); | ||
717 | keepthis = false; | ||
718 | } else { | ||
719 | debug("eval%d bad expr", ops - eval_ops); | ||
720 | return (LT_IF); | ||
721 | } | ||
722 | |||
723 | *cpp = cp; | ||
724 | debug("eval%d = %d", ops - eval_ops, *valp); | ||
725 | return (*valp ? LT_TRUE : LT_FALSE); | ||
726 | } | ||
727 | |||
728 | /* | ||
729 | * Table-driven evaluation of binary operators. | ||
730 | */ | ||
731 | static Linetype | ||
732 | eval_table(const struct ops *ops, int *valp, const char **cpp) | ||
733 | { | ||
734 | const struct op *op; | ||
735 | const char *cp; | ||
736 | int val; | ||
737 | |||
738 | debug("eval%d", ops - eval_ops); | ||
739 | cp = *cpp; | ||
740 | if (ops->inner(ops+1, valp, &cp) == LT_IF) | ||
741 | return (LT_IF); | ||
742 | for (;;) { | ||
743 | cp = skipcomment(cp); | ||
744 | for (op = ops->op; op->str != NULL; op++) | ||
745 | if (strncmp(cp, op->str, strlen(op->str)) == 0) | ||
746 | break; | ||
747 | if (op->str == NULL) | ||
748 | break; | ||
749 | cp += strlen(op->str); | ||
750 | debug("eval%d %s", ops - eval_ops, op->str); | ||
751 | if (ops->inner(ops+1, &val, &cp) == LT_IF) | ||
752 | return (LT_IF); | ||
753 | *valp = op->fn(*valp, val); | ||
754 | } | ||
755 | |||
756 | *cpp = cp; | ||
757 | debug("eval%d = %d", ops - eval_ops, *valp); | ||
758 | return (*valp ? LT_TRUE : LT_FALSE); | ||
759 | } | ||
760 | |||
761 | /* | ||
762 | * Evaluate the expression on a #if or #elif line. If we can work out | ||
763 | * the result we return LT_TRUE or LT_FALSE accordingly, otherwise we | ||
764 | * return just a generic LT_IF. | ||
765 | */ | ||
766 | static Linetype | ||
767 | ifeval(const char **cpp) | ||
768 | { | ||
769 | int ret; | ||
770 | int val; | ||
771 | |||
772 | debug("eval %s", *cpp); | ||
773 | keepthis = killconsts ? false : true; | ||
774 | ret = eval_table(eval_ops, &val, cpp); | ||
775 | debug("eval = %d", val); | ||
776 | return (keepthis ? LT_IF : ret); | ||
777 | } | ||
778 | |||
779 | /* | ||
780 | * Skip over comments, strings, and character literals and stop at the | ||
781 | * next character position that is not whitespace. Between calls we keep | ||
782 | * the comment state in the global variable incomment, and we also adjust | ||
783 | * the global variable linestate when we see a newline. | ||
784 | * XXX: doesn't cope with the buffer splitting inside a state transition. | ||
785 | */ | ||
786 | static const char * | ||
787 | skipcomment(const char *cp) | ||
788 | { | ||
789 | if (text || ignoring[depth]) { | ||
790 | for (; isspace((unsigned char)*cp); cp++) | ||
791 | if (*cp == '\n') | ||
792 | linestate = LS_START; | ||
793 | return (cp); | ||
794 | } | ||
795 | while (*cp != '\0') | ||
796 | /* don't reset to LS_START after a line continuation */ | ||
797 | if (strncmp(cp, "\\\n", 2) == 0) | ||
798 | cp += 2; | ||
799 | else switch (incomment) { | ||
800 | case NO_COMMENT: | ||
801 | if (strncmp(cp, "/\\\n", 3) == 0) { | ||
802 | incomment = STARTING_COMMENT; | ||
803 | cp += 3; | ||
804 | } else if (strncmp(cp, "/*", 2) == 0) { | ||
805 | incomment = C_COMMENT; | ||
806 | cp += 2; | ||
807 | } else if (strncmp(cp, "//", 2) == 0) { | ||
808 | incomment = CXX_COMMENT; | ||
809 | cp += 2; | ||
810 | } else if (strncmp(cp, "\'", 1) == 0) { | ||
811 | incomment = CHAR_LITERAL; | ||
812 | linestate = LS_DIRTY; | ||
813 | cp += 1; | ||
814 | } else if (strncmp(cp, "\"", 1) == 0) { | ||
815 | incomment = STRING_LITERAL; | ||
816 | linestate = LS_DIRTY; | ||
817 | cp += 1; | ||
818 | } else if (strncmp(cp, "\n", 1) == 0) { | ||
819 | linestate = LS_START; | ||
820 | cp += 1; | ||
821 | } else if (strchr(" \t", *cp) != NULL) { | ||
822 | cp += 1; | ||
823 | } else | ||
824 | return (cp); | ||
825 | continue; | ||
826 | case CXX_COMMENT: | ||
827 | if (strncmp(cp, "\n", 1) == 0) { | ||
828 | incomment = NO_COMMENT; | ||
829 | linestate = LS_START; | ||
830 | } | ||
831 | cp += 1; | ||
832 | continue; | ||
833 | case CHAR_LITERAL: | ||
834 | case STRING_LITERAL: | ||
835 | if ((incomment == CHAR_LITERAL && cp[0] == '\'') || | ||
836 | (incomment == STRING_LITERAL && cp[0] == '\"')) { | ||
837 | incomment = NO_COMMENT; | ||
838 | cp += 1; | ||
839 | } else if (cp[0] == '\\') { | ||
840 | if (cp[1] == '\0') | ||
841 | cp += 1; | ||
842 | else | ||
843 | cp += 2; | ||
844 | } else if (strncmp(cp, "\n", 1) == 0) { | ||
845 | if (incomment == CHAR_LITERAL) | ||
846 | error("unterminated char literal"); | ||
847 | else | ||
848 | error("unterminated string literal"); | ||
849 | } else | ||
850 | cp += 1; | ||
851 | continue; | ||
852 | case C_COMMENT: | ||
853 | if (strncmp(cp, "*\\\n", 3) == 0) { | ||
854 | incomment = FINISHING_COMMENT; | ||
855 | cp += 3; | ||
856 | } else if (strncmp(cp, "*/", 2) == 0) { | ||
857 | incomment = NO_COMMENT; | ||
858 | cp += 2; | ||
859 | } else | ||
860 | cp += 1; | ||
861 | continue; | ||
862 | case STARTING_COMMENT: | ||
863 | if (*cp == '*') { | ||
864 | incomment = C_COMMENT; | ||
865 | cp += 1; | ||
866 | } else if (*cp == '/') { | ||
867 | incomment = CXX_COMMENT; | ||
868 | cp += 1; | ||
869 | } else { | ||
870 | incomment = NO_COMMENT; | ||
871 | linestate = LS_DIRTY; | ||
872 | } | ||
873 | continue; | ||
874 | case FINISHING_COMMENT: | ||
875 | if (*cp == '/') { | ||
876 | incomment = NO_COMMENT; | ||
877 | cp += 1; | ||
878 | } else | ||
879 | incomment = C_COMMENT; | ||
880 | continue; | ||
881 | default: | ||
882 | abort(); /* bug */ | ||
883 | } | ||
884 | return (cp); | ||
885 | } | ||
886 | |||
887 | /* | ||
888 | * Skip over an identifier. | ||
889 | */ | ||
890 | static const char * | ||
891 | skipsym(const char *cp) | ||
892 | { | ||
893 | while (!endsym(*cp)) | ||
894 | ++cp; | ||
895 | return (cp); | ||
896 | } | ||
897 | |||
898 | /* | ||
899 | * Look for the symbol in the symbol table. If is is found, we return | ||
900 | * the symbol table index, else we return -1. | ||
901 | */ | ||
902 | static int | ||
903 | findsym(const char *str) | ||
904 | { | ||
905 | const char *cp; | ||
906 | int symind; | ||
907 | |||
908 | cp = skipsym(str); | ||
909 | if (cp == str) | ||
910 | return (-1); | ||
911 | if (symlist) { | ||
912 | printf("%.*s\n", (int)(cp-str), str); | ||
913 | /* we don't care about the value of the symbol */ | ||
914 | return (0); | ||
915 | } | ||
916 | for (symind = 0; symind < nsyms; ++symind) { | ||
917 | if (strlcmp(symname[symind], str, cp-str) == 0) { | ||
918 | debug("findsym %s %s", symname[symind], | ||
919 | value[symind] ? value[symind] : ""); | ||
920 | return (symind); | ||
921 | } | ||
922 | } | ||
923 | return (-1); | ||
924 | } | ||
925 | |||
926 | /* | ||
927 | * Add a symbol to the symbol table. | ||
928 | */ | ||
929 | static void | ||
930 | addsym(bool ignorethis, bool definethis, char *sym) | ||
931 | { | ||
932 | int symind; | ||
933 | char *val; | ||
934 | |||
935 | symind = findsym(sym); | ||
936 | if (symind < 0) { | ||
937 | if (nsyms >= MAXSYMS) | ||
938 | errx(2, "too many symbols"); | ||
939 | symind = nsyms++; | ||
940 | } | ||
941 | symname[symind] = sym; | ||
942 | ignore[symind] = ignorethis; | ||
943 | val = sym + (skipsym(sym) - sym); | ||
944 | if (definethis) { | ||
945 | if (*val == '=') { | ||
946 | value[symind] = val+1; | ||
947 | *val = '\0'; | ||
948 | } else if (*val == '\0') | ||
949 | value[symind] = ""; | ||
950 | else | ||
951 | usage(); | ||
952 | } else { | ||
953 | if (*val != '\0') | ||
954 | usage(); | ||
955 | value[symind] = NULL; | ||
956 | } | ||
957 | } | ||
958 | |||
959 | /* | ||
960 | * Compare s with n characters of t. | ||
961 | * The same as strncmp() except that it checks that s[n] == '\0'. | ||
962 | */ | ||
963 | static int | ||
964 | strlcmp(const char *s, const char *t, size_t n) | ||
965 | { | ||
966 | while (n-- && *t != '\0') | ||
967 | if (*s != *t) | ||
968 | return ((unsigned char)*s - (unsigned char)*t); | ||
969 | else | ||
970 | ++s, ++t; | ||
971 | return ((unsigned char)*s); | ||
972 | } | ||
973 | |||
974 | /* | ||
975 | * Diagnostics. | ||
976 | */ | ||
977 | static void | ||
978 | debug(const char *msg, ...) | ||
979 | { | ||
980 | va_list ap; | ||
981 | |||
982 | if (debugging) { | ||
983 | va_start(ap, msg); | ||
984 | vwarnx(msg, ap); | ||
985 | va_end(ap); | ||
986 | } | ||
987 | } | ||
988 | |||
989 | static void | ||
990 | error(const char *msg) | ||
991 | { | ||
992 | if (depth == 0) | ||
993 | warnx("%s: %d: %s", filename, linenum, msg); | ||
994 | else | ||
995 | warnx("%s: %d: %s (#if line %d depth %d)", | ||
996 | filename, linenum, msg, stifline[depth], depth); | ||
997 | errx(2, "output may be truncated"); | ||
998 | } | ||