aboutsummaryrefslogtreecommitdiffstats
path: root/scripts/unifdef.c
diff options
context:
space:
mode:
authorSam Ravnborg <sam@mars.ravnborg.org>2006-07-23 14:39:59 -0400
committerSam Ravnborg <sam@neptun.ravnborg.org>2006-09-25 03:00:00 -0400
commit01f1c8799ad8b23c190d59cf1c9e28e6fed390a4 (patch)
tree7619f68a3a123c128b01a60b5386d21829840313 /scripts/unifdef.c
parent48f1f0589dd09df6ea07d41c737db3218ad2cb79 (diff)
kbuild: add unifdef
This patch contains a raw copy of unifdef.c Next patch will modify it and add infrastructure to use it Adding unifdef to the kernel is acked by the author. The reason to add unifdef as part of the kernel source is that it is not yet a common utility on most distributions. Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Diffstat (limited to 'scripts/unifdef.c')
-rw-r--r--scripts/unifdef.c998
1 files changed, 998 insertions, 0 deletions
diff --git a/scripts/unifdef.c b/scripts/unifdef.c
new file mode 100644
index 000000000000..5384b4377333
--- /dev/null
+++ b/scripts/unifdef.c
@@ -0,0 +1,998 @@
1/*
2 * Copyright (c) 2002 - 2005 Tony Finch <dot@dotat.at>. All rights reserved.
3 *
4 * This code is derived from software contributed to Berkeley by Dave Yost.
5 * It was rewritten to support ANSI C by Tony Finch. The original version of
6 * unifdef carried the following copyright notice. None of its code remains
7 * in this version (though some of the names remain).
8 *
9 * Copyright (c) 1985, 1993
10 * The Regents of the University of California. All rights reserved.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33
34#include <sys/cdefs.h>
35
36#ifndef lint
37#if 0
38static const char copyright[] =
39"@(#) Copyright (c) 1985, 1993\n\
40 The Regents of the University of California. All rights reserved.\n";
41#endif
42#ifdef __IDSTRING
43__IDSTRING(Berkeley, "@(#)unifdef.c 8.1 (Berkeley) 6/6/93");
44__IDSTRING(NetBSD, "$NetBSD: unifdef.c,v 1.8 2000/07/03 02:51:36 matt Exp $");
45__IDSTRING(dotat, "$dotat: things/unifdef.c,v 1.171 2005/03/08 12:38:48 fanf2 Exp $");
46#endif
47#endif /* not lint */
48#ifdef __FBSDID
49__FBSDID("$FreeBSD: /repoman/r/ncvs/src/usr.bin/unifdef/unifdef.c,v 1.20 2005/05/21 09:55:09 ru Exp $");
50#endif
51
52/*
53 * unifdef - remove ifdef'ed lines
54 *
55 * Wishlist:
56 * provide an option which will append the name of the
57 * appropriate symbol after #else's and #endif's
58 * provide an option which will check symbols after
59 * #else's and #endif's to see that they match their
60 * corresponding #ifdef or #ifndef
61 *
62 * The first two items above require better buffer handling, which would
63 * also make it possible to handle all "dodgy" directives correctly.
64 */
65
66#include <ctype.h>
67#include <err.h>
68#include <stdarg.h>
69#include <stdbool.h>
70#include <stdio.h>
71#include <stdlib.h>
72#include <string.h>
73#include <unistd.h>
74
75size_t strlcpy(char *dst, const char *src, size_t siz);
76
77/* types of input lines: */
78typedef enum {
79 LT_TRUEI, /* a true #if with ignore flag */
80 LT_FALSEI, /* a false #if with ignore flag */
81 LT_IF, /* an unknown #if */
82 LT_TRUE, /* a true #if */
83 LT_FALSE, /* a false #if */
84 LT_ELIF, /* an unknown #elif */
85 LT_ELTRUE, /* a true #elif */
86 LT_ELFALSE, /* a false #elif */
87 LT_ELSE, /* #else */
88 LT_ENDIF, /* #endif */
89 LT_DODGY, /* flag: directive is not on one line */
90 LT_DODGY_LAST = LT_DODGY + LT_ENDIF,
91 LT_PLAIN, /* ordinary line */
92 LT_EOF, /* end of file */
93 LT_COUNT
94} Linetype;
95
96static char const * const linetype_name[] = {
97 "TRUEI", "FALSEI", "IF", "TRUE", "FALSE",
98 "ELIF", "ELTRUE", "ELFALSE", "ELSE", "ENDIF",
99 "DODGY TRUEI", "DODGY FALSEI",
100 "DODGY IF", "DODGY TRUE", "DODGY FALSE",
101 "DODGY ELIF", "DODGY ELTRUE", "DODGY ELFALSE",
102 "DODGY ELSE", "DODGY ENDIF",
103 "PLAIN", "EOF"
104};
105
106/* state of #if processing */
107typedef enum {
108 IS_OUTSIDE,
109 IS_FALSE_PREFIX, /* false #if followed by false #elifs */
110 IS_TRUE_PREFIX, /* first non-false #(el)if is true */
111 IS_PASS_MIDDLE, /* first non-false #(el)if is unknown */
112 IS_FALSE_MIDDLE, /* a false #elif after a pass state */
113 IS_TRUE_MIDDLE, /* a true #elif after a pass state */
114 IS_PASS_ELSE, /* an else after a pass state */
115 IS_FALSE_ELSE, /* an else after a true state */
116 IS_TRUE_ELSE, /* an else after only false states */
117 IS_FALSE_TRAILER, /* #elifs after a true are false */
118 IS_COUNT
119} Ifstate;
120
121static char const * const ifstate_name[] = {
122 "OUTSIDE", "FALSE_PREFIX", "TRUE_PREFIX",
123 "PASS_MIDDLE", "FALSE_MIDDLE", "TRUE_MIDDLE",
124 "PASS_ELSE", "FALSE_ELSE", "TRUE_ELSE",
125 "FALSE_TRAILER"
126};
127
128/* state of comment parser */
129typedef enum {
130 NO_COMMENT = false, /* outside a comment */
131 C_COMMENT, /* in a comment like this one */
132 CXX_COMMENT, /* between // and end of line */
133 STARTING_COMMENT, /* just after slash-backslash-newline */
134 FINISHING_COMMENT, /* star-backslash-newline in a C comment */
135 CHAR_LITERAL, /* inside '' */
136 STRING_LITERAL /* inside "" */
137} Comment_state;
138
139static char const * const comment_name[] = {
140 "NO", "C", "CXX", "STARTING", "FINISHING", "CHAR", "STRING"
141};
142
143/* state of preprocessor line parser */
144typedef enum {
145 LS_START, /* only space and comments on this line */
146 LS_HASH, /* only space, comments, and a hash */
147 LS_DIRTY /* this line can't be a preprocessor line */
148} Line_state;
149
150static char const * const linestate_name[] = {
151 "START", "HASH", "DIRTY"
152};
153
154/*
155 * Minimum translation limits from ISO/IEC 9899:1999 5.2.4.1
156 */
157#define MAXDEPTH 64 /* maximum #if nesting */
158#define MAXLINE 4096 /* maximum length of line */
159#define MAXSYMS 4096 /* maximum number of symbols */
160
161/*
162 * Sometimes when editing a keyword the replacement text is longer, so
163 * we leave some space at the end of the tline buffer to accommodate this.
164 */
165#define EDITSLOP 10
166
167/*
168 * Globals.
169 */
170
171static bool complement; /* -c: do the complement */
172static bool debugging; /* -d: debugging reports */
173static bool iocccok; /* -e: fewer IOCCC errors */
174static bool killconsts; /* -k: eval constant #ifs */
175static bool lnblank; /* -l: blank deleted lines */
176static bool lnnum; /* -n: add #line directives */
177static bool symlist; /* -s: output symbol list */
178static bool text; /* -t: this is a text file */
179
180static const char *symname[MAXSYMS]; /* symbol name */
181static const char *value[MAXSYMS]; /* -Dsym=value */
182static bool ignore[MAXSYMS]; /* -iDsym or -iUsym */
183static int nsyms; /* number of symbols */
184
185static FILE *input; /* input file pointer */
186static const char *filename; /* input file name */
187static int linenum; /* current line number */
188
189static char tline[MAXLINE+EDITSLOP];/* input buffer plus space */
190static char *keyword; /* used for editing #elif's */
191
192static Comment_state incomment; /* comment parser state */
193static Line_state linestate; /* #if line parser state */
194static Ifstate ifstate[MAXDEPTH]; /* #if processor state */
195static bool ignoring[MAXDEPTH]; /* ignore comments state */
196static int stifline[MAXDEPTH]; /* start of current #if */
197static int depth; /* current #if nesting */
198static int delcount; /* count of deleted lines */
199static bool keepthis; /* don't delete constant #if */
200
201static int exitstat; /* program exit status */
202
203static void addsym(bool, bool, char *);
204static void debug(const char *, ...);
205static void done(void);
206static void error(const char *);
207static int findsym(const char *);
208static void flushline(bool);
209static Linetype getline(void);
210static Linetype ifeval(const char **);
211static void ignoreoff(void);
212static void ignoreon(void);
213static void keywordedit(const char *);
214static void nest(void);
215static void process(void);
216static const char *skipcomment(const char *);
217static const char *skipsym(const char *);
218static void state(Ifstate);
219static int strlcmp(const char *, const char *, size_t);
220static void unnest(void);
221static void usage(void);
222
223#define endsym(c) (!isalpha((unsigned char)c) && !isdigit((unsigned char)c) && c != '_')
224
225/*
226 * The main program.
227 */
228int
229main(int argc, char *argv[])
230{
231 int opt;
232
233 while ((opt = getopt(argc, argv, "i:D:U:I:cdeklnst")) != -1)
234 switch (opt) {
235 case 'i': /* treat stuff controlled by these symbols as text */
236 /*
237 * For strict backwards-compatibility the U or D
238 * should be immediately after the -i but it doesn't
239 * matter much if we relax that requirement.
240 */
241 opt = *optarg++;
242 if (opt == 'D')
243 addsym(true, true, optarg);
244 else if (opt == 'U')
245 addsym(true, false, optarg);
246 else
247 usage();
248 break;
249 case 'D': /* define a symbol */
250 addsym(false, true, optarg);
251 break;
252 case 'U': /* undef a symbol */
253 addsym(false, false, optarg);
254 break;
255 case 'I':
256 /* no-op for compatibility with cpp */
257 break;
258 case 'c': /* treat -D as -U and vice versa */
259 complement = true;
260 break;
261 case 'd':
262 debugging = true;
263 break;
264 case 'e': /* fewer errors from dodgy lines */
265 iocccok = true;
266 break;
267 case 'k': /* process constant #ifs */
268 killconsts = true;
269 break;
270 case 'l': /* blank deleted lines instead of omitting them */
271 lnblank = true;
272 break;
273 case 'n': /* add #line directive after deleted lines */
274 lnnum = true;
275 break;
276 case 's': /* only output list of symbols that control #ifs */
277 symlist = true;
278 break;
279 case 't': /* don't parse C comments */
280 text = true;
281 break;
282 default:
283 usage();
284 }
285 argc -= optind;
286 argv += optind;
287 if (argc > 1) {
288 errx(2, "can only do one file");
289 } else if (argc == 1 && strcmp(*argv, "-") != 0) {
290 filename = *argv;
291 input = fopen(filename, "r");
292 if (input == NULL)
293 err(2, "can't open %s", filename);
294 } else {
295 filename = "[stdin]";
296 input = stdin;
297 }
298 process();
299 abort(); /* bug */
300}
301
302static void
303usage(void)
304{
305 fprintf(stderr, "usage: unifdef [-cdeklnst] [-Ipath]"
306 " [-Dsym[=val]] [-Usym] [-iDsym[=val]] [-iUsym] ... [file]\n");
307 exit(2);
308}
309
310/*
311 * A state transition function alters the global #if processing state
312 * in a particular way. The table below is indexed by the current
313 * processing state and the type of the current line.
314 *
315 * Nesting is handled by keeping a stack of states; some transition
316 * functions increase or decrease the depth. They also maintain the
317 * ignore state on a stack. In some complicated cases they have to
318 * alter the preprocessor directive, as follows.
319 *
320 * When we have processed a group that starts off with a known-false
321 * #if/#elif sequence (which has therefore been deleted) followed by a
322 * #elif that we don't understand and therefore must keep, we edit the
323 * latter into a #if to keep the nesting correct.
324 *
325 * When we find a true #elif in a group, the following block will
326 * always be kept and the rest of the sequence after the next #elif or
327 * #else will be discarded. We edit the #elif into a #else and the
328 * following directive to #endif since this has the desired behaviour.
329 *
330 * "Dodgy" directives are split across multiple lines, the most common
331 * example being a multi-line comment hanging off the right of the
332 * directive. We can handle them correctly only if there is no change
333 * from printing to dropping (or vice versa) caused by that directive.
334 * If the directive is the first of a group we have a choice between
335 * failing with an error, or passing it through unchanged instead of
336 * evaluating it. The latter is not the default to avoid questions from
337 * users about unifdef unexpectedly leaving behind preprocessor directives.
338 */
339typedef void state_fn(void);
340
341/* report an error */
342static void Eelif (void) { error("Inappropriate #elif"); }
343static void Eelse (void) { error("Inappropriate #else"); }
344static void Eendif(void) { error("Inappropriate #endif"); }
345static void Eeof (void) { error("Premature EOF"); }
346static void Eioccc(void) { error("Obfuscated preprocessor control line"); }
347/* plain line handling */
348static void print (void) { flushline(true); }
349static void drop (void) { flushline(false); }
350/* output lacks group's start line */
351static void Strue (void) { drop(); ignoreoff(); state(IS_TRUE_PREFIX); }
352static void Sfalse(void) { drop(); ignoreoff(); state(IS_FALSE_PREFIX); }
353static void Selse (void) { drop(); state(IS_TRUE_ELSE); }
354/* print/pass this block */
355static void Pelif (void) { print(); ignoreoff(); state(IS_PASS_MIDDLE); }
356static void Pelse (void) { print(); state(IS_PASS_ELSE); }
357static void Pendif(void) { print(); unnest(); }
358/* discard this block */
359static void Dfalse(void) { drop(); ignoreoff(); state(IS_FALSE_TRAILER); }
360static void Delif (void) { drop(); ignoreoff(); state(IS_FALSE_MIDDLE); }
361static void Delse (void) { drop(); state(IS_FALSE_ELSE); }
362static void Dendif(void) { drop(); unnest(); }
363/* first line of group */
364static void Fdrop (void) { nest(); Dfalse(); }
365static void Fpass (void) { nest(); Pelif(); }
366static void Ftrue (void) { nest(); Strue(); }
367static void Ffalse(void) { nest(); Sfalse(); }
368/* variable pedantry for obfuscated lines */
369static void Oiffy (void) { if (!iocccok) Eioccc(); Fpass(); ignoreon(); }
370static void Oif (void) { if (!iocccok) Eioccc(); Fpass(); }
371static void Oelif (void) { if (!iocccok) Eioccc(); Pelif(); }
372/* ignore comments in this block */
373static void Idrop (void) { Fdrop(); ignoreon(); }
374static void Itrue (void) { Ftrue(); ignoreon(); }
375static void Ifalse(void) { Ffalse(); ignoreon(); }
376/* edit this line */
377static void Mpass (void) { strncpy(keyword, "if ", 4); Pelif(); }
378static void Mtrue (void) { keywordedit("else\n"); state(IS_TRUE_MIDDLE); }
379static void Melif (void) { keywordedit("endif\n"); state(IS_FALSE_TRAILER); }
380static void Melse (void) { keywordedit("endif\n"); state(IS_FALSE_ELSE); }
381
382static state_fn * const trans_table[IS_COUNT][LT_COUNT] = {
383/* IS_OUTSIDE */
384{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Eendif,
385 Oiffy, Oiffy, Fpass, Oif, Oif, Eelif, Eelif, Eelif, Eelse, Eendif,
386 print, done },
387/* IS_FALSE_PREFIX */
388{ Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Strue, Sfalse,Selse, Dendif,
389 Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Eioccc,Eioccc,Eioccc,Eioccc,
390 drop, Eeof },
391/* IS_TRUE_PREFIX */
392{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Dfalse,Dfalse,Dfalse,Delse, Dendif,
393 Oiffy, Oiffy, Fpass, Oif, Oif, Eioccc,Eioccc,Eioccc,Eioccc,Eioccc,
394 print, Eeof },
395/* IS_PASS_MIDDLE */
396{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Pelif, Mtrue, Delif, Pelse, Pendif,
397 Oiffy, Oiffy, Fpass, Oif, Oif, Pelif, Oelif, Oelif, Pelse, Pendif,
398 print, Eeof },
399/* IS_FALSE_MIDDLE */
400{ Idrop, Idrop, Fdrop, Fdrop, Fdrop, Pelif, Mtrue, Delif, Pelse, Pendif,
401 Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eioccc,Eioccc,Eioccc,Eioccc,Eioccc,
402 drop, Eeof },
403/* IS_TRUE_MIDDLE */
404{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Melif, Melif, Melif, Melse, Pendif,
405 Oiffy, Oiffy, Fpass, Oif, Oif, Eioccc,Eioccc,Eioccc,Eioccc,Pendif,
406 print, Eeof },
407/* IS_PASS_ELSE */
408{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Pendif,
409 Oiffy, Oiffy, Fpass, Oif, Oif, Eelif, Eelif, Eelif, Eelse, Pendif,
410 print, Eeof },
411/* IS_FALSE_ELSE */
412{ Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Dendif,
413 Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Eioccc,
414 drop, Eeof },
415/* IS_TRUE_ELSE */
416{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Dendif,
417 Oiffy, Oiffy, Fpass, Oif, Oif, Eelif, Eelif, Eelif, Eelse, Eioccc,
418 print, Eeof },
419/* IS_FALSE_TRAILER */
420{ Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Dendif,
421 Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Eioccc,
422 drop, Eeof }
423/*TRUEI FALSEI IF TRUE FALSE ELIF ELTRUE ELFALSE ELSE ENDIF
424 TRUEI FALSEI IF TRUE FALSE ELIF ELTRUE ELFALSE ELSE ENDIF (DODGY)
425 PLAIN EOF */
426};
427
428/*
429 * State machine utility functions
430 */
431static void
432done(void)
433{
434 if (incomment)
435 error("EOF in comment");
436 exit(exitstat);
437}
438static void
439ignoreoff(void)
440{
441 if (depth == 0)
442 abort(); /* bug */
443 ignoring[depth] = ignoring[depth-1];
444}
445static void
446ignoreon(void)
447{
448 ignoring[depth] = true;
449}
450static void
451keywordedit(const char *replacement)
452{
453 strlcpy(keyword, replacement, tline + sizeof(tline) - keyword);
454 print();
455}
456static void
457nest(void)
458{
459 depth += 1;
460 if (depth >= MAXDEPTH)
461 error("Too many levels of nesting");
462 stifline[depth] = linenum;
463}
464static void
465unnest(void)
466{
467 if (depth == 0)
468 abort(); /* bug */
469 depth -= 1;
470}
471static void
472state(Ifstate is)
473{
474 ifstate[depth] = is;
475}
476
477/*
478 * Write a line to the output or not, according to command line options.
479 */
480static void
481flushline(bool keep)
482{
483 if (symlist)
484 return;
485 if (keep ^ complement) {
486 if (lnnum && delcount > 0)
487 printf("#line %d\n", linenum);
488 fputs(tline, stdout);
489 delcount = 0;
490 } else {
491 if (lnblank)
492 putc('\n', stdout);
493 exitstat = 1;
494 delcount += 1;
495 }
496}
497
498/*
499 * The driver for the state machine.
500 */
501static void
502process(void)
503{
504 Linetype lineval;
505
506 for (;;) {
507 linenum++;
508 lineval = getline();
509 trans_table[ifstate[depth]][lineval]();
510 debug("process %s -> %s depth %d",
511 linetype_name[lineval],
512 ifstate_name[ifstate[depth]], depth);
513 }
514}
515
516/*
517 * Parse a line and determine its type. We keep the preprocessor line
518 * parser state between calls in the global variable linestate, with
519 * help from skipcomment().
520 */
521static Linetype
522getline(void)
523{
524 const char *cp;
525 int cursym;
526 int kwlen;
527 Linetype retval;
528 Comment_state wascomment;
529
530 if (fgets(tline, MAXLINE, input) == NULL)
531 return (LT_EOF);
532 retval = LT_PLAIN;
533 wascomment = incomment;
534 cp = skipcomment(tline);
535 if (linestate == LS_START) {
536 if (*cp == '#') {
537 linestate = LS_HASH;
538 cp = skipcomment(cp + 1);
539 } else if (*cp != '\0')
540 linestate = LS_DIRTY;
541 }
542 if (!incomment && linestate == LS_HASH) {
543 keyword = tline + (cp - tline);
544 cp = skipsym(cp);
545 kwlen = cp - keyword;
546 /* no way can we deal with a continuation inside a keyword */
547 if (strncmp(cp, "\\\n", 2) == 0)
548 Eioccc();
549 if (strlcmp("ifdef", keyword, kwlen) == 0 ||
550 strlcmp("ifndef", keyword, kwlen) == 0) {
551 cp = skipcomment(cp);
552 if ((cursym = findsym(cp)) < 0)
553 retval = LT_IF;
554 else {
555 retval = (keyword[2] == 'n')
556 ? LT_FALSE : LT_TRUE;
557 if (value[cursym] == NULL)
558 retval = (retval == LT_TRUE)
559 ? LT_FALSE : LT_TRUE;
560 if (ignore[cursym])
561 retval = (retval == LT_TRUE)
562 ? LT_TRUEI : LT_FALSEI;
563 }
564 cp = skipsym(cp);
565 } else if (strlcmp("if", keyword, kwlen) == 0)
566 retval = ifeval(&cp);
567 else if (strlcmp("elif", keyword, kwlen) == 0)
568 retval = ifeval(&cp) - LT_IF + LT_ELIF;
569 else if (strlcmp("else", keyword, kwlen) == 0)
570 retval = LT_ELSE;
571 else if (strlcmp("endif", keyword, kwlen) == 0)
572 retval = LT_ENDIF;
573 else {
574 linestate = LS_DIRTY;
575 retval = LT_PLAIN;
576 }
577 cp = skipcomment(cp);
578 if (*cp != '\0') {
579 linestate = LS_DIRTY;
580 if (retval == LT_TRUE || retval == LT_FALSE ||
581 retval == LT_TRUEI || retval == LT_FALSEI)
582 retval = LT_IF;
583 if (retval == LT_ELTRUE || retval == LT_ELFALSE)
584 retval = LT_ELIF;
585 }
586 if (retval != LT_PLAIN && (wascomment || incomment)) {
587 retval += LT_DODGY;
588 if (incomment)
589 linestate = LS_DIRTY;
590 }
591 /* skipcomment should have changed the state */
592 if (linestate == LS_HASH)
593 abort(); /* bug */
594 }
595 if (linestate == LS_DIRTY) {
596 while (*cp != '\0')
597 cp = skipcomment(cp + 1);
598 }
599 debug("parser %s comment %s line",
600 comment_name[incomment], linestate_name[linestate]);
601 return (retval);
602}
603
604/*
605 * These are the binary operators that are supported by the expression
606 * evaluator. Note that if support for division is added then we also
607 * need short-circuiting booleans because of divide-by-zero.
608 */
609static int op_lt(int a, int b) { return (a < b); }
610static int op_gt(int a, int b) { return (a > b); }
611static int op_le(int a, int b) { return (a <= b); }
612static int op_ge(int a, int b) { return (a >= b); }
613static int op_eq(int a, int b) { return (a == b); }
614static int op_ne(int a, int b) { return (a != b); }
615static int op_or(int a, int b) { return (a || b); }
616static int op_and(int a, int b) { return (a && b); }
617
618/*
619 * An evaluation function takes three arguments, as follows: (1) a pointer to
620 * an element of the precedence table which lists the operators at the current
621 * level of precedence; (2) a pointer to an integer which will receive the
622 * value of the expression; and (3) a pointer to a char* that points to the
623 * expression to be evaluated and that is updated to the end of the expression
624 * when evaluation is complete. The function returns LT_FALSE if the value of
625 * the expression is zero, LT_TRUE if it is non-zero, or LT_IF if the
626 * expression could not be evaluated.
627 */
628struct ops;
629
630typedef Linetype eval_fn(const struct ops *, int *, const char **);
631
632static eval_fn eval_table, eval_unary;
633
634/*
635 * The precedence table. Expressions involving binary operators are evaluated
636 * in a table-driven way by eval_table. When it evaluates a subexpression it
637 * calls the inner function with its first argument pointing to the next
638 * element of the table. Innermost expressions have special non-table-driven
639 * handling.
640 */
641static const struct ops {
642 eval_fn *inner;
643 struct op {
644 const char *str;
645 int (*fn)(int, int);
646 } op[5];
647} eval_ops[] = {
648 { eval_table, { { "||", op_or } } },
649 { eval_table, { { "&&", op_and } } },
650 { eval_table, { { "==", op_eq },
651 { "!=", op_ne } } },
652 { eval_unary, { { "<=", op_le },
653 { ">=", op_ge },
654 { "<", op_lt },
655 { ">", op_gt } } }
656};
657
658/*
659 * Function for evaluating the innermost parts of expressions,
660 * viz. !expr (expr) defined(symbol) symbol number
661 * We reset the keepthis flag when we find a non-constant subexpression.
662 */
663static Linetype
664eval_unary(const struct ops *ops, int *valp, const char **cpp)
665{
666 const char *cp;
667 char *ep;
668 int sym;
669
670 cp = skipcomment(*cpp);
671 if (*cp == '!') {
672 debug("eval%d !", ops - eval_ops);
673 cp++;
674 if (eval_unary(ops, valp, &cp) == LT_IF)
675 return (LT_IF);
676 *valp = !*valp;
677 } else if (*cp == '(') {
678 cp++;
679 debug("eval%d (", ops - eval_ops);
680 if (eval_table(eval_ops, valp, &cp) == LT_IF)
681 return (LT_IF);
682 cp = skipcomment(cp);
683 if (*cp++ != ')')
684 return (LT_IF);
685 } else if (isdigit((unsigned char)*cp)) {
686 debug("eval%d number", ops - eval_ops);
687 *valp = strtol(cp, &ep, 0);
688 cp = skipsym(cp);
689 } else if (strncmp(cp, "defined", 7) == 0 && endsym(cp[7])) {
690 cp = skipcomment(cp+7);
691 debug("eval%d defined", ops - eval_ops);
692 if (*cp++ != '(')
693 return (LT_IF);
694 cp = skipcomment(cp);
695 sym = findsym(cp);
696 if (sym < 0)
697 return (LT_IF);
698 *valp = (value[sym] != NULL);
699 cp = skipsym(cp);
700 cp = skipcomment(cp);
701 if (*cp++ != ')')
702 return (LT_IF);
703 keepthis = false;
704 } else if (!endsym(*cp)) {
705 debug("eval%d symbol", ops - eval_ops);
706 sym = findsym(cp);
707 if (sym < 0)
708 return (LT_IF);
709 if (value[sym] == NULL)
710 *valp = 0;
711 else {
712 *valp = strtol(value[sym], &ep, 0);
713 if (*ep != '\0' || ep == value[sym])
714 return (LT_IF);
715 }
716 cp = skipsym(cp);
717 keepthis = false;
718 } else {
719 debug("eval%d bad expr", ops - eval_ops);
720 return (LT_IF);
721 }
722
723 *cpp = cp;
724 debug("eval%d = %d", ops - eval_ops, *valp);
725 return (*valp ? LT_TRUE : LT_FALSE);
726}
727
728/*
729 * Table-driven evaluation of binary operators.
730 */
731static Linetype
732eval_table(const struct ops *ops, int *valp, const char **cpp)
733{
734 const struct op *op;
735 const char *cp;
736 int val;
737
738 debug("eval%d", ops - eval_ops);
739 cp = *cpp;
740 if (ops->inner(ops+1, valp, &cp) == LT_IF)
741 return (LT_IF);
742 for (;;) {
743 cp = skipcomment(cp);
744 for (op = ops->op; op->str != NULL; op++)
745 if (strncmp(cp, op->str, strlen(op->str)) == 0)
746 break;
747 if (op->str == NULL)
748 break;
749 cp += strlen(op->str);
750 debug("eval%d %s", ops - eval_ops, op->str);
751 if (ops->inner(ops+1, &val, &cp) == LT_IF)
752 return (LT_IF);
753 *valp = op->fn(*valp, val);
754 }
755
756 *cpp = cp;
757 debug("eval%d = %d", ops - eval_ops, *valp);
758 return (*valp ? LT_TRUE : LT_FALSE);
759}
760
761/*
762 * Evaluate the expression on a #if or #elif line. If we can work out
763 * the result we return LT_TRUE or LT_FALSE accordingly, otherwise we
764 * return just a generic LT_IF.
765 */
766static Linetype
767ifeval(const char **cpp)
768{
769 int ret;
770 int val;
771
772 debug("eval %s", *cpp);
773 keepthis = killconsts ? false : true;
774 ret = eval_table(eval_ops, &val, cpp);
775 debug("eval = %d", val);
776 return (keepthis ? LT_IF : ret);
777}
778
779/*
780 * Skip over comments, strings, and character literals and stop at the
781 * next character position that is not whitespace. Between calls we keep
782 * the comment state in the global variable incomment, and we also adjust
783 * the global variable linestate when we see a newline.
784 * XXX: doesn't cope with the buffer splitting inside a state transition.
785 */
786static const char *
787skipcomment(const char *cp)
788{
789 if (text || ignoring[depth]) {
790 for (; isspace((unsigned char)*cp); cp++)
791 if (*cp == '\n')
792 linestate = LS_START;
793 return (cp);
794 }
795 while (*cp != '\0')
796 /* don't reset to LS_START after a line continuation */
797 if (strncmp(cp, "\\\n", 2) == 0)
798 cp += 2;
799 else switch (incomment) {
800 case NO_COMMENT:
801 if (strncmp(cp, "/\\\n", 3) == 0) {
802 incomment = STARTING_COMMENT;
803 cp += 3;
804 } else if (strncmp(cp, "/*", 2) == 0) {
805 incomment = C_COMMENT;
806 cp += 2;
807 } else if (strncmp(cp, "//", 2) == 0) {
808 incomment = CXX_COMMENT;
809 cp += 2;
810 } else if (strncmp(cp, "\'", 1) == 0) {
811 incomment = CHAR_LITERAL;
812 linestate = LS_DIRTY;
813 cp += 1;
814 } else if (strncmp(cp, "\"", 1) == 0) {
815 incomment = STRING_LITERAL;
816 linestate = LS_DIRTY;
817 cp += 1;
818 } else if (strncmp(cp, "\n", 1) == 0) {
819 linestate = LS_START;
820 cp += 1;
821 } else if (strchr(" \t", *cp) != NULL) {
822 cp += 1;
823 } else
824 return (cp);
825 continue;
826 case CXX_COMMENT:
827 if (strncmp(cp, "\n", 1) == 0) {
828 incomment = NO_COMMENT;
829 linestate = LS_START;
830 }
831 cp += 1;
832 continue;
833 case CHAR_LITERAL:
834 case STRING_LITERAL:
835 if ((incomment == CHAR_LITERAL && cp[0] == '\'') ||
836 (incomment == STRING_LITERAL && cp[0] == '\"')) {
837 incomment = NO_COMMENT;
838 cp += 1;
839 } else if (cp[0] == '\\') {
840 if (cp[1] == '\0')
841 cp += 1;
842 else
843 cp += 2;
844 } else if (strncmp(cp, "\n", 1) == 0) {
845 if (incomment == CHAR_LITERAL)
846 error("unterminated char literal");
847 else
848 error("unterminated string literal");
849 } else
850 cp += 1;
851 continue;
852 case C_COMMENT:
853 if (strncmp(cp, "*\\\n", 3) == 0) {
854 incomment = FINISHING_COMMENT;
855 cp += 3;
856 } else if (strncmp(cp, "*/", 2) == 0) {
857 incomment = NO_COMMENT;
858 cp += 2;
859 } else
860 cp += 1;
861 continue;
862 case STARTING_COMMENT:
863 if (*cp == '*') {
864 incomment = C_COMMENT;
865 cp += 1;
866 } else if (*cp == '/') {
867 incomment = CXX_COMMENT;
868 cp += 1;
869 } else {
870 incomment = NO_COMMENT;
871 linestate = LS_DIRTY;
872 }
873 continue;
874 case FINISHING_COMMENT:
875 if (*cp == '/') {
876 incomment = NO_COMMENT;
877 cp += 1;
878 } else
879 incomment = C_COMMENT;
880 continue;
881 default:
882 abort(); /* bug */
883 }
884 return (cp);
885}
886
887/*
888 * Skip over an identifier.
889 */
890static const char *
891skipsym(const char *cp)
892{
893 while (!endsym(*cp))
894 ++cp;
895 return (cp);
896}
897
898/*
899 * Look for the symbol in the symbol table. If is is found, we return
900 * the symbol table index, else we return -1.
901 */
902static int
903findsym(const char *str)
904{
905 const char *cp;
906 int symind;
907
908 cp = skipsym(str);
909 if (cp == str)
910 return (-1);
911 if (symlist) {
912 printf("%.*s\n", (int)(cp-str), str);
913 /* we don't care about the value of the symbol */
914 return (0);
915 }
916 for (symind = 0; symind < nsyms; ++symind) {
917 if (strlcmp(symname[symind], str, cp-str) == 0) {
918 debug("findsym %s %s", symname[symind],
919 value[symind] ? value[symind] : "");
920 return (symind);
921 }
922 }
923 return (-1);
924}
925
926/*
927 * Add a symbol to the symbol table.
928 */
929static void
930addsym(bool ignorethis, bool definethis, char *sym)
931{
932 int symind;
933 char *val;
934
935 symind = findsym(sym);
936 if (symind < 0) {
937 if (nsyms >= MAXSYMS)
938 errx(2, "too many symbols");
939 symind = nsyms++;
940 }
941 symname[symind] = sym;
942 ignore[symind] = ignorethis;
943 val = sym + (skipsym(sym) - sym);
944 if (definethis) {
945 if (*val == '=') {
946 value[symind] = val+1;
947 *val = '\0';
948 } else if (*val == '\0')
949 value[symind] = "";
950 else
951 usage();
952 } else {
953 if (*val != '\0')
954 usage();
955 value[symind] = NULL;
956 }
957}
958
959/*
960 * Compare s with n characters of t.
961 * The same as strncmp() except that it checks that s[n] == '\0'.
962 */
963static int
964strlcmp(const char *s, const char *t, size_t n)
965{
966 while (n-- && *t != '\0')
967 if (*s != *t)
968 return ((unsigned char)*s - (unsigned char)*t);
969 else
970 ++s, ++t;
971 return ((unsigned char)*s);
972}
973
974/*
975 * Diagnostics.
976 */
977static void
978debug(const char *msg, ...)
979{
980 va_list ap;
981
982 if (debugging) {
983 va_start(ap, msg);
984 vwarnx(msg, ap);
985 va_end(ap);
986 }
987}
988
989static void
990error(const char *msg)
991{
992 if (depth == 0)
993 warnx("%s: %d: %s", filename, linenum, msg);
994 else
995 warnx("%s: %d: %s (#if line %d depth %d)",
996 filename, linenum, msg, stifline[depth], depth);
997 errx(2, "output may be truncated");
998}