aboutsummaryrefslogtreecommitdiffstats
path: root/scripts
diff options
context:
space:
mode:
authorPaulo Marques <pmarques@grupopie.com>2005-09-06 18:16:31 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2005-09-07 19:57:18 -0400
commitb3dbb4ecd46767b621df3dedd28788da93ee0cac (patch)
treeae0187791a1b1997efadd56461d2e2191af8cf22 /scripts
parente82894f84dbba130ab46c97748c03647f8204f92 (diff)
[PATCH] kallsyms: change compression algorithm
This patch changes the way the compression algorithm works. The base algorithm is similiar to the previous but we force the compressed token size to 2. Having a fixed size compressed token allows for a lot of optimizations, and that in turn allows this code to run over *all* the symbols faster than it did before over just a subset. Having it work over all the symbols will make it behave better when symbols change positions between passes, and the "inconsistent kallsyms" messages should become less frequent. In my tests the compression ratio was degraded by about 0.5%, but the results will depend greatly on the number of symbols to compress. Signed-off-by: Paulo Marques <pmarques@grupopie.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'scripts')
-rw-r--r--scripts/kallsyms.c424
1 files changed, 95 insertions, 329 deletions
diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c
index 9be41a9f5aff..1f53d4fc4c1d 100644
--- a/scripts/kallsyms.c
+++ b/scripts/kallsyms.c
@@ -24,75 +24,37 @@
24 * 24 *
25 */ 25 */
26 26
27#define _GNU_SOURCE
28
27#include <stdio.h> 29#include <stdio.h>
28#include <stdlib.h> 30#include <stdlib.h>
29#include <string.h> 31#include <string.h>
30#include <ctype.h> 32#include <ctype.h>
31 33
32/* maximum token length used. It doesn't pay to increase it a lot, because
33 * very long substrings probably don't repeat themselves too often. */
34#define MAX_TOK_SIZE 11
35#define KSYM_NAME_LEN 127 34#define KSYM_NAME_LEN 127
36 35
37/* we use only a subset of the complete symbol table to gather the token count,
38 * to speed up compression, at the expense of a little compression ratio */
39#define WORKING_SET 1024
40
41/* first find the best token only on the list of tokens that would profit more
42 * than GOOD_BAD_THRESHOLD. Only if this list is empty go to the "bad" list.
43 * Increasing this value will put less tokens on the "good" list, so the search
44 * is faster. However, if the good list runs out of tokens, we must painfully
45 * search the bad list. */
46#define GOOD_BAD_THRESHOLD 10
47
48/* token hash parameters */
49#define HASH_BITS 18
50#define HASH_TABLE_SIZE (1 << HASH_BITS)
51#define HASH_MASK (HASH_TABLE_SIZE - 1)
52#define HASH_BASE_OFFSET 2166136261U
53#define HASH_FOLD(a) ((a)&(HASH_MASK))
54
55/* flags to mark symbols */
56#define SYM_FLAG_VALID 1
57#define SYM_FLAG_SAMPLED 2
58 36
59struct sym_entry { 37struct sym_entry {
60 unsigned long long addr; 38 unsigned long long addr;
61 char type; 39 unsigned int len;
62 unsigned char flags;
63 unsigned char len;
64 unsigned char *sym; 40 unsigned char *sym;
65}; 41};
66 42
67 43
68static struct sym_entry *table; 44static struct sym_entry *table;
69static int size, cnt; 45static unsigned int table_size, table_cnt;
70static unsigned long long _stext, _etext, _sinittext, _einittext, _sextratext, _eextratext; 46static unsigned long long _stext, _etext, _sinittext, _einittext, _sextratext, _eextratext;
71static int all_symbols = 0; 47static int all_symbols = 0;
72static char symbol_prefix_char = '\0'; 48static char symbol_prefix_char = '\0';
73 49
74struct token { 50int token_profit[0x10000];
75 unsigned char data[MAX_TOK_SIZE];
76 unsigned char len;
77 /* profit: the number of bytes that could be saved by inserting this
78 * token into the table */
79 int profit;
80 struct token *next; /* next token on the hash list */
81 struct token *right; /* next token on the good/bad list */
82 struct token *left; /* previous token on the good/bad list */
83 struct token *smaller; /* token that is less one letter than this one */
84 };
85
86struct token bad_head, good_head;
87struct token *hash_table[HASH_TABLE_SIZE];
88 51
89/* the table that holds the result of the compression */ 52/* the table that holds the result of the compression */
90unsigned char best_table[256][MAX_TOK_SIZE+1]; 53unsigned char best_table[256][2];
91unsigned char best_table_len[256]; 54unsigned char best_table_len[256];
92 55
93 56
94static void 57static void usage(void)
95usage(void)
96{ 58{
97 fprintf(stderr, "Usage: kallsyms [--all-symbols] [--symbol-prefix=<prefix char>] < in.map > out.S\n"); 59 fprintf(stderr, "Usage: kallsyms [--all-symbols] [--symbol-prefix=<prefix char>] < in.map > out.S\n");
98 exit(1); 60 exit(1);
@@ -102,21 +64,19 @@ usage(void)
102 * This ignores the intensely annoying "mapping symbols" found 64 * This ignores the intensely annoying "mapping symbols" found
103 * in ARM ELF files: $a, $t and $d. 65 * in ARM ELF files: $a, $t and $d.
104 */ 66 */
105static inline int 67static inline int is_arm_mapping_symbol(const char *str)
106is_arm_mapping_symbol(const char *str)
107{ 68{
108 return str[0] == '$' && strchr("atd", str[1]) 69 return str[0] == '$' && strchr("atd", str[1])
109 && (str[2] == '\0' || str[2] == '.'); 70 && (str[2] == '\0' || str[2] == '.');
110} 71}
111 72
112static int 73static int read_symbol(FILE *in, struct sym_entry *s)
113read_symbol(FILE *in, struct sym_entry *s)
114{ 74{
115 char str[500]; 75 char str[500];
116 char *sym; 76 char *sym, stype;
117 int rc; 77 int rc;
118 78
119 rc = fscanf(in, "%llx %c %499s\n", &s->addr, &s->type, str); 79 rc = fscanf(in, "%llx %c %499s\n", &s->addr, &stype, str);
120 if (rc != 3) { 80 if (rc != 3) {
121 if (rc != EOF) { 81 if (rc != EOF) {
122 /* skip line */ 82 /* skip line */
@@ -143,7 +103,7 @@ read_symbol(FILE *in, struct sym_entry *s)
143 _sextratext = s->addr; 103 _sextratext = s->addr;
144 else if (strcmp(sym, "_eextratext") == 0) 104 else if (strcmp(sym, "_eextratext") == 0)
145 _eextratext = s->addr; 105 _eextratext = s->addr;
146 else if (toupper(s->type) == 'A') 106 else if (toupper(stype) == 'A')
147 { 107 {
148 /* Keep these useful absolute symbols */ 108 /* Keep these useful absolute symbols */
149 if (strcmp(sym, "__kernel_syscall_via_break") && 109 if (strcmp(sym, "__kernel_syscall_via_break") &&
@@ -153,22 +113,21 @@ read_symbol(FILE *in, struct sym_entry *s)
153 return -1; 113 return -1;
154 114
155 } 115 }
156 else if (toupper(s->type) == 'U' || 116 else if (toupper(stype) == 'U' ||
157 is_arm_mapping_symbol(sym)) 117 is_arm_mapping_symbol(sym))
158 return -1; 118 return -1;
159 119
160 /* include the type field in the symbol name, so that it gets 120 /* include the type field in the symbol name, so that it gets
161 * compressed together */ 121 * compressed together */
162 s->len = strlen(str) + 1; 122 s->len = strlen(str) + 1;
163 s->sym = (char *) malloc(s->len + 1); 123 s->sym = malloc(s->len + 1);
164 strcpy(s->sym + 1, str); 124 strcpy((char *)s->sym + 1, str);
165 s->sym[0] = s->type; 125 s->sym[0] = stype;
166 126
167 return 0; 127 return 0;
168} 128}
169 129
170static int 130static int symbol_valid(struct sym_entry *s)
171symbol_valid(struct sym_entry *s)
172{ 131{
173 /* Symbols which vary between passes. Passes 1 and 2 must have 132 /* Symbols which vary between passes. Passes 1 and 2 must have
174 * identical symbol lists. The kallsyms_* symbols below are only added 133 * identical symbol lists. The kallsyms_* symbols below are only added
@@ -214,30 +173,29 @@ symbol_valid(struct sym_entry *s)
214 } 173 }
215 174
216 /* Exclude symbols which vary between passes. */ 175 /* Exclude symbols which vary between passes. */
217 if (strstr(s->sym + offset, "_compiled.")) 176 if (strstr((char *)s->sym + offset, "_compiled."))
218 return 0; 177 return 0;
219 178
220 for (i = 0; special_symbols[i]; i++) 179 for (i = 0; special_symbols[i]; i++)
221 if( strcmp(s->sym + offset, special_symbols[i]) == 0 ) 180 if( strcmp((char *)s->sym + offset, special_symbols[i]) == 0 )
222 return 0; 181 return 0;
223 182
224 return 1; 183 return 1;
225} 184}
226 185
227static void 186static void read_map(FILE *in)
228read_map(FILE *in)
229{ 187{
230 while (!feof(in)) { 188 while (!feof(in)) {
231 if (cnt >= size) { 189 if (table_cnt >= table_size) {
232 size += 10000; 190 table_size += 10000;
233 table = realloc(table, sizeof(*table) * size); 191 table = realloc(table, sizeof(*table) * table_size);
234 if (!table) { 192 if (!table) {
235 fprintf(stderr, "out of memory\n"); 193 fprintf(stderr, "out of memory\n");
236 exit (1); 194 exit (1);
237 } 195 }
238 } 196 }
239 if (read_symbol(in, &table[cnt]) == 0) 197 if (read_symbol(in, &table[table_cnt]) == 0)
240 cnt++; 198 table_cnt++;
241 } 199 }
242} 200}
243 201
@@ -281,10 +239,9 @@ static int expand_symbol(unsigned char *data, int len, char *result)
281 return total; 239 return total;
282} 240}
283 241
284static void 242static void write_src(void)
285write_src(void)
286{ 243{
287 int i, k, off, valid; 244 unsigned int i, k, off;
288 unsigned int best_idx[256]; 245 unsigned int best_idx[256];
289 unsigned int *markers; 246 unsigned int *markers;
290 char buf[KSYM_NAME_LEN+1]; 247 char buf[KSYM_NAME_LEN+1];
@@ -301,33 +258,24 @@ write_src(void)
301 printf(".data\n"); 258 printf(".data\n");
302 259
303 output_label("kallsyms_addresses"); 260 output_label("kallsyms_addresses");
304 valid = 0; 261 for (i = 0; i < table_cnt; i++) {
305 for (i = 0; i < cnt; i++) { 262 printf("\tPTR\t%#llx\n", table[i].addr);
306 if (table[i].flags & SYM_FLAG_VALID) {
307 printf("\tPTR\t%#llx\n", table[i].addr);
308 valid++;
309 }
310 } 263 }
311 printf("\n"); 264 printf("\n");
312 265
313 output_label("kallsyms_num_syms"); 266 output_label("kallsyms_num_syms");
314 printf("\tPTR\t%d\n", valid); 267 printf("\tPTR\t%d\n", table_cnt);
315 printf("\n"); 268 printf("\n");
316 269
317 /* table of offset markers, that give the offset in the compressed stream 270 /* table of offset markers, that give the offset in the compressed stream
318 * every 256 symbols */ 271 * every 256 symbols */
319 markers = (unsigned int *) malloc(sizeof(unsigned int)*((valid + 255) / 256)); 272 markers = (unsigned int *) malloc(sizeof(unsigned int) * ((table_cnt + 255) / 256));
320 273
321 output_label("kallsyms_names"); 274 output_label("kallsyms_names");
322 valid = 0;
323 off = 0; 275 off = 0;
324 for (i = 0; i < cnt; i++) { 276 for (i = 0; i < table_cnt; i++) {
325 277 if ((i & 0xFF) == 0)
326 if (!table[i].flags & SYM_FLAG_VALID) 278 markers[i >> 8] = off;
327 continue;
328
329 if ((valid & 0xFF) == 0)
330 markers[valid >> 8] = off;
331 279
332 printf("\t.byte 0x%02x", table[i].len); 280 printf("\t.byte 0x%02x", table[i].len);
333 for (k = 0; k < table[i].len; k++) 281 for (k = 0; k < table[i].len; k++)
@@ -335,12 +283,11 @@ write_src(void)
335 printf("\n"); 283 printf("\n");
336 284
337 off += table[i].len + 1; 285 off += table[i].len + 1;
338 valid++;
339 } 286 }
340 printf("\n"); 287 printf("\n");
341 288
342 output_label("kallsyms_markers"); 289 output_label("kallsyms_markers");
343 for (i = 0; i < ((valid + 255) >> 8); i++) 290 for (i = 0; i < ((table_cnt + 255) >> 8); i++)
344 printf("\tPTR\t%d\n", markers[i]); 291 printf("\tPTR\t%d\n", markers[i]);
345 printf("\n"); 292 printf("\n");
346 293
@@ -350,7 +297,7 @@ write_src(void)
350 off = 0; 297 off = 0;
351 for (i = 0; i < 256; i++) { 298 for (i = 0; i < 256; i++) {
352 best_idx[i] = off; 299 best_idx[i] = off;
353 expand_symbol(best_table[i],best_table_len[i],buf); 300 expand_symbol(best_table[i], best_table_len[i], buf);
354 printf("\t.asciz\t\"%s\"\n", buf); 301 printf("\t.asciz\t\"%s\"\n", buf);
355 off += strlen(buf) + 1; 302 off += strlen(buf) + 1;
356 } 303 }
@@ -365,153 +312,13 @@ write_src(void)
365 312
366/* table lookup compression functions */ 313/* table lookup compression functions */
367 314
368static inline unsigned int rehash_token(unsigned int hash, unsigned char data)
369{
370 return ((hash * 16777619) ^ data);
371}
372
373static unsigned int hash_token(unsigned char *data, int len)
374{
375 unsigned int hash=HASH_BASE_OFFSET;
376 int i;
377
378 for (i = 0; i < len; i++)
379 hash = rehash_token(hash, data[i]);
380
381 return HASH_FOLD(hash);
382}
383
384/* find a token given its data and hash value */
385static struct token *find_token_hash(unsigned char *data, int len, unsigned int hash)
386{
387 struct token *ptr;
388
389 ptr = hash_table[hash];
390
391 while (ptr) {
392 if ((ptr->len == len) && (memcmp(ptr->data, data, len) == 0))
393 return ptr;
394 ptr=ptr->next;
395 }
396
397 return NULL;
398}
399
400static inline void insert_token_in_group(struct token *head, struct token *ptr)
401{
402 ptr->right = head->right;
403 ptr->right->left = ptr;
404 head->right = ptr;
405 ptr->left = head;
406}
407
408static inline void remove_token_from_group(struct token *ptr)
409{
410 ptr->left->right = ptr->right;
411 ptr->right->left = ptr->left;
412}
413
414
415/* build the counts for all the tokens that start with "data", and have lenghts
416 * from 2 to "len" */
417static void learn_token(unsigned char *data, int len)
418{
419 struct token *ptr,*last_ptr;
420 int i, newprofit;
421 unsigned int hash = HASH_BASE_OFFSET;
422 unsigned int hashes[MAX_TOK_SIZE + 1];
423
424 if (len > MAX_TOK_SIZE)
425 len = MAX_TOK_SIZE;
426
427 /* calculate and store the hash values for all the sub-tokens */
428 hash = rehash_token(hash, data[0]);
429 for (i = 2; i <= len; i++) {
430 hash = rehash_token(hash, data[i-1]);
431 hashes[i] = HASH_FOLD(hash);
432 }
433
434 last_ptr = NULL;
435 ptr = NULL;
436
437 for (i = len; i >= 2; i--) {
438 hash = hashes[i];
439
440 if (!ptr) ptr = find_token_hash(data, i, hash);
441
442 if (!ptr) {
443 /* create a new token entry */
444 ptr = (struct token *) malloc(sizeof(*ptr));
445
446 memcpy(ptr->data, data, i);
447 ptr->len = i;
448
449 /* when we create an entry, it's profit is 0 because
450 * we also take into account the size of the token on
451 * the compressed table. We then subtract GOOD_BAD_THRESHOLD
452 * so that the test to see if this token belongs to
453 * the good or bad list, is a comparison to zero */
454 ptr->profit = -GOOD_BAD_THRESHOLD;
455
456 ptr->next = hash_table[hash];
457 hash_table[hash] = ptr;
458
459 insert_token_in_group(&bad_head, ptr);
460
461 ptr->smaller = NULL;
462 } else {
463 newprofit = ptr->profit + (ptr->len - 1);
464 /* check to see if this token needs to be moved to a
465 * different list */
466 if((ptr->profit < 0) && (newprofit >= 0)) {
467 remove_token_from_group(ptr);
468 insert_token_in_group(&good_head,ptr);
469 }
470 ptr->profit = newprofit;
471 }
472
473 if (last_ptr) last_ptr->smaller = ptr;
474 last_ptr = ptr;
475
476 ptr = ptr->smaller;
477 }
478}
479
480/* decrease the counts for all the tokens that start with "data", and have lenghts
481 * from 2 to "len". This function is much simpler than learn_token because we have
482 * more guarantees (tho tokens exist, the ->smaller pointer is set, etc.)
483 * The two separate functions exist only because of compression performance */
484static void forget_token(unsigned char *data, int len)
485{
486 struct token *ptr;
487 int i, newprofit;
488 unsigned int hash=0;
489
490 if (len > MAX_TOK_SIZE) len = MAX_TOK_SIZE;
491
492 hash = hash_token(data, len);
493 ptr = find_token_hash(data, len, hash);
494
495 for (i = len; i >= 2; i--) {
496
497 newprofit = ptr->profit - (ptr->len - 1);
498 if ((ptr->profit >= 0) && (newprofit < 0)) {
499 remove_token_from_group(ptr);
500 insert_token_in_group(&bad_head, ptr);
501 }
502 ptr->profit=newprofit;
503
504 ptr=ptr->smaller;
505 }
506}
507
508/* count all the possible tokens in a symbol */ 315/* count all the possible tokens in a symbol */
509static void learn_symbol(unsigned char *symbol, int len) 316static void learn_symbol(unsigned char *symbol, int len)
510{ 317{
511 int i; 318 int i;
512 319
513 for (i = 0; i < len - 1; i++) 320 for (i = 0; i < len - 1; i++)
514 learn_token(symbol + i, len - i); 321 token_profit[ symbol[i] + (symbol[i + 1] << 8) ]++;
515} 322}
516 323
517/* decrease the count for all the possible tokens in a symbol */ 324/* decrease the count for all the possible tokens in a symbol */
@@ -520,117 +327,90 @@ static void forget_symbol(unsigned char *symbol, int len)
520 int i; 327 int i;
521 328
522 for (i = 0; i < len - 1; i++) 329 for (i = 0; i < len - 1; i++)
523 forget_token(symbol + i, len - i); 330 token_profit[ symbol[i] + (symbol[i + 1] << 8) ]--;
524} 331}
525 332
526/* set all the symbol flags and do the initial token count */ 333/* remove all the invalid symbols from the table and do the initial token count */
527static void build_initial_tok_table(void) 334static void build_initial_tok_table(void)
528{ 335{
529 int i, use_it, valid; 336 unsigned int i, pos;
530 337
531 valid = 0; 338 pos = 0;
532 for (i = 0; i < cnt; i++) { 339 for (i = 0; i < table_cnt; i++) {
533 table[i].flags = 0;
534 if ( symbol_valid(&table[i]) ) { 340 if ( symbol_valid(&table[i]) ) {
535 table[i].flags |= SYM_FLAG_VALID; 341 if (pos != i)
536 valid++; 342 table[pos] = table[i];
537 } 343 learn_symbol(table[pos].sym, table[pos].len);
538 } 344 pos++;
539
540 use_it = 0;
541 for (i = 0; i < cnt; i++) {
542
543 /* subsample the available symbols. This method is almost like
544 * a Bresenham's algorithm to get uniformly distributed samples
545 * across the symbol table */
546 if (table[i].flags & SYM_FLAG_VALID) {
547
548 use_it += WORKING_SET;
549
550 if (use_it >= valid) {
551 table[i].flags |= SYM_FLAG_SAMPLED;
552 use_it -= valid;
553 }
554 } 345 }
555 if (table[i].flags & SYM_FLAG_SAMPLED)
556 learn_symbol(table[i].sym, table[i].len);
557 } 346 }
347 table_cnt = pos;
558} 348}
559 349
560/* replace a given token in all the valid symbols. Use the sampled symbols 350/* replace a given token in all the valid symbols. Use the sampled symbols
561 * to update the counts */ 351 * to update the counts */
562static void compress_symbols(unsigned char *str, int tlen, int idx) 352static void compress_symbols(unsigned char *str, int idx)
563{ 353{
564 int i, len, learn, size; 354 unsigned int i, len, size;
565 unsigned char *p; 355 unsigned char *p1, *p2;
566 356
567 for (i = 0; i < cnt; i++) { 357 for (i = 0; i < table_cnt; i++) {
568
569 if (!(table[i].flags & SYM_FLAG_VALID)) continue;
570 358
571 len = table[i].len; 359 len = table[i].len;
572 learn = 0; 360 p1 = table[i].sym;
573 p = table[i].sym; 361
362 /* find the token on the symbol */
363 p2 = memmem(p1, len, str, 2);
364 if (!p2) continue;
365
366 /* decrease the counts for this symbol's tokens */
367 forget_symbol(table[i].sym, len);
368
369 size = len;
574 370
575 do { 371 do {
372 *p2 = idx;
373 p2++;
374 size -= (p2 - p1);
375 memmove(p2, p2 + 1, size);
376 p1 = p2;
377 len--;
378
379 if (size < 2) break;
380
576 /* find the token on the symbol */ 381 /* find the token on the symbol */
577 p = (unsigned char *) strstr((char *) p, (char *) str); 382 p2 = memmem(p1, size, str, 2);
578 if (!p) break;
579
580 if (!learn) {
581 /* if this symbol was used to count, decrease it */
582 if (table[i].flags & SYM_FLAG_SAMPLED)
583 forget_symbol(table[i].sym, len);
584 learn = 1;
585 }
586 383
587 *p = idx; 384 } while (p2);
588 size = (len - (p - table[i].sym)) - tlen + 1;
589 memmove(p + 1, p + tlen, size);
590 p++;
591 len -= tlen - 1;
592 385
593 } while (size >= tlen); 386 table[i].len = len;
594 387
595 if(learn) { 388 /* increase the counts for this symbol's new tokens */
596 table[i].len = len; 389 learn_symbol(table[i].sym, len);
597 /* if this symbol was used to count, learn it again */
598 if(table[i].flags & SYM_FLAG_SAMPLED)
599 learn_symbol(table[i].sym, len);
600 }
601 } 390 }
602} 391}
603 392
604/* search the token with the maximum profit */ 393/* search the token with the maximum profit */
605static struct token *find_best_token(void) 394static int find_best_token(void)
606{ 395{
607 struct token *ptr,*best,*head; 396 int i, best, bestprofit;
608 int bestprofit;
609 397
610 bestprofit=-10000; 398 bestprofit=-10000;
399 best = 0;
611 400
612 /* failsafe: if the "good" list is empty search from the "bad" list */ 401 for (i = 0; i < 0x10000; i++) {
613 if(good_head.right == &good_head) head = &bad_head; 402 if (token_profit[i] > bestprofit) {
614 else head = &good_head; 403 best = i;
615 404 bestprofit = token_profit[i];
616 ptr = head->right;
617 best = NULL;
618 while (ptr != head) {
619 if (ptr->profit > bestprofit) {
620 bestprofit = ptr->profit;
621 best = ptr;
622 } 405 }
623 ptr = ptr->right;
624 } 406 }
625
626 return best; 407 return best;
627} 408}
628 409
629/* this is the core of the algorithm: calculate the "best" table */ 410/* this is the core of the algorithm: calculate the "best" table */
630static void optimize_result(void) 411static void optimize_result(void)
631{ 412{
632 struct token *best; 413 int i, best;
633 int i;
634 414
635 /* using the '\0' symbol last allows compress_symbols to use standard 415 /* using the '\0' symbol last allows compress_symbols to use standard
636 * fast string functions */ 416 * fast string functions */
@@ -644,14 +424,12 @@ static void optimize_result(void)
644 best = find_best_token(); 424 best = find_best_token();
645 425
646 /* place it in the "best" table */ 426 /* place it in the "best" table */
647 best_table_len[i] = best->len; 427 best_table_len[i] = 2;
648 memcpy(best_table[i], best->data, best_table_len[i]); 428 best_table[i][0] = best & 0xFF;
649 /* zero terminate the token so that we can use strstr 429 best_table[i][1] = (best >> 8) & 0xFF;
650 in compress_symbols */
651 best_table[i][best_table_len[i]]='\0';
652 430
653 /* replace this token in all the valid symbols */ 431 /* replace this token in all the valid symbols */
654 compress_symbols(best_table[i], best_table_len[i], i); 432 compress_symbols(best_table[i], i);
655 } 433 }
656 } 434 }
657} 435}
@@ -659,39 +437,28 @@ static void optimize_result(void)
659/* start by placing the symbols that are actually used on the table */ 437/* start by placing the symbols that are actually used on the table */
660static void insert_real_symbols_in_table(void) 438static void insert_real_symbols_in_table(void)
661{ 439{
662 int i, j, c; 440 unsigned int i, j, c;
663 441
664 memset(best_table, 0, sizeof(best_table)); 442 memset(best_table, 0, sizeof(best_table));
665 memset(best_table_len, 0, sizeof(best_table_len)); 443 memset(best_table_len, 0, sizeof(best_table_len));
666 444
667 for (i = 0; i < cnt; i++) { 445 for (i = 0; i < table_cnt; i++) {
668 if (table[i].flags & SYM_FLAG_VALID) { 446 for (j = 0; j < table[i].len; j++) {
669 for (j = 0; j < table[i].len; j++) { 447 c = table[i].sym[j];
670 c = table[i].sym[j]; 448 best_table[c][0]=c;
671 best_table[c][0]=c; 449 best_table_len[c]=1;
672 best_table_len[c]=1;
673 }
674 } 450 }
675 } 451 }
676} 452}
677 453
678static void optimize_token_table(void) 454static void optimize_token_table(void)
679{ 455{
680 memset(hash_table, 0, sizeof(hash_table));
681
682 good_head.left = &good_head;
683 good_head.right = &good_head;
684
685 bad_head.left = &bad_head;
686 bad_head.right = &bad_head;
687
688 build_initial_tok_table(); 456 build_initial_tok_table();
689 457
690 insert_real_symbols_in_table(); 458 insert_real_symbols_in_table();
691 459
692 /* When valid symbol is not registered, exit to error */ 460 /* When valid symbol is not registered, exit to error */
693 if (good_head.left == good_head.right && 461 if (!table_cnt) {
694 bad_head.left == bad_head.right) {
695 fprintf(stderr, "No valid symbol.\n"); 462 fprintf(stderr, "No valid symbol.\n");
696 exit(1); 463 exit(1);
697 } 464 }
@@ -700,8 +467,7 @@ static void optimize_token_table(void)
700} 467}
701 468
702 469
703int 470int main(int argc, char **argv)
704main(int argc, char **argv)
705{ 471{
706 if (argc >= 2) { 472 if (argc >= 2) {
707 int i; 473 int i;