1 files changed, 95 insertions, 329 deletions
diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c
index 9be41a9f5aff..1f53d4fc4c1d 100644
--- a/scripts/kallsyms.c
+++ b/scripts/kallsyms.c
@@ -24,75 +24,37 @@
 *
 */
+#define _GNU_SOURCE
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <ctype.h>
-/* maximum token length used. It doesn't pay to increase it a lot, because
- * very long substrings probably don't repeat themselves too often. */
-#define MAX_TOK_SIZE            11
 #define KSYM_NAME_LEN           127
-/* we use only a subset of the complete symbol table to gather the token count,
- * to speed up compression, at the expense of a little compression ratio */
-#define WORKING_SET             1024
-/* first find the best token only on the list of tokens that would profit more
- * than GOOD_BAD_THRESHOLD. Only if this list is empty go to the "bad" list.
- * Increasing this value will put less tokens on the "good" list, so the search
- * is faster. However, if the good list runs out of tokens, we must painfully
- * search the bad list. */
-#define GOOD_BAD_THRESHOLD      10
-/* token hash parameters */
-#define HASH_BITS               18
-#define HASH_TABLE_SIZE         (1 << HASH_BITS)
-#define HASH_MASK               (HASH_TABLE_SIZE - 1)
-#define HASH_BASE_OFFSET        2166136261U
-#define HASH_FOLD(a)            ((a)&(HASH_MASK))
-/* flags to mark symbols */
-#define SYM_FLAG_VALID          1
-#define SYM_FLAG_SAMPLED        2
 struct sym_entry {
        unsigned long long addr;
-        char type;
+        unsigned int len;
-        unsigned char flags;
-        unsigned char len;
        unsigned char *sym;
 };
 static struct sym_entry *table;
-static int size, cnt;
+static unsigned int table_size, table_cnt;
 static unsigned long long _stext, _etext, _sinittext, _einittext, _sextratext, _eextratext;
 static int all_symbols = 0;
 static char symbol_prefix_char = '\0';
-struct token {
+int token_profit[0x10000];
-        unsigned char data[MAX_TOK_SIZE];
-        unsigned char len;
-        /* profit: the number of bytes that could be saved by inserting this
-         * token into the table */
-        int profit;
-        struct token *next;     /* next token on the hash list */
-        struct token *right;    /* next token on the good/bad list */
-        struct token *left;    /* previous token on the good/bad list */
-        struct token *smaller; /* token that is less one letter than this one */
-        };
-struct token bad_head, good_head;
-struct token *hash_table[HASH_TABLE_SIZE];
 /* the table that holds the result of the compression */
-unsigned char best_table[256][MAX_TOK_SIZE+1];
+unsigned char best_table[256][2];
 unsigned char best_table_len[256];
-static void
+static void usage(void)
-usage(void)
 {
        fprintf(stderr, "Usage: kallsyms [--all-symbols] [--symbol-prefix=<prefix char>] < in.map > out.S\n");
        exit(1);
@@ -102,21 +64,19 @@ usage(void)
 * This ignores the intensely annoying "mapping symbols" found
 * in ARM ELF files: $a, $t and $d.
 */
-static inline int
+static inline int is_arm_mapping_symbol(const char *str)
-is_arm_mapping_symbol(const char *str)
 {
        return str[0] == '$' && strchr("atd", str[1])
               && (str[2] == '\0' || str[2] == '.');
 }
-static int
+static int read_symbol(FILE *in, struct sym_entry *s)
-read_symbol(FILE *in, struct sym_entry *s)
 {
        char str[500];
-        char *sym;
+        char *sym, stype;
        int rc;
-        rc = fscanf(in, "%llx %c %499s\n", &s->addr, &s->type, str);
+        rc = fscanf(in, "%llx %c %499s\n", &s->addr, &stype, str);
        if (rc != 3) {
                if (rc != EOF) {
                        /* skip line */
@@ -143,7 +103,7 @@ read_symbol(FILE *in, struct sym_entry *s)
                _sextratext = s->addr;
        else if (strcmp(sym, "_eextratext") == 0)
                _eextratext = s->addr;
-        else if (toupper(s->type) == 'A')
+        else if (toupper(stype) == 'A')
        {
                /* Keep these useful absolute symbols */
                if (strcmp(sym, "__kernel_syscall_via_break") &&
@@ -153,22 +113,21 @@ read_symbol(FILE *in, struct sym_entry *s)
                        return -1;
        }
-        else if (toupper(s->type) == 'U' ||
+        else if (toupper(stype) == 'U' ||
                 is_arm_mapping_symbol(sym))
                return -1;
        /* include the type field in the symbol name, so that it gets
         * compressed together */
        s->len = strlen(str) + 1;
-        s->sym = (char *) malloc(s->len + 1);
+        s->sym = malloc(s->len + 1);
-        strcpy(s->sym + 1, str);
+        strcpy((char *)s->sym + 1, str);
-        s->sym[0] = s->type;
+        s->sym[0] = stype;
        return 0;
 }
-static int
+static int symbol_valid(struct sym_entry *s)
-symbol_valid(struct sym_entry *s)
 {
        /* Symbols which vary between passes.  Passes 1 and 2 must have
         * identical symbol lists.  The kallsyms_* symbols below are only added
@@ -214,30 +173,29 @@ symbol_valid(struct sym_entry *s)
        }
        /* Exclude symbols which vary between passes. */
-        if (strstr(s->sym + offset, "_compiled."))
+        if (strstr((char *)s->sym + offset, "_compiled."))
                return 0;
        for (i = 0; special_symbols[i]; i++)
-                if( strcmp(s->sym + offset, special_symbols[i]) == 0 )
+                if( strcmp((char *)s->sym + offset, special_symbols[i]) == 0 )
                        return 0;
        return 1;
 }
-static void
+static void read_map(FILE *in)
-read_map(FILE *in)
 {
        while (!feof(in)) {
-                if (cnt >= size) {
+                if (table_cnt >= table_size) {
-                        size += 10000;
+                        table_size += 10000;
-                        table = realloc(table, sizeof(*table) * size);
+                        table = realloc(table, sizeof(*table) * table_size);
                        if (!table) {
                                fprintf(stderr, "out of memory\n");
                                exit (1);
                        }
                }
-                if (read_symbol(in, &table[cnt]) == 0)
+                if (read_symbol(in, &table[table_cnt]) == 0)
-                        cnt++;
+                        table_cnt++;
        }
 }
@@ -281,10 +239,9 @@ static int expand_symbol(unsigned char *data, int len, char *result)
        return total;
 }
-static void
+static void write_src(void)
-write_src(void)
 {
-        int i, k, off, valid;
+        unsigned int i, k, off;
        unsigned int best_idx[256];
        unsigned int *markers;
        char buf[KSYM_NAME_LEN+1];
@@ -301,33 +258,24 @@ write_src(void)
        printf(".data\n");
        output_label("kallsyms_addresses");
-        valid = 0;
+        for (i = 0; i < table_cnt; i++) {
-        for (i = 0; i < cnt; i++) {
+                printf("\tPTR\t%#llx\n", table[i].addr);
-                if (table[i].flags & SYM_FLAG_VALID) {
-                        printf("\tPTR\t%#llx\n", table[i].addr);
-                        valid++;
-                }
        }
        printf("\n");
        output_label("kallsyms_num_syms");
-        printf("\tPTR\t%d\n", valid);
+        printf("\tPTR\t%d\n", table_cnt);
        printf("\n");
        /* table of offset markers, that give the offset in the compressed stream
         * every 256 symbols */
-        markers = (unsigned int *) malloc(sizeof(unsigned int)*((valid + 255) / 256));
+        markers = (unsigned int *) malloc(sizeof(unsigned int) * ((table_cnt + 255) / 256));
        output_label("kallsyms_names");
-        valid = 0;
        off = 0;
-        for (i = 0; i < cnt; i++) {
+        for (i = 0; i < table_cnt; i++) {
+                if ((i & 0xFF) == 0)
-                if (!table[i].flags & SYM_FLAG_VALID)
+                        markers[i >> 8] = off;
-                        continue;
-                if ((valid & 0xFF) == 0)
-                        markers[valid >> 8] = off;
                printf("\t.byte 0x%02x", table[i].len);
                for (k = 0; k < table[i].len; k++)
@@ -335,12 +283,11 @@ write_src(void)
                printf("\n");
                off += table[i].len + 1;
-                valid++;
        }
        printf("\n");
        output_label("kallsyms_markers");
-        for (i = 0; i < ((valid + 255) >> 8); i++)
+        for (i = 0; i < ((table_cnt + 255) >> 8); i++)
                printf("\tPTR\t%d\n", markers[i]);
        printf("\n");
@@ -350,7 +297,7 @@ write_src(void)
        off = 0;
        for (i = 0; i < 256; i++) {
                best_idx[i] = off;
-                expand_symbol(best_table[i],best_table_len[i],buf);
+                expand_symbol(best_table[i], best_table_len[i], buf);
                printf("\t.asciz\t\"%s\"\n", buf);
                off += strlen(buf) + 1;
        }
@@ -365,153 +312,13 @@ write_src(void)
 /* table lookup compression functions */
-static inline unsigned int rehash_token(unsigned int hash, unsigned char data)
-{
-        return ((hash * 16777619) ^ data);
-}
-static unsigned int hash_token(unsigned char *data, int len)
-{
-        unsigned int hash=HASH_BASE_OFFSET;
-        int i;
-        for (i = 0; i < len; i++)
-                hash = rehash_token(hash, data[i]);
-        return HASH_FOLD(hash);
-}
-/* find a token given its data and hash value */
-static struct token *find_token_hash(unsigned char *data, int len, unsigned int hash)
-{
-        struct token *ptr;
-        ptr = hash_table[hash];
-        while (ptr) {
-                if ((ptr->len == len) && (memcmp(ptr->data, data, len) == 0))
-                        return ptr;
-                ptr=ptr->next;
-        }
-        return NULL;
-}
-static inline void insert_token_in_group(struct token *head, struct token *ptr)
-{
-        ptr->right = head->right;
-        ptr->right->left = ptr;
-        head->right = ptr;
-        ptr->left = head;
-}
-static inline void remove_token_from_group(struct token *ptr)
-{
-        ptr->left->right = ptr->right;
-        ptr->right->left = ptr->left;
-}
-/* build the counts for all the tokens that start with "data", and have lenghts
- * from 2 to "len" */
-static void learn_token(unsigned char *data, int len)
-{
-        struct token *ptr,*last_ptr;
-        int i, newprofit;
-        unsigned int hash = HASH_BASE_OFFSET;
-        unsigned int hashes[MAX_TOK_SIZE + 1];
-        if (len > MAX_TOK_SIZE)
-                len = MAX_TOK_SIZE;
-        /* calculate and store the hash values for all the sub-tokens */
-        hash = rehash_token(hash, data[0]);
-        for (i = 2; i <= len; i++) {
-                hash = rehash_token(hash, data[i-1]);
-                hashes[i] = HASH_FOLD(hash);
-        }
-        last_ptr = NULL;
-        ptr = NULL;
-        for (i = len; i >= 2; i--) {
-                hash = hashes[i];
-                if (!ptr) ptr = find_token_hash(data, i, hash);
-                if (!ptr) {
-                        /* create a new token entry */
-                        ptr = (struct token *) malloc(sizeof(*ptr));
-                        memcpy(ptr->data, data, i);
-                        ptr->len = i;
-                        /* when we create an entry, it's profit is 0 because
-                         * we also take into account the size of the token on
-                         * the compressed table. We then subtract GOOD_BAD_THRESHOLD
-                         * so that the test to see if this token belongs to
-                         * the good or bad list, is a comparison to zero */
-                        ptr->profit = -GOOD_BAD_THRESHOLD;
-                        ptr->next = hash_table[hash];
-                        hash_table[hash] = ptr;
-                        insert_token_in_group(&bad_head, ptr);
-                        ptr->smaller = NULL;
-                } else {
-                        newprofit = ptr->profit + (ptr->len - 1);
-                        /* check to see if this token needs to be moved to a
-                         * different list */
-                        if((ptr->profit < 0) && (newprofit >= 0)) {
-                                remove_token_from_group(ptr);
-                                insert_token_in_group(&good_head,ptr);
-                        }
-                        ptr->profit = newprofit;
-                }
-                if (last_ptr) last_ptr->smaller = ptr;
-                last_ptr = ptr;
-                ptr = ptr->smaller;
-        }
-}
-/* decrease the counts for all the tokens that start with "data", and have lenghts
- * from 2 to "len". This function is much simpler than learn_token because we have
- * more guarantees (tho tokens exist, the ->smaller pointer is set, etc.)
- * The two separate functions exist only because of compression performance */
-static void forget_token(unsigned char *data, int len)
-{
-        struct token *ptr;
-        int i, newprofit;
-        unsigned int hash=0;
-        if (len > MAX_TOK_SIZE) len = MAX_TOK_SIZE;
-        hash = hash_token(data, len);
-        ptr = find_token_hash(data, len, hash);
-        for (i = len; i >= 2; i--) {
-                newprofit = ptr->profit - (ptr->len - 1);
-                if ((ptr->profit >= 0) && (newprofit < 0)) {
-                        remove_token_from_group(ptr);
-                        insert_token_in_group(&bad_head, ptr);
-                }
-                ptr->profit=newprofit;
-                ptr=ptr->smaller;
-        }
-}
 /* count all the possible tokens in a symbol */
 static void learn_symbol(unsigned char *symbol, int len)
 {
        int i;
        for (i = 0; i < len - 1; i++)
-                learn_token(symbol + i, len - i);
+                token_profit[ symbol[i] + (symbol[i + 1] << 8) ]++;
 }
 /* decrease the count for all the possible tokens in a symbol */
@@ -520,117 +327,90 @@ static void forget_symbol(unsigned char *symbol, int len)
        int i;
        for (i = 0; i < len - 1; i++)
-                forget_token(symbol + i, len - i);
+                token_profit[ symbol[i] + (symbol[i + 1] << 8) ]--;
 }
-/* set all the symbol flags and do the initial token count */
+/* remove all the invalid symbols from the table and do the initial token count */
 static void build_initial_tok_table(void)
 {
-        int i, use_it, valid;
+        unsigned int i, pos;
-        valid = 0;
+        pos = 0;
-        for (i = 0; i < cnt; i++) {
+        for (i = 0; i < table_cnt; i++) {
-                table[i].flags = 0;
                if ( symbol_valid(&table[i]) ) {
-                        table[i].flags |= SYM_FLAG_VALID;
+                        if (pos != i)
-                        valid++;
+                                table[pos] = table[i];
-                }
+                        learn_symbol(table[pos].sym, table[pos].len);
-        }
+                        pos++;
-        use_it = 0;
-        for (i = 0; i < cnt; i++) {
-                /* subsample the available symbols. This method is almost like
-                 * a Bresenham's algorithm to get uniformly distributed samples
-                 * across the symbol table */
-                if (table[i].flags & SYM_FLAG_VALID) {
-                        use_it += WORKING_SET;
-                        if (use_it >= valid) {
-                                table[i].flags |= SYM_FLAG_SAMPLED;
-                                use_it -= valid;
-                        }
                }
-                if (table[i].flags & SYM_FLAG_SAMPLED)
-                        learn_symbol(table[i].sym, table[i].len);
        }
+        table_cnt = pos;
 }
 /* replace a given token in all the valid symbols. Use the sampled symbols
 * to update the counts */
-static void compress_symbols(unsigned char *str, int tlen, int idx)
+static void compress_symbols(unsigned char *str, int idx)
 {
-        int i, len, learn, size;
+        unsigned int i, len, size;
-        unsigned char *p;
+        unsigned char *p1, *p2;
-        for (i = 0; i < cnt; i++) {
+        for (i = 0; i < table_cnt; i++) {
-                if (!(table[i].flags & SYM_FLAG_VALID)) continue;
                len = table[i].len;
-                learn = 0;
+                p1 = table[i].sym;
-                p = table[i].sym;
+                /* find the token on the symbol */
+                p2 = memmem(p1, len, str, 2);
+                if (!p2) continue;
+                /* decrease the counts for this symbol's tokens */
+                forget_symbol(table[i].sym, len);
+                size = len;
                do {
+                        *p2 = idx;
+                        p2++;
+                        size -= (p2 - p1);
+                        memmove(p2, p2 + 1, size);
+                        p1 = p2;
+                        len--;
+                        if (size < 2) break;
                        /* find the token on the symbol */
-                        p = (unsigned char *) strstr((char *) p, (char *) str);
+                        p2 = memmem(p1, size, str, 2);
-                        if (!p) break;
-                        if (!learn) {
-                                /* if this symbol was used to count, decrease it */
-                                if (table[i].flags & SYM_FLAG_SAMPLED)
-                                        forget_symbol(table[i].sym, len);
-                                learn = 1;
-                        }
-                        *p = idx;
+                } while (p2);
-                        size = (len - (p - table[i].sym)) - tlen + 1;
-                        memmove(p + 1, p + tlen, size);
-                        p++;
-                        len -= tlen - 1;
-                } while (size >= tlen);
+                table[i].len = len;
-                if(learn) {
+                /* increase the counts for this symbol's new tokens */
-                        table[i].len = len;
+                learn_symbol(table[i].sym, len);
-                        /* if this symbol was used to count, learn it again */
-                        if(table[i].flags & SYM_FLAG_SAMPLED)
-                                learn_symbol(table[i].sym, len);
-                }
        }
 }
 /* search the token with the maximum profit */
-static struct token *find_best_token(void)
+static int find_best_token(void)
 {
-        struct token *ptr,*best,*head;
+        int i, best, bestprofit;
-        int bestprofit;
        bestprofit=-10000;
+        best = 0;
-        /* failsafe: if the "good" list is empty search from the "bad" list */
+        for (i = 0; i < 0x10000; i++) {
-        if(good_head.right == &good_head) head = &bad_head;
+                if (token_profit[i] > bestprofit) {
-        else head = &good_head;
+                        best = i;
+                        bestprofit = token_profit[i];
-        ptr = head->right;
-        best = NULL;
-        while (ptr != head) {
-                if (ptr->profit > bestprofit) {
-                        bestprofit = ptr->profit;
-                        best = ptr;
                }
-                ptr = ptr->right;
        }
        return best;
 }
 /* this is the core of the algorithm: calculate the "best" table */
 static void optimize_result(void)
 {
-        struct token *best;
+        int i, best;
-        int i;
        /* using the '\0' symbol last allows compress_symbols to use standard
         * fast string functions */
@@ -644,14 +424,12 @@ static void optimize_result(void)
                        best = find_best_token();
                        /* place it in the "best" table */
-                        best_table_len[i] = best->len;
+                        best_table_len[i] = 2;
-                        memcpy(best_table[i], best->data, best_table_len[i]);
+                        best_table[i][0] = best & 0xFF;
-                        /* zero terminate the token so that we can use strstr
+                        best_table[i][1] = (best >> 8) & 0xFF;
-                           in compress_symbols */
-                        best_table[i][best_table_len[i]]='\0';
                        /* replace this token in all the valid symbols */
-                        compress_symbols(best_table[i], best_table_len[i], i);
+                        compress_symbols(best_table[i], i);
                }
        }
 }
@@ -659,39 +437,28 @@ static void optimize_result(void)
 /* start by placing the symbols that are actually used on the table */
 static void insert_real_symbols_in_table(void)
 {
-        int i, j, c;
+        unsigned int i, j, c;
        memset(best_table, 0, sizeof(best_table));
        memset(best_table_len, 0, sizeof(best_table_len));
-        for (i = 0; i < cnt; i++) {
+        for (i = 0; i < table_cnt; i++) {
-                if (table[i].flags & SYM_FLAG_VALID) {
+                for (j = 0; j < table[i].len; j++) {
-                        for (j = 0; j < table[i].len; j++) {
+                        c = table[i].sym[j];
-                                c = table[i].sym[j];
+                        best_table[c][0]=c;
-                                best_table[c][0]=c;
+                        best_table_len[c]=1;
-                                best_table_len[c]=1;
-                        }
                }
        }
 }
 static void optimize_token_table(void)
 {
-        memset(hash_table, 0, sizeof(hash_table));
-        good_head.left = &good_head;
-        good_head.right = &good_head;
-        bad_head.left = &bad_head;
-        bad_head.right = &bad_head;
        build_initial_tok_table();
        insert_real_symbols_in_table();
        /* When valid symbol is not registered, exit to error */
-        if (good_head.left == good_head.right &&
+        if (!table_cnt) {
-            bad_head.left == bad_head.right) {
                fprintf(stderr, "No valid symbol.\n");
                exit(1);
        }
@@ -700,8 +467,7 @@ static void optimize_token_table(void)
 }
-int
+int main(int argc, char **argv)
-main(int argc, char **argv)
 {
        if (argc >= 2) {
                int i;