1 files changed, 686 insertions, 0 deletions
diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c
new file mode 100644
index 000000000000..090ffda4adbc
--- /dev/null
+++ b/scripts/kallsyms.c
@@ -0,0 +1,686 @@
+/* Generate assembler source containing symbol information
+ *
+ * Copyright 2002       by Kai Germaschewski
+ *
+ * This software may be used and distributed according to the terms
+ * of the GNU General Public License, incorporated herein by reference.
+ *
+ * Usage: nm -n vmlinux | scripts/kallsyms [--all-symbols] > symbols.S
+ *
+ * ChangeLog:
+ *
+ * (25/Aug/2004) Paulo Marques <pmarques@grupopie.com>
+ *      Changed the compression method from stem compression to "table lookup"
+ *      compression
+ *
+ *      Table compression uses all the unused char codes on the symbols and
+ *  maps these to the most used substrings (tokens). For instance, it might
+ *  map char code 0xF7 to represent "write_" and then in every symbol where
+ *  "write_" appears it can be replaced by 0xF7, saving 5 bytes.
+ *      The used codes themselves are also placed in the table so that the
+ *  decompresion can work without "special cases".
+ *      Applied to kernel symbols, this usually produces a compression ratio
+ *  of about 50%.
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+/* maximum token length used. It doesn't pay to increase it a lot, because
+ * very long substrings probably don't repeat themselves too often. */
+#define MAX_TOK_SIZE            11
+#define KSYM_NAME_LEN           127
+/* we use only a subset of the complete symbol table to gather the token count,
+ * to speed up compression, at the expense of a little compression ratio */
+#define WORKING_SET             1024
+/* first find the best token only on the list of tokens that would profit more
+ * than GOOD_BAD_THRESHOLD. Only if this list is empty go to the "bad" list.
+ * Increasing this value will put less tokens on the "good" list, so the search
+ * is faster. However, if the good list runs out of tokens, we must painfully
+ * search the bad list. */
+#define GOOD_BAD_THRESHOLD      10
+/* token hash parameters */
+#define HASH_BITS               18
+#define HASH_TABLE_SIZE         (1 << HASH_BITS)
+#define HASH_MASK               (HASH_TABLE_SIZE - 1)
+#define HASH_BASE_OFFSET        2166136261U
+#define HASH_FOLD(a)            ((a)&(HASH_MASK))
+/* flags to mark symbols */
+#define SYM_FLAG_VALID          1
+#define SYM_FLAG_SAMPLED        2
+struct sym_entry {
+        unsigned long long addr;
+        char type;
+        unsigned char flags;
+        unsigned char len;
+        unsigned char *sym;
+};
+static struct sym_entry *table;
+static int size, cnt;
+static unsigned long long _stext, _etext, _sinittext, _einittext;
+static int all_symbols = 0;
+struct token {
+        unsigned char data[MAX_TOK_SIZE];
+        unsigned char len;
+        /* profit: the number of bytes that could be saved by inserting this
+         * token into the table */
+        int profit;
+        struct token *next;     /* next token on the hash list */
+        struct token *right;    /* next token on the good/bad list */
+        struct token *left;    /* previous token on the good/bad list */
+        struct token *smaller; /* token that is less one letter than this one */
+        };
+struct token bad_head, good_head;
+struct token *hash_table[HASH_TABLE_SIZE];
+/* the table that holds the result of the compression */
+unsigned char best_table[256][MAX_TOK_SIZE+1];
+unsigned char best_table_len[256];
+static void
+usage(void)
+{
+        fprintf(stderr, "Usage: kallsyms [--all-symbols] < in.map > out.S\n");
+        exit(1);
+}
+/*
+ * This ignores the intensely annoying "mapping symbols" found
+ * in ARM ELF files: $a, $t and $d.
+ */
+static inline int
+is_arm_mapping_symbol(const char *str)
+{
+        return str[0] == '$' && strchr("atd", str[1])
+               && (str[2] == '\0' || str[2] == '.');
+}
+static int
+read_symbol(FILE *in, struct sym_entry *s)
+{
+        char str[500];
+        int rc;
+        rc = fscanf(in, "%llx %c %499s\n", &s->addr, &s->type, str);
+        if (rc != 3) {
+                if (rc != EOF) {
+                        /* skip line */
+                        fgets(str, 500, in);
+                }
+                return -1;
+        }
+        /* Ignore most absolute/undefined (?) symbols. */
+        if (strcmp(str, "_stext") == 0)
+                _stext = s->addr;
+        else if (strcmp(str, "_etext") == 0)
+                _etext = s->addr;
+        else if (strcmp(str, "_sinittext") == 0)
+                _sinittext = s->addr;
+        else if (strcmp(str, "_einittext") == 0)
+                _einittext = s->addr;
+        else if (toupper(s->type) == 'A')
+        {
+                /* Keep these useful absolute symbols */
+                if (strcmp(str, "__kernel_syscall_via_break") &&
+                    strcmp(str, "__kernel_syscall_via_epc") &&
+                    strcmp(str, "__kernel_sigtramp") &&
+                    strcmp(str, "__gp"))
+                        return -1;
+        }
+        else if (toupper(s->type) == 'U' ||
+                 is_arm_mapping_symbol(str))
+                return -1;
+        /* include the type field in the symbol name, so that it gets
+         * compressed together */
+        s->len = strlen(str) + 1;
+        s->sym = (char *) malloc(s->len + 1);
+        strcpy(s->sym + 1, str);
+        s->sym[0] = s->type;
+        return 0;
+}
+static int
+symbol_valid(struct sym_entry *s)
+{
+        /* Symbols which vary between passes.  Passes 1 and 2 must have
+         * identical symbol lists.  The kallsyms_* symbols below are only added
+         * after pass 1, they would be included in pass 2 when --all-symbols is
+         * specified so exclude them to get a stable symbol list.
+         */
+        static char *special_symbols[] = {
+                "kallsyms_addresses",
+                "kallsyms_num_syms",
+                "kallsyms_names",
+                "kallsyms_markers",
+                "kallsyms_token_table",
+                "kallsyms_token_index",
+        /* Exclude linker generated symbols which vary between passes */
+                "_SDA_BASE_",           /* ppc */
+                "_SDA2_BASE_",          /* ppc */
+                NULL };
+        int i;
+        /* if --all-symbols is not specified, then symbols outside the text
+         * and inittext sections are discarded */
+        if (!all_symbols) {
+                if ((s->addr < _stext || s->addr > _etext)
+                    && (s->addr < _sinittext || s->addr > _einittext))
+                        return 0;
+                /* Corner case.  Discard any symbols with the same value as
+                 * _etext or _einittext, they can move between pass 1 and 2
+                 * when the kallsyms data is added.  If these symbols move then
+                 * they may get dropped in pass 2, which breaks the kallsyms
+                 * rules.
+                 */
+                if ((s->addr == _etext && strcmp(s->sym + 1, "_etext")) ||
+                    (s->addr == _einittext && strcmp(s->sym + 1, "_einittext")))
+                        return 0;
+        }
+        /* Exclude symbols which vary between passes. */
+        if (strstr(s->sym + 1, "_compiled."))
+                return 0;
+        for (i = 0; special_symbols[i]; i++)
+                if( strcmp(s->sym + 1, special_symbols[i]) == 0 )
+                        return 0;
+        return 1;
+}
+static void
+read_map(FILE *in)
+{
+        while (!feof(in)) {
+                if (cnt >= size) {
+                        size += 10000;
+                        table = realloc(table, sizeof(*table) * size);
+                        if (!table) {
+                                fprintf(stderr, "out of memory\n");
+                                exit (1);
+                        }
+                }
+                if (read_symbol(in, &table[cnt]) == 0)
+                        cnt++;
+        }
+}
+static void output_label(char *label)
+{
+        printf(".globl %s\n",label);
+        printf("\tALGN\n");
+        printf("%s:\n",label);
+}
+/* uncompress a compressed symbol. When this function is called, the best table
+ * might still be compressed itself, so the function needs to be recursive */
+static int expand_symbol(unsigned char *data, int len, char *result)
+{
+        int c, rlen, total=0;
+        while (len) {
+                c = *data;
+                /* if the table holds a single char that is the same as the one
+                 * we are looking for, then end the search */
+                if (best_table[c][0]==c && best_table_len[c]==1) {
+                        *result++ = c;
+                        total++;
+                } else {
+                        /* if not, recurse and expand */
+                        rlen = expand_symbol(best_table[c], best_table_len[c], result);
+                        total += rlen;
+                        result += rlen;
+                }
+                data++;
+                len--;
+        }
+        *result=0;
+        return total;
+}
+static void
+write_src(void)
+{
+        int i, k, off, valid;
+        unsigned int best_idx[256];
+        unsigned int *markers;
+        char buf[KSYM_NAME_LEN+1];
+        printf("#include <asm/types.h>\n");
+        printf("#if BITS_PER_LONG == 64\n");
+        printf("#define PTR .quad\n");
+        printf("#define ALGN .align 8\n");
+        printf("#else\n");
+        printf("#define PTR .long\n");
+        printf("#define ALGN .align 4\n");
+        printf("#endif\n");
+        printf(".data\n");
+        output_label("kallsyms_addresses");
+        valid = 0;
+        for (i = 0; i < cnt; i++) {
+                if (table[i].flags & SYM_FLAG_VALID) {
+                        printf("\tPTR\t%#llx\n", table[i].addr);
+                        valid++;
+                }
+        }
+        printf("\n");
+        output_label("kallsyms_num_syms");
+        printf("\tPTR\t%d\n", valid);
+        printf("\n");
+        /* table of offset markers, that give the offset in the compressed stream
+         * every 256 symbols */
+        markers = (unsigned int *) malloc(sizeof(unsigned int)*((valid + 255) / 256));
+        output_label("kallsyms_names");
+        valid = 0;
+        off = 0;
+        for (i = 0; i < cnt; i++) {
+                if (!table[i].flags & SYM_FLAG_VALID)
+                        continue;
+                if ((valid & 0xFF) == 0)
+                        markers[valid >> 8] = off;
+                printf("\t.byte 0x%02x", table[i].len);
+                for (k = 0; k < table[i].len; k++)
+                        printf(", 0x%02x", table[i].sym[k]);
+                printf("\n");
+                off += table[i].len + 1;
+                valid++;
+        }
+        printf("\n");
+        output_label("kallsyms_markers");
+        for (i = 0; i < ((valid + 255) >> 8); i++)
+                printf("\tPTR\t%d\n", markers[i]);
+        printf("\n");
+        free(markers);
+        output_label("kallsyms_token_table");
+        off = 0;
+        for (i = 0; i < 256; i++) {
+                best_idx[i] = off;
+                expand_symbol(best_table[i],best_table_len[i],buf);
+                printf("\t.asciz\t\"%s\"\n", buf);
+                off += strlen(buf) + 1;
+        }
+        printf("\n");
+        output_label("kallsyms_token_index");
+        for (i = 0; i < 256; i++)
+                printf("\t.short\t%d\n", best_idx[i]);
+        printf("\n");
+}
+/* table lookup compression functions */
+static inline unsigned int rehash_token(unsigned int hash, unsigned char data)
+{
+        return ((hash * 16777619) ^ data);
+}
+static unsigned int hash_token(unsigned char *data, int len)
+{
+        unsigned int hash=HASH_BASE_OFFSET;
+        int i;
+        for (i = 0; i < len; i++)
+                hash = rehash_token(hash, data[i]);
+        return HASH_FOLD(hash);
+}
+/* find a token given its data and hash value */
+static struct token *find_token_hash(unsigned char *data, int len, unsigned int hash)
+{
+        struct token *ptr;
+        ptr = hash_table[hash];
+        while (ptr) {
+                if ((ptr->len == len) && (memcmp(ptr->data, data, len) == 0))
+                        return ptr;
+                ptr=ptr->next;
+        }
+        return NULL;
+}
+static inline void insert_token_in_group(struct token *head, struct token *ptr)
+{
+        ptr->right = head->right;
+        ptr->right->left = ptr;
+        head->right = ptr;
+        ptr->left = head;
+}
+static inline void remove_token_from_group(struct token *ptr)
+{
+        ptr->left->right = ptr->right;
+        ptr->right->left = ptr->left;
+}
+/* build the counts for all the tokens that start with "data", and have lenghts
+ * from 2 to "len" */
+static void learn_token(unsigned char *data, int len)
+{
+        struct token *ptr,*last_ptr;
+        int i, newprofit;
+        unsigned int hash = HASH_BASE_OFFSET;
+        unsigned int hashes[MAX_TOK_SIZE + 1];
+        if (len > MAX_TOK_SIZE)
+                len = MAX_TOK_SIZE;
+        /* calculate and store the hash values for all the sub-tokens */
+        hash = rehash_token(hash, data[0]);
+        for (i = 2; i <= len; i++) {
+                hash = rehash_token(hash, data[i-1]);
+                hashes[i] = HASH_FOLD(hash);
+        }
+        last_ptr = NULL;
+        ptr = NULL;
+        for (i = len; i >= 2; i--) {
+                hash = hashes[i];
+                if (!ptr) ptr = find_token_hash(data, i, hash);
+                if (!ptr) {
+                        /* create a new token entry */
+                        ptr = (struct token *) malloc(sizeof(*ptr));
+                        memcpy(ptr->data, data, i);
+                        ptr->len = i;
+                        /* when we create an entry, it's profit is 0 because
+                         * we also take into account the size of the token on
+                         * the compressed table. We then subtract GOOD_BAD_THRESHOLD
+                         * so that the test to see if this token belongs to
+                         * the good or bad list, is a comparison to zero */
+                        ptr->profit = -GOOD_BAD_THRESHOLD;
+                        ptr->next = hash_table[hash];
+                        hash_table[hash] = ptr;
+                        insert_token_in_group(&bad_head, ptr);
+                        ptr->smaller = NULL;
+                } else {
+                        newprofit = ptr->profit + (ptr->len - 1);
+                        /* check to see if this token needs to be moved to a
+                         * different list */
+                        if((ptr->profit < 0) && (newprofit >= 0)) {
+                                remove_token_from_group(ptr);
+                                insert_token_in_group(&good_head,ptr);
+                        }
+                        ptr->profit = newprofit;
+                }
+                if (last_ptr) last_ptr->smaller = ptr;
+                last_ptr = ptr;
+                ptr = ptr->smaller;
+        }
+}
+/* decrease the counts for all the tokens that start with "data", and have lenghts
+ * from 2 to "len". This function is much simpler than learn_token because we have
+ * more guarantees (tho tokens exist, the ->smaller pointer is set, etc.)
+ * The two separate functions exist only because of compression performance */
+static void forget_token(unsigned char *data, int len)
+{
+        struct token *ptr;
+        int i, newprofit;
+        unsigned int hash=0;
+        if (len > MAX_TOK_SIZE) len = MAX_TOK_SIZE;
+        hash = hash_token(data, len);
+        ptr = find_token_hash(data, len, hash);
+        for (i = len; i >= 2; i--) {
+                newprofit = ptr->profit - (ptr->len - 1);
+                if ((ptr->profit >= 0) && (newprofit < 0)) {
+                        remove_token_from_group(ptr);
+                        insert_token_in_group(&bad_head, ptr);
+                }
+                ptr->profit=newprofit;
+                ptr=ptr->smaller;
+        }
+}
+/* count all the possible tokens in a symbol */
+static void learn_symbol(unsigned char *symbol, int len)
+{
+        int i;
+        for (i = 0; i < len - 1; i++)
+                learn_token(symbol + i, len - i);
+}
+/* decrease the count for all the possible tokens in a symbol */
+static void forget_symbol(unsigned char *symbol, int len)
+{
+        int i;
+        for (i = 0; i < len - 1; i++)
+                forget_token(symbol + i, len - i);
+}
+/* set all the symbol flags and do the initial token count */
+static void build_initial_tok_table(void)
+{
+        int i, use_it, valid;
+        valid = 0;
+        for (i = 0; i < cnt; i++) {
+                table[i].flags = 0;
+                if ( symbol_valid(&table[i]) ) {
+                        table[i].flags |= SYM_FLAG_VALID;
+                        valid++;
+                }
+        }
+        use_it = 0;
+        for (i = 0; i < cnt; i++) {
+                /* subsample the available symbols. This method is almost like
+                 * a Bresenham's algorithm to get uniformly distributed samples
+                 * across the symbol table */
+                if (table[i].flags & SYM_FLAG_VALID) {
+                        use_it += WORKING_SET;
+                        if (use_it >= valid) {
+                                table[i].flags |= SYM_FLAG_SAMPLED;
+                                use_it -= valid;
+                        }
+                }
+                if (table[i].flags & SYM_FLAG_SAMPLED)
+                        learn_symbol(table[i].sym, table[i].len);
+        }
+}
+/* replace a given token in all the valid symbols. Use the sampled symbols
+ * to update the counts */
+static void compress_symbols(unsigned char *str, int tlen, int idx)
+{
+        int i, len, learn, size;
+        unsigned char *p;
+        for (i = 0; i < cnt; i++) {
+                if (!(table[i].flags & SYM_FLAG_VALID)) continue;
+                len = table[i].len;
+                learn = 0;
+                p = table[i].sym;
+                do {
+                        /* find the token on the symbol */
+                        p = (unsigned char *) strstr((char *) p, (char *) str);
+                        if (!p) break;
+                        if (!learn) {
+                                /* if this symbol was used to count, decrease it */
+                                if (table[i].flags & SYM_FLAG_SAMPLED)
+                                        forget_symbol(table[i].sym, len);
+                                learn = 1;
+                        }
+                        *p = idx;
+                        size = (len - (p - table[i].sym)) - tlen + 1;
+                        memmove(p + 1, p + tlen, size);
+                        p++;
+                        len -= tlen - 1;
+                } while (size >= tlen);
+                if(learn) {
+                        table[i].len = len;
+                        /* if this symbol was used to count, learn it again */
+                        if(table[i].flags & SYM_FLAG_SAMPLED)
+                                learn_symbol(table[i].sym, len);
+                }
+        }
+}
+/* search the token with the maximum profit */
+static struct token *find_best_token(void)
+{
+        struct token *ptr,*best,*head;
+        int bestprofit;
+        bestprofit=-10000;
+        /* failsafe: if the "good" list is empty search from the "bad" list */
+        if(good_head.right == &good_head) head = &bad_head;
+        else head = &good_head;
+        ptr = head->right;
+        best = NULL;
+        while (ptr != head) {
+                if (ptr->profit > bestprofit) {
+                        bestprofit = ptr->profit;
+                        best = ptr;
+                }
+                ptr = ptr->right;
+        }
+        return best;
+}
+/* this is the core of the algorithm: calculate the "best" table */
+static void optimize_result(void)
+{
+        struct token *best;
+        int i;
+        /* using the '\0' symbol last allows compress_symbols to use standard
+         * fast string functions */
+        for (i = 255; i >= 0; i--) {
+                /* if this table slot is empty (it is not used by an actual
+                 * original char code */
+                if (!best_table_len[i]) {
+                        /* find the token with the breates profit value */
+                        best = find_best_token();
+                        /* place it in the "best" table */
+                        best_table_len[i] = best->len;
+                        memcpy(best_table[i], best->data, best_table_len[i]);
+                        /* zero terminate the token so that we can use strstr
+                           in compress_symbols */
+                        best_table[i][best_table_len[i]]='\0';
+                        /* replace this token in all the valid symbols */
+                        compress_symbols(best_table[i], best_table_len[i], i);
+                }
+        }
+}
+/* start by placing the symbols that are actually used on the table */
+static void insert_real_symbols_in_table(void)
+{
+        int i, j, c;
+        memset(best_table, 0, sizeof(best_table));
+        memset(best_table_len, 0, sizeof(best_table_len));
+        for (i = 0; i < cnt; i++) {
+                if (table[i].flags & SYM_FLAG_VALID) {
+                        for (j = 0; j < table[i].len; j++) {
+                                c = table[i].sym[j];
+                                best_table[c][0]=c;
+                                best_table_len[c]=1;
+                        }
+                }
+        }
+}
+static void optimize_token_table(void)
+{
+        memset(hash_table, 0, sizeof(hash_table));
+        good_head.left = &good_head;
+        good_head.right = &good_head;
+        bad_head.left = &bad_head;
+        bad_head.right = &bad_head;
+        build_initial_tok_table();
+        insert_real_symbols_in_table();
+        optimize_result();
+}
+int
+main(int argc, char **argv)
+{
+        if (argc == 2 && strcmp(argv[1], "--all-symbols") == 0)
+                all_symbols = 1;
+        else if (argc != 1)
+                usage();
+        read_map(stdin);
+        optimize_token_table();
+        write_src();
+        return 0;
+}

diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c new file mode 100644 index 000000000000..090ffda4adbc --- /dev/null +++ b/scripts/kallsyms.c
@@ -0,0 +1,686 @@
	1	/* Generate assembler source containing symbol information
	2	*
	3	* Copyright 2002 by Kai Germaschewski
	4	*
	5	* This software may be used and distributed according to the terms
	6	* of the GNU General Public License, incorporated herein by reference.
	7	*
	8	* Usage: nm -n vmlinux \| scripts/kallsyms [--all-symbols] > symbols.S
	9	*
	10	* ChangeLog:
	11	*
	12	* (25/Aug/2004) Paulo Marques <pmarques@grupopie.com>
	13	* Changed the compression method from stem compression to "table lookup"
	14	* compression
	15	*
	16	* Table compression uses all the unused char codes on the symbols and
	17	* maps these to the most used substrings (tokens). For instance, it might
	18	* map char code 0xF7 to represent "write_" and then in every symbol where
	19	* "write_" appears it can be replaced by 0xF7, saving 5 bytes.
	20	* The used codes themselves are also placed in the table so that the
	21	* decompresion can work without "special cases".
	22	* Applied to kernel symbols, this usually produces a compression ratio
	23	* of about 50%.
	24	*
	25	*/
	26
	27	#include <stdio.h>
	28	#include <stdlib.h>
	29	#include <string.h>
	30	#include <ctype.h>
	31
	32	/* maximum token length used. It doesn't pay to increase it a lot, because
	33	* very long substrings probably don't repeat themselves too often. */
	34	#define MAX_TOK_SIZE 11
	35	#define KSYM_NAME_LEN 127
	36
	37	/* we use only a subset of the complete symbol table to gather the token count,
	38	* to speed up compression, at the expense of a little compression ratio */
	39	#define WORKING_SET 1024
	40
	41	/* first find the best token only on the list of tokens that would profit more
	42	* than GOOD_BAD_THRESHOLD. Only if this list is empty go to the "bad" list.
	43	* Increasing this value will put less tokens on the "good" list, so the search
	44	* is faster. However, if the good list runs out of tokens, we must painfully
	45	* search the bad list. */
	46	#define GOOD_BAD_THRESHOLD 10
	47
	48	/* token hash parameters */
	49	#define HASH_BITS 18
	50	#define HASH_TABLE_SIZE (1 << HASH_BITS)
	51	#define HASH_MASK (HASH_TABLE_SIZE - 1)
	52	#define HASH_BASE_OFFSET 2166136261U
	53	#define HASH_FOLD(a) ((a)&(HASH_MASK))
	54
	55	/* flags to mark symbols */
	56	#define SYM_FLAG_VALID 1
	57	#define SYM_FLAG_SAMPLED 2
	58
	59	struct sym_entry {
	60	unsigned long long addr;
	61	char type;
	62	unsigned char flags;
	63	unsigned char len;
	64	unsigned char *sym;
	65	};
	66
	67
	68	static struct sym_entry *table;
	69	static int size, cnt;
	70	static unsigned long long _stext, _etext, _sinittext, _einittext;
	71	static int all_symbols = 0;
	72
	73	struct token {
	74	unsigned char data[MAX_TOK_SIZE];
	75	unsigned char len;
	76	/* profit: the number of bytes that could be saved by inserting this
	77	* token into the table */
	78	int profit;
	79	struct token next; / next token on the hash list */
	80	struct token right; / next token on the good/bad list */
	81	struct token left; / previous token on the good/bad list */
	82	struct token smaller; / token that is less one letter than this one */
	83	};
	84
	85	struct token bad_head, good_head;
	86	struct token *hash_table[HASH_TABLE_SIZE];
	87
	88	/* the table that holds the result of the compression */
	89	unsigned char best_table[256][MAX_TOK_SIZE+1];
	90	unsigned char best_table_len[256];
	91
	92
	93	static void
	94	usage(void)
	95	{
	96	fprintf(stderr, "Usage: kallsyms [--all-symbols] < in.map > out.S\n");
	97	exit(1);
	98	}
	99
	100	/*
	101	* This ignores the intensely annoying "mapping symbols" found
	102	* in ARM ELF files: $a, $t and $d.
	103	*/
	104	static inline int
	105	is_arm_mapping_symbol(const char *str)
	106	{
	107	return str[0] == '$' && strchr("atd", str[1])
	108	&& (str[2] == '\0' \|\| str[2] == '.');
	109	}
	110
	111	static int
	112	read_symbol(FILE in, struct sym_entry s)
	113	{
	114	char str[500];
	115	int rc;
	116
	117	rc = fscanf(in, "%llx %c %499s\n", &s->addr, &s->type, str);
	118	if (rc != 3) {
	119	if (rc != EOF) {
	120	/* skip line */
	121	fgets(str, 500, in);
	122	}
	123	return -1;
	124	}
	125
	126	/* Ignore most absolute/undefined (?) symbols. */
	127	if (strcmp(str, "_stext") == 0)
	128	_stext = s->addr;
	129	else if (strcmp(str, "_etext") == 0)
	130	_etext = s->addr;
	131	else if (strcmp(str, "_sinittext") == 0)
	132	_sinittext = s->addr;
	133	else if (strcmp(str, "_einittext") == 0)
	134	_einittext = s->addr;
	135	else if (toupper(s->type) == 'A')
	136	{
	137	/* Keep these useful absolute symbols */
	138	if (strcmp(str, "__kernel_syscall_via_break") &&
	139	strcmp(str, "__kernel_syscall_via_epc") &&
	140	strcmp(str, "__kernel_sigtramp") &&
	141	strcmp(str, "__gp"))
	142	return -1;
	143
	144	}
	145	else if (toupper(s->type) == 'U' \|\|
	146	is_arm_mapping_symbol(str))
	147	return -1;
	148
	149	/* include the type field in the symbol name, so that it gets
	150	* compressed together */
	151	s->len = strlen(str) + 1;
	152	s->sym = (char *) malloc(s->len + 1);
	153	strcpy(s->sym + 1, str);
	154	s->sym[0] = s->type;
	155
	156	return 0;
	157	}
	158
	159	static int
	160	symbol_valid(struct sym_entry *s)
	161	{
	162	/* Symbols which vary between passes. Passes 1 and 2 must have
	163	* identical symbol lists. The kallsyms_* symbols below are only added
	164	* after pass 1, they would be included in pass 2 when --all-symbols is
	165	* specified so exclude them to get a stable symbol list.
	166	*/
	167	static char *special_symbols[] = {
	168	"kallsyms_addresses",
	169	"kallsyms_num_syms",
	170	"kallsyms_names",
	171	"kallsyms_markers",
	172	"kallsyms_token_table",
	173	"kallsyms_token_index",
	174
	175	/* Exclude linker generated symbols which vary between passes */
	176	"_SDA_BASE_", /* ppc */
	177	"_SDA2_BASE_", /* ppc */
	178	NULL };
	179	int i;
	180
	181	/* if --all-symbols is not specified, then symbols outside the text
	182	* and inittext sections are discarded */
	183	if (!all_symbols) {
	184	if ((s->addr < _stext \|\| s->addr > _etext)
	185	&& (s->addr < _sinittext \|\| s->addr > _einittext))
	186	return 0;
	187	/* Corner case. Discard any symbols with the same value as
	188	* _etext or _einittext, they can move between pass 1 and 2
	189	* when the kallsyms data is added. If these symbols move then
	190	* they may get dropped in pass 2, which breaks the kallsyms
	191	* rules.
	192	*/
	193	if ((s->addr == _etext && strcmp(s->sym + 1, "_etext")) \|\|
	194	(s->addr == _einittext && strcmp(s->sym + 1, "_einittext")))
	195	return 0;
	196	}
	197
	198	/* Exclude symbols which vary between passes. */
	199	if (strstr(s->sym + 1, "_compiled."))
	200	return 0;
	201
	202	for (i = 0; special_symbols[i]; i++)
	203	if( strcmp(s->sym + 1, special_symbols[i]) == 0 )
	204	return 0;
	205
	206	return 1;
	207	}
	208
	209	static void
	210	read_map(FILE *in)
	211	{
	212	while (!feof(in)) {
	213	if (cnt >= size) {
	214	size += 10000;
	215	table = realloc(table, sizeof(table) size);
	216	if (!table) {
	217	fprintf(stderr, "out of memory\n");
	218	exit (1);
	219	}
	220	}
	221	if (read_symbol(in, &table[cnt]) == 0)
	222	cnt++;
	223	}
	224	}
	225
	226	static void output_label(char *label)
	227	{
	228	printf(".globl %s\n",label);
	229	printf("\tALGN\n");
	230	printf("%s:\n",label);
	231	}
	232
	233	/* uncompress a compressed symbol. When this function is called, the best table
	234	* might still be compressed itself, so the function needs to be recursive */
	235	static int expand_symbol(unsigned char data, int len, char result)
	236	{
	237	int c, rlen, total=0;
	238
	239	while (len) {
	240	c = *data;
	241	/* if the table holds a single char that is the same as the one
	242	* we are looking for, then end the search */
	243	if (best_table[c][0]==c && best_table_len[c]==1) {
	244	*result++ = c;
	245	total++;
	246	} else {
	247	/* if not, recurse and expand */
	248	rlen = expand_symbol(best_table[c], best_table_len[c], result);
	249	total += rlen;
	250	result += rlen;
	251	}
	252	data++;
	253	len--;
	254	}
	255	*result=0;
	256
	257	return total;
	258	}
	259
	260	static void
	261	write_src(void)
	262	{
	263	int i, k, off, valid;
	264	unsigned int best_idx[256];
	265	unsigned int *markers;
	266	char buf[KSYM_NAME_LEN+1];
	267
	268	printf("#include <asm/types.h>\n");
	269	printf("#if BITS_PER_LONG == 64\n");
	270	printf("#define PTR .quad\n");
	271	printf("#define ALGN .align 8\n");
	272	printf("#else\n");
	273	printf("#define PTR .long\n");
	274	printf("#define ALGN .align 4\n");
	275	printf("#endif\n");
	276
	277	printf(".data\n");
	278
	279	output_label("kallsyms_addresses");
	280	valid = 0;
	281	for (i = 0; i < cnt; i++) {
	282	if (table[i].flags & SYM_FLAG_VALID) {
	283	printf("\tPTR\t%#llx\n", table[i].addr);
	284	valid++;
	285	}
	286	}
	287	printf("\n");
	288
	289	output_label("kallsyms_num_syms");
	290	printf("\tPTR\t%d\n", valid);
	291	printf("\n");
	292
	293	/* table of offset markers, that give the offset in the compressed stream
	294	* every 256 symbols */
	295	markers = (unsigned int ) malloc(sizeof(unsigned int)((valid + 255) / 256));
	296
	297	output_label("kallsyms_names");
	298	valid = 0;
	299	off = 0;
	300	for (i = 0; i < cnt; i++) {
	301
	302	if (!table[i].flags & SYM_FLAG_VALID)
	303	continue;
	304
	305	if ((valid & 0xFF) == 0)
	306	markers[valid >> 8] = off;
	307
	308	printf("\t.byte 0x%02x", table[i].len);
	309	for (k = 0; k < table[i].len; k++)
	310	printf(", 0x%02x", table[i].sym[k]);
	311	printf("\n");
	312
	313	off += table[i].len + 1;
	314	valid++;
	315	}
	316	printf("\n");
	317
	318	output_label("kallsyms_markers");
	319	for (i = 0; i < ((valid + 255) >> 8); i++)
	320	printf("\tPTR\t%d\n", markers[i]);
	321	printf("\n");
	322
	323	free(markers);
	324
	325	output_label("kallsyms_token_table");
	326	off = 0;
	327	for (i = 0; i < 256; i++) {
	328	best_idx[i] = off;
	329	expand_symbol(best_table[i],best_table_len[i],buf);
	330	printf("\t.asciz\t\"%s\"\n", buf);
	331	off += strlen(buf) + 1;
	332	}
	333	printf("\n");
	334
	335	output_label("kallsyms_token_index");
	336	for (i = 0; i < 256; i++)
	337	printf("\t.short\t%d\n", best_idx[i]);
	338	printf("\n");
	339	}
	340
	341
	342	/* table lookup compression functions */
	343
	344	static inline unsigned int rehash_token(unsigned int hash, unsigned char data)
	345	{
	346	return ((hash * 16777619) ^ data);
	347	}
	348
	349	static unsigned int hash_token(unsigned char *data, int len)
	350	{
	351	unsigned int hash=HASH_BASE_OFFSET;
	352	int i;
	353
	354	for (i = 0; i < len; i++)
	355	hash = rehash_token(hash, data[i]);
	356
	357	return HASH_FOLD(hash);
	358	}
	359
	360	/* find a token given its data and hash value */
	361	static struct token find_token_hash(unsigned char data, int len, unsigned int hash)
	362	{
	363	struct token *ptr;
	364
	365	ptr = hash_table[hash];
	366
	367	while (ptr) {
	368	if ((ptr->len == len) && (memcmp(ptr->data, data, len) == 0))
	369	return ptr;
	370	ptr=ptr->next;
	371	}
	372
	373	return NULL;
	374	}
	375
	376	static inline void insert_token_in_group(struct token head, struct token ptr)
	377	{
	378	ptr->right = head->right;
	379	ptr->right->left = ptr;
	380	head->right = ptr;
	381	ptr->left = head;
	382	}
	383
	384	static inline void remove_token_from_group(struct token *ptr)
	385	{
	386	ptr->left->right = ptr->right;
	387	ptr->right->left = ptr->left;
	388	}
	389
	390
	391	/* build the counts for all the tokens that start with "data", and have lenghts
	392	* from 2 to "len" */
	393	static void learn_token(unsigned char *data, int len)
	394	{
	395	struct token ptr,last_ptr;
	396	int i, newprofit;
	397	unsigned int hash = HASH_BASE_OFFSET;
	398	unsigned int hashes[MAX_TOK_SIZE + 1];
	399
	400	if (len > MAX_TOK_SIZE)
	401	len = MAX_TOK_SIZE;
	402
	403	/* calculate and store the hash values for all the sub-tokens */
	404	hash = rehash_token(hash, data[0]);
	405	for (i = 2; i <= len; i++) {
	406	hash = rehash_token(hash, data[i-1]);
	407	hashes[i] = HASH_FOLD(hash);
	408	}
	409
	410	last_ptr = NULL;
	411	ptr = NULL;
	412
	413	for (i = len; i >= 2; i--) {
	414	hash = hashes[i];
	415
	416	if (!ptr) ptr = find_token_hash(data, i, hash);
	417
	418	if (!ptr) {
	419	/* create a new token entry */
	420	ptr = (struct token ) malloc(sizeof(ptr));
	421
	422	memcpy(ptr->data, data, i);
	423	ptr->len = i;
	424
	425	/* when we create an entry, it's profit is 0 because
	426	* we also take into account the size of the token on
	427	* the compressed table. We then subtract GOOD_BAD_THRESHOLD
	428	* so that the test to see if this token belongs to
	429	* the good or bad list, is a comparison to zero */
	430	ptr->profit = -GOOD_BAD_THRESHOLD;
	431
	432	ptr->next = hash_table[hash];
	433	hash_table[hash] = ptr;
	434
	435	insert_token_in_group(&bad_head, ptr);
	436
	437	ptr->smaller = NULL;
	438	} else {
	439	newprofit = ptr->profit + (ptr->len - 1);
	440	/* check to see if this token needs to be moved to a
	441	* different list */
	442	if((ptr->profit < 0) && (newprofit >= 0)) {
	443	remove_token_from_group(ptr);
	444	insert_token_in_group(&good_head,ptr);
	445	}
	446	ptr->profit = newprofit;
	447	}
	448
	449	if (last_ptr) last_ptr->smaller = ptr;
	450	last_ptr = ptr;
	451
	452	ptr = ptr->smaller;
	453	}
	454	}
	455
	456	/* decrease the counts for all the tokens that start with "data", and have lenghts
	457	* from 2 to "len". This function is much simpler than learn_token because we have
	458	* more guarantees (tho tokens exist, the ->smaller pointer is set, etc.)
	459	* The two separate functions exist only because of compression performance */
	460	static void forget_token(unsigned char *data, int len)
	461	{
	462	struct token *ptr;
	463	int i, newprofit;
	464	unsigned int hash=0;
	465
	466	if (len > MAX_TOK_SIZE) len = MAX_TOK_SIZE;
	467
	468	hash = hash_token(data, len);
	469	ptr = find_token_hash(data, len, hash);
	470
	471	for (i = len; i >= 2; i--) {
	472
	473	newprofit = ptr->profit - (ptr->len - 1);
	474	if ((ptr->profit >= 0) && (newprofit < 0)) {
	475	remove_token_from_group(ptr);
	476	insert_token_in_group(&bad_head, ptr);
	477	}
	478	ptr->profit=newprofit;
	479
	480	ptr=ptr->smaller;
	481	}
	482	}
	483
	484	/* count all the possible tokens in a symbol */
	485	static void learn_symbol(unsigned char *symbol, int len)
	486	{
	487	int i;
	488
	489	for (i = 0; i < len - 1; i++)
	490	learn_token(symbol + i, len - i);
	491	}
	492
	493	/* decrease the count for all the possible tokens in a symbol */
	494	static void forget_symbol(unsigned char *symbol, int len)
	495	{
	496	int i;
	497
	498	for (i = 0; i < len - 1; i++)
	499	forget_token(symbol + i, len - i);
	500	}
	501
	502	/* set all the symbol flags and do the initial token count */
	503	static void build_initial_tok_table(void)
	504	{
	505	int i, use_it, valid;
	506
	507	valid = 0;
	508	for (i = 0; i < cnt; i++) {
	509	table[i].flags = 0;
	510	if ( symbol_valid(&table[i]) ) {
	511	table[i].flags \|= SYM_FLAG_VALID;
	512	valid++;
	513	}
	514	}
	515
	516	use_it = 0;
	517	for (i = 0; i < cnt; i++) {
	518
	519	/* subsample the available symbols. This method is almost like
	520	* a Bresenham's algorithm to get uniformly distributed samples
	521	* across the symbol table */
	522	if (table[i].flags & SYM_FLAG_VALID) {
	523
	524	use_it += WORKING_SET;
	525
	526	if (use_it >= valid) {
	527	table[i].flags \|= SYM_FLAG_SAMPLED;
	528	use_it -= valid;
	529	}
	530	}
	531	if (table[i].flags & SYM_FLAG_SAMPLED)
	532	learn_symbol(table[i].sym, table[i].len);
	533	}
	534	}
	535
	536	/* replace a given token in all the valid symbols. Use the sampled symbols
	537	* to update the counts */
	538	static void compress_symbols(unsigned char *str, int tlen, int idx)
	539	{
	540	int i, len, learn, size;
	541	unsigned char *p;
	542
	543	for (i = 0; i < cnt; i++) {
	544
	545	if (!(table[i].flags & SYM_FLAG_VALID)) continue;
	546
	547	len = table[i].len;
	548	learn = 0;
	549	p = table[i].sym;
	550
	551	do {
	552	/* find the token on the symbol */
	553	p = (unsigned char ) strstr((char ) p, (char *) str);
	554	if (!p) break;
	555
	556	if (!learn) {
	557	/* if this symbol was used to count, decrease it */
	558	if (table[i].flags & SYM_FLAG_SAMPLED)
	559	forget_symbol(table[i].sym, len);
	560	learn = 1;
	561	}
	562
	563	*p = idx;
	564	size = (len - (p - table[i].sym)) - tlen + 1;
	565	memmove(p + 1, p + tlen, size);
	566	p++;
	567	len -= tlen - 1;
	568
	569	} while (size >= tlen);
	570
	571	if(learn) {
	572	table[i].len = len;
	573	/* if this symbol was used to count, learn it again */
	574	if(table[i].flags & SYM_FLAG_SAMPLED)
	575	learn_symbol(table[i].sym, len);
	576	}
	577	}
	578	}
	579
	580	/* search the token with the maximum profit */
	581	static struct token *find_best_token(void)
	582	{
	583	struct token ptr,best,*head;
	584	int bestprofit;
	585
	586	bestprofit=-10000;
	587
	588	/* failsafe: if the "good" list is empty search from the "bad" list */
	589	if(good_head.right == &good_head) head = &bad_head;
	590	else head = &good_head;
	591
	592	ptr = head->right;
	593	best = NULL;
	594	while (ptr != head) {
	595	if (ptr->profit > bestprofit) {
	596	bestprofit = ptr->profit;
	597	best = ptr;
	598	}
	599	ptr = ptr->right;
	600	}
	601
	602	return best;
	603	}
	604
	605	/* this is the core of the algorithm: calculate the "best" table */
	606	static void optimize_result(void)
	607	{
	608	struct token *best;
	609	int i;
	610
	611	/* using the '\0' symbol last allows compress_symbols to use standard
	612	* fast string functions */
	613	for (i = 255; i >= 0; i--) {
	614
	615	/* if this table slot is empty (it is not used by an actual
	616	* original char code */
	617	if (!best_table_len[i]) {
	618
	619	/* find the token with the breates profit value */
	620	best = find_best_token();
	621
	622	/* place it in the "best" table */
	623	best_table_len[i] = best->len;
	624	memcpy(best_table[i], best->data, best_table_len[i]);
	625	/* zero terminate the token so that we can use strstr
	626	in compress_symbols */
	627	best_table[i][best_table_len[i]]='\0';
	628
	629	/* replace this token in all the valid symbols */
	630	compress_symbols(best_table[i], best_table_len[i], i);
	631	}
	632	}
	633	}
	634
	635	/* start by placing the symbols that are actually used on the table */
	636	static void insert_real_symbols_in_table(void)
	637	{
	638	int i, j, c;
	639
	640	memset(best_table, 0, sizeof(best_table));
	641	memset(best_table_len, 0, sizeof(best_table_len));
	642
	643	for (i = 0; i < cnt; i++) {
	644	if (table[i].flags & SYM_FLAG_VALID) {
	645	for (j = 0; j < table[i].len; j++) {
	646	c = table[i].sym[j];
	647	best_table[c][0]=c;
	648	best_table_len[c]=1;
	649	}
	650	}
	651	}
	652	}
	653
	654	static void optimize_token_table(void)
	655	{
	656	memset(hash_table, 0, sizeof(hash_table));
	657
	658	good_head.left = &good_head;
	659	good_head.right = &good_head;
	660
	661	bad_head.left = &bad_head;
	662	bad_head.right = &bad_head;
	663
	664	build_initial_tok_table();
	665
	666	insert_real_symbols_in_table();
	667
	668	optimize_result();
	669	}
	670
	671
	672	int
	673	main(int argc, char **argv)
	674	{
	675	if (argc == 2 && strcmp(argv[1], "--all-symbols") == 0)
	676	all_symbols = 1;
	677	else if (argc != 1)
	678	usage();
	679
	680	read_map(stdin);
	681	optimize_token_table();
	682	write_src();
	683
	684	return 0;
	685	}
	686