diff options
Diffstat (limited to 'scripts/kallsyms.c')
| -rw-r--r-- | scripts/kallsyms.c | 587 |
1 files changed, 334 insertions, 253 deletions
diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c index 487ac6f37ca2..4b0234e4b12f 100644 --- a/scripts/kallsyms.c +++ b/scripts/kallsyms.c @@ -5,7 +5,7 @@ * This software may be used and distributed according to the terms * of the GNU General Public License, incorporated herein by reference. * - * Usage: nm -n vmlinux | scripts/kallsyms [--all-symbols] > symbols.S + * Usage: kallsyms [--all-symbols] in.map > out.S * * Table compression uses all the unused char codes on the symbols and * maps these to the most used substrings (tokens). For instance, it might @@ -18,190 +18,198 @@ * */ +#include <errno.h> +#include <getopt.h> +#include <stdbool.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <ctype.h> +#include <limits.h> + +#include <xalloc.h> -#ifndef ARRAY_SIZE #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0])) -#endif -#define KSYM_NAME_LEN 128 +#define KSYM_NAME_LEN 512 struct sym_entry { unsigned long long addr; unsigned int len; - unsigned int start_pos; - unsigned char *sym; + unsigned int seq; + unsigned char sym[]; }; -struct text_range { - const char *stext, *etext; +struct addr_range { + const char *start_sym, *end_sym; unsigned long long start, end; }; static unsigned long long _text; -static struct text_range text_ranges[] = { +static unsigned long long relative_base; +static struct addr_range text_ranges[] = { { "_stext", "_etext" }, { "_sinittext", "_einittext" }, - { "_stext_l1", "_etext_l1" }, /* Blackfin on-chip L1 inst SRAM */ - { "_stext_l2", "_etext_l2" }, /* Blackfin on-chip L2 SRAM */ }; #define text_range_text (&text_ranges[0]) #define text_range_inittext (&text_ranges[1]) -static struct sym_entry *table; +static struct sym_entry **table; static unsigned int table_size, table_cnt; -static int all_symbols = 0; -static char symbol_prefix_char = '\0'; +static int all_symbols; -int token_profit[0x10000]; +static int token_profit[0x10000]; /* the table that holds the result of the compression */ -unsigned char best_table[256][2]; -unsigned char best_table_len[256]; +static unsigned char best_table[256][2]; +static unsigned char best_table_len[256]; static void usage(void) { - fprintf(stderr, "Usage: kallsyms [--all-symbols] [--symbol-prefix=<prefix char>] < in.map > out.S\n"); + fprintf(stderr, "Usage: kallsyms [--all-symbols] in.map > out.S\n"); exit(1); } -/* - * This ignores the intensely annoying "mapping symbols" found - * in ARM ELF files: $a, $t and $d. - */ -static inline int is_arm_mapping_symbol(const char *str) +static char *sym_name(const struct sym_entry *s) +{ + return (char *)s->sym + 1; +} + +static bool is_ignored_symbol(const char *name, char type) { - return str[0] == '$' && strchr("atd", str[1]) - && (str[2] == '\0' || str[2] == '.'); + if (type == 'u' || type == 'n') + return true; + + if (toupper(type) == 'A') { + /* Keep these useful absolute symbols */ + if (strcmp(name, "__kernel_syscall_via_break") && + strcmp(name, "__kernel_syscall_via_epc") && + strcmp(name, "__kernel_sigtramp") && + strcmp(name, "__gp")) + return true; + } + + return false; } -static int read_symbol_tr(const char *sym, unsigned long long addr) +static void check_symbol_range(const char *sym, unsigned long long addr, + struct addr_range *ranges, int entries) { size_t i; - struct text_range *tr; + struct addr_range *ar; - for (i = 0; i < ARRAY_SIZE(text_ranges); ++i) { - tr = &text_ranges[i]; + for (i = 0; i < entries; ++i) { + ar = &ranges[i]; - if (strcmp(sym, tr->stext) == 0) { - tr->start = addr; - return 0; - } else if (strcmp(sym, tr->etext) == 0) { - tr->end = addr; - return 0; + if (strcmp(sym, ar->start_sym) == 0) { + ar->start = addr; + return; + } else if (strcmp(sym, ar->end_sym) == 0) { + ar->end = addr; + return; } } - - return 1; } -static int read_symbol(FILE *in, struct sym_entry *s) +static struct sym_entry *read_symbol(FILE *in, char **buf, size_t *buf_len) { - char str[500]; - char *sym, stype; - int rc; - - rc = fscanf(in, "%llx %c %499s\n", &s->addr, &stype, str); - if (rc != 3) { - if (rc != EOF && fgets(str, 500, in) == NULL) - fprintf(stderr, "Read error or end of file.\n"); - return -1; + char *name, type, *p; + unsigned long long addr; + size_t len; + ssize_t readlen; + struct sym_entry *sym; + + errno = 0; + readlen = getline(buf, buf_len, in); + if (readlen < 0) { + if (errno) { + perror("read_symbol"); + exit(EXIT_FAILURE); + } + return NULL; } - sym = str; - /* skip prefix char */ - if (symbol_prefix_char && str[0] == symbol_prefix_char) - sym++; + if ((*buf)[readlen - 1] == '\n') + (*buf)[readlen - 1] = 0; - /* Ignore most absolute/undefined (?) symbols. */ - if (strcmp(sym, "_text") == 0) - _text = s->addr; - else if (read_symbol_tr(sym, s->addr) == 0) - /* nothing to do */; - else if (toupper(stype) == 'A') - { - /* Keep these useful absolute symbols */ - if (strcmp(sym, "__kernel_syscall_via_break") && - strcmp(sym, "__kernel_syscall_via_epc") && - strcmp(sym, "__kernel_sigtramp") && - strcmp(sym, "__gp")) - return -1; + addr = strtoull(*buf, &p, 16); + if (*buf == p || *p++ != ' ' || !isascii((type = *p++)) || *p++ != ' ') { + fprintf(stderr, "line format error\n"); + exit(EXIT_FAILURE); + } + + name = p; + len = strlen(name); + + if (len >= KSYM_NAME_LEN) { + fprintf(stderr, "Symbol %s too long for kallsyms (%zu >= %d).\n" + "Please increase KSYM_NAME_LEN both in kernel and kallsyms.c\n", + name, len, KSYM_NAME_LEN); + return NULL; } - else if (toupper(stype) == 'U' || - is_arm_mapping_symbol(sym)) - return -1; - /* exclude also MIPS ELF local symbols ($L123 instead of .L123) */ - else if (str[0] == '$') - return -1; - /* exclude debugging symbols */ - else if (stype == 'N') - return -1; + + if (strcmp(name, "_text") == 0) + _text = addr; + + /* Ignore most absolute/undefined (?) symbols. */ + if (is_ignored_symbol(name, type)) + return NULL; + + check_symbol_range(name, addr, text_ranges, ARRAY_SIZE(text_ranges)); /* include the type field in the symbol name, so that it gets * compressed together */ - s->len = strlen(str) + 1; - s->sym = malloc(s->len + 1); - if (!s->sym) { - fprintf(stderr, "kallsyms failure: " - "unable to allocate required amount of memory\n"); - exit(EXIT_FAILURE); - } - strcpy((char *)s->sym + 1, str); - s->sym[0] = stype; + len++; - return 0; + sym = xmalloc(sizeof(*sym) + len + 1); + sym->addr = addr; + sym->len = len; + sym->sym[0] = type; + strcpy(sym_name(sym), name); + + return sym; } -static int symbol_valid_tr(struct sym_entry *s) +static int symbol_in_range(const struct sym_entry *s, + const struct addr_range *ranges, int entries) { size_t i; - struct text_range *tr; + const struct addr_range *ar; - for (i = 0; i < ARRAY_SIZE(text_ranges); ++i) { - tr = &text_ranges[i]; + for (i = 0; i < entries; ++i) { + ar = &ranges[i]; - if (s->addr >= tr->start && s->addr <= tr->end) + if (s->addr >= ar->start && s->addr <= ar->end) return 1; } return 0; } -static int symbol_valid(struct sym_entry *s) +static bool string_starts_with(const char *s, const char *prefix) { - /* Symbols which vary between passes. Passes 1 and 2 must have - * identical symbol lists. The kallsyms_* symbols below are only added - * after pass 1, they would be included in pass 2 when --all-symbols is - * specified so exclude them to get a stable symbol list. - */ - static char *special_symbols[] = { - "kallsyms_addresses", - "kallsyms_num_syms", - "kallsyms_names", - "kallsyms_markers", - "kallsyms_token_table", - "kallsyms_token_index", - - /* Exclude linker generated symbols which vary between passes */ - "_SDA_BASE_", /* ppc */ - "_SDA2_BASE_", /* ppc */ - NULL }; - int i; - int offset = 1; + return strncmp(s, prefix, strlen(prefix)) == 0; +} - /* skip prefix char */ - if (symbol_prefix_char && *(s->sym + 1) == symbol_prefix_char) - offset++; +static int symbol_valid(const struct sym_entry *s) +{ + const char *name = sym_name(s); /* if --all-symbols is not specified, then symbols outside the text * and inittext sections are discarded */ if (!all_symbols) { - if (symbol_valid_tr(s) == 0) + /* + * Symbols starting with __start and __stop are used to denote + * section boundaries, and should always be included: + */ + if (string_starts_with(name, "__start_") || + string_starts_with(name, "__stop_")) + return 1; + + if (symbol_in_range(s, text_ranges, + ARRAY_SIZE(text_ranges)) == 0) return 0; /* Corner case. Discard any symbols with the same value as * _etext _einittext; they can move between pass 1 and 2 when @@ -210,57 +218,75 @@ static int symbol_valid(struct sym_entry *s) * rules. */ if ((s->addr == text_range_text->end && - strcmp((char *)s->sym + offset, text_range_text->etext)) || + strcmp(name, text_range_text->end_sym)) || (s->addr == text_range_inittext->end && - strcmp((char *)s->sym + offset, text_range_inittext->etext))) + strcmp(name, text_range_inittext->end_sym))) return 0; } - /* Exclude symbols which vary between passes. */ - if (strstr((char *)s->sym + offset, "_compiled.")) - return 0; + return 1; +} - for (i = 0; special_symbols[i]; i++) - if( strcmp((char *)s->sym + offset, special_symbols[i]) == 0 ) - return 0; +/* remove all the invalid symbols from the table */ +static void shrink_table(void) +{ + unsigned int i, pos; - return 1; + pos = 0; + for (i = 0; i < table_cnt; i++) { + if (symbol_valid(table[i])) { + if (pos != i) + table[pos] = table[i]; + pos++; + } else { + free(table[i]); + } + } + table_cnt = pos; } -static void read_map(FILE *in) +static void read_map(const char *in) { - while (!feof(in)) { + FILE *fp; + struct sym_entry *sym; + char *buf = NULL; + size_t buflen = 0; + + fp = fopen(in, "r"); + if (!fp) { + perror(in); + exit(1); + } + + while (!feof(fp)) { + sym = read_symbol(fp, &buf, &buflen); + if (!sym) + continue; + + sym->seq = table_cnt; + if (table_cnt >= table_size) { table_size += 10000; - table = realloc(table, sizeof(*table) * table_size); - if (!table) { - fprintf(stderr, "out of memory\n"); - exit (1); - } - } - if (read_symbol(in, &table[table_cnt]) == 0) { - table[table_cnt].start_pos = table_cnt; - table_cnt++; + table = xrealloc(table, sizeof(*table) * table_size); } + + table[table_cnt++] = sym; } + + free(buf); + fclose(fp); } -static void output_label(char *label) +static void output_label(const char *label) { - if (symbol_prefix_char) - printf(".globl %c%s\n", symbol_prefix_char, label); - else - printf(".globl %s\n", label); + printf(".globl %s\n", label); printf("\tALGN\n"); - if (symbol_prefix_char) - printf("%c%s:\n", symbol_prefix_char, label); - else - printf("%s:\n", label); + printf("%s:\n", label); } /* uncompress a compressed symbol. When this function is called, the best table * might still be compressed itself, so the function needs to be recursive */ -static int expand_symbol(unsigned char *data, int len, char *result) +static int expand_symbol(const unsigned char *data, int len, char *result) { int c, rlen, total=0; @@ -285,77 +311,107 @@ static int expand_symbol(unsigned char *data, int len, char *result) return total; } +static int compare_names(const void *a, const void *b) +{ + int ret; + const struct sym_entry *sa = *(const struct sym_entry **)a; + const struct sym_entry *sb = *(const struct sym_entry **)b; + + ret = strcmp(sym_name(sa), sym_name(sb)); + if (!ret) { + if (sa->addr > sb->addr) + return 1; + else if (sa->addr < sb->addr) + return -1; + + /* keep old order */ + return (int)(sa->seq - sb->seq); + } + + return ret; +} + +static void sort_symbols_by_name(void) +{ + qsort(table, table_cnt, sizeof(table[0]), compare_names); +} + static void write_src(void) { unsigned int i, k, off; unsigned int best_idx[256]; - unsigned int *markers; + unsigned int *markers, markers_cnt; char buf[KSYM_NAME_LEN]; - printf("#include <asm/types.h>\n"); + printf("#include <asm/bitsperlong.h>\n"); printf("#if BITS_PER_LONG == 64\n"); printf("#define PTR .quad\n"); - printf("#define ALGN .align 8\n"); + printf("#define ALGN .balign 8\n"); printf("#else\n"); printf("#define PTR .long\n"); - printf("#define ALGN .align 4\n"); + printf("#define ALGN .balign 4\n"); printf("#endif\n"); printf("\t.section .rodata, \"a\"\n"); - /* Provide proper symbols relocatability by their '_text' - * relativeness. The symbol names cannot be used to construct - * normal symbol references as the list of symbols contains - * symbols that are declared static and are private to their - * .o files. This prevents .tmp_kallsyms.o or any other - * object from referencing them. - */ - output_label("kallsyms_addresses"); - for (i = 0; i < table_cnt; i++) { - if (toupper(table[i].sym[0]) != 'A') { - if (_text <= table[i].addr) - printf("\tPTR\t_text + %#llx\n", - table[i].addr - _text); - else - printf("\tPTR\t_text - %#llx\n", - _text - table[i].addr); - } else { - printf("\tPTR\t%#llx\n", table[i].addr); - } - } - printf("\n"); - output_label("kallsyms_num_syms"); - printf("\tPTR\t%d\n", table_cnt); + printf("\t.long\t%u\n", table_cnt); printf("\n"); /* table of offset markers, that give the offset in the compressed stream * every 256 symbols */ - markers = malloc(sizeof(unsigned int) * ((table_cnt + 255) / 256)); - if (!markers) { - fprintf(stderr, "kallsyms failure: " - "unable to allocate required memory\n"); - exit(EXIT_FAILURE); - } + markers_cnt = (table_cnt + 255) / 256; + markers = xmalloc(sizeof(*markers) * markers_cnt); output_label("kallsyms_names"); off = 0; for (i = 0; i < table_cnt; i++) { if ((i & 0xFF) == 0) markers[i >> 8] = off; + table[i]->seq = i; + + /* There cannot be any symbol of length zero. */ + if (table[i]->len == 0) { + fprintf(stderr, "kallsyms failure: " + "unexpected zero symbol length\n"); + exit(EXIT_FAILURE); + } - printf("\t.byte 0x%02x", table[i].len); - for (k = 0; k < table[i].len; k++) - printf(", 0x%02x", table[i].sym[k]); - printf("\n"); + /* Only lengths that fit in up-to-two-byte ULEB128 are supported. */ + if (table[i]->len > 0x3FFF) { + fprintf(stderr, "kallsyms failure: " + "unexpected huge symbol length\n"); + exit(EXIT_FAILURE); + } + + /* Encode length with ULEB128. */ + if (table[i]->len <= 0x7F) { + /* Most symbols use a single byte for the length. */ + printf("\t.byte 0x%02x", table[i]->len); + off += table[i]->len + 1; + } else { + /* "Big" symbols use two bytes. */ + printf("\t.byte 0x%02x, 0x%02x", + (table[i]->len & 0x7F) | 0x80, + (table[i]->len >> 7) & 0x7F); + off += table[i]->len + 2; + } + for (k = 0; k < table[i]->len; k++) + printf(", 0x%02x", table[i]->sym[k]); - off += table[i].len + 1; + /* + * Now that we wrote out the compressed symbol name, restore the + * original name and print it in the comment. + */ + expand_symbol(table[i]->sym, table[i]->len, buf); + strcpy((char *)table[i]->sym, buf); + printf("\t/* %s */\n", table[i]->sym); } printf("\n"); output_label("kallsyms_markers"); - for (i = 0; i < ((table_cnt + 255) >> 8); i++) - printf("\tPTR\t%d\n", markers[i]); + for (i = 0; i < markers_cnt; i++) + printf("\t.long\t%u\n", markers[i]); printf("\n"); free(markers); @@ -374,13 +430,54 @@ static void write_src(void) for (i = 0; i < 256; i++) printf("\t.short\t%d\n", best_idx[i]); printf("\n"); + + output_label("kallsyms_offsets"); + + for (i = 0; i < table_cnt; i++) { + /* + * Use the offset relative to the lowest value + * encountered of all relative symbols, and emit + * non-relocatable fixed offsets that will be fixed + * up at runtime. + */ + + long long offset; + + offset = table[i]->addr - relative_base; + if (offset < 0 || offset > UINT_MAX) { + fprintf(stderr, "kallsyms failure: " + "relative symbol value %#llx out of range\n", + table[i]->addr); + exit(EXIT_FAILURE); + } + printf("\t.long\t%#x\t/* %s */\n", (int)offset, table[i]->sym); + } + printf("\n"); + + output_label("kallsyms_relative_base"); + /* Provide proper symbols relocatability by their '_text' relativeness. */ + if (_text <= relative_base) + printf("\tPTR\t_text + %#llx\n", relative_base - _text); + else + printf("\tPTR\t_text - %#llx\n", _text - relative_base); + printf("\n"); + + sort_symbols_by_name(); + output_label("kallsyms_seqs_of_names"); + for (i = 0; i < table_cnt; i++) + printf("\t.byte 0x%02x, 0x%02x, 0x%02x\t/* %s */\n", + (unsigned char)(table[i]->seq >> 16), + (unsigned char)(table[i]->seq >> 8), + (unsigned char)(table[i]->seq >> 0), + table[i]->sym); + printf("\n"); } /* table lookup compression functions */ /* count all the possible tokens in a symbol */ -static void learn_symbol(unsigned char *symbol, int len) +static void learn_symbol(const unsigned char *symbol, int len) { int i; @@ -389,7 +486,7 @@ static void learn_symbol(unsigned char *symbol, int len) } /* decrease the count for all the possible tokens in a symbol */ -static void forget_symbol(unsigned char *symbol, int len) +static void forget_symbol(const unsigned char *symbol, int len) { int i; @@ -397,24 +494,17 @@ static void forget_symbol(unsigned char *symbol, int len) token_profit[ symbol[i] + (symbol[i + 1] << 8) ]--; } -/* remove all the invalid symbols from the table and do the initial token count */ -static void build_initial_tok_table(void) +/* do the initial token count */ +static void build_initial_token_table(void) { - unsigned int i, pos; + unsigned int i; - pos = 0; - for (i = 0; i < table_cnt; i++) { - if ( symbol_valid(&table[i]) ) { - if (pos != i) - table[pos] = table[i]; - learn_symbol(table[pos].sym, table[pos].len); - pos++; - } - } - table_cnt = pos; + for (i = 0; i < table_cnt; i++) + learn_symbol(table[i]->sym, table[i]->len); } -static void *find_token(unsigned char *str, int len, unsigned char *token) +static unsigned char *find_token(unsigned char *str, int len, + const unsigned char *token) { int i; @@ -427,22 +517,22 @@ static void *find_token(unsigned char *str, int len, unsigned char *token) /* replace a given token in all the valid symbols. Use the sampled symbols * to update the counts */ -static void compress_symbols(unsigned char *str, int idx) +static void compress_symbols(const unsigned char *str, int idx) { unsigned int i, len, size; unsigned char *p1, *p2; for (i = 0; i < table_cnt; i++) { - len = table[i].len; - p1 = table[i].sym; + len = table[i]->len; + p1 = table[i]->sym; /* find the token on the symbol */ p2 = find_token(p1, len, str); if (!p2) continue; /* decrease the counts for this symbol's tokens */ - forget_symbol(table[i].sym, len); + forget_symbol(table[i]->sym, len); size = len; @@ -461,10 +551,10 @@ static void compress_symbols(unsigned char *str, int idx) } while (p2); - table[i].len = len; + table[i]->len = len; /* increase the counts for this symbol's new tokens */ - learn_symbol(table[i].sym, len); + learn_symbol(table[i]->sym, len); } } @@ -498,7 +588,7 @@ static void optimize_result(void) * original char code */ if (!best_table_len[i]) { - /* find the token with the breates profit value */ + /* find the token with the best profit value */ best = find_best_token(); if (token_profit[best] == 0) break; @@ -519,12 +609,9 @@ static void insert_real_symbols_in_table(void) { unsigned int i, j, c; - memset(best_table, 0, sizeof(best_table)); - memset(best_table_len, 0, sizeof(best_table_len)); - for (i = 0; i < table_cnt; i++) { - for (j = 0; j < table[i].len; j++) { - c = table[i].sym[j]; + for (j = 0; j < table[i]->len; j++) { + c = table[i]->sym[j]; best_table[c][0]=c; best_table_len[c]=1; } @@ -533,23 +620,17 @@ static void insert_real_symbols_in_table(void) static void optimize_token_table(void) { - build_initial_tok_table(); + build_initial_token_table(); insert_real_symbols_in_table(); - /* When valid symbol is not registered, exit to error */ - if (!table_cnt) { - fprintf(stderr, "No valid symbol.\n"); - exit(1); - } - optimize_result(); } /* guess for "linker script provide" symbol */ static int may_be_linker_script_provide_symbol(const struct sym_entry *se) { - const char *symbol = (char *)se->sym + 1; + const char *symbol = sym_name(se); int len = se->len - 1; if (len < 8) @@ -581,25 +662,12 @@ static int may_be_linker_script_provide_symbol(const struct sym_entry *se) return 0; } -static int prefix_underscores_count(const char *str) -{ - const char *tail = str; - - while (*tail == '_') - tail++; - - return tail - str; -} - static int compare_symbols(const void *a, const void *b) { - const struct sym_entry *sa; - const struct sym_entry *sb; + const struct sym_entry *sa = *(const struct sym_entry **)a; + const struct sym_entry *sb = *(const struct sym_entry **)b; int wa, wb; - sa = a; - sb = b; - /* sort by address first */ if (sa->addr > sb->addr) return 1; @@ -619,41 +687,54 @@ static int compare_symbols(const void *a, const void *b) return wa - wb; /* sort by the number of prefix underscores */ - wa = prefix_underscores_count((const char *)sa->sym + 1); - wb = prefix_underscores_count((const char *)sb->sym + 1); + wa = strspn(sym_name(sa), "_"); + wb = strspn(sym_name(sb), "_"); if (wa != wb) return wa - wb; /* sort by initial order, so that other symbols are left undisturbed */ - return sa->start_pos - sb->start_pos; + return sa->seq - sb->seq; } static void sort_symbols(void) { - qsort(table, table_cnt, sizeof(struct sym_entry), compare_symbols); + qsort(table, table_cnt, sizeof(table[0]), compare_symbols); +} + +/* find the minimum non-absolute symbol address */ +static void record_relative_base(void) +{ + /* + * The table is sorted by address. + * Take the first symbol value. + */ + if (table_cnt) + relative_base = table[0]->addr; } int main(int argc, char **argv) { - if (argc >= 2) { - int i; - for (i = 1; i < argc; i++) { - if(strcmp(argv[i], "--all-symbols") == 0) - all_symbols = 1; - else if (strncmp(argv[i], "--symbol-prefix=", 16) == 0) { - char *p = &argv[i][16]; - /* skip quote */ - if ((*p == '"' && *(p+2) == '"') || (*p == '\'' && *(p+2) == '\'')) - p++; - symbol_prefix_char = *p; - } else - usage(); - } - } else if (argc != 1) + while (1) { + static const struct option long_options[] = { + {"all-symbols", no_argument, &all_symbols, 1}, + {}, + }; + + int c = getopt_long(argc, argv, "", long_options, NULL); + + if (c == -1) + break; + if (c != 0) + usage(); + } + + if (optind >= argc) usage(); - read_map(stdin); + read_map(argv[optind]); + shrink_table(); sort_symbols(); + record_relative_base(); optimize_token_table(); write_src(); |
