ia64/xen-unstable

view xen/tools/symbols.c @ 6109:37ac3cf33506

Fix where "!" operator used in Bitwise operation.

In IBM we have an internal source code scanner called BEAM. We have run
it against Xen and here are some of the results.

Signed-off-by: Jerone Young <jyoung5@us.ibm.com>
author kaf24@firebug.cl.cam.ac.uk
date Thu Aug 11 16:23:54 2005 +0000 (2005-08-11)
parents 41ceeb6828b5
children 8799d14bef77 9312a3e8a6f8 c589ca6d292b 40b887fa79d0 29aab159846c 1ae656509f02 23979fb12c49 84ee014ebd41 99914b54f7bf 81576d3d1ca8 3a8f27c6d56c cc5f88b719d0 fa0754a9f64f 112d44270733
line source
1 /* Generate assembler source containing symbol information
2 *
3 * Copyright 2002 by Kai Germaschewski
4 *
5 * This software may be used and distributed according to the terms
6 * of the GNU General Public License, incorporated herein by reference.
7 *
8 * Usage: nm -n <object-file> | scripts/symbols [--all-symbols] > symbols.S
9 *
10 * ChangeLog:
11 *
12 * (25/Aug/2004) Paulo Marques <pmarques@grupopie.com>
13 * Changed the compression method from stem compression to "table lookup"
14 * compression
15 *
16 * Table compression uses all the unused char codes on the symbols and
17 * maps these to the most used substrings (tokens). For instance, it might
18 * map char code 0xF7 to represent "write_" and then in every symbol where
19 * "write_" appears it can be replaced by 0xF7, saving 5 bytes.
20 * The used codes themselves are also placed in the table so that the
21 * decompresion can work without "special cases".
22 * Applied to kernel symbols, this usually produces a compression ratio
23 * of about 50%.
24 *
25 */
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <ctype.h>
32 /* maximum token length used. It doesn't pay to increase it a lot, because
33 * very long substrings probably don't repeat themselves too often. */
34 #define MAX_TOK_SIZE 11
35 #define KSYM_NAME_LEN 127
37 /* we use only a subset of the complete symbol table to gather the token count,
38 * to speed up compression, at the expense of a little compression ratio */
39 #define WORKING_SET 1024
41 /* first find the best token only on the list of tokens that would profit more
42 * than GOOD_BAD_THRESHOLD. Only if this list is empty go to the "bad" list.
43 * Increasing this value will put less tokens on the "good" list, so the search
44 * is faster. However, if the good list runs out of tokens, we must painfully
45 * search the bad list. */
46 #define GOOD_BAD_THRESHOLD 10
48 /* token hash parameters */
49 #define HASH_BITS 18
50 #define HASH_TABLE_SIZE (1 << HASH_BITS)
51 #define HASH_MASK (HASH_TABLE_SIZE - 1)
52 #define HASH_BASE_OFFSET 2166136261U
53 #define HASH_FOLD(a) ((a)&(HASH_MASK))
55 /* flags to mark symbols */
56 #define SYM_FLAG_VALID 1
57 #define SYM_FLAG_SAMPLED 2
59 struct sym_entry {
60 unsigned long long addr;
61 char type;
62 unsigned char flags;
63 unsigned char len;
64 unsigned char *sym;
65 };
68 static struct sym_entry *table;
69 static int size, cnt;
70 static unsigned long long _stext, _etext;
71 static int all_symbols = 0;
72 static char symbol_prefix_char = '\0';
74 struct token {
75 unsigned char data[MAX_TOK_SIZE];
76 unsigned char len;
77 /* profit: the number of bytes that could be saved by inserting this
78 * token into the table */
79 int profit;
80 struct token *next; /* next token on the hash list */
81 struct token *right; /* next token on the good/bad list */
82 struct token *left; /* previous token on the good/bad list */
83 struct token *smaller; /* token that is less one letter than this one */
84 };
86 struct token bad_head, good_head;
87 struct token *hash_table[HASH_TABLE_SIZE];
89 /* the table that holds the result of the compression */
90 unsigned char best_table[256][MAX_TOK_SIZE+1];
91 unsigned char best_table_len[256];
94 static void
95 usage(void)
96 {
97 fprintf(stderr, "Usage: symbols [--all-symbols] [--symbol-prefix=<prefix char>] < in.map > out.S\n");
98 exit(1);
99 }
101 /*
102 * This ignores the intensely annoying "mapping symbols" found
103 * in ARM ELF files: $a, $t and $d.
104 */
105 static inline int
106 is_arm_mapping_symbol(const char *str)
107 {
108 return str[0] == '$' && strchr("atd", str[1])
109 && (str[2] == '\0' || str[2] == '.');
110 }
112 static int
113 read_symbol(FILE *in, struct sym_entry *s)
114 {
115 char str[500];
116 char *sym;
117 int rc;
119 rc = fscanf(in, "%llx %c %499s\n", &s->addr, &s->type, str);
120 if (rc != 3) {
121 if (rc != EOF) {
122 /* skip line */
123 fgets(str, 500, in);
124 }
125 return -1;
126 }
128 sym = str;
129 /* skip prefix char */
130 if (symbol_prefix_char && str[0] == symbol_prefix_char)
131 sym++;
133 /* Ignore most absolute/undefined (?) symbols. */
134 if (strcmp(sym, "_stext") == 0)
135 _stext = s->addr;
136 else if (strcmp(sym, "_etext") == 0)
137 _etext = s->addr;
138 else if (toupper(s->type) == 'A')
139 {
140 /* Keep these useful absolute symbols */
141 if (strcmp(sym, "__kernel_syscall_via_break") &&
142 strcmp(sym, "__kernel_syscall_via_epc") &&
143 strcmp(sym, "__kernel_sigtramp") &&
144 strcmp(sym, "__gp"))
145 return -1;
147 }
148 else if (toupper(s->type) == 'U' ||
149 is_arm_mapping_symbol(sym))
150 return -1;
152 /* include the type field in the symbol name, so that it gets
153 * compressed together */
154 s->len = strlen(str) + 1;
155 s->sym = (unsigned char *) malloc(s->len + 1);
156 strcpy((char *)s->sym + 1, str);
157 s->sym[0] = s->type;
159 return 0;
160 }
162 static int
163 symbol_valid(struct sym_entry *s)
164 {
165 /* Symbols which vary between passes. Passes 1 and 2 must have
166 * identical symbol lists. The symbols_* symbols below are only added
167 * after pass 1, they would be included in pass 2 when --all-symbols is
168 * specified so exclude them to get a stable symbol list.
169 */
170 static char *special_symbols[] = {
171 "symbols_addresses",
172 "symbols_num_syms",
173 "symbols_names",
174 "symbols_markers",
175 "symbols_token_table",
176 "symbols_token_index",
178 /* Exclude linker generated symbols which vary between passes */
179 "_SDA_BASE_", /* ppc */
180 "_SDA2_BASE_", /* ppc */
181 NULL };
182 int i;
183 int offset = 1;
185 /* skip prefix char */
186 if (symbol_prefix_char && *(s->sym + 1) == symbol_prefix_char)
187 offset++;
189 /* if --all-symbols is not specified, then symbols outside the text
190 * and inittext sections are discarded */
191 if (!all_symbols) {
192 if (s->addr < _stext || s->addr > _etext)
193 return 0;
194 /* Corner case. Discard any symbols with the same value as
195 * _etext _einittext or _eextratext; they can move between pass
196 * 1 and 2 when the symbols data are added. If these symbols
197 * move then they may get dropped in pass 2, which breaks the
198 * symbols rules.
199 */
200 if (s->addr == _etext && strcmp((char *)s->sym + offset, "_etext"))
201 return 0;
202 }
204 /* Exclude symbols which vary between passes. */
205 if (strstr((char *)s->sym + offset, "_compiled."))
206 return 0;
208 for (i = 0; special_symbols[i]; i++)
209 if( strcmp((char *)s->sym + offset, special_symbols[i]) == 0 )
210 return 0;
212 return 1;
213 }
215 static void
216 read_map(FILE *in)
217 {
218 while (!feof(in)) {
219 if (cnt >= size) {
220 size += 10000;
221 table = realloc(table, sizeof(*table) * size);
222 if (!table) {
223 fprintf(stderr, "out of memory\n");
224 exit (1);
225 }
226 }
227 if (read_symbol(in, &table[cnt]) == 0)
228 cnt++;
229 }
230 }
232 static void output_label(char *label)
233 {
234 if (symbol_prefix_char)
235 printf(".globl %c%s\n", symbol_prefix_char, label);
236 else
237 printf(".globl %s\n", label);
238 printf("\tALGN\n");
239 if (symbol_prefix_char)
240 printf("%c%s:\n", symbol_prefix_char, label);
241 else
242 printf("%s:\n", label);
243 }
245 /* uncompress a compressed symbol. When this function is called, the best table
246 * might still be compressed itself, so the function needs to be recursive */
247 static int expand_symbol(unsigned char *data, int len, char *result)
248 {
249 int c, rlen, total=0;
251 while (len) {
252 c = *data;
253 /* if the table holds a single char that is the same as the one
254 * we are looking for, then end the search */
255 if (best_table[c][0]==c && best_table_len[c]==1) {
256 *result++ = c;
257 total++;
258 } else {
259 /* if not, recurse and expand */
260 rlen = expand_symbol(best_table[c], best_table_len[c], result);
261 total += rlen;
262 result += rlen;
263 }
264 data++;
265 len--;
266 }
267 *result=0;
269 return total;
270 }
272 static void
273 write_src(void)
274 {
275 int i, k, off, valid;
276 unsigned int best_idx[256];
277 unsigned int *markers;
278 char buf[KSYM_NAME_LEN+1];
280 printf("#include <asm/types.h>\n");
281 printf("#if BITS_PER_LONG == 64\n");
282 printf("#define PTR .quad\n");
283 printf("#define ALGN .align 8\n");
284 printf("#else\n");
285 printf("#define PTR .long\n");
286 printf("#define ALGN .align 4\n");
287 printf("#endif\n");
289 printf(".data\n");
291 output_label("symbols_addresses");
292 valid = 0;
293 for (i = 0; i < cnt; i++) {
294 if (table[i].flags & SYM_FLAG_VALID) {
295 printf("\tPTR\t%#llx\n", table[i].addr);
296 valid++;
297 }
298 }
299 printf("\n");
301 output_label("symbols_num_syms");
302 printf("\tPTR\t%d\n", valid);
303 printf("\n");
305 /* table of offset markers, that give the offset in the compressed stream
306 * every 256 symbols */
307 markers = (unsigned int *) malloc(sizeof(unsigned int)*((valid + 255) / 256));
309 output_label("symbols_names");
310 valid = 0;
311 off = 0;
312 for (i = 0; i < cnt; i++) {
314 if (!(table[i].flags & SYM_FLAG_VALID))
315 continue;
317 if ((valid & 0xFF) == 0)
318 markers[valid >> 8] = off;
320 printf("\t.byte 0x%02x", table[i].len);
321 for (k = 0; k < table[i].len; k++)
322 printf(", 0x%02x", table[i].sym[k]);
323 printf("\n");
325 off += table[i].len + 1;
326 valid++;
327 }
328 printf("\n");
330 output_label("symbols_markers");
331 for (i = 0; i < ((valid + 255) >> 8); i++)
332 printf("\tPTR\t%d\n", markers[i]);
333 printf("\n");
335 free(markers);
337 output_label("symbols_token_table");
338 off = 0;
339 for (i = 0; i < 256; i++) {
340 best_idx[i] = off;
341 expand_symbol(best_table[i],best_table_len[i],buf);
342 printf("\t.asciz\t\"%s\"\n", buf);
343 off += strlen(buf) + 1;
344 }
345 printf("\n");
347 output_label("symbols_token_index");
348 for (i = 0; i < 256; i++)
349 printf("\t.short\t%d\n", best_idx[i]);
350 printf("\n");
351 }
354 /* table lookup compression functions */
356 static inline unsigned int rehash_token(unsigned int hash, unsigned char data)
357 {
358 return ((hash * 16777619) ^ data);
359 }
361 static unsigned int hash_token(unsigned char *data, int len)
362 {
363 unsigned int hash=HASH_BASE_OFFSET;
364 int i;
366 for (i = 0; i < len; i++)
367 hash = rehash_token(hash, data[i]);
369 return HASH_FOLD(hash);
370 }
372 /* find a token given its data and hash value */
373 static struct token *find_token_hash(unsigned char *data, int len, unsigned int hash)
374 {
375 struct token *ptr;
377 ptr = hash_table[hash];
379 while (ptr) {
380 if ((ptr->len == len) && (memcmp(ptr->data, data, len) == 0))
381 return ptr;
382 ptr=ptr->next;
383 }
385 return NULL;
386 }
388 static inline void insert_token_in_group(struct token *head, struct token *ptr)
389 {
390 ptr->right = head->right;
391 ptr->right->left = ptr;
392 head->right = ptr;
393 ptr->left = head;
394 }
396 static inline void remove_token_from_group(struct token *ptr)
397 {
398 ptr->left->right = ptr->right;
399 ptr->right->left = ptr->left;
400 }
403 /* build the counts for all the tokens that start with "data", and have lenghts
404 * from 2 to "len" */
405 static void learn_token(unsigned char *data, int len)
406 {
407 struct token *ptr,*last_ptr;
408 int i, newprofit;
409 unsigned int hash = HASH_BASE_OFFSET;
410 unsigned int hashes[MAX_TOK_SIZE + 1];
412 if (len > MAX_TOK_SIZE)
413 len = MAX_TOK_SIZE;
415 /* calculate and store the hash values for all the sub-tokens */
416 hash = rehash_token(hash, data[0]);
417 for (i = 2; i <= len; i++) {
418 hash = rehash_token(hash, data[i-1]);
419 hashes[i] = HASH_FOLD(hash);
420 }
422 last_ptr = NULL;
423 ptr = NULL;
425 for (i = len; i >= 2; i--) {
426 hash = hashes[i];
428 if (!ptr) ptr = find_token_hash(data, i, hash);
430 if (!ptr) {
431 /* create a new token entry */
432 ptr = (struct token *) malloc(sizeof(*ptr));
434 memcpy(ptr->data, data, i);
435 ptr->len = i;
437 /* when we create an entry, it's profit is 0 because
438 * we also take into account the size of the token on
439 * the compressed table. We then subtract GOOD_BAD_THRESHOLD
440 * so that the test to see if this token belongs to
441 * the good or bad list, is a comparison to zero */
442 ptr->profit = -GOOD_BAD_THRESHOLD;
444 ptr->next = hash_table[hash];
445 hash_table[hash] = ptr;
447 insert_token_in_group(&bad_head, ptr);
449 ptr->smaller = NULL;
450 } else {
451 newprofit = ptr->profit + (ptr->len - 1);
452 /* check to see if this token needs to be moved to a
453 * different list */
454 if((ptr->profit < 0) && (newprofit >= 0)) {
455 remove_token_from_group(ptr);
456 insert_token_in_group(&good_head,ptr);
457 }
458 ptr->profit = newprofit;
459 }
461 if (last_ptr) last_ptr->smaller = ptr;
462 last_ptr = ptr;
464 ptr = ptr->smaller;
465 }
466 }
468 /* decrease the counts for all the tokens that start with "data", and have lenghts
469 * from 2 to "len". This function is much simpler than learn_token because we have
470 * more guarantees (tho tokens exist, the ->smaller pointer is set, etc.)
471 * The two separate functions exist only because of compression performance */
472 static void forget_token(unsigned char *data, int len)
473 {
474 struct token *ptr;
475 int i, newprofit;
476 unsigned int hash=0;
478 if (len > MAX_TOK_SIZE) len = MAX_TOK_SIZE;
480 hash = hash_token(data, len);
481 ptr = find_token_hash(data, len, hash);
483 for (i = len; i >= 2; i--) {
485 newprofit = ptr->profit - (ptr->len - 1);
486 if ((ptr->profit >= 0) && (newprofit < 0)) {
487 remove_token_from_group(ptr);
488 insert_token_in_group(&bad_head, ptr);
489 }
490 ptr->profit=newprofit;
492 ptr=ptr->smaller;
493 }
494 }
496 /* count all the possible tokens in a symbol */
497 static void learn_symbol(unsigned char *symbol, int len)
498 {
499 int i;
501 for (i = 0; i < len - 1; i++)
502 learn_token(symbol + i, len - i);
503 }
505 /* decrease the count for all the possible tokens in a symbol */
506 static void forget_symbol(unsigned char *symbol, int len)
507 {
508 int i;
510 for (i = 0; i < len - 1; i++)
511 forget_token(symbol + i, len - i);
512 }
514 /* set all the symbol flags and do the initial token count */
515 static void build_initial_tok_table(void)
516 {
517 int i, use_it, valid;
519 valid = 0;
520 for (i = 0; i < cnt; i++) {
521 table[i].flags = 0;
522 if ( symbol_valid(&table[i]) ) {
523 table[i].flags |= SYM_FLAG_VALID;
524 valid++;
525 }
526 }
528 use_it = 0;
529 for (i = 0; i < cnt; i++) {
531 /* subsample the available symbols. This method is almost like
532 * a Bresenham's algorithm to get uniformly distributed samples
533 * across the symbol table */
534 if (table[i].flags & SYM_FLAG_VALID) {
536 use_it += WORKING_SET;
538 if (use_it >= valid) {
539 table[i].flags |= SYM_FLAG_SAMPLED;
540 use_it -= valid;
541 }
542 }
543 if (table[i].flags & SYM_FLAG_SAMPLED)
544 learn_symbol(table[i].sym, table[i].len);
545 }
546 }
548 /* replace a given token in all the valid symbols. Use the sampled symbols
549 * to update the counts */
550 static void compress_symbols(unsigned char *str, int tlen, int idx)
551 {
552 int i, len, learn, size;
553 unsigned char *p;
555 for (i = 0; i < cnt; i++) {
557 if (!(table[i].flags & SYM_FLAG_VALID)) continue;
559 len = table[i].len;
560 learn = 0;
561 p = table[i].sym;
563 do {
564 /* find the token on the symbol */
565 p = (unsigned char *) strstr((char *) p, (char *) str);
566 if (!p) break;
568 if (!learn) {
569 /* if this symbol was used to count, decrease it */
570 if (table[i].flags & SYM_FLAG_SAMPLED)
571 forget_symbol(table[i].sym, len);
572 learn = 1;
573 }
575 *p = idx;
576 size = (len - (p - table[i].sym)) - tlen + 1;
577 memmove(p + 1, p + tlen, size);
578 p++;
579 len -= tlen - 1;
581 } while (size >= tlen);
583 if(learn) {
584 table[i].len = len;
585 /* if this symbol was used to count, learn it again */
586 if(table[i].flags & SYM_FLAG_SAMPLED)
587 learn_symbol(table[i].sym, len);
588 }
589 }
590 }
592 /* search the token with the maximum profit */
593 static struct token *find_best_token(void)
594 {
595 struct token *ptr,*best,*head;
596 int bestprofit;
598 bestprofit=-10000;
600 /* failsafe: if the "good" list is empty search from the "bad" list */
601 if(good_head.right == &good_head) head = &bad_head;
602 else head = &good_head;
604 ptr = head->right;
605 best = NULL;
606 while (ptr != head) {
607 if (ptr->profit > bestprofit) {
608 bestprofit = ptr->profit;
609 best = ptr;
610 }
611 ptr = ptr->right;
612 }
614 return best;
615 }
617 /* this is the core of the algorithm: calculate the "best" table */
618 static void optimize_result(void)
619 {
620 struct token *best;
621 int i;
623 /* using the '\0' symbol last allows compress_symbols to use standard
624 * fast string functions */
625 for (i = 255; i >= 0; i--) {
627 /* if this table slot is empty (it is not used by an actual
628 * original char code */
629 if (!best_table_len[i]) {
631 /* find the token with the breates profit value */
632 best = find_best_token();
634 /* place it in the "best" table */
635 best_table_len[i] = best->len;
636 memcpy(best_table[i], best->data, best_table_len[i]);
637 /* zero terminate the token so that we can use strstr
638 in compress_symbols */
639 best_table[i][best_table_len[i]]='\0';
641 /* replace this token in all the valid symbols */
642 compress_symbols(best_table[i], best_table_len[i], i);
643 }
644 }
645 }
647 /* start by placing the symbols that are actually used on the table */
648 static void insert_real_symbols_in_table(void)
649 {
650 int i, j, c;
652 memset(best_table, 0, sizeof(best_table));
653 memset(best_table_len, 0, sizeof(best_table_len));
655 for (i = 0; i < cnt; i++) {
656 if (table[i].flags & SYM_FLAG_VALID) {
657 for (j = 0; j < table[i].len; j++) {
658 c = table[i].sym[j];
659 best_table[c][0]=c;
660 best_table_len[c]=1;
661 }
662 }
663 }
664 }
666 static void optimize_token_table(void)
667 {
668 memset(hash_table, 0, sizeof(hash_table));
670 good_head.left = &good_head;
671 good_head.right = &good_head;
673 bad_head.left = &bad_head;
674 bad_head.right = &bad_head;
676 build_initial_tok_table();
678 insert_real_symbols_in_table();
680 /* When valid symbol is not registered, exit to error */
681 if (good_head.left == good_head.right &&
682 bad_head.left == bad_head.right) {
683 fprintf(stderr, "No valid symbol.\n");
684 exit(1);
685 }
687 optimize_result();
688 }
691 int
692 main(int argc, char **argv)
693 {
694 if (argc >= 2) {
695 int i;
696 for (i = 1; i < argc; i++) {
697 if(strcmp(argv[i], "--all-symbols") == 0)
698 all_symbols = 1;
699 else if (strncmp(argv[i], "--symbol-prefix=", 16) == 0) {
700 char *p = &argv[i][16];
701 /* skip quote */
702 if ((*p == '"' && *(p+2) == '"') || (*p == '\'' && *(p+2) == '\''))
703 p++;
704 symbol_prefix_char = *p;
705 } else
706 usage();
707 }
708 } else if (argc != 1)
709 usage();
711 read_map(stdin);
712 optimize_token_table();
713 write_src();
715 return 0;
716 }