ia64/xen-unstable

changeset 5840:48aed1403fe3

Port kallsyms to Xen, as 'symbols'.
Signed-off-by: Keir Fraser <keir@xensource.com>
author kaf24@firebug.cl.cam.ac.uk
date Fri Jul 22 16:44:33 2005 +0000 (2005-07-22)
parents 7627476544b5
children dc7c14e533c2
files .hgignore xen/Makefile xen/arch/ia64/Makefile xen/arch/x86/Makefile xen/arch/x86/traps.c xen/arch/x86/x86_32/traps.c xen/arch/x86/x86_64/traps.c xen/common/symbols.c xen/include/asm-x86/types.h xen/include/xen/symbols.h xen/tools/Makefile xen/tools/symbols.c
line diff
     1.1 --- a/.hgignore	Fri Jul 22 14:25:10 2005 +0000
     1.2 +++ b/.hgignore	Fri Jul 22 16:44:33 2005 +0000
     1.3 @@ -162,10 +162,6 @@
     1.4  ^xen/arch/x86/asm-offsets\.s$
     1.5  ^xen/arch/x86/boot/mkelf32$
     1.6  ^xen/ddb/.*$
     1.7 -^xen/drivers/pci/classlist\.h$
     1.8 -^xen/drivers/pci/devlist\.h$
     1.9 -^xen/drivers/pci/gen-devlist$
    1.10 -^xen/figlet/figlet$
    1.11  ^xen/include/asm$
    1.12  ^xen/include/asm-.*/asm-offsets\.h$
    1.13  ^xen/include/hypervisor-ifs/arch$
    1.14 @@ -175,8 +171,8 @@
    1.15  ^xen/include/xen/banner\.h$
    1.16  ^xen/include/xen/compile\.h$
    1.17  ^xen/tags$
    1.18 -^xen/tools/elf-reloc$
    1.19  ^xen/tools/figlet/figlet$
    1.20 +^xen/tools/symbols$
    1.21  ^xen/xen$
    1.22  ^xen/xen-syms$
    1.23  ^xen/xen\..*$
     2.1 --- a/xen/Makefile	Fri Jul 22 14:25:10 2005 +0000
     2.2 +++ b/xen/Makefile	Fri Jul 22 16:44:33 2005 +0000
     2.3 @@ -50,10 +50,10 @@ clean: delete-unfresh-files
     2.4  	$(MAKE) -C arch/$(TARGET_ARCH) clean
     2.5  	rm -f include/asm *.o $(TARGET)* *~ core
     2.6  	rm -f include/asm-*/asm-offsets.h
     2.7 -	rm -f tools/figlet/*.o tools/figlet/figlet
     2.8  	rm -f include/xen/acm_policy.h
     2.9  
    2.10  $(TARGET): delete-unfresh-files
    2.11 +	$(MAKE) -C tools
    2.12  	$(MAKE) include/xen/compile.h
    2.13  	$(MAKE) include/xen/acm_policy.h
    2.14  	[ -e include/asm ] || ln -sf asm-$(TARGET_ARCH) include/asm
    2.15 @@ -71,7 +71,6 @@ endif
    2.16  delete-unfresh-files:
    2.17  	@if [ ! -r include/xen/compile.h -o -O include/xen/compile.h ]; then \
    2.18  		rm -f include/xen/{banner,compile}.h; \
    2.19 -		$(MAKE) -C arch/$(TARGET_ARCH) delete-unfresh-files; \
    2.20  	fi
    2.21  
    2.22  # acm_policy.h contains security policy for Xen
    2.23 @@ -105,12 +104,7 @@ include/xen/compile.h: include/xen/compi
    2.24  	@cat include/xen/banner.h >> $@.new
    2.25  	@mv -f $@.new $@
    2.26  
    2.27 -tools/figlet/figlet: tools/figlet/figlet.o
    2.28 -	$(HOSTCC) -o $@ $<
    2.29 -tools/figlet/figlet.o: tools/figlet/figlet.c
    2.30 -	$(HOSTCC) -o $@ -c $<
    2.31 -
    2.32 -include/xen/banner.h: tools/figlet/figlet tools/figlet/xen.flf
    2.33 +include/xen/banner.h:
    2.34  	tools/figlet/figlet -d tools/figlet Xen $(XEN_FULLVERSION) > $@.new
    2.35  	@mv -f $@.new $@
    2.36  
     3.1 --- a/xen/arch/ia64/Makefile	Fri Jul 22 14:25:10 2005 +0000
     3.2 +++ b/xen/arch/ia64/Makefile	Fri Jul 22 16:44:33 2005 +0000
     3.3 @@ -62,10 +62,4 @@ clean:
     3.4  	rm -f *.o *~ core  xen.lds.s $(BASEDIR)/include/asm-ia64/.offsets.h.stamp asm-offsets.s
     3.5  	rm -f lib/*.o
     3.6  
     3.7 -# setup.o contains bits of compile.h so it must be blown away
     3.8 -delete-unfresh-files:
     3.9 -	echo any unfresh-files to delete for ia64\?
    3.10 -#	rm -f setup.o
    3.11 -
    3.12 -.PHONY: default clean delete-unfresh-files
    3.13 -
    3.14 +.PHONY: default clean
     4.1 --- a/xen/arch/x86/Makefile	Fri Jul 22 14:25:10 2005 +0000
     4.2 +++ b/xen/arch/x86/Makefile	Fri Jul 22 16:44:33 2005 +0000
     4.3 @@ -37,6 +37,15 @@ default: $(TARGET)
     4.4  $(TARGET)-syms: boot/$(TARGET_SUBARCH).o $(ALL_OBJS) $(TARGET_SUBARCH)/xen.lds
     4.5  	$(LD) $(LDFLAGS) -T $(TARGET_SUBARCH)/xen.lds -N \
     4.6  	    boot/$(TARGET_SUBARCH).o $(ALL_OBJS) -o $@
     4.7 +	nm -n $@ | $(BASEDIR)/tools/symbols >$(BASEDIR)/xen-syms.S
     4.8 +	$(MAKE) $(BASEDIR)/xen-syms.o
     4.9 +	$(LD) $(LDFLAGS) -T $(TARGET_SUBARCH)/xen.lds -N \
    4.10 +	    boot/$(TARGET_SUBARCH).o $(ALL_OBJS) $(BASEDIR)/xen-syms.o -o $@
    4.11 +	nm -n $@ | $(BASEDIR)/tools/symbols >$(BASEDIR)/xen-syms.S
    4.12 +	$(MAKE) $(BASEDIR)/xen-syms.o
    4.13 +	$(LD) $(LDFLAGS) -T $(TARGET_SUBARCH)/xen.lds -N \
    4.14 +	    boot/$(TARGET_SUBARCH).o $(ALL_OBJS) $(BASEDIR)/xen-syms.o -o $@
    4.15 +	rm -f $(BASEDIR)/xen-syms.S $(BASEDIR)/xen-syms.o
    4.16  
    4.17  asm-offsets.s: $(TARGET_SUBARCH)/asm-offsets.c $(HDRS)
    4.18  	$(CC) $(CFLAGS) -S -o $@ $<
    4.19 @@ -53,7 +62,4 @@ clean:
    4.20  	rm -f genapic/*.o genapic/*~ genapic/core
    4.21  	rm -f cpu/*.o cpu/*~ cpu/core
    4.22  
    4.23 -delete-unfresh-files:
    4.24 -	# nothing
    4.25 -
    4.26 -.PHONY: default clean delete-unfresh-files
    4.27 +.PHONY: default clean
     5.1 --- a/xen/arch/x86/traps.c	Fri Jul 22 14:25:10 2005 +0000
     5.2 +++ b/xen/arch/x86/traps.c	Fri Jul 22 16:44:33 2005 +0000
     5.3 @@ -40,6 +40,7 @@
     5.4  #include <xen/perfc.h>
     5.5  #include <xen/softirq.h>
     5.6  #include <xen/domain_page.h>
     5.7 +#include <xen/symbols.h>
     5.8  #include <asm/shadow.h>
     5.9  #include <asm/system.h>
    5.10  #include <asm/io.h>
    5.11 @@ -100,7 +101,7 @@ unsigned long do_get_debugreg(int reg);
    5.12  static int debug_stack_lines = 20;
    5.13  integer_param("debug_stack_lines", debug_stack_lines);
    5.14  
    5.15 -static inline int kernel_text_address(unsigned long addr)
    5.16 +int is_kernel_text(unsigned long addr)
    5.17  {
    5.18      extern char _stext, _etext;
    5.19      if (addr >= (unsigned long) &_stext &&
    5.20 @@ -110,6 +111,12 @@ static inline int kernel_text_address(un
    5.21  
    5.22  }
    5.23  
    5.24 +unsigned long kernel_text_end(void)
    5.25 +{
    5.26 +    extern char _etext;
    5.27 +    return (unsigned long) &_etext;
    5.28 +}
    5.29 +
    5.30  void show_guest_stack(void)
    5.31  {
    5.32      int i;
    5.33 @@ -150,11 +157,12 @@ void show_trace(unsigned long *esp)
    5.34      while ( ((long) stack & (STACK_SIZE-1)) != 0 )
    5.35      {
    5.36          addr = *stack++;
    5.37 -        if ( kernel_text_address(addr) )
    5.38 +        if ( is_kernel_text(addr) )
    5.39          {
    5.40              if ( (i != 0) && ((i % 6) == 0) )
    5.41                  printk("\n   ");
    5.42 -            printk("[<%p>] ", _p(addr));
    5.43 +            printk("[<%p>]", _p(addr));
    5.44 +            print_symbol(" %s\n", addr);
    5.45              i++;
    5.46          }
    5.47      }
    5.48 @@ -177,10 +185,7 @@ void show_stack(unsigned long *esp)
    5.49          if ( (i != 0) && ((i % 8) == 0) )
    5.50              printk("\n   ");
    5.51          addr = *stack++;
    5.52 -        if ( kernel_text_address(addr) )
    5.53 -            printk("[%p] ", _p(addr));
    5.54 -        else
    5.55 -            printk("%p ", _p(addr));
    5.56 +        printk("%p ", _p(addr));
    5.57      }
    5.58      if ( i == 0 )
    5.59          printk("Stack empty.");
     6.1 --- a/xen/arch/x86/x86_32/traps.c	Fri Jul 22 14:25:10 2005 +0000
     6.2 +++ b/xen/arch/x86/x86_32/traps.c	Fri Jul 22 16:44:33 2005 +0000
     6.3 @@ -6,6 +6,7 @@
     6.4  #include <xen/console.h>
     6.5  #include <xen/mm.h>
     6.6  #include <xen/irq.h>
     6.7 +#include <xen/symbols.h>
     6.8  #include <asm/current.h>
     6.9  #include <asm/flushtlb.h>
    6.10  #include <asm/vmx.h>
    6.11 @@ -63,10 +64,10 @@ void show_registers(struct cpu_user_regs
    6.12          }
    6.13      }
    6.14  
    6.15 -    printk("CPU:    %d\nEIP:    %04lx:[<%08lx>]      \nEFLAGS: %08lx   "
    6.16 -           "CONTEXT: %s\n",
    6.17 -           smp_processor_id(), (unsigned long)0xffff & regs->cs,
    6.18 -           eip, eflags, context);
    6.19 +    printk("CPU:    %d\nEIP:    %04lx:[<%08lx>]",
    6.20 +           smp_processor_id(), (unsigned long)0xffff & regs->cs, eip);
    6.21 +    print_symbol(" %s\n", eip);
    6.22 +    printk("EFLAGS: %08lx   CONTEXT: %s\n", eflags, context);
    6.23      printk("eax: %08x   ebx: %08x   ecx: %08x   edx: %08x\n",
    6.24             regs->eax, regs->ebx, regs->ecx, regs->edx);
    6.25      printk("esi: %08x   edi: %08x   ebp: %08x   esp: %08lx\n",
    6.26 @@ -119,8 +120,10 @@ asmlinkage void do_double_fault(void)
    6.27  
    6.28      /* Find information saved during fault and dump it to the console. */
    6.29      tss = &init_tss[cpu];
    6.30 -    printk("CPU:    %d\nEIP:    %04x:[<%08x>]      \nEFLAGS: %08x\n",
    6.31 -           cpu, tss->cs, tss->eip, tss->eflags);
    6.32 +    printk("CPU:    %d\nEIP:    %04x:[<%08x>]",
    6.33 +           cpu, tss->cs, tss->eip);
    6.34 +    print_symbol(" %s\n", tss->eip);
    6.35 +    printk("EFLAGS: %08x\n", tss->eflags);
    6.36      printk("CR3:    %08x\n", tss->__cr3);
    6.37      printk("eax: %08x   ebx: %08x   ecx: %08x   edx: %08x\n",
    6.38             tss->eax, tss->ebx, tss->ecx, tss->edx);
     7.1 --- a/xen/arch/x86/x86_64/traps.c	Fri Jul 22 14:25:10 2005 +0000
     7.2 +++ b/xen/arch/x86/x86_64/traps.c	Fri Jul 22 16:44:33 2005 +0000
     7.3 @@ -6,6 +6,7 @@
     7.4  #include <xen/errno.h>
     7.5  #include <xen/mm.h>
     7.6  #include <xen/irq.h>
     7.7 +#include <xen/symbols.h>
     7.8  #include <xen/console.h>
     7.9  #include <xen/sched.h>
    7.10  #include <asm/current.h>
    7.11 @@ -14,8 +15,10 @@
    7.12  
    7.13  void show_registers(struct cpu_user_regs *regs)
    7.14  {
    7.15 -    printk("CPU:    %d\nEIP:    %04x:[<%016lx>]      \nEFLAGS: %016lx\n",
    7.16 -           smp_processor_id(), 0xffff & regs->cs, regs->rip, regs->eflags);
    7.17 +    printk("CPU:    %d\nEIP:    %04x:[<%016lx>]",
    7.18 +           smp_processor_id(), 0xffff & regs->cs, regs->rip);
    7.19 +    print_symbol(" %s\n", regs->rip);
    7.20 +    printk("EFLAGS: %016lx\n", regs->eflags);
    7.21      printk("rax: %016lx   rbx: %016lx   rcx: %016lx   rdx: %016lx\n",
    7.22             regs->rax, regs->rbx, regs->rcx, regs->rdx);
    7.23      printk("rsi: %016lx   rdi: %016lx   rbp: %016lx   rsp: %016lx\n",
     8.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     8.2 +++ b/xen/common/symbols.c	Fri Jul 22 16:44:33 2005 +0000
     8.3 @@ -0,0 +1,158 @@
     8.4 +/*
     8.5 + * symbols.c: in-kernel printing of symbolic oopses and stack traces.
     8.6 + *
     8.7 + * Copyright 2002 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
     8.8 + *
     8.9 + * ChangeLog:
    8.10 + *
    8.11 + * (25/Aug/2004) Paulo Marques <pmarques@grupopie.com>
    8.12 + *      Changed the compression method from stem compression to "table lookup"
    8.13 + *      compression (see tools/symbols.c for a more complete description)
    8.14 + */
    8.15 +
    8.16 +#include <xen/config.h>
    8.17 +#include <xen/symbols.h>
    8.18 +#include <xen/init.h>
    8.19 +#include <xen/lib.h>
    8.20 +#include <xen/string.h>
    8.21 +
    8.22 +/* These will be re-linked against their real values during the second link stage */
    8.23 +extern unsigned long symbols_addresses[] __attribute__((weak));
    8.24 +extern unsigned long symbols_num_syms __attribute__((weak,section("data")));
    8.25 +extern u8 symbols_names[] __attribute__((weak));
    8.26 +
    8.27 +extern u8 symbols_token_table[] __attribute__((weak));
    8.28 +extern u16 symbols_token_index[] __attribute__((weak));
    8.29 +
    8.30 +extern unsigned long symbols_markers[] __attribute__((weak));
    8.31 +
    8.32 +/* expand a compressed symbol data into the resulting uncompressed string,
    8.33 +   given the offset to where the symbol is in the compressed stream */
    8.34 +static unsigned int symbols_expand_symbol(unsigned int off, char *result)
    8.35 +{
    8.36 +    int len, skipped_first = 0;
    8.37 +    u8 *tptr, *data;
    8.38 +
    8.39 +    /* get the compressed symbol length from the first symbol byte */
    8.40 +    data = &symbols_names[off];
    8.41 +    len = *data;
    8.42 +    data++;
    8.43 +
    8.44 +    /* update the offset to return the offset for the next symbol on
    8.45 +     * the compressed stream */
    8.46 +    off += len + 1;
    8.47 +
    8.48 +    /* for every byte on the compressed symbol data, copy the table
    8.49 +       entry for that byte */
    8.50 +    while(len) {
    8.51 +        tptr = &symbols_token_table[ symbols_token_index[*data] ];
    8.52 +        data++;
    8.53 +        len--;
    8.54 +
    8.55 +        while (*tptr) {
    8.56 +            if(skipped_first) {
    8.57 +                *result = *tptr;
    8.58 +                result++;
    8.59 +            } else
    8.60 +                skipped_first = 1;
    8.61 +            tptr++;
    8.62 +        }
    8.63 +    }
    8.64 +
    8.65 +    *result = '\0';
    8.66 +
    8.67 +    /* return to offset to the next symbol */
    8.68 +    return off;
    8.69 +}
    8.70 +
    8.71 +/* find the offset on the compressed stream given and index in the
    8.72 + * symbols array */
    8.73 +static unsigned int get_symbol_offset(unsigned long pos)
    8.74 +{
    8.75 +    u8 *name;
    8.76 +    int i;
    8.77 +
    8.78 +    /* use the closest marker we have. We have markers every 256 positions,
    8.79 +     * so that should be close enough */
    8.80 +    name = &symbols_names[ symbols_markers[pos>>8] ];
    8.81 +
    8.82 +    /* sequentially scan all the symbols up to the point we're searching for.
    8.83 +     * Every symbol is stored in a [<len>][<len> bytes of data] format, so we
    8.84 +     * just need to add the len to the current pointer for every symbol we
    8.85 +     * wish to skip */
    8.86 +    for(i = 0; i < (pos&0xFF); i++)
    8.87 +        name = name + (*name) + 1;
    8.88 +
    8.89 +    return name - symbols_names;
    8.90 +}
    8.91 +
    8.92 +const char *symbols_lookup(unsigned long addr,
    8.93 +                           unsigned long *symbolsize,
    8.94 +                           unsigned long *offset,
    8.95 +                           char *namebuf)
    8.96 +{
    8.97 +    unsigned long i, low, high, mid;
    8.98 +    unsigned long symbol_end = 0;
    8.99 +
   8.100 +    /* This kernel should never had been booted. */
   8.101 +    BUG_ON(!symbols_addresses);
   8.102 +
   8.103 +    namebuf[KSYM_NAME_LEN] = 0;
   8.104 +    namebuf[0] = 0;
   8.105 +
   8.106 +    if (!is_kernel_text(addr))
   8.107 +        return NULL;
   8.108 +
   8.109 +        /* do a binary search on the sorted symbols_addresses array */
   8.110 +    low = 0;
   8.111 +    high = symbols_num_syms;
   8.112 +
   8.113 +    while (high-low > 1) {
   8.114 +        mid = (low + high) / 2;
   8.115 +        if (symbols_addresses[mid] <= addr) low = mid;
   8.116 +        else high = mid;
   8.117 +    }
   8.118 +
   8.119 +    /* search for the first aliased symbol. Aliased symbols are
   8.120 +           symbols with the same address */
   8.121 +    while (low && symbols_addresses[low - 1] == symbols_addresses[low])
   8.122 +        --low;
   8.123 +
   8.124 +        /* Grab name */
   8.125 +    symbols_expand_symbol(get_symbol_offset(low), namebuf);
   8.126 +
   8.127 +    /* Search for next non-aliased symbol */
   8.128 +    for (i = low + 1; i < symbols_num_syms; i++) {
   8.129 +        if (symbols_addresses[i] > symbols_addresses[low]) {
   8.130 +            symbol_end = symbols_addresses[i];
   8.131 +            break;
   8.132 +        }
   8.133 +    }
   8.134 +
   8.135 +    /* if we found no next symbol, we use the end of the section */
   8.136 +    if (!symbol_end)
   8.137 +        symbol_end = kernel_text_end();
   8.138 +
   8.139 +    *symbolsize = symbol_end - symbols_addresses[low];
   8.140 +    *offset = addr - symbols_addresses[low];
   8.141 +    return namebuf;
   8.142 +}
   8.143 +
   8.144 +/* Replace "%s" in format with address, or returns -errno. */
   8.145 +void __print_symbol(const char *fmt, unsigned long address)
   8.146 +{
   8.147 +    const char *name;
   8.148 +    unsigned long offset, size;
   8.149 +    char namebuf[KSYM_NAME_LEN+1];
   8.150 +    char buffer[sizeof("%s+%#lx/%#lx [%s]") + KSYM_NAME_LEN +
   8.151 +               2*(BITS_PER_LONG*3/10) + 1];
   8.152 +
   8.153 +    name = symbols_lookup(address, &size, &offset, namebuf);
   8.154 +
   8.155 +    if (!name)
   8.156 +        sprintf(buffer, "???");
   8.157 +    else
   8.158 +        sprintf(buffer, "%s+%#lx/%#lx", name, offset, size);
   8.159 +
   8.160 +    printk(fmt, buffer);
   8.161 +}
     9.1 --- a/xen/include/asm-x86/types.h	Fri Jul 22 14:25:10 2005 +0000
     9.2 +++ b/xen/include/asm-x86/types.h	Fri Jul 22 16:44:33 2005 +0000
     9.3 @@ -1,10 +1,9 @@
     9.4  #ifndef __X86_TYPES_H__
     9.5  #define __X86_TYPES_H__
     9.6  
     9.7 -/*
     9.8 - * __xx is ok: it doesn't pollute the POSIX namespace. Use these in the
     9.9 - * header files exported to user space
    9.10 - */
    9.11 +#ifndef __ASSEMBLY__
    9.12 +
    9.13 +#include <xen/config.h>
    9.14  
    9.15  typedef __signed__ char __s8;
    9.16  typedef unsigned char __u8;
    9.17 @@ -25,8 +24,6 @@ typedef unsigned long __u64;
    9.18  #endif
    9.19  #endif
    9.20  
    9.21 -#include <xen/config.h>
    9.22 -
    9.23  typedef signed char s8;
    9.24  typedef unsigned char u8;
    9.25  
    9.26 @@ -39,9 +36,6 @@ typedef unsigned int u32;
    9.27  #if defined(__i386__)
    9.28  typedef signed long long s64;
    9.29  typedef unsigned long long u64;
    9.30 -#define BITS_PER_LONG 32
    9.31 -#define BYTES_PER_LONG 4
    9.32 -#define LONG_BYTEORDER 2
    9.33  #if defined(CONFIG_X86_PAE)
    9.34  typedef u64 physaddr_t;
    9.35  #else
    9.36 @@ -50,12 +44,21 @@ typedef u32 physaddr_t;
    9.37  #elif defined(__x86_64__)
    9.38  typedef signed long s64;
    9.39  typedef unsigned long u64;
    9.40 -#define BITS_PER_LONG 64
    9.41 -#define BYTES_PER_LONG 8
    9.42 -#define LONG_BYTEORDER 3
    9.43  typedef u64 physaddr_t;
    9.44  #endif
    9.45  
    9.46  typedef unsigned long size_t;
    9.47  
    9.48 +#endif /* __ASSEMBLY__ */
    9.49 +
    9.50 +#if defined(__i386__)
    9.51 +#define BITS_PER_LONG 32
    9.52 +#define BYTES_PER_LONG 4
    9.53 +#define LONG_BYTEORDER 2
    9.54 +#elif defined(__x86_64__)
    9.55 +#define BITS_PER_LONG 64
    9.56 +#define BYTES_PER_LONG 8
    9.57 +#define LONG_BYTEORDER 3
    9.58 +#endif
    9.59 +
    9.60  #endif /* __X86_TYPES_H__ */
    10.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    10.2 +++ b/xen/include/xen/symbols.h	Fri Jul 22 16:44:33 2005 +0000
    10.3 @@ -0,0 +1,45 @@
    10.4 +#ifndef _XEN_SYMBOLS_H
    10.5 +#define _XEN_SYMBOLS_H
    10.6 +
    10.7 +#include <xen/config.h>
    10.8 +#include <xen/types.h>
    10.9 +
   10.10 +#define KSYM_NAME_LEN 127
   10.11 +
   10.12 +extern int is_kernel_text(unsigned long addr);
   10.13 +extern unsigned long kernel_text_end(void);
   10.14 +
   10.15 +/* Lookup an address. */
   10.16 +const char *symbols_lookup(unsigned long addr,
   10.17 +                           unsigned long *symbolsize,
   10.18 +                           unsigned long *offset,
   10.19 +                           char *namebuf);
   10.20 +
   10.21 +/* Replace "%s" in format with address, if found */
   10.22 +extern void __print_symbol(const char *fmt, unsigned long address);
   10.23 +
   10.24 +/* This macro allows us to keep printk typechecking */
   10.25 +static void __check_printsym_format(const char *fmt, ...)
   10.26 +    __attribute__((format(printf,1,2)));
   10.27 +    static inline void __check_printsym_format(const char *fmt, ...)
   10.28 +{
   10.29 +}
   10.30 +
   10.31 +/* ia64 and ppc64 use function descriptors, which contain the real address */
   10.32 +#if defined(CONFIG_IA64) || defined(CONFIG_PPC64)
   10.33 +#define print_fn_descriptor_symbol(fmt, addr)		\
   10.34 +do {						\
   10.35 +	unsigned long *__faddr = (unsigned long*) addr;		\
   10.36 +	print_symbol(fmt, __faddr[0]);		\
   10.37 +} while (0)
   10.38 +#else
   10.39 +#define print_fn_descriptor_symbol(fmt, addr) print_symbol(fmt, addr)
   10.40 +#endif
   10.41 +
   10.42 +#define print_symbol(fmt, addr)			\
   10.43 +do {						\
   10.44 +	__check_printsym_format(fmt, "");	\
   10.45 +	__print_symbol(fmt, addr);		\
   10.46 +} while(0)
   10.47 +
   10.48 +#endif /*_XEN_SYMBOLS_H*/
    11.1 --- a/xen/tools/Makefile	Fri Jul 22 14:25:10 2005 +0000
    11.2 +++ b/xen/tools/Makefile	Fri Jul 22 16:44:33 2005 +0000
    11.3 @@ -1,6 +1,13 @@
    11.4 +
    11.5 +include $(BASEDIR)/../Config.mk
    11.6  
    11.7  default:
    11.8  	$(MAKE) -C figlet
    11.9 +	$(MAKE) symbols
   11.10  
   11.11  clean:
   11.12 -	$(MAKE) -C figlet clean
   11.13 \ No newline at end of file
   11.14 +	$(MAKE) -C figlet clean
   11.15 +	rm -f *.o symbols
   11.16 +
   11.17 +symbols: symbols.c
   11.18 +	$(HOSTCC) -o $@ $<
    12.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    12.2 +++ b/xen/tools/symbols.c	Fri Jul 22 16:44:33 2005 +0000
    12.3 @@ -0,0 +1,716 @@
    12.4 +/* Generate assembler source containing symbol information
    12.5 + *
    12.6 + * Copyright 2002       by Kai Germaschewski
    12.7 + *
    12.8 + * This software may be used and distributed according to the terms
    12.9 + * of the GNU General Public License, incorporated herein by reference.
   12.10 + *
   12.11 + * Usage: nm -n <object-file> | scripts/symbols [--all-symbols] > symbols.S
   12.12 + *
   12.13 + * ChangeLog:
   12.14 + *
   12.15 + * (25/Aug/2004) Paulo Marques <pmarques@grupopie.com>
   12.16 + *      Changed the compression method from stem compression to "table lookup"
   12.17 + *      compression
   12.18 + *
   12.19 + *      Table compression uses all the unused char codes on the symbols and
   12.20 + *  maps these to the most used substrings (tokens). For instance, it might
   12.21 + *  map char code 0xF7 to represent "write_" and then in every symbol where
   12.22 + *  "write_" appears it can be replaced by 0xF7, saving 5 bytes.
   12.23 + *      The used codes themselves are also placed in the table so that the
   12.24 + *  decompresion can work without "special cases".
   12.25 + *      Applied to kernel symbols, this usually produces a compression ratio
   12.26 + *  of about 50%.
   12.27 + *
   12.28 + */
   12.29 +
   12.30 +#include <stdio.h>
   12.31 +#include <stdlib.h>
   12.32 +#include <string.h>
   12.33 +#include <ctype.h>
   12.34 +
   12.35 +/* maximum token length used. It doesn't pay to increase it a lot, because
   12.36 + * very long substrings probably don't repeat themselves too often. */
   12.37 +#define MAX_TOK_SIZE		11
   12.38 +#define KSYM_NAME_LEN		127
   12.39 +
   12.40 +/* we use only a subset of the complete symbol table to gather the token count,
   12.41 + * to speed up compression, at the expense of a little compression ratio */
   12.42 +#define WORKING_SET		1024
   12.43 +
   12.44 +/* first find the best token only on the list of tokens that would profit more
   12.45 + * than GOOD_BAD_THRESHOLD. Only if this list is empty go to the "bad" list.
   12.46 + * Increasing this value will put less tokens on the "good" list, so the search
   12.47 + * is faster. However, if the good list runs out of tokens, we must painfully
   12.48 + * search the bad list. */
   12.49 +#define GOOD_BAD_THRESHOLD	10
   12.50 +
   12.51 +/* token hash parameters */
   12.52 +#define HASH_BITS		18
   12.53 +#define HASH_TABLE_SIZE		(1 << HASH_BITS)
   12.54 +#define HASH_MASK		(HASH_TABLE_SIZE - 1)
   12.55 +#define HASH_BASE_OFFSET	2166136261U
   12.56 +#define HASH_FOLD(a)		((a)&(HASH_MASK))
   12.57 +
   12.58 +/* flags to mark symbols */
   12.59 +#define SYM_FLAG_VALID		1
   12.60 +#define SYM_FLAG_SAMPLED	2
   12.61 +
   12.62 +struct sym_entry {
   12.63 +	unsigned long long addr;
   12.64 +	char type;
   12.65 +	unsigned char flags;
   12.66 +	unsigned char len;
   12.67 +	unsigned char *sym;
   12.68 +};
   12.69 +
   12.70 +
   12.71 +static struct sym_entry *table;
   12.72 +static int size, cnt;
   12.73 +static unsigned long long _stext, _etext;
   12.74 +static int all_symbols = 0;
   12.75 +static char symbol_prefix_char = '\0';
   12.76 +
   12.77 +struct token {
   12.78 +	unsigned char data[MAX_TOK_SIZE];
   12.79 +	unsigned char len;
   12.80 +	/* profit: the number of bytes that could be saved by inserting this
   12.81 +	 * token into the table */
   12.82 +	int profit;
   12.83 +	struct token *next;	/* next token on the hash list */
   12.84 +	struct token *right;	/* next token on the good/bad list */
   12.85 +	struct token *left;    /* previous token on the good/bad list */
   12.86 +	struct token *smaller; /* token that is less one letter than this one */
   12.87 +	};
   12.88 +
   12.89 +struct token bad_head, good_head;
   12.90 +struct token *hash_table[HASH_TABLE_SIZE];
   12.91 +
   12.92 +/* the table that holds the result of the compression */
   12.93 +unsigned char best_table[256][MAX_TOK_SIZE+1];
   12.94 +unsigned char best_table_len[256];
   12.95 +
   12.96 +
   12.97 +static void
   12.98 +usage(void)
   12.99 +{
  12.100 +	fprintf(stderr, "Usage: symbols [--all-symbols] [--symbol-prefix=<prefix char>] < in.map > out.S\n");
  12.101 +	exit(1);
  12.102 +}
  12.103 +
  12.104 +/*
  12.105 + * This ignores the intensely annoying "mapping symbols" found
  12.106 + * in ARM ELF files: $a, $t and $d.
  12.107 + */
  12.108 +static inline int
  12.109 +is_arm_mapping_symbol(const char *str)
  12.110 +{
  12.111 +	return str[0] == '$' && strchr("atd", str[1])
  12.112 +	       && (str[2] == '\0' || str[2] == '.');
  12.113 +}
  12.114 +
  12.115 +static int
  12.116 +read_symbol(FILE *in, struct sym_entry *s)
  12.117 +{
  12.118 +	char str[500];
  12.119 +	char *sym;
  12.120 +	int rc;
  12.121 +
  12.122 +	rc = fscanf(in, "%llx %c %499s\n", &s->addr, &s->type, str);
  12.123 +	if (rc != 3) {
  12.124 +		if (rc != EOF) {
  12.125 +			/* skip line */
  12.126 +			fgets(str, 500, in);
  12.127 +		}
  12.128 +		return -1;
  12.129 +	}
  12.130 +
  12.131 +	sym = str;
  12.132 +	/* skip prefix char */
  12.133 +	if (symbol_prefix_char && str[0] == symbol_prefix_char)
  12.134 +		sym++;
  12.135 +
  12.136 +	/* Ignore most absolute/undefined (?) symbols. */
  12.137 +	if (strcmp(sym, "_stext") == 0)
  12.138 +		_stext = s->addr;
  12.139 +	else if (strcmp(sym, "_etext") == 0)
  12.140 +		_etext = s->addr;
  12.141 +	else if (toupper(s->type) == 'A')
  12.142 +	{
  12.143 +		/* Keep these useful absolute symbols */
  12.144 +		if (strcmp(sym, "__kernel_syscall_via_break") &&
  12.145 +		    strcmp(sym, "__kernel_syscall_via_epc") &&
  12.146 +		    strcmp(sym, "__kernel_sigtramp") &&
  12.147 +		    strcmp(sym, "__gp"))
  12.148 +			return -1;
  12.149 +
  12.150 +	}
  12.151 +	else if (toupper(s->type) == 'U' ||
  12.152 +		 is_arm_mapping_symbol(sym))
  12.153 +		return -1;
  12.154 +
  12.155 +	/* include the type field in the symbol name, so that it gets
  12.156 +	 * compressed together */
  12.157 +	s->len = strlen(str) + 1;
  12.158 +	s->sym = (char *) malloc(s->len + 1);
  12.159 +	strcpy(s->sym + 1, str);
  12.160 +	s->sym[0] = s->type;
  12.161 +
  12.162 +	return 0;
  12.163 +}
  12.164 +
  12.165 +static int
  12.166 +symbol_valid(struct sym_entry *s)
  12.167 +{
  12.168 +	/* Symbols which vary between passes.  Passes 1 and 2 must have
  12.169 +	 * identical symbol lists.  The symbols_* symbols below are only added
  12.170 +	 * after pass 1, they would be included in pass 2 when --all-symbols is
  12.171 +	 * specified so exclude them to get a stable symbol list.
  12.172 +	 */
  12.173 +	static char *special_symbols[] = {
  12.174 +		"symbols_addresses",
  12.175 +		"symbols_num_syms",
  12.176 +		"symbols_names",
  12.177 +		"symbols_markers",
  12.178 +		"symbols_token_table",
  12.179 +		"symbols_token_index",
  12.180 +
  12.181 +	/* Exclude linker generated symbols which vary between passes */
  12.182 +		"_SDA_BASE_",		/* ppc */
  12.183 +		"_SDA2_BASE_",		/* ppc */
  12.184 +		NULL };
  12.185 +	int i;
  12.186 +	int offset = 1;
  12.187 +
  12.188 +	/* skip prefix char */
  12.189 +	if (symbol_prefix_char && *(s->sym + 1) == symbol_prefix_char)
  12.190 +		offset++;
  12.191 +
  12.192 +	/* if --all-symbols is not specified, then symbols outside the text
  12.193 +	 * and inittext sections are discarded */
  12.194 +	if (!all_symbols) {
  12.195 +		if (s->addr < _stext || s->addr > _etext)
  12.196 +			return 0;
  12.197 +		/* Corner case.  Discard any symbols with the same value as
  12.198 +		 * _etext _einittext or _eextratext; they can move between pass
  12.199 +		 * 1 and 2 when the symbols data are added.  If these symbols
  12.200 +		 * move then they may get dropped in pass 2, which breaks the
  12.201 +		 * symbols rules.
  12.202 +		 */
  12.203 +		if (s->addr == _etext && strcmp(s->sym + offset, "_etext"))
  12.204 +			return 0;
  12.205 +	}
  12.206 +
  12.207 +	/* Exclude symbols which vary between passes. */
  12.208 +	if (strstr(s->sym + offset, "_compiled."))
  12.209 +		return 0;
  12.210 +
  12.211 +	for (i = 0; special_symbols[i]; i++)
  12.212 +		if( strcmp(s->sym + offset, special_symbols[i]) == 0 )
  12.213 +			return 0;
  12.214 +
  12.215 +	return 1;
  12.216 +}
  12.217 +
  12.218 +static void
  12.219 +read_map(FILE *in)
  12.220 +{
  12.221 +	while (!feof(in)) {
  12.222 +		if (cnt >= size) {
  12.223 +			size += 10000;
  12.224 +			table = realloc(table, sizeof(*table) * size);
  12.225 +			if (!table) {
  12.226 +				fprintf(stderr, "out of memory\n");
  12.227 +				exit (1);
  12.228 +			}
  12.229 +		}
  12.230 +		if (read_symbol(in, &table[cnt]) == 0)
  12.231 +			cnt++;
  12.232 +	}
  12.233 +}
  12.234 +
  12.235 +static void output_label(char *label)
  12.236 +{
  12.237 +	if (symbol_prefix_char)
  12.238 +		printf(".globl %c%s\n", symbol_prefix_char, label);
  12.239 +	else
  12.240 +		printf(".globl %s\n", label);
  12.241 +	printf("\tALGN\n");
  12.242 +	if (symbol_prefix_char)
  12.243 +		printf("%c%s:\n", symbol_prefix_char, label);
  12.244 +	else
  12.245 +		printf("%s:\n", label);
  12.246 +}
  12.247 +
  12.248 +/* uncompress a compressed symbol. When this function is called, the best table
  12.249 + * might still be compressed itself, so the function needs to be recursive */
  12.250 +static int expand_symbol(unsigned char *data, int len, char *result)
  12.251 +{
  12.252 +	int c, rlen, total=0;
  12.253 +
  12.254 +	while (len) {
  12.255 +		c = *data;
  12.256 +		/* if the table holds a single char that is the same as the one
  12.257 +		 * we are looking for, then end the search */
  12.258 +		if (best_table[c][0]==c && best_table_len[c]==1) {
  12.259 +			*result++ = c;
  12.260 +			total++;
  12.261 +		} else {
  12.262 +			/* if not, recurse and expand */
  12.263 +			rlen = expand_symbol(best_table[c], best_table_len[c], result);
  12.264 +			total += rlen;
  12.265 +			result += rlen;
  12.266 +		}
  12.267 +		data++;
  12.268 +		len--;
  12.269 +	}
  12.270 +	*result=0;
  12.271 +
  12.272 +	return total;
  12.273 +}
  12.274 +
  12.275 +static void
  12.276 +write_src(void)
  12.277 +{
  12.278 +	int i, k, off, valid;
  12.279 +	unsigned int best_idx[256];
  12.280 +	unsigned int *markers;
  12.281 +	char buf[KSYM_NAME_LEN+1];
  12.282 +
  12.283 +	printf("#include <asm/types.h>\n");
  12.284 +	printf("#if BITS_PER_LONG == 64\n");
  12.285 +	printf("#define PTR .quad\n");
  12.286 +	printf("#define ALGN .align 8\n");
  12.287 +	printf("#else\n");
  12.288 +	printf("#define PTR .long\n");
  12.289 +	printf("#define ALGN .align 4\n");
  12.290 +	printf("#endif\n");
  12.291 +
  12.292 +	printf(".data\n");
  12.293 +
  12.294 +	output_label("symbols_addresses");
  12.295 +	valid = 0;
  12.296 +	for (i = 0; i < cnt; i++) {
  12.297 +		if (table[i].flags & SYM_FLAG_VALID) {
  12.298 +			printf("\tPTR\t%#llx\n", table[i].addr);
  12.299 +			valid++;
  12.300 +		}
  12.301 +	}
  12.302 +	printf("\n");
  12.303 +
  12.304 +	output_label("symbols_num_syms");
  12.305 +	printf("\tPTR\t%d\n", valid);
  12.306 +	printf("\n");
  12.307 +
  12.308 +	/* table of offset markers, that give the offset in the compressed stream
  12.309 +	 * every 256 symbols */
  12.310 +	markers = (unsigned int *) malloc(sizeof(unsigned int)*((valid + 255) / 256));
  12.311 +
  12.312 +	output_label("symbols_names");
  12.313 +	valid = 0;
  12.314 +	off = 0;
  12.315 +	for (i = 0; i < cnt; i++) {
  12.316 +
  12.317 +		if (!table[i].flags & SYM_FLAG_VALID)
  12.318 +			continue;
  12.319 +
  12.320 +		if ((valid & 0xFF) == 0)
  12.321 +			markers[valid >> 8] = off;
  12.322 +
  12.323 +		printf("\t.byte 0x%02x", table[i].len);
  12.324 +		for (k = 0; k < table[i].len; k++)
  12.325 +			printf(", 0x%02x", table[i].sym[k]);
  12.326 +		printf("\n");
  12.327 +
  12.328 +		off += table[i].len + 1;
  12.329 +		valid++;
  12.330 +	}
  12.331 +	printf("\n");
  12.332 +
  12.333 +	output_label("symbols_markers");
  12.334 +	for (i = 0; i < ((valid + 255) >> 8); i++)
  12.335 +		printf("\tPTR\t%d\n", markers[i]);
  12.336 +	printf("\n");
  12.337 +
  12.338 +	free(markers);
  12.339 +
  12.340 +	output_label("symbols_token_table");
  12.341 +	off = 0;
  12.342 +	for (i = 0; i < 256; i++) {
  12.343 +		best_idx[i] = off;
  12.344 +		expand_symbol(best_table[i],best_table_len[i],buf);
  12.345 +		printf("\t.asciz\t\"%s\"\n", buf);
  12.346 +		off += strlen(buf) + 1;
  12.347 +	}
  12.348 +	printf("\n");
  12.349 +
  12.350 +	output_label("symbols_token_index");
  12.351 +	for (i = 0; i < 256; i++)
  12.352 +		printf("\t.short\t%d\n", best_idx[i]);
  12.353 +	printf("\n");
  12.354 +}
  12.355 +
  12.356 +
  12.357 +/* table lookup compression functions */
  12.358 +
  12.359 +static inline unsigned int rehash_token(unsigned int hash, unsigned char data)
  12.360 +{
  12.361 +	return ((hash * 16777619) ^ data);
  12.362 +}
  12.363 +
  12.364 +static unsigned int hash_token(unsigned char *data, int len)
  12.365 +{
  12.366 +	unsigned int hash=HASH_BASE_OFFSET;
  12.367 +	int i;
  12.368 +
  12.369 +	for (i = 0; i < len; i++)
  12.370 +		hash = rehash_token(hash, data[i]);
  12.371 +
  12.372 +	return HASH_FOLD(hash);
  12.373 +}
  12.374 +
  12.375 +/* find a token given its data and hash value */
  12.376 +static struct token *find_token_hash(unsigned char *data, int len, unsigned int hash)
  12.377 +{
  12.378 +	struct token *ptr;
  12.379 +
  12.380 +	ptr = hash_table[hash];
  12.381 +
  12.382 +	while (ptr) {
  12.383 +		if ((ptr->len == len) && (memcmp(ptr->data, data, len) == 0))
  12.384 +			return ptr;
  12.385 +		ptr=ptr->next;
  12.386 +	}
  12.387 +
  12.388 +	return NULL;
  12.389 +}
  12.390 +
  12.391 +static inline void insert_token_in_group(struct token *head, struct token *ptr)
  12.392 +{
  12.393 +	ptr->right = head->right;
  12.394 +	ptr->right->left = ptr;
  12.395 +	head->right = ptr;
  12.396 +	ptr->left = head;
  12.397 +}
  12.398 +
  12.399 +static inline void remove_token_from_group(struct token *ptr)
  12.400 +{
  12.401 +	ptr->left->right = ptr->right;
  12.402 +	ptr->right->left = ptr->left;
  12.403 +}
  12.404 +
  12.405 +
  12.406 +/* build the counts for all the tokens that start with "data", and have lenghts
  12.407 + * from 2 to "len" */
  12.408 +static void learn_token(unsigned char *data, int len)
  12.409 +{
  12.410 +	struct token *ptr,*last_ptr;
  12.411 +	int i, newprofit;
  12.412 +	unsigned int hash = HASH_BASE_OFFSET;
  12.413 +	unsigned int hashes[MAX_TOK_SIZE + 1];
  12.414 +
  12.415 +	if (len > MAX_TOK_SIZE)
  12.416 +		len = MAX_TOK_SIZE;
  12.417 +
  12.418 +	/* calculate and store the hash values for all the sub-tokens */
  12.419 +	hash = rehash_token(hash, data[0]);
  12.420 +	for (i = 2; i <= len; i++) {
  12.421 +		hash = rehash_token(hash, data[i-1]);
  12.422 +		hashes[i] = HASH_FOLD(hash);
  12.423 +	}
  12.424 +
  12.425 +	last_ptr = NULL;
  12.426 +	ptr = NULL;
  12.427 +
  12.428 +	for (i = len; i >= 2; i--) {
  12.429 +		hash = hashes[i];
  12.430 +
  12.431 +		if (!ptr) ptr = find_token_hash(data, i, hash);
  12.432 +
  12.433 +		if (!ptr) {
  12.434 +			/* create a new token entry */
  12.435 +			ptr = (struct token *) malloc(sizeof(*ptr));
  12.436 +
  12.437 +			memcpy(ptr->data, data, i);
  12.438 +			ptr->len = i;
  12.439 +
  12.440 +			/* when we create an entry, it's profit is 0 because
  12.441 +			 * we also take into account the size of the token on
  12.442 +			 * the compressed table. We then subtract GOOD_BAD_THRESHOLD
  12.443 +			 * so that the test to see if this token belongs to
  12.444 +			 * the good or bad list, is a comparison to zero */
  12.445 +			ptr->profit = -GOOD_BAD_THRESHOLD;
  12.446 +
  12.447 +			ptr->next = hash_table[hash];
  12.448 +			hash_table[hash] = ptr;
  12.449 +
  12.450 +			insert_token_in_group(&bad_head, ptr);
  12.451 +
  12.452 +			ptr->smaller = NULL;
  12.453 +		} else {
  12.454 +			newprofit = ptr->profit + (ptr->len - 1);
  12.455 +			/* check to see if this token needs to be moved to a
  12.456 +			 * different list */
  12.457 +			if((ptr->profit < 0) && (newprofit >= 0)) {
  12.458 +				remove_token_from_group(ptr);
  12.459 +				insert_token_in_group(&good_head,ptr);
  12.460 +			}
  12.461 +			ptr->profit = newprofit;
  12.462 +		}
  12.463 +
  12.464 +		if (last_ptr) last_ptr->smaller = ptr;
  12.465 +		last_ptr = ptr;
  12.466 +
  12.467 +		ptr = ptr->smaller;
  12.468 +	}
  12.469 +}
  12.470 +
  12.471 +/* decrease the counts for all the tokens that start with "data", and have lenghts
  12.472 + * from 2 to "len". This function is much simpler than learn_token because we have
  12.473 + * more guarantees (tho tokens exist, the ->smaller pointer is set, etc.)
  12.474 + * The two separate functions exist only because of compression performance */
  12.475 +static void forget_token(unsigned char *data, int len)
  12.476 +{
  12.477 +	struct token *ptr;
  12.478 +	int i, newprofit;
  12.479 +	unsigned int hash=0;
  12.480 +
  12.481 +	if (len > MAX_TOK_SIZE) len = MAX_TOK_SIZE;
  12.482 +
  12.483 +	hash = hash_token(data, len);
  12.484 +	ptr = find_token_hash(data, len, hash);
  12.485 +
  12.486 +	for (i = len; i >= 2; i--) {
  12.487 +
  12.488 +		newprofit = ptr->profit - (ptr->len - 1);
  12.489 +		if ((ptr->profit >= 0) && (newprofit < 0)) {
  12.490 +			remove_token_from_group(ptr);
  12.491 +			insert_token_in_group(&bad_head, ptr);
  12.492 +		}
  12.493 +		ptr->profit=newprofit;
  12.494 +
  12.495 +		ptr=ptr->smaller;
  12.496 +	}
  12.497 +}
  12.498 +
  12.499 +/* count all the possible tokens in a symbol */
  12.500 +static void learn_symbol(unsigned char *symbol, int len)
  12.501 +{
  12.502 +	int i;
  12.503 +
  12.504 +	for (i = 0; i < len - 1; i++)
  12.505 +		learn_token(symbol + i, len - i);
  12.506 +}
  12.507 +
  12.508 +/* decrease the count for all the possible tokens in a symbol */
  12.509 +static void forget_symbol(unsigned char *symbol, int len)
  12.510 +{
  12.511 +	int i;
  12.512 +
  12.513 +	for (i = 0; i < len - 1; i++)
  12.514 +		forget_token(symbol + i, len - i);
  12.515 +}
  12.516 +
  12.517 +/* set all the symbol flags and do the initial token count */
  12.518 +static void build_initial_tok_table(void)
  12.519 +{
  12.520 +	int i, use_it, valid;
  12.521 +
  12.522 +	valid = 0;
  12.523 +	for (i = 0; i < cnt; i++) {
  12.524 +		table[i].flags = 0;
  12.525 +		if ( symbol_valid(&table[i]) ) {
  12.526 +			table[i].flags |= SYM_FLAG_VALID;
  12.527 +			valid++;
  12.528 +		}
  12.529 +	}
  12.530 +
  12.531 +	use_it = 0;
  12.532 +	for (i = 0; i < cnt; i++) {
  12.533 +
  12.534 +		/* subsample the available symbols. This method is almost like
  12.535 +		 * a Bresenham's algorithm to get uniformly distributed samples
  12.536 +		 * across the symbol table */
  12.537 +		if (table[i].flags & SYM_FLAG_VALID) {
  12.538 +
  12.539 +			use_it += WORKING_SET;
  12.540 +
  12.541 +			if (use_it >= valid) {
  12.542 +				table[i].flags |= SYM_FLAG_SAMPLED;
  12.543 +				use_it -= valid;
  12.544 +			}
  12.545 +		}
  12.546 +		if (table[i].flags & SYM_FLAG_SAMPLED)
  12.547 +			learn_symbol(table[i].sym, table[i].len);
  12.548 +	}
  12.549 +}
  12.550 +
  12.551 +/* replace a given token in all the valid symbols. Use the sampled symbols
  12.552 + * to update the counts */
  12.553 +static void compress_symbols(unsigned char *str, int tlen, int idx)
  12.554 +{
  12.555 +	int i, len, learn, size;
  12.556 +	unsigned char *p;
  12.557 +
  12.558 +	for (i = 0; i < cnt; i++) {
  12.559 +
  12.560 +		if (!(table[i].flags & SYM_FLAG_VALID)) continue;
  12.561 +
  12.562 +		len = table[i].len;
  12.563 +		learn = 0;
  12.564 +		p = table[i].sym;
  12.565 +
  12.566 +		do {
  12.567 +			/* find the token on the symbol */
  12.568 +			p = (unsigned char *) strstr((char *) p, (char *) str);
  12.569 +			if (!p) break;
  12.570 +
  12.571 +			if (!learn) {
  12.572 +				/* if this symbol was used to count, decrease it */
  12.573 +				if (table[i].flags & SYM_FLAG_SAMPLED)
  12.574 +					forget_symbol(table[i].sym, len);
  12.575 +				learn = 1;
  12.576 +			}
  12.577 +
  12.578 +			*p = idx;
  12.579 +			size = (len - (p - table[i].sym)) - tlen + 1;
  12.580 +			memmove(p + 1, p + tlen, size);
  12.581 +			p++;
  12.582 +			len -= tlen - 1;
  12.583 +
  12.584 +		} while (size >= tlen);
  12.585 +
  12.586 +		if(learn) {
  12.587 +			table[i].len = len;
  12.588 +			/* if this symbol was used to count, learn it again */
  12.589 +			if(table[i].flags & SYM_FLAG_SAMPLED)
  12.590 +				learn_symbol(table[i].sym, len);
  12.591 +		}
  12.592 +	}
  12.593 +}
  12.594 +
  12.595 +/* search the token with the maximum profit */
  12.596 +static struct token *find_best_token(void)
  12.597 +{
  12.598 +	struct token *ptr,*best,*head;
  12.599 +	int bestprofit;
  12.600 +
  12.601 +	bestprofit=-10000;
  12.602 +
  12.603 +	/* failsafe: if the "good" list is empty search from the "bad" list */
  12.604 +	if(good_head.right == &good_head) head = &bad_head;
  12.605 +	else head = &good_head;
  12.606 +
  12.607 +	ptr = head->right;
  12.608 +	best = NULL;
  12.609 +	while (ptr != head) {
  12.610 +		if (ptr->profit > bestprofit) {
  12.611 +			bestprofit = ptr->profit;
  12.612 +			best = ptr;
  12.613 +		}
  12.614 +		ptr = ptr->right;
  12.615 +	}
  12.616 +
  12.617 +	return best;
  12.618 +}
  12.619 +
  12.620 +/* this is the core of the algorithm: calculate the "best" table */
  12.621 +static void optimize_result(void)
  12.622 +{
  12.623 +	struct token *best;
  12.624 +	int i;
  12.625 +
  12.626 +	/* using the '\0' symbol last allows compress_symbols to use standard
  12.627 +	 * fast string functions */
  12.628 +	for (i = 255; i >= 0; i--) {
  12.629 +
  12.630 +		/* if this table slot is empty (it is not used by an actual
  12.631 +		 * original char code */
  12.632 +		if (!best_table_len[i]) {
  12.633 +
  12.634 +			/* find the token with the breates profit value */
  12.635 +			best = find_best_token();
  12.636 +
  12.637 +			/* place it in the "best" table */
  12.638 +			best_table_len[i] = best->len;
  12.639 +			memcpy(best_table[i], best->data, best_table_len[i]);
  12.640 +			/* zero terminate the token so that we can use strstr
  12.641 +			   in compress_symbols */
  12.642 +			best_table[i][best_table_len[i]]='\0';
  12.643 +
  12.644 +			/* replace this token in all the valid symbols */
  12.645 +			compress_symbols(best_table[i], best_table_len[i], i);
  12.646 +		}
  12.647 +	}
  12.648 +}
  12.649 +
  12.650 +/* start by placing the symbols that are actually used on the table */
  12.651 +static void insert_real_symbols_in_table(void)
  12.652 +{
  12.653 +	int i, j, c;
  12.654 +
  12.655 +	memset(best_table, 0, sizeof(best_table));
  12.656 +	memset(best_table_len, 0, sizeof(best_table_len));
  12.657 +
  12.658 +	for (i = 0; i < cnt; i++) {
  12.659 +		if (table[i].flags & SYM_FLAG_VALID) {
  12.660 +			for (j = 0; j < table[i].len; j++) {
  12.661 +				c = table[i].sym[j];
  12.662 +				best_table[c][0]=c;
  12.663 +				best_table_len[c]=1;
  12.664 +			}
  12.665 +		}
  12.666 +	}
  12.667 +}
  12.668 +
  12.669 +static void optimize_token_table(void)
  12.670 +{
  12.671 +	memset(hash_table, 0, sizeof(hash_table));
  12.672 +
  12.673 +	good_head.left = &good_head;
  12.674 +	good_head.right = &good_head;
  12.675 +
  12.676 +	bad_head.left = &bad_head;
  12.677 +	bad_head.right = &bad_head;
  12.678 +
  12.679 +	build_initial_tok_table();
  12.680 +
  12.681 +	insert_real_symbols_in_table();
  12.682 +
  12.683 +	/* When valid symbol is not registered, exit to error */
  12.684 +	if (good_head.left == good_head.right &&
  12.685 +	    bad_head.left == bad_head.right) {
  12.686 +		fprintf(stderr, "No valid symbol.\n");
  12.687 +		exit(1);
  12.688 +	}
  12.689 +
  12.690 +	optimize_result();
  12.691 +}
  12.692 +
  12.693 +
  12.694 +int
  12.695 +main(int argc, char **argv)
  12.696 +{
  12.697 +	if (argc >= 2) {
  12.698 +		int i;
  12.699 +		for (i = 1; i < argc; i++) {
  12.700 +			if(strcmp(argv[i], "--all-symbols") == 0)
  12.701 +				all_symbols = 1;
  12.702 +			else if (strncmp(argv[i], "--symbol-prefix=", 16) == 0) {
  12.703 +				char *p = &argv[i][16];
  12.704 +				/* skip quote */
  12.705 +				if ((*p == '"' && *(p+2) == '"') || (*p == '\'' && *(p+2) == '\''))
  12.706 +					p++;
  12.707 +				symbol_prefix_char = *p;
  12.708 +			} else
  12.709 +				usage();
  12.710 +		}
  12.711 +	} else if (argc != 1)
  12.712 +		usage();
  12.713 +
  12.714 +	read_map(stdin);
  12.715 +	optimize_token_table();
  12.716 +	write_src();
  12.717 +
  12.718 +	return 0;
  12.719 +}