ia64/xen-unstable

changeset 5842:dc7c14e533c2

Local merge
Stephan.Diestelhorst@{cl.cam.ac.uk, inf.tu-dresden.de}
author sd386@font.cl.cam.ac.uk
date Fri Jul 22 17:58:52 2005 +0000 (2005-07-22)
parents 460405b4723b 48aed1403fe3
children a49bf96419a4
files .hgignore xen/Makefile xen/arch/ia64/Makefile xen/arch/x86/Makefile xen/arch/x86/traps.c xen/arch/x86/x86_32/traps.c xen/arch/x86/x86_64/traps.c xen/common/sched_sedf.c xen/common/symbols.c xen/include/asm-x86/types.h xen/include/xen/symbols.h xen/tools/Makefile xen/tools/symbols.c
line diff
     1.1 --- a/.hgignore	Fri Jul 22 15:32:31 2005 +0000
     1.2 +++ b/.hgignore	Fri Jul 22 17:58:52 2005 +0000
     1.3 @@ -162,10 +162,6 @@
     1.4  ^xen/arch/x86/asm-offsets\.s$
     1.5  ^xen/arch/x86/boot/mkelf32$
     1.6  ^xen/ddb/.*$
     1.7 -^xen/drivers/pci/classlist\.h$
     1.8 -^xen/drivers/pci/devlist\.h$
     1.9 -^xen/drivers/pci/gen-devlist$
    1.10 -^xen/figlet/figlet$
    1.11  ^xen/include/asm$
    1.12  ^xen/include/asm-.*/asm-offsets\.h$
    1.13  ^xen/include/hypervisor-ifs/arch$
    1.14 @@ -175,8 +171,8 @@
    1.15  ^xen/include/xen/banner\.h$
    1.16  ^xen/include/xen/compile\.h$
    1.17  ^xen/tags$
    1.18 -^xen/tools/elf-reloc$
    1.19  ^xen/tools/figlet/figlet$
    1.20 +^xen/tools/symbols$
    1.21  ^xen/xen$
    1.22  ^xen/xen-syms$
    1.23  ^xen/xen\..*$
     2.1 --- a/xen/Makefile	Fri Jul 22 15:32:31 2005 +0000
     2.2 +++ b/xen/Makefile	Fri Jul 22 17:58:52 2005 +0000
     2.3 @@ -50,10 +50,10 @@ clean: delete-unfresh-files
     2.4  	$(MAKE) -C arch/$(TARGET_ARCH) clean
     2.5  	rm -f include/asm *.o $(TARGET)* *~ core
     2.6  	rm -f include/asm-*/asm-offsets.h
     2.7 -	rm -f tools/figlet/*.o tools/figlet/figlet
     2.8  	rm -f include/xen/acm_policy.h
     2.9  
    2.10  $(TARGET): delete-unfresh-files
    2.11 +	$(MAKE) -C tools
    2.12  	$(MAKE) include/xen/compile.h
    2.13  	$(MAKE) include/xen/acm_policy.h
    2.14  	[ -e include/asm ] || ln -sf asm-$(TARGET_ARCH) include/asm
    2.15 @@ -71,7 +71,6 @@ endif
    2.16  delete-unfresh-files:
    2.17  	@if [ ! -r include/xen/compile.h -o -O include/xen/compile.h ]; then \
    2.18  		rm -f include/xen/{banner,compile}.h; \
    2.19 -		$(MAKE) -C arch/$(TARGET_ARCH) delete-unfresh-files; \
    2.20  	fi
    2.21  
    2.22  # acm_policy.h contains security policy for Xen
    2.23 @@ -105,12 +104,7 @@ include/xen/compile.h: include/xen/compi
    2.24  	@cat include/xen/banner.h >> $@.new
    2.25  	@mv -f $@.new $@
    2.26  
    2.27 -tools/figlet/figlet: tools/figlet/figlet.o
    2.28 -	$(HOSTCC) -o $@ $<
    2.29 -tools/figlet/figlet.o: tools/figlet/figlet.c
    2.30 -	$(HOSTCC) -o $@ -c $<
    2.31 -
    2.32 -include/xen/banner.h: tools/figlet/figlet tools/figlet/xen.flf
    2.33 +include/xen/banner.h:
    2.34  	tools/figlet/figlet -d tools/figlet Xen $(XEN_FULLVERSION) > $@.new
    2.35  	@mv -f $@.new $@
    2.36  
     3.1 --- a/xen/arch/ia64/Makefile	Fri Jul 22 15:32:31 2005 +0000
     3.2 +++ b/xen/arch/ia64/Makefile	Fri Jul 22 17:58:52 2005 +0000
     3.3 @@ -62,10 +62,4 @@ clean:
     3.4  	rm -f *.o *~ core  xen.lds.s $(BASEDIR)/include/asm-ia64/.offsets.h.stamp asm-offsets.s
     3.5  	rm -f lib/*.o
     3.6  
     3.7 -# setup.o contains bits of compile.h so it must be blown away
     3.8 -delete-unfresh-files:
     3.9 -	echo any unfresh-files to delete for ia64\?
    3.10 -#	rm -f setup.o
    3.11 -
    3.12 -.PHONY: default clean delete-unfresh-files
    3.13 -
    3.14 +.PHONY: default clean
     4.1 --- a/xen/arch/x86/Makefile	Fri Jul 22 15:32:31 2005 +0000
     4.2 +++ b/xen/arch/x86/Makefile	Fri Jul 22 17:58:52 2005 +0000
     4.3 @@ -37,6 +37,15 @@ default: $(TARGET)
     4.4  $(TARGET)-syms: boot/$(TARGET_SUBARCH).o $(ALL_OBJS) $(TARGET_SUBARCH)/xen.lds
     4.5  	$(LD) $(LDFLAGS) -T $(TARGET_SUBARCH)/xen.lds -N \
     4.6  	    boot/$(TARGET_SUBARCH).o $(ALL_OBJS) -o $@
     4.7 +	nm -n $@ | $(BASEDIR)/tools/symbols >$(BASEDIR)/xen-syms.S
     4.8 +	$(MAKE) $(BASEDIR)/xen-syms.o
     4.9 +	$(LD) $(LDFLAGS) -T $(TARGET_SUBARCH)/xen.lds -N \
    4.10 +	    boot/$(TARGET_SUBARCH).o $(ALL_OBJS) $(BASEDIR)/xen-syms.o -o $@
    4.11 +	nm -n $@ | $(BASEDIR)/tools/symbols >$(BASEDIR)/xen-syms.S
    4.12 +	$(MAKE) $(BASEDIR)/xen-syms.o
    4.13 +	$(LD) $(LDFLAGS) -T $(TARGET_SUBARCH)/xen.lds -N \
    4.14 +	    boot/$(TARGET_SUBARCH).o $(ALL_OBJS) $(BASEDIR)/xen-syms.o -o $@
    4.15 +	rm -f $(BASEDIR)/xen-syms.S $(BASEDIR)/xen-syms.o
    4.16  
    4.17  asm-offsets.s: $(TARGET_SUBARCH)/asm-offsets.c $(HDRS)
    4.18  	$(CC) $(CFLAGS) -S -o $@ $<
    4.19 @@ -53,7 +62,4 @@ clean:
    4.20  	rm -f genapic/*.o genapic/*~ genapic/core
    4.21  	rm -f cpu/*.o cpu/*~ cpu/core
    4.22  
    4.23 -delete-unfresh-files:
    4.24 -	# nothing
    4.25 -
    4.26 -.PHONY: default clean delete-unfresh-files
    4.27 +.PHONY: default clean
     5.1 --- a/xen/arch/x86/traps.c	Fri Jul 22 15:32:31 2005 +0000
     5.2 +++ b/xen/arch/x86/traps.c	Fri Jul 22 17:58:52 2005 +0000
     5.3 @@ -40,6 +40,7 @@
     5.4  #include <xen/perfc.h>
     5.5  #include <xen/softirq.h>
     5.6  #include <xen/domain_page.h>
     5.7 +#include <xen/symbols.h>
     5.8  #include <asm/shadow.h>
     5.9  #include <asm/system.h>
    5.10  #include <asm/io.h>
    5.11 @@ -100,7 +101,7 @@ unsigned long do_get_debugreg(int reg);
    5.12  static int debug_stack_lines = 20;
    5.13  integer_param("debug_stack_lines", debug_stack_lines);
    5.14  
    5.15 -static inline int kernel_text_address(unsigned long addr)
    5.16 +int is_kernel_text(unsigned long addr)
    5.17  {
    5.18      extern char _stext, _etext;
    5.19      if (addr >= (unsigned long) &_stext &&
    5.20 @@ -110,6 +111,12 @@ static inline int kernel_text_address(un
    5.21  
    5.22  }
    5.23  
    5.24 +unsigned long kernel_text_end(void)
    5.25 +{
    5.26 +    extern char _etext;
    5.27 +    return (unsigned long) &_etext;
    5.28 +}
    5.29 +
    5.30  void show_guest_stack(void)
    5.31  {
    5.32      int i;
    5.33 @@ -150,11 +157,12 @@ void show_trace(unsigned long *esp)
    5.34      while ( ((long) stack & (STACK_SIZE-1)) != 0 )
    5.35      {
    5.36          addr = *stack++;
    5.37 -        if ( kernel_text_address(addr) )
    5.38 +        if ( is_kernel_text(addr) )
    5.39          {
    5.40              if ( (i != 0) && ((i % 6) == 0) )
    5.41                  printk("\n   ");
    5.42 -            printk("[<%p>] ", _p(addr));
    5.43 +            printk("[<%p>]", _p(addr));
    5.44 +            print_symbol(" %s\n", addr);
    5.45              i++;
    5.46          }
    5.47      }
    5.48 @@ -177,10 +185,7 @@ void show_stack(unsigned long *esp)
    5.49          if ( (i != 0) && ((i % 8) == 0) )
    5.50              printk("\n   ");
    5.51          addr = *stack++;
    5.52 -        if ( kernel_text_address(addr) )
    5.53 -            printk("[%p] ", _p(addr));
    5.54 -        else
    5.55 -            printk("%p ", _p(addr));
    5.56 +        printk("%p ", _p(addr));
    5.57      }
    5.58      if ( i == 0 )
    5.59          printk("Stack empty.");
     6.1 --- a/xen/arch/x86/x86_32/traps.c	Fri Jul 22 15:32:31 2005 +0000
     6.2 +++ b/xen/arch/x86/x86_32/traps.c	Fri Jul 22 17:58:52 2005 +0000
     6.3 @@ -6,6 +6,7 @@
     6.4  #include <xen/console.h>
     6.5  #include <xen/mm.h>
     6.6  #include <xen/irq.h>
     6.7 +#include <xen/symbols.h>
     6.8  #include <asm/current.h>
     6.9  #include <asm/flushtlb.h>
    6.10  #include <asm/vmx.h>
    6.11 @@ -63,10 +64,10 @@ void show_registers(struct cpu_user_regs
    6.12          }
    6.13      }
    6.14  
    6.15 -    printk("CPU:    %d\nEIP:    %04lx:[<%08lx>]      \nEFLAGS: %08lx   "
    6.16 -           "CONTEXT: %s\n",
    6.17 -           smp_processor_id(), (unsigned long)0xffff & regs->cs,
    6.18 -           eip, eflags, context);
    6.19 +    printk("CPU:    %d\nEIP:    %04lx:[<%08lx>]",
    6.20 +           smp_processor_id(), (unsigned long)0xffff & regs->cs, eip);
    6.21 +    print_symbol(" %s\n", eip);
    6.22 +    printk("EFLAGS: %08lx   CONTEXT: %s\n", eflags, context);
    6.23      printk("eax: %08x   ebx: %08x   ecx: %08x   edx: %08x\n",
    6.24             regs->eax, regs->ebx, regs->ecx, regs->edx);
    6.25      printk("esi: %08x   edi: %08x   ebp: %08x   esp: %08lx\n",
    6.26 @@ -119,8 +120,10 @@ asmlinkage void do_double_fault(void)
    6.27  
    6.28      /* Find information saved during fault and dump it to the console. */
    6.29      tss = &init_tss[cpu];
    6.30 -    printk("CPU:    %d\nEIP:    %04x:[<%08x>]      \nEFLAGS: %08x\n",
    6.31 -           cpu, tss->cs, tss->eip, tss->eflags);
    6.32 +    printk("CPU:    %d\nEIP:    %04x:[<%08x>]",
    6.33 +           cpu, tss->cs, tss->eip);
    6.34 +    print_symbol(" %s\n", tss->eip);
    6.35 +    printk("EFLAGS: %08x\n", tss->eflags);
    6.36      printk("CR3:    %08x\n", tss->__cr3);
    6.37      printk("eax: %08x   ebx: %08x   ecx: %08x   edx: %08x\n",
    6.38             tss->eax, tss->ebx, tss->ecx, tss->edx);
     7.1 --- a/xen/arch/x86/x86_64/traps.c	Fri Jul 22 15:32:31 2005 +0000
     7.2 +++ b/xen/arch/x86/x86_64/traps.c	Fri Jul 22 17:58:52 2005 +0000
     7.3 @@ -6,6 +6,7 @@
     7.4  #include <xen/errno.h>
     7.5  #include <xen/mm.h>
     7.6  #include <xen/irq.h>
     7.7 +#include <xen/symbols.h>
     7.8  #include <xen/console.h>
     7.9  #include <xen/sched.h>
    7.10  #include <asm/current.h>
    7.11 @@ -14,8 +15,10 @@
    7.12  
    7.13  void show_registers(struct cpu_user_regs *regs)
    7.14  {
    7.15 -    printk("CPU:    %d\nEIP:    %04x:[<%016lx>]      \nEFLAGS: %016lx\n",
    7.16 -           smp_processor_id(), 0xffff & regs->cs, regs->rip, regs->eflags);
    7.17 +    printk("CPU:    %d\nEIP:    %04x:[<%016lx>]",
    7.18 +           smp_processor_id(), 0xffff & regs->cs, regs->rip);
    7.19 +    print_symbol(" %s\n", regs->rip);
    7.20 +    printk("EFLAGS: %016lx\n", regs->eflags);
    7.21      printk("rax: %016lx   rbx: %016lx   rcx: %016lx   rdx: %016lx\n",
    7.22             regs->rax, regs->rbx, regs->rcx, regs->rdx);
    7.23      printk("rsi: %016lx   rdi: %016lx   rbp: %016lx   rsp: %016lx\n",
     9.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     9.2 +++ b/xen/common/symbols.c	Fri Jul 22 17:58:52 2005 +0000
     9.3 @@ -0,0 +1,158 @@
     9.4 +/*
     9.5 + * symbols.c: in-kernel printing of symbolic oopses and stack traces.
     9.6 + *
     9.7 + * Copyright 2002 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
     9.8 + *
     9.9 + * ChangeLog:
    9.10 + *
    9.11 + * (25/Aug/2004) Paulo Marques <pmarques@grupopie.com>
    9.12 + *      Changed the compression method from stem compression to "table lookup"
    9.13 + *      compression (see tools/symbols.c for a more complete description)
    9.14 + */
    9.15 +
    9.16 +#include <xen/config.h>
    9.17 +#include <xen/symbols.h>
    9.18 +#include <xen/init.h>
    9.19 +#include <xen/lib.h>
    9.20 +#include <xen/string.h>
    9.21 +
    9.22 +/* These will be re-linked against their real values during the second link stage */
    9.23 +extern unsigned long symbols_addresses[] __attribute__((weak));
    9.24 +extern unsigned long symbols_num_syms __attribute__((weak,section("data")));
    9.25 +extern u8 symbols_names[] __attribute__((weak));
    9.26 +
    9.27 +extern u8 symbols_token_table[] __attribute__((weak));
    9.28 +extern u16 symbols_token_index[] __attribute__((weak));
    9.29 +
    9.30 +extern unsigned long symbols_markers[] __attribute__((weak));
    9.31 +
    9.32 +/* expand a compressed symbol data into the resulting uncompressed string,
    9.33 +   given the offset to where the symbol is in the compressed stream */
    9.34 +static unsigned int symbols_expand_symbol(unsigned int off, char *result)
    9.35 +{
    9.36 +    int len, skipped_first = 0;
    9.37 +    u8 *tptr, *data;
    9.38 +
    9.39 +    /* get the compressed symbol length from the first symbol byte */
    9.40 +    data = &symbols_names[off];
    9.41 +    len = *data;
    9.42 +    data++;
    9.43 +
    9.44 +    /* update the offset to return the offset for the next symbol on
    9.45 +     * the compressed stream */
    9.46 +    off += len + 1;
    9.47 +
    9.48 +    /* for every byte on the compressed symbol data, copy the table
    9.49 +       entry for that byte */
    9.50 +    while(len) {
    9.51 +        tptr = &symbols_token_table[ symbols_token_index[*data] ];
    9.52 +        data++;
    9.53 +        len--;
    9.54 +
    9.55 +        while (*tptr) {
    9.56 +            if(skipped_first) {
    9.57 +                *result = *tptr;
    9.58 +                result++;
    9.59 +            } else
    9.60 +                skipped_first = 1;
    9.61 +            tptr++;
    9.62 +        }
    9.63 +    }
    9.64 +
    9.65 +    *result = '\0';
    9.66 +
    9.67 +    /* return to offset to the next symbol */
    9.68 +    return off;
    9.69 +}
    9.70 +
    9.71 +/* find the offset on the compressed stream given and index in the
    9.72 + * symbols array */
    9.73 +static unsigned int get_symbol_offset(unsigned long pos)
    9.74 +{
    9.75 +    u8 *name;
    9.76 +    int i;
    9.77 +
    9.78 +    /* use the closest marker we have. We have markers every 256 positions,
    9.79 +     * so that should be close enough */
    9.80 +    name = &symbols_names[ symbols_markers[pos>>8] ];
    9.81 +
    9.82 +    /* sequentially scan all the symbols up to the point we're searching for.
    9.83 +     * Every symbol is stored in a [<len>][<len> bytes of data] format, so we
    9.84 +     * just need to add the len to the current pointer for every symbol we
    9.85 +     * wish to skip */
    9.86 +    for(i = 0; i < (pos&0xFF); i++)
    9.87 +        name = name + (*name) + 1;
    9.88 +
    9.89 +    return name - symbols_names;
    9.90 +}
    9.91 +
    9.92 +const char *symbols_lookup(unsigned long addr,
    9.93 +                           unsigned long *symbolsize,
    9.94 +                           unsigned long *offset,
    9.95 +                           char *namebuf)
    9.96 +{
    9.97 +    unsigned long i, low, high, mid;
    9.98 +    unsigned long symbol_end = 0;
    9.99 +
   9.100 +    /* This kernel should never had been booted. */
   9.101 +    BUG_ON(!symbols_addresses);
   9.102 +
   9.103 +    namebuf[KSYM_NAME_LEN] = 0;
   9.104 +    namebuf[0] = 0;
   9.105 +
   9.106 +    if (!is_kernel_text(addr))
   9.107 +        return NULL;
   9.108 +
   9.109 +        /* do a binary search on the sorted symbols_addresses array */
   9.110 +    low = 0;
   9.111 +    high = symbols_num_syms;
   9.112 +
   9.113 +    while (high-low > 1) {
   9.114 +        mid = (low + high) / 2;
   9.115 +        if (symbols_addresses[mid] <= addr) low = mid;
   9.116 +        else high = mid;
   9.117 +    }
   9.118 +
   9.119 +    /* search for the first aliased symbol. Aliased symbols are
   9.120 +           symbols with the same address */
   9.121 +    while (low && symbols_addresses[low - 1] == symbols_addresses[low])
   9.122 +        --low;
   9.123 +
   9.124 +        /* Grab name */
   9.125 +    symbols_expand_symbol(get_symbol_offset(low), namebuf);
   9.126 +
   9.127 +    /* Search for next non-aliased symbol */
   9.128 +    for (i = low + 1; i < symbols_num_syms; i++) {
   9.129 +        if (symbols_addresses[i] > symbols_addresses[low]) {
   9.130 +            symbol_end = symbols_addresses[i];
   9.131 +            break;
   9.132 +        }
   9.133 +    }
   9.134 +
   9.135 +    /* if we found no next symbol, we use the end of the section */
   9.136 +    if (!symbol_end)
   9.137 +        symbol_end = kernel_text_end();
   9.138 +
   9.139 +    *symbolsize = symbol_end - symbols_addresses[low];
   9.140 +    *offset = addr - symbols_addresses[low];
   9.141 +    return namebuf;
   9.142 +}
   9.143 +
   9.144 +/* Replace "%s" in format with address, or returns -errno. */
   9.145 +void __print_symbol(const char *fmt, unsigned long address)
   9.146 +{
   9.147 +    const char *name;
   9.148 +    unsigned long offset, size;
   9.149 +    char namebuf[KSYM_NAME_LEN+1];
   9.150 +    char buffer[sizeof("%s+%#lx/%#lx [%s]") + KSYM_NAME_LEN +
   9.151 +               2*(BITS_PER_LONG*3/10) + 1];
   9.152 +
   9.153 +    name = symbols_lookup(address, &size, &offset, namebuf);
   9.154 +
   9.155 +    if (!name)
   9.156 +        sprintf(buffer, "???");
   9.157 +    else
   9.158 +        sprintf(buffer, "%s+%#lx/%#lx", name, offset, size);
   9.159 +
   9.160 +    printk(fmt, buffer);
   9.161 +}
    10.1 --- a/xen/include/asm-x86/types.h	Fri Jul 22 15:32:31 2005 +0000
    10.2 +++ b/xen/include/asm-x86/types.h	Fri Jul 22 17:58:52 2005 +0000
    10.3 @@ -1,10 +1,9 @@
    10.4  #ifndef __X86_TYPES_H__
    10.5  #define __X86_TYPES_H__
    10.6  
    10.7 -/*
    10.8 - * __xx is ok: it doesn't pollute the POSIX namespace. Use these in the
    10.9 - * header files exported to user space
   10.10 - */
   10.11 +#ifndef __ASSEMBLY__
   10.12 +
   10.13 +#include <xen/config.h>
   10.14  
   10.15  typedef __signed__ char __s8;
   10.16  typedef unsigned char __u8;
   10.17 @@ -25,8 +24,6 @@ typedef unsigned long __u64;
   10.18  #endif
   10.19  #endif
   10.20  
   10.21 -#include <xen/config.h>
   10.22 -
   10.23  typedef signed char s8;
   10.24  typedef unsigned char u8;
   10.25  
   10.26 @@ -39,9 +36,6 @@ typedef unsigned int u32;
   10.27  #if defined(__i386__)
   10.28  typedef signed long long s64;
   10.29  typedef unsigned long long u64;
   10.30 -#define BITS_PER_LONG 32
   10.31 -#define BYTES_PER_LONG 4
   10.32 -#define LONG_BYTEORDER 2
   10.33  #if defined(CONFIG_X86_PAE)
   10.34  typedef u64 physaddr_t;
   10.35  #else
   10.36 @@ -50,12 +44,21 @@ typedef u32 physaddr_t;
   10.37  #elif defined(__x86_64__)
   10.38  typedef signed long s64;
   10.39  typedef unsigned long u64;
   10.40 -#define BITS_PER_LONG 64
   10.41 -#define BYTES_PER_LONG 8
   10.42 -#define LONG_BYTEORDER 3
   10.43  typedef u64 physaddr_t;
   10.44  #endif
   10.45  
   10.46  typedef unsigned long size_t;
   10.47  
   10.48 +#endif /* __ASSEMBLY__ */
   10.49 +
   10.50 +#if defined(__i386__)
   10.51 +#define BITS_PER_LONG 32
   10.52 +#define BYTES_PER_LONG 4
   10.53 +#define LONG_BYTEORDER 2
   10.54 +#elif defined(__x86_64__)
   10.55 +#define BITS_PER_LONG 64
   10.56 +#define BYTES_PER_LONG 8
   10.57 +#define LONG_BYTEORDER 3
   10.58 +#endif
   10.59 +
   10.60  #endif /* __X86_TYPES_H__ */
    11.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    11.2 +++ b/xen/include/xen/symbols.h	Fri Jul 22 17:58:52 2005 +0000
    11.3 @@ -0,0 +1,45 @@
    11.4 +#ifndef _XEN_SYMBOLS_H
    11.5 +#define _XEN_SYMBOLS_H
    11.6 +
    11.7 +#include <xen/config.h>
    11.8 +#include <xen/types.h>
    11.9 +
   11.10 +#define KSYM_NAME_LEN 127
   11.11 +
   11.12 +extern int is_kernel_text(unsigned long addr);
   11.13 +extern unsigned long kernel_text_end(void);
   11.14 +
   11.15 +/* Lookup an address. */
   11.16 +const char *symbols_lookup(unsigned long addr,
   11.17 +                           unsigned long *symbolsize,
   11.18 +                           unsigned long *offset,
   11.19 +                           char *namebuf);
   11.20 +
   11.21 +/* Replace "%s" in format with address, if found */
   11.22 +extern void __print_symbol(const char *fmt, unsigned long address);
   11.23 +
   11.24 +/* This macro allows us to keep printk typechecking */
   11.25 +static void __check_printsym_format(const char *fmt, ...)
   11.26 +    __attribute__((format(printf,1,2)));
   11.27 +    static inline void __check_printsym_format(const char *fmt, ...)
   11.28 +{
   11.29 +}
   11.30 +
   11.31 +/* ia64 and ppc64 use function descriptors, which contain the real address */
   11.32 +#if defined(CONFIG_IA64) || defined(CONFIG_PPC64)
   11.33 +#define print_fn_descriptor_symbol(fmt, addr)		\
   11.34 +do {						\
   11.35 +	unsigned long *__faddr = (unsigned long*) addr;		\
   11.36 +	print_symbol(fmt, __faddr[0]);		\
   11.37 +} while (0)
   11.38 +#else
   11.39 +#define print_fn_descriptor_symbol(fmt, addr) print_symbol(fmt, addr)
   11.40 +#endif
   11.41 +
   11.42 +#define print_symbol(fmt, addr)			\
   11.43 +do {						\
   11.44 +	__check_printsym_format(fmt, "");	\
   11.45 +	__print_symbol(fmt, addr);		\
   11.46 +} while(0)
   11.47 +
   11.48 +#endif /*_XEN_SYMBOLS_H*/
    12.1 --- a/xen/tools/Makefile	Fri Jul 22 15:32:31 2005 +0000
    12.2 +++ b/xen/tools/Makefile	Fri Jul 22 17:58:52 2005 +0000
    12.3 @@ -1,6 +1,13 @@
    12.4 +
    12.5 +include $(BASEDIR)/../Config.mk
    12.6  
    12.7  default:
    12.8  	$(MAKE) -C figlet
    12.9 +	$(MAKE) symbols
   12.10  
   12.11  clean:
   12.12 -	$(MAKE) -C figlet clean
   12.13 \ No newline at end of file
   12.14 +	$(MAKE) -C figlet clean
   12.15 +	rm -f *.o symbols
   12.16 +
   12.17 +symbols: symbols.c
   12.18 +	$(HOSTCC) -o $@ $<
    13.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    13.2 +++ b/xen/tools/symbols.c	Fri Jul 22 17:58:52 2005 +0000
    13.3 @@ -0,0 +1,716 @@
    13.4 +/* Generate assembler source containing symbol information
    13.5 + *
    13.6 + * Copyright 2002       by Kai Germaschewski
    13.7 + *
    13.8 + * This software may be used and distributed according to the terms
    13.9 + * of the GNU General Public License, incorporated herein by reference.
   13.10 + *
   13.11 + * Usage: nm -n <object-file> | scripts/symbols [--all-symbols] > symbols.S
   13.12 + *
   13.13 + * ChangeLog:
   13.14 + *
   13.15 + * (25/Aug/2004) Paulo Marques <pmarques@grupopie.com>
   13.16 + *      Changed the compression method from stem compression to "table lookup"
   13.17 + *      compression
   13.18 + *
   13.19 + *      Table compression uses all the unused char codes on the symbols and
   13.20 + *  maps these to the most used substrings (tokens). For instance, it might
   13.21 + *  map char code 0xF7 to represent "write_" and then in every symbol where
   13.22 + *  "write_" appears it can be replaced by 0xF7, saving 5 bytes.
   13.23 + *      The used codes themselves are also placed in the table so that the
   13.24 + *  decompresion can work without "special cases".
   13.25 + *      Applied to kernel symbols, this usually produces a compression ratio
   13.26 + *  of about 50%.
   13.27 + *
   13.28 + */
   13.29 +
   13.30 +#include <stdio.h>
   13.31 +#include <stdlib.h>
   13.32 +#include <string.h>
   13.33 +#include <ctype.h>
   13.34 +
   13.35 +/* maximum token length used. It doesn't pay to increase it a lot, because
   13.36 + * very long substrings probably don't repeat themselves too often. */
   13.37 +#define MAX_TOK_SIZE		11
   13.38 +#define KSYM_NAME_LEN		127
   13.39 +
   13.40 +/* we use only a subset of the complete symbol table to gather the token count,
   13.41 + * to speed up compression, at the expense of a little compression ratio */
   13.42 +#define WORKING_SET		1024
   13.43 +
   13.44 +/* first find the best token only on the list of tokens that would profit more
   13.45 + * than GOOD_BAD_THRESHOLD. Only if this list is empty go to the "bad" list.
   13.46 + * Increasing this value will put less tokens on the "good" list, so the search
   13.47 + * is faster. However, if the good list runs out of tokens, we must painfully
   13.48 + * search the bad list. */
   13.49 +#define GOOD_BAD_THRESHOLD	10
   13.50 +
   13.51 +/* token hash parameters */
   13.52 +#define HASH_BITS		18
   13.53 +#define HASH_TABLE_SIZE		(1 << HASH_BITS)
   13.54 +#define HASH_MASK		(HASH_TABLE_SIZE - 1)
   13.55 +#define HASH_BASE_OFFSET	2166136261U
   13.56 +#define HASH_FOLD(a)		((a)&(HASH_MASK))
   13.57 +
   13.58 +/* flags to mark symbols */
   13.59 +#define SYM_FLAG_VALID		1
   13.60 +#define SYM_FLAG_SAMPLED	2
   13.61 +
   13.62 +struct sym_entry {
   13.63 +	unsigned long long addr;
   13.64 +	char type;
   13.65 +	unsigned char flags;
   13.66 +	unsigned char len;
   13.67 +	unsigned char *sym;
   13.68 +};
   13.69 +
   13.70 +
   13.71 +static struct sym_entry *table;
   13.72 +static int size, cnt;
   13.73 +static unsigned long long _stext, _etext;
   13.74 +static int all_symbols = 0;
   13.75 +static char symbol_prefix_char = '\0';
   13.76 +
   13.77 +struct token {
   13.78 +	unsigned char data[MAX_TOK_SIZE];
   13.79 +	unsigned char len;
   13.80 +	/* profit: the number of bytes that could be saved by inserting this
   13.81 +	 * token into the table */
   13.82 +	int profit;
   13.83 +	struct token *next;	/* next token on the hash list */
   13.84 +	struct token *right;	/* next token on the good/bad list */
   13.85 +	struct token *left;    /* previous token on the good/bad list */
   13.86 +	struct token *smaller; /* token that is less one letter than this one */
   13.87 +	};
   13.88 +
   13.89 +struct token bad_head, good_head;
   13.90 +struct token *hash_table[HASH_TABLE_SIZE];
   13.91 +
   13.92 +/* the table that holds the result of the compression */
   13.93 +unsigned char best_table[256][MAX_TOK_SIZE+1];
   13.94 +unsigned char best_table_len[256];
   13.95 +
   13.96 +
   13.97 +static void
   13.98 +usage(void)
   13.99 +{
  13.100 +	fprintf(stderr, "Usage: symbols [--all-symbols] [--symbol-prefix=<prefix char>] < in.map > out.S\n");
  13.101 +	exit(1);
  13.102 +}
  13.103 +
  13.104 +/*
  13.105 + * This ignores the intensely annoying "mapping symbols" found
  13.106 + * in ARM ELF files: $a, $t and $d.
  13.107 + */
  13.108 +static inline int
  13.109 +is_arm_mapping_symbol(const char *str)
  13.110 +{
  13.111 +	return str[0] == '$' && strchr("atd", str[1])
  13.112 +	       && (str[2] == '\0' || str[2] == '.');
  13.113 +}
  13.114 +
  13.115 +static int
  13.116 +read_symbol(FILE *in, struct sym_entry *s)
  13.117 +{
  13.118 +	char str[500];
  13.119 +	char *sym;
  13.120 +	int rc;
  13.121 +
  13.122 +	rc = fscanf(in, "%llx %c %499s\n", &s->addr, &s->type, str);
  13.123 +	if (rc != 3) {
  13.124 +		if (rc != EOF) {
  13.125 +			/* skip line */
  13.126 +			fgets(str, 500, in);
  13.127 +		}
  13.128 +		return -1;
  13.129 +	}
  13.130 +
  13.131 +	sym = str;
  13.132 +	/* skip prefix char */
  13.133 +	if (symbol_prefix_char && str[0] == symbol_prefix_char)
  13.134 +		sym++;
  13.135 +
  13.136 +	/* Ignore most absolute/undefined (?) symbols. */
  13.137 +	if (strcmp(sym, "_stext") == 0)
  13.138 +		_stext = s->addr;
  13.139 +	else if (strcmp(sym, "_etext") == 0)
  13.140 +		_etext = s->addr;
  13.141 +	else if (toupper(s->type) == 'A')
  13.142 +	{
  13.143 +		/* Keep these useful absolute symbols */
  13.144 +		if (strcmp(sym, "__kernel_syscall_via_break") &&
  13.145 +		    strcmp(sym, "__kernel_syscall_via_epc") &&
  13.146 +		    strcmp(sym, "__kernel_sigtramp") &&
  13.147 +		    strcmp(sym, "__gp"))
  13.148 +			return -1;
  13.149 +
  13.150 +	}
  13.151 +	else if (toupper(s->type) == 'U' ||
  13.152 +		 is_arm_mapping_symbol(sym))
  13.153 +		return -1;
  13.154 +
  13.155 +	/* include the type field in the symbol name, so that it gets
  13.156 +	 * compressed together */
  13.157 +	s->len = strlen(str) + 1;
  13.158 +	s->sym = (char *) malloc(s->len + 1);
  13.159 +	strcpy(s->sym + 1, str);
  13.160 +	s->sym[0] = s->type;
  13.161 +
  13.162 +	return 0;
  13.163 +}
  13.164 +
  13.165 +static int
  13.166 +symbol_valid(struct sym_entry *s)
  13.167 +{
  13.168 +	/* Symbols which vary between passes.  Passes 1 and 2 must have
  13.169 +	 * identical symbol lists.  The symbols_* symbols below are only added
  13.170 +	 * after pass 1, they would be included in pass 2 when --all-symbols is
  13.171 +	 * specified so exclude them to get a stable symbol list.
  13.172 +	 */
  13.173 +	static char *special_symbols[] = {
  13.174 +		"symbols_addresses",
  13.175 +		"symbols_num_syms",
  13.176 +		"symbols_names",
  13.177 +		"symbols_markers",
  13.178 +		"symbols_token_table",
  13.179 +		"symbols_token_index",
  13.180 +
  13.181 +	/* Exclude linker generated symbols which vary between passes */
  13.182 +		"_SDA_BASE_",		/* ppc */
  13.183 +		"_SDA2_BASE_",		/* ppc */
  13.184 +		NULL };
  13.185 +	int i;
  13.186 +	int offset = 1;
  13.187 +
  13.188 +	/* skip prefix char */
  13.189 +	if (symbol_prefix_char && *(s->sym + 1) == symbol_prefix_char)
  13.190 +		offset++;
  13.191 +
  13.192 +	/* if --all-symbols is not specified, then symbols outside the text
  13.193 +	 * and inittext sections are discarded */
  13.194 +	if (!all_symbols) {
  13.195 +		if (s->addr < _stext || s->addr > _etext)
  13.196 +			return 0;
  13.197 +		/* Corner case.  Discard any symbols with the same value as
  13.198 +		 * _etext _einittext or _eextratext; they can move between pass
  13.199 +		 * 1 and 2 when the symbols data are added.  If these symbols
  13.200 +		 * move then they may get dropped in pass 2, which breaks the
  13.201 +		 * symbols rules.
  13.202 +		 */
  13.203 +		if (s->addr == _etext && strcmp(s->sym + offset, "_etext"))
  13.204 +			return 0;
  13.205 +	}
  13.206 +
  13.207 +	/* Exclude symbols which vary between passes. */
  13.208 +	if (strstr(s->sym + offset, "_compiled."))
  13.209 +		return 0;
  13.210 +
  13.211 +	for (i = 0; special_symbols[i]; i++)
  13.212 +		if( strcmp(s->sym + offset, special_symbols[i]) == 0 )
  13.213 +			return 0;
  13.214 +
  13.215 +	return 1;
  13.216 +}
  13.217 +
  13.218 +static void
  13.219 +read_map(FILE *in)
  13.220 +{
  13.221 +	while (!feof(in)) {
  13.222 +		if (cnt >= size) {
  13.223 +			size += 10000;
  13.224 +			table = realloc(table, sizeof(*table) * size);
  13.225 +			if (!table) {
  13.226 +				fprintf(stderr, "out of memory\n");
  13.227 +				exit (1);
  13.228 +			}
  13.229 +		}
  13.230 +		if (read_symbol(in, &table[cnt]) == 0)
  13.231 +			cnt++;
  13.232 +	}
  13.233 +}
  13.234 +
  13.235 +static void output_label(char *label)
  13.236 +{
  13.237 +	if (symbol_prefix_char)
  13.238 +		printf(".globl %c%s\n", symbol_prefix_char, label);
  13.239 +	else
  13.240 +		printf(".globl %s\n", label);
  13.241 +	printf("\tALGN\n");
  13.242 +	if (symbol_prefix_char)
  13.243 +		printf("%c%s:\n", symbol_prefix_char, label);
  13.244 +	else
  13.245 +		printf("%s:\n", label);
  13.246 +}
  13.247 +
  13.248 +/* uncompress a compressed symbol. When this function is called, the best table
  13.249 + * might still be compressed itself, so the function needs to be recursive */
  13.250 +static int expand_symbol(unsigned char *data, int len, char *result)
  13.251 +{
  13.252 +	int c, rlen, total=0;
  13.253 +
  13.254 +	while (len) {
  13.255 +		c = *data;
  13.256 +		/* if the table holds a single char that is the same as the one
  13.257 +		 * we are looking for, then end the search */
  13.258 +		if (best_table[c][0]==c && best_table_len[c]==1) {
  13.259 +			*result++ = c;
  13.260 +			total++;
  13.261 +		} else {
  13.262 +			/* if not, recurse and expand */
  13.263 +			rlen = expand_symbol(best_table[c], best_table_len[c], result);
  13.264 +			total += rlen;
  13.265 +			result += rlen;
  13.266 +		}
  13.267 +		data++;
  13.268 +		len--;
  13.269 +	}
  13.270 +	*result=0;
  13.271 +
  13.272 +	return total;
  13.273 +}
  13.274 +
  13.275 +static void
  13.276 +write_src(void)
  13.277 +{
  13.278 +	int i, k, off, valid;
  13.279 +	unsigned int best_idx[256];
  13.280 +	unsigned int *markers;
  13.281 +	char buf[KSYM_NAME_LEN+1];
  13.282 +
  13.283 +	printf("#include <asm/types.h>\n");
  13.284 +	printf("#if BITS_PER_LONG == 64\n");
  13.285 +	printf("#define PTR .quad\n");
  13.286 +	printf("#define ALGN .align 8\n");
  13.287 +	printf("#else\n");
  13.288 +	printf("#define PTR .long\n");
  13.289 +	printf("#define ALGN .align 4\n");
  13.290 +	printf("#endif\n");
  13.291 +
  13.292 +	printf(".data\n");
  13.293 +
  13.294 +	output_label("symbols_addresses");
  13.295 +	valid = 0;
  13.296 +	for (i = 0; i < cnt; i++) {
  13.297 +		if (table[i].flags & SYM_FLAG_VALID) {
  13.298 +			printf("\tPTR\t%#llx\n", table[i].addr);
  13.299 +			valid++;
  13.300 +		}
  13.301 +	}
  13.302 +	printf("\n");
  13.303 +
  13.304 +	output_label("symbols_num_syms");
  13.305 +	printf("\tPTR\t%d\n", valid);
  13.306 +	printf("\n");
  13.307 +
  13.308 +	/* table of offset markers, that give the offset in the compressed stream
  13.309 +	 * every 256 symbols */
  13.310 +	markers = (unsigned int *) malloc(sizeof(unsigned int)*((valid + 255) / 256));
  13.311 +
  13.312 +	output_label("symbols_names");
  13.313 +	valid = 0;
  13.314 +	off = 0;
  13.315 +	for (i = 0; i < cnt; i++) {
  13.316 +
  13.317 +		if (!table[i].flags & SYM_FLAG_VALID)
  13.318 +			continue;
  13.319 +
  13.320 +		if ((valid & 0xFF) == 0)
  13.321 +			markers[valid >> 8] = off;
  13.322 +
  13.323 +		printf("\t.byte 0x%02x", table[i].len);
  13.324 +		for (k = 0; k < table[i].len; k++)
  13.325 +			printf(", 0x%02x", table[i].sym[k]);
  13.326 +		printf("\n");
  13.327 +
  13.328 +		off += table[i].len + 1;
  13.329 +		valid++;
  13.330 +	}
  13.331 +	printf("\n");
  13.332 +
  13.333 +	output_label("symbols_markers");
  13.334 +	for (i = 0; i < ((valid + 255) >> 8); i++)
  13.335 +		printf("\tPTR\t%d\n", markers[i]);
  13.336 +	printf("\n");
  13.337 +
  13.338 +	free(markers);
  13.339 +
  13.340 +	output_label("symbols_token_table");
  13.341 +	off = 0;
  13.342 +	for (i = 0; i < 256; i++) {
  13.343 +		best_idx[i] = off;
  13.344 +		expand_symbol(best_table[i],best_table_len[i],buf);
  13.345 +		printf("\t.asciz\t\"%s\"\n", buf);
  13.346 +		off += strlen(buf) + 1;
  13.347 +	}
  13.348 +	printf("\n");
  13.349 +
  13.350 +	output_label("symbols_token_index");
  13.351 +	for (i = 0; i < 256; i++)
  13.352 +		printf("\t.short\t%d\n", best_idx[i]);
  13.353 +	printf("\n");
  13.354 +}
  13.355 +
  13.356 +
  13.357 +/* table lookup compression functions */
  13.358 +
  13.359 +static inline unsigned int rehash_token(unsigned int hash, unsigned char data)
  13.360 +{
  13.361 +	return ((hash * 16777619) ^ data);
  13.362 +}
  13.363 +
  13.364 +static unsigned int hash_token(unsigned char *data, int len)
  13.365 +{
  13.366 +	unsigned int hash=HASH_BASE_OFFSET;
  13.367 +	int i;
  13.368 +
  13.369 +	for (i = 0; i < len; i++)
  13.370 +		hash = rehash_token(hash, data[i]);
  13.371 +
  13.372 +	return HASH_FOLD(hash);
  13.373 +}
  13.374 +
  13.375 +/* find a token given its data and hash value */
  13.376 +static struct token *find_token_hash(unsigned char *data, int len, unsigned int hash)
  13.377 +{
  13.378 +	struct token *ptr;
  13.379 +
  13.380 +	ptr = hash_table[hash];
  13.381 +
  13.382 +	while (ptr) {
  13.383 +		if ((ptr->len == len) && (memcmp(ptr->data, data, len) == 0))
  13.384 +			return ptr;
  13.385 +		ptr=ptr->next;
  13.386 +	}
  13.387 +
  13.388 +	return NULL;
  13.389 +}
  13.390 +
  13.391 +static inline void insert_token_in_group(struct token *head, struct token *ptr)
  13.392 +{
  13.393 +	ptr->right = head->right;
  13.394 +	ptr->right->left = ptr;
  13.395 +	head->right = ptr;
  13.396 +	ptr->left = head;
  13.397 +}
  13.398 +
  13.399 +static inline void remove_token_from_group(struct token *ptr)
  13.400 +{
  13.401 +	ptr->left->right = ptr->right;
  13.402 +	ptr->right->left = ptr->left;
  13.403 +}
  13.404 +
  13.405 +
  13.406 +/* build the counts for all the tokens that start with "data", and have lenghts
  13.407 + * from 2 to "len" */
  13.408 +static void learn_token(unsigned char *data, int len)
  13.409 +{
  13.410 +	struct token *ptr,*last_ptr;
  13.411 +	int i, newprofit;
  13.412 +	unsigned int hash = HASH_BASE_OFFSET;
  13.413 +	unsigned int hashes[MAX_TOK_SIZE + 1];
  13.414 +
  13.415 +	if (len > MAX_TOK_SIZE)
  13.416 +		len = MAX_TOK_SIZE;
  13.417 +
  13.418 +	/* calculate and store the hash values for all the sub-tokens */
  13.419 +	hash = rehash_token(hash, data[0]);
  13.420 +	for (i = 2; i <= len; i++) {
  13.421 +		hash = rehash_token(hash, data[i-1]);
  13.422 +		hashes[i] = HASH_FOLD(hash);
  13.423 +	}
  13.424 +
  13.425 +	last_ptr = NULL;
  13.426 +	ptr = NULL;
  13.427 +
  13.428 +	for (i = len; i >= 2; i--) {
  13.429 +		hash = hashes[i];
  13.430 +
  13.431 +		if (!ptr) ptr = find_token_hash(data, i, hash);
  13.432 +
  13.433 +		if (!ptr) {
  13.434 +			/* create a new token entry */
  13.435 +			ptr = (struct token *) malloc(sizeof(*ptr));
  13.436 +
  13.437 +			memcpy(ptr->data, data, i);
  13.438 +			ptr->len = i;
  13.439 +
  13.440 +			/* when we create an entry, it's profit is 0 because
  13.441 +			 * we also take into account the size of the token on
  13.442 +			 * the compressed table. We then subtract GOOD_BAD_THRESHOLD
  13.443 +			 * so that the test to see if this token belongs to
  13.444 +			 * the good or bad list, is a comparison to zero */
  13.445 +			ptr->profit = -GOOD_BAD_THRESHOLD;
  13.446 +
  13.447 +			ptr->next = hash_table[hash];
  13.448 +			hash_table[hash] = ptr;
  13.449 +
  13.450 +			insert_token_in_group(&bad_head, ptr);
  13.451 +
  13.452 +			ptr->smaller = NULL;
  13.453 +		} else {
  13.454 +			newprofit = ptr->profit + (ptr->len - 1);
  13.455 +			/* check to see if this token needs to be moved to a
  13.456 +			 * different list */
  13.457 +			if((ptr->profit < 0) && (newprofit >= 0)) {
  13.458 +				remove_token_from_group(ptr);
  13.459 +				insert_token_in_group(&good_head,ptr);
  13.460 +			}
  13.461 +			ptr->profit = newprofit;
  13.462 +		}
  13.463 +
  13.464 +		if (last_ptr) last_ptr->smaller = ptr;
  13.465 +		last_ptr = ptr;
  13.466 +
  13.467 +		ptr = ptr->smaller;
  13.468 +	}
  13.469 +}
  13.470 +
  13.471 +/* decrease the counts for all the tokens that start with "data", and have lenghts
  13.472 + * from 2 to "len". This function is much simpler than learn_token because we have
  13.473 + * more guarantees (tho tokens exist, the ->smaller pointer is set, etc.)
  13.474 + * The two separate functions exist only because of compression performance */
  13.475 +static void forget_token(unsigned char *data, int len)
  13.476 +{
  13.477 +	struct token *ptr;
  13.478 +	int i, newprofit;
  13.479 +	unsigned int hash=0;
  13.480 +
  13.481 +	if (len > MAX_TOK_SIZE) len = MAX_TOK_SIZE;
  13.482 +
  13.483 +	hash = hash_token(data, len);
  13.484 +	ptr = find_token_hash(data, len, hash);
  13.485 +
  13.486 +	for (i = len; i >= 2; i--) {
  13.487 +
  13.488 +		newprofit = ptr->profit - (ptr->len - 1);
  13.489 +		if ((ptr->profit >= 0) && (newprofit < 0)) {
  13.490 +			remove_token_from_group(ptr);
  13.491 +			insert_token_in_group(&bad_head, ptr);
  13.492 +		}
  13.493 +		ptr->profit=newprofit;
  13.494 +
  13.495 +		ptr=ptr->smaller;
  13.496 +	}
  13.497 +}
  13.498 +
  13.499 +/* count all the possible tokens in a symbol */
  13.500 +static void learn_symbol(unsigned char *symbol, int len)
  13.501 +{
  13.502 +	int i;
  13.503 +
  13.504 +	for (i = 0; i < len - 1; i++)
  13.505 +		learn_token(symbol + i, len - i);
  13.506 +}
  13.507 +
  13.508 +/* decrease the count for all the possible tokens in a symbol */
  13.509 +static void forget_symbol(unsigned char *symbol, int len)
  13.510 +{
  13.511 +	int i;
  13.512 +
  13.513 +	for (i = 0; i < len - 1; i++)
  13.514 +		forget_token(symbol + i, len - i);
  13.515 +}
  13.516 +
  13.517 +/* set all the symbol flags and do the initial token count */
  13.518 +static void build_initial_tok_table(void)
  13.519 +{
  13.520 +	int i, use_it, valid;
  13.521 +
  13.522 +	valid = 0;
  13.523 +	for (i = 0; i < cnt; i++) {
  13.524 +		table[i].flags = 0;
  13.525 +		if ( symbol_valid(&table[i]) ) {
  13.526 +			table[i].flags |= SYM_FLAG_VALID;
  13.527 +			valid++;
  13.528 +		}
  13.529 +	}
  13.530 +
  13.531 +	use_it = 0;
  13.532 +	for (i = 0; i < cnt; i++) {
  13.533 +
  13.534 +		/* subsample the available symbols. This method is almost like
  13.535 +		 * a Bresenham's algorithm to get uniformly distributed samples
  13.536 +		 * across the symbol table */
  13.537 +		if (table[i].flags & SYM_FLAG_VALID) {
  13.538 +
  13.539 +			use_it += WORKING_SET;
  13.540 +
  13.541 +			if (use_it >= valid) {
  13.542 +				table[i].flags |= SYM_FLAG_SAMPLED;
  13.543 +				use_it -= valid;
  13.544 +			}
  13.545 +		}
  13.546 +		if (table[i].flags & SYM_FLAG_SAMPLED)
  13.547 +			learn_symbol(table[i].sym, table[i].len);
  13.548 +	}
  13.549 +}
  13.550 +
  13.551 +/* replace a given token in all the valid symbols. Use the sampled symbols
  13.552 + * to update the counts */
  13.553 +static void compress_symbols(unsigned char *str, int tlen, int idx)
  13.554 +{
  13.555 +	int i, len, learn, size;
  13.556 +	unsigned char *p;
  13.557 +
  13.558 +	for (i = 0; i < cnt; i++) {
  13.559 +
  13.560 +		if (!(table[i].flags & SYM_FLAG_VALID)) continue;
  13.561 +
  13.562 +		len = table[i].len;
  13.563 +		learn = 0;
  13.564 +		p = table[i].sym;
  13.565 +
  13.566 +		do {
  13.567 +			/* find the token on the symbol */
  13.568 +			p = (unsigned char *) strstr((char *) p, (char *) str);
  13.569 +			if (!p) break;
  13.570 +
  13.571 +			if (!learn) {
  13.572 +				/* if this symbol was used to count, decrease it */
  13.573 +				if (table[i].flags & SYM_FLAG_SAMPLED)
  13.574 +					forget_symbol(table[i].sym, len);
  13.575 +				learn = 1;
  13.576 +			}
  13.577 +
  13.578 +			*p = idx;
  13.579 +			size = (len - (p - table[i].sym)) - tlen + 1;
  13.580 +			memmove(p + 1, p + tlen, size);
  13.581 +			p++;
  13.582 +			len -= tlen - 1;
  13.583 +
  13.584 +		} while (size >= tlen);
  13.585 +
  13.586 +		if(learn) {
  13.587 +			table[i].len = len;
  13.588 +			/* if this symbol was used to count, learn it again */
  13.589 +			if(table[i].flags & SYM_FLAG_SAMPLED)
  13.590 +				learn_symbol(table[i].sym, len);
  13.591 +		}
  13.592 +	}
  13.593 +}
  13.594 +
  13.595 +/* search the token with the maximum profit */
  13.596 +static struct token *find_best_token(void)
  13.597 +{
  13.598 +	struct token *ptr,*best,*head;
  13.599 +	int bestprofit;
  13.600 +
  13.601 +	bestprofit=-10000;
  13.602 +
  13.603 +	/* failsafe: if the "good" list is empty search from the "bad" list */
  13.604 +	if(good_head.right == &good_head) head = &bad_head;
  13.605 +	else head = &good_head;
  13.606 +
  13.607 +	ptr = head->right;
  13.608 +	best = NULL;
  13.609 +	while (ptr != head) {
  13.610 +		if (ptr->profit > bestprofit) {
  13.611 +			bestprofit = ptr->profit;
  13.612 +			best = ptr;
  13.613 +		}
  13.614 +		ptr = ptr->right;
  13.615 +	}
  13.616 +
  13.617 +	return best;
  13.618 +}
  13.619 +
  13.620 +/* this is the core of the algorithm: calculate the "best" table */
  13.621 +static void optimize_result(void)
  13.622 +{
  13.623 +	struct token *best;
  13.624 +	int i;
  13.625 +
  13.626 +	/* using the '\0' symbol last allows compress_symbols to use standard
  13.627 +	 * fast string functions */
  13.628 +	for (i = 255; i >= 0; i--) {
  13.629 +
  13.630 +		/* if this table slot is empty (it is not used by an actual
  13.631 +		 * original char code */
  13.632 +		if (!best_table_len[i]) {
  13.633 +
  13.634 +			/* find the token with the breates profit value */
  13.635 +			best = find_best_token();
  13.636 +
  13.637 +			/* place it in the "best" table */
  13.638 +			best_table_len[i] = best->len;
  13.639 +			memcpy(best_table[i], best->data, best_table_len[i]);
  13.640 +			/* zero terminate the token so that we can use strstr
  13.641 +			   in compress_symbols */
  13.642 +			best_table[i][best_table_len[i]]='\0';
  13.643 +
  13.644 +			/* replace this token in all the valid symbols */
  13.645 +			compress_symbols(best_table[i], best_table_len[i], i);
  13.646 +		}
  13.647 +	}
  13.648 +}
  13.649 +
  13.650 +/* start by placing the symbols that are actually used on the table */
  13.651 +static void insert_real_symbols_in_table(void)
  13.652 +{
  13.653 +	int i, j, c;
  13.654 +
  13.655 +	memset(best_table, 0, sizeof(best_table));
  13.656 +	memset(best_table_len, 0, sizeof(best_table_len));
  13.657 +
  13.658 +	for (i = 0; i < cnt; i++) {
  13.659 +		if (table[i].flags & SYM_FLAG_VALID) {
  13.660 +			for (j = 0; j < table[i].len; j++) {
  13.661 +				c = table[i].sym[j];
  13.662 +				best_table[c][0]=c;
  13.663 +				best_table_len[c]=1;
  13.664 +			}
  13.665 +		}
  13.666 +	}
  13.667 +}
  13.668 +
  13.669 +static void optimize_token_table(void)
  13.670 +{
  13.671 +	memset(hash_table, 0, sizeof(hash_table));
  13.672 +
  13.673 +	good_head.left = &good_head;
  13.674 +	good_head.right = &good_head;
  13.675 +
  13.676 +	bad_head.left = &bad_head;
  13.677 +	bad_head.right = &bad_head;
  13.678 +
  13.679 +	build_initial_tok_table();
  13.680 +
  13.681 +	insert_real_symbols_in_table();
  13.682 +
  13.683 +	/* When valid symbol is not registered, exit to error */
  13.684 +	if (good_head.left == good_head.right &&
  13.685 +	    bad_head.left == bad_head.right) {
  13.686 +		fprintf(stderr, "No valid symbol.\n");
  13.687 +		exit(1);
  13.688 +	}
  13.689 +
  13.690 +	optimize_result();
  13.691 +}
  13.692 +
  13.693 +
  13.694 +int
  13.695 +main(int argc, char **argv)
  13.696 +{
  13.697 +	if (argc >= 2) {
  13.698 +		int i;
  13.699 +		for (i = 1; i < argc; i++) {
  13.700 +			if(strcmp(argv[i], "--all-symbols") == 0)
  13.701 +				all_symbols = 1;
  13.702 +			else if (strncmp(argv[i], "--symbol-prefix=", 16) == 0) {
  13.703 +				char *p = &argv[i][16];
  13.704 +				/* skip quote */
  13.705 +				if ((*p == '"' && *(p+2) == '"') || (*p == '\'' && *(p+2) == '\''))
  13.706 +					p++;
  13.707 +				symbol_prefix_char = *p;
  13.708 +			} else
  13.709 +				usage();
  13.710 +		}
  13.711 +	} else if (argc != 1)
  13.712 +		usage();
  13.713 +
  13.714 +	read_map(stdin);
  13.715 +	optimize_token_table();
  13.716 +	write_src();
  13.717 +
  13.718 +	return 0;
  13.719 +}