ia64/xen-unstable

changeset 1377:acc04d188782

bitkeeper revision 1.903 (40a35338ZpUgNaqcF2wHoseyC85T4Q)

manual merge
author iap10@labyrinth.cl.cam.ac.uk
date Thu May 13 10:51:36 2004 +0000 (2004-05-13)
parents a3dd7bf2fcd6 623355fbbab9
children b996311d6c69
files .rootkeys tools/examples/xc_dom_control.py xen/arch/i386/entry.S xen/arch/i386/io_apic.c xen/arch/i386/irq.c xen/arch/i386/process.c xen/common/domain.c xen/common/kernel.c xen/common/memory.c xen/common/physdev.c xen/common/schedule.c xen/include/hypervisor-ifs/physdev.h xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/interface.c xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/main.c xenolinux-2.4.26-sparse/arch/xen/kernel/evtchn.c xenolinux-2.4.26-sparse/include/linux/skbuff.h
line diff
     1.1 --- a/.rootkeys	Thu May 13 10:17:36 2004 +0000
     1.2 +++ b/.rootkeys	Thu May 13 10:51:36 2004 +0000
     1.3 @@ -753,6 +753,7 @@ 40659defgWA92arexpMGn8X3QMDj3w xenolinux
     1.4  3f056927gMHl7mWB89rb73JahbhQIA xenolinux-2.4.26-sparse/include/linux/blk.h
     1.5  3e5a4e68WLX3B8owTvktP3HHOtznPQ xenolinux-2.4.26-sparse/include/linux/major.h
     1.6  401c0590D_kwJDU59X8NyvqSv_Cl2A xenolinux-2.4.26-sparse/include/linux/sched.h
     1.7 +40a248afgI0_JKthdYAe8beVfXSTpQ xenolinux-2.4.26-sparse/include/linux/skbuff.h
     1.8  3e5a4e686V0nioX2ZpFf056sgvdiQw xenolinux-2.4.26-sparse/include/linux/sunrpc/debug.h
     1.9  401c0592pLrp_aCbQRo9GXiYQQaVVA xenolinux-2.4.26-sparse/include/linux/timer.h
    1.10  3e5a4e68W_hpMlM3u_-QOKMp3gzcwQ xenolinux-2.4.26-sparse/init/do_mounts.c
     2.1 --- a/tools/examples/xc_dom_control.py	Thu May 13 10:17:36 2004 +0000
     2.2 +++ b/tools/examples/xc_dom_control.py	Thu May 13 10:51:36 2004 +0000
     2.3 @@ -92,14 +92,14 @@ elif cmd == 'pincpu':
     2.4      rc = xc.domain_pincpu( dom, cpu )
     2.5  
     2.6  elif cmd == 'list':
     2.7 -    print 'Dom  Name             Mem(kb)  CPU  State  Time(s)'
     2.8 +    print 'Dom  Name             Mem(kb)  CPU  State  Time(ms)'
     2.9      for domain in xc.domain_getinfo():
    2.10  
    2.11  	run = (domain['running'] and 'r') or '-'		# domain['running'] ? run='r' : run='-'
    2.12  	stop = (domain['stopped'] and 's') or '-'		# domain['stopped'] ? stop='s': stop='-'
    2.13  
    2.14          domain['state'] = run + stop
    2.15 -        domain['cpu_time'] = domain['cpu_time']/1e8
    2.16 +        domain['cpu_time'] = domain['cpu_time']/1e6
    2.17  
    2.18          print "%(dom)-4d %(name)-16s %(mem_kb)7d %(cpu)3d %(state)5s %(cpu_time)8d" % domain
    2.19  
     3.1 --- a/xen/arch/i386/entry.S	Thu May 13 10:17:36 2004 +0000
     3.2 +++ b/xen/arch/i386/entry.S	Thu May 13 10:51:36 2004 +0000
     3.3 @@ -245,6 +245,10 @@ restore_all_guest:
     3.4          movsl
     3.5          movsl
     3.6          movsl
     3.7 +        # Third, reenable interrupts. They will definitely be reenabled by IRET
     3.8 +        # in any case. They could be disabled here if we are returning from an
     3.9 +        # interrupt. We need interrupts enabled if we take a fault.
    3.10 +        sti
    3.11          # Finally, restore guest registers -- faults will cause failsafe
    3.12          popl %ebx
    3.13  	popl %ecx
     4.1 --- a/xen/arch/i386/io_apic.c	Thu May 13 10:17:36 2004 +0000
     4.2 +++ b/xen/arch/i386/io_apic.c	Thu May 13 10:51:36 2004 +0000
     4.3 @@ -208,7 +208,11 @@ static void set_ioapic_affinity (unsigne
     4.4  	spin_unlock_irqrestore(&ioapic_lock, flags);
     4.5  }
     4.6  
     4.7 -#if CONFIG_SMP
     4.8 +/*
     4.9 + * In new I/O model, the interrupt is pinned to the CPU of the first
    4.10 + * device-driver domain that attaches. Dynamic balancing is pointless.
    4.11 + */
    4.12 +#if defined(CONFIG_SMP) && !defined(NO_DEVICES_IN_XEN)
    4.13  
    4.14  typedef struct {
    4.15  	unsigned int cpu;
    4.16 @@ -220,8 +224,6 @@ static irq_balance_t irq_balance[NR_IRQS
    4.17  
    4.18  extern unsigned long irq_affinity [NR_IRQS];
    4.19  
    4.20 -#endif
    4.21 -
    4.22  #define IDLE_ENOUGH(cpu,now) \
    4.23  		(idle_cpu(cpu) && ((now) - irq_stat[(cpu)].idle_timestamp > 1))
    4.24  
    4.25 @@ -256,7 +258,6 @@ inside:
    4.26  
    4.27  static inline void balance_irq(int irq)
    4.28  {
    4.29 -#if CONFIG_SMP
    4.30  	irq_balance_t *entry = irq_balance + irq;
    4.31  	unsigned long now = jiffies;
    4.32  
    4.33 @@ -272,8 +273,13 @@ static inline void balance_irq(int irq)
    4.34  		entry->cpu = move(entry->cpu, allowed_mask, now, random_number);
    4.35  		set_ioapic_affinity(irq, apicid_to_phys_cpu_present(entry->cpu));
    4.36  	}
    4.37 +}
    4.38 +
    4.39 +#else
    4.40 +
    4.41 +#define balance_irq(_irq) ((void)0)
    4.42 +
    4.43  #endif
    4.44 -}
    4.45  
    4.46  /*
    4.47   * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
    4.48 @@ -883,6 +889,7 @@ void __init UNEXPECTED_IO_APIC(void)
    4.49  
    4.50  void __init print_IO_APIC(void)
    4.51  {
    4.52 +#ifndef NDEBUG
    4.53  	int apic, i;
    4.54  	struct IO_APIC_reg_00 reg_00;
    4.55  	struct IO_APIC_reg_01 reg_01;
    4.56 @@ -1019,9 +1026,11 @@ void __init print_IO_APIC(void)
    4.57  	}
    4.58  
    4.59  	printk(KERN_INFO ".................................... done.\n");
    4.60 +#endif
    4.61 +}
    4.62  
    4.63 -	return;
    4.64 -}
    4.65 +
    4.66 +#if 0 /* Maybe useful for debugging, but not currently used anywhere. */
    4.67  
    4.68  static void print_APIC_bitfield (int base)
    4.69  {
    4.70 @@ -1041,6 +1050,7 @@ static void print_APIC_bitfield (int bas
    4.71  	}
    4.72  }
    4.73  
    4.74 +
    4.75  void /*__init*/ print_local_APIC(void * dummy)
    4.76  {
    4.77  	unsigned int v, ver, maxlvt;
    4.78 @@ -1156,6 +1166,9 @@ void /*__init*/ print_PIC(void)
    4.79  	printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
    4.80  }
    4.81  
    4.82 +#endif /* 0 */
    4.83 +
    4.84 +
    4.85  static void __init enable_IO_APIC(void)
    4.86  {
    4.87  	struct IO_APIC_reg_01 reg_01;
    4.88 @@ -1874,7 +1887,7 @@ int io_apic_set_pci_routing (int ioapic,
    4.89  		mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq, edge_level, active_high_low);
    4.90  
    4.91  	if (edge_level) {
    4.92 -	irq_desc[irq].handler = &ioapic_level_irq_type;
    4.93 +		irq_desc[irq].handler = &ioapic_level_irq_type;
    4.94  	} else {
    4.95  		irq_desc[irq].handler = &ioapic_edge_irq_type;
    4.96  	}
    4.97 @@ -1893,3 +1906,110 @@ int io_apic_set_pci_routing (int ioapic,
    4.98  }
    4.99  
   4.100  #endif /*CONFIG_ACPI_BOOT*/
   4.101 +
   4.102 +extern char opt_leveltrigger[], opt_edgetrigger[];
   4.103 +
   4.104 +static int __init ioapic_trigger_setup(void)
   4.105 +{
   4.106 +    char       *p;
   4.107 +    irq_desc_t *desc;
   4.108 +    long        irq;
   4.109 +
   4.110 +    p = opt_leveltrigger;
   4.111 +    while ( *p != '\0' )
   4.112 +    {
   4.113 +        irq = simple_strtol(p, &p, 10);
   4.114 +        if ( (irq <= 0) || (irq >= NR_IRQS) )
   4.115 +        {
   4.116 +            printk("IRQ '%ld' out of range in level-trigger list '%s'\n",
   4.117 +                   irq, opt_leveltrigger);
   4.118 +            break;
   4.119 +        }
   4.120 +
   4.121 +        printk("Forcing IRQ %ld to level-trigger: ", irq);
   4.122 +
   4.123 +        desc = &irq_desc[irq];
   4.124 +        spin_lock_irq(&desc->lock);
   4.125 +
   4.126 +        if ( desc->handler == &ioapic_level_irq_type )
   4.127 +        {
   4.128 +            printk("already level-triggered (no force applied).\n");
   4.129 +        }
   4.130 +        else if ( desc->handler != &ioapic_edge_irq_type )
   4.131 +        {
   4.132 +            printk("cannot force (can only force IO-APIC-edge IRQs).\n");
   4.133 +        }
   4.134 +        else
   4.135 +        {
   4.136 +            desc->handler = &ioapic_level_irq_type;
   4.137 +            __mask_IO_APIC_irq(irq);
   4.138 +            __level_IO_APIC_irq(irq);        
   4.139 +            printk("done.\n");
   4.140 +        }
   4.141 +
   4.142 +        spin_unlock_irq(&desc->lock);
   4.143 +
   4.144 +        if ( *p == '\0' )
   4.145 +            break;
   4.146 +
   4.147 +        if ( *p != ',' )
   4.148 +        {
   4.149 +            printk("Unexpected character '%c' in level-trigger list '%s'\n",
   4.150 +                   *p, opt_leveltrigger);
   4.151 +            break;
   4.152 +        }
   4.153 +
   4.154 +        p++;
   4.155 +    }
   4.156 +
   4.157 +    p = opt_edgetrigger;
   4.158 +    while ( *p != '\0' )
   4.159 +    {
   4.160 +        irq = simple_strtol(p, &p, 10);
   4.161 +        if ( (irq <= 0) || (irq >= NR_IRQS) )
   4.162 +        {
   4.163 +            printk("IRQ '%ld' out of range in edge-trigger list '%s'\n",
   4.164 +                   irq, opt_edgetrigger);
   4.165 +            break;
   4.166 +        }
   4.167 +
   4.168 +        printk("Forcing IRQ %ld to edge-trigger: ", irq);
   4.169 +
   4.170 +        desc = &irq_desc[irq];
   4.171 +        spin_lock_irq(&desc->lock);
   4.172 +
   4.173 +        if ( desc->handler == &ioapic_edge_irq_type )
   4.174 +        {
   4.175 +            printk("already edge-triggered (no force applied).\n");
   4.176 +        }
   4.177 +        else if ( desc->handler != &ioapic_level_irq_type )
   4.178 +        {
   4.179 +            printk("cannot force (can only force IO-APIC-level IRQs).\n");
   4.180 +        }
   4.181 +        else
   4.182 +        {
   4.183 +            desc->handler = &ioapic_edge_irq_type;
   4.184 +            __edge_IO_APIC_irq(irq);        
   4.185 +            desc->status |= IRQ_PENDING; /* may have lost a masked edge */
   4.186 +            printk("done.\n");
   4.187 +        }
   4.188 +
   4.189 +        spin_unlock_irq(&desc->lock);
   4.190 +
   4.191 +        if ( *p == '\0' )
   4.192 +            break;
   4.193 +
   4.194 +        if ( *p != ',' )
   4.195 +        {
   4.196 +            printk("Unexpected character '%c' in edge-trigger list '%s'\n",
   4.197 +                   *p, opt_edgetrigger);
   4.198 +            break;
   4.199 +        }
   4.200 +
   4.201 +        p++;
   4.202 +    }
   4.203 +
   4.204 +    return 0;
   4.205 +}
   4.206 +
   4.207 +__initcall(ioapic_trigger_setup);
     5.1 --- a/xen/arch/i386/irq.c	Thu May 13 10:17:36 2004 +0000
     5.2 +++ b/xen/arch/i386/irq.c	Thu May 13 10:51:36 2004 +0000
     5.3 @@ -39,6 +39,7 @@
     5.4  #include <xen/delay.h>
     5.5  #include <xen/timex.h>
     5.6  #include <xen/perfc.h>
     5.7 +#include <asm/smpboot.h>
     5.8  
     5.9  /*
    5.10   * Linux has a controller-independent x86 interrupt architecture.
    5.11 @@ -1034,6 +1035,11 @@ int pirq_guest_bind(struct task_struct *
    5.12          desc->status |= IRQ_GUEST;
    5.13          desc->status &= ~(IRQ_DISABLED | IRQ_AUTODETECT | IRQ_WAITING);
    5.14          desc->handler->startup(irq);
    5.15 +
    5.16 +        /* Attempt to bind the interrupt target to the correct CPU. */
    5.17 +        if ( desc->handler->set_affinity != NULL )
    5.18 +            desc->handler->set_affinity(
    5.19 +                irq, apicid_to_phys_cpu_present(p->processor));
    5.20      }
    5.21      else if ( !will_share || !action->shareable )
    5.22      {
     6.1 --- a/xen/arch/i386/process.c	Thu May 13 10:17:36 2004 +0000
     6.2 +++ b/xen/arch/i386/process.c	Thu May 13 10:51:36 2004 +0000
     6.3 @@ -270,7 +270,7 @@ void switch_to(struct task_struct *prev_
     6.4          tss->ss1  = next->guestos_ss;
     6.5  
     6.6          /* Maybe switch the debug registers. */
     6.7 -        if ( next->debugreg[7] )
     6.8 +        if ( unlikely(next->debugreg[7]) )
     6.9          {
    6.10              loaddebug(next, 0);
    6.11              loaddebug(next, 1);
    6.12 @@ -280,10 +280,17 @@ void switch_to(struct task_struct *prev_
    6.13              loaddebug(next, 6);
    6.14              loaddebug(next, 7);
    6.15          }
    6.16 +
    6.17 +        /* Switch page tables. */
    6.18 +        write_ptbase(&next_p->mm);
    6.19 +        tlb_clocktick();
    6.20      }
    6.21  
    6.22 -    if ( ( prev_p->io_bitmap != NULL ) || ( next_p->io_bitmap != NULL ) ) {
    6.23 -        if ( next_p->io_bitmap != NULL ) {
    6.24 +    if ( unlikely(prev_p->io_bitmap != NULL) || 
    6.25 +         unlikely(next_p->io_bitmap != NULL) )
    6.26 +    {
    6.27 +        if ( next_p->io_bitmap != NULL )
    6.28 +        {
    6.29              /* Copy in the appropriate parts of the IO bitmap.  We use the
    6.30               * selector to copy only the interesting parts of the bitmap. */
    6.31  
    6.32 @@ -314,7 +321,9 @@ void switch_to(struct task_struct *prev_
    6.33  
    6.34              tss->bitmap = IO_BITMAP_OFFSET;
    6.35  
    6.36 -	} else {
    6.37 +	}
    6.38 +        else
    6.39 +        {
    6.40              /* In this case, we're switching FROM a task with IO port access,
    6.41               * to a task that doesn't use the IO bitmap.  We set any TSS bits
    6.42               * that might have been cleared, ready for future use. */
    6.43 @@ -332,11 +341,6 @@ void switch_to(struct task_struct *prev_
    6.44              tss->bitmap = INVALID_IO_BITMAP_OFFSET;
    6.45  	}
    6.46      }
    6.47 -    
    6.48 -    
    6.49 -    /* Switch page tables. */
    6.50 -    write_ptbase(&next_p->mm);
    6.51 -    tlb_clocktick();
    6.52  
    6.53      set_current(next_p);
    6.54  
     7.1 --- a/xen/common/domain.c	Thu May 13 10:17:36 2004 +0000
     7.2 +++ b/xen/common/domain.c	Thu May 13 10:51:36 2004 +0000
     7.3 @@ -222,10 +222,6 @@ void __kill_domain(struct task_struct *p
     7.4      *pp = p->next_hash;
     7.5      write_unlock_irqrestore(&tasklist_lock, flags);
     7.6  
     7.7 -    if ( atomic_read(&p->refcnt) >2 )
     7.8 -	DPRINTK("Domain refcnt>1 so kil deferred. Missing put_task? p=%p cur=%p cnt=%d\n",p,current,atomic_read(&p->refcnt));
     7.9 -
    7.10 -
    7.11      if ( p == current )
    7.12      {
    7.13          __enter_scheduler();
    7.14 @@ -420,7 +416,16 @@ void free_all_dom_mem(struct task_struct
    7.15  
    7.16      INIT_LIST_HEAD(&zombies);
    7.17  
    7.18 -    if ( p->mm.shadow_mode ) shadow_mode_disable(p);
    7.19 +    /*
    7.20 +     * If we're executing the idle task then we may still be running over the 
    7.21 +     * dead domain's page tables. We'd better fix that before freeing them!
    7.22 +     */
    7.23 +    if ( is_idle_task(current) )
    7.24 +        write_ptbase(&current->mm);
    7.25 +
    7.26 +    /* Exit shadow mode before deconstructing final guest page table. */
    7.27 +    if ( p->mm.shadow_mode )
    7.28 +        shadow_mode_disable(p);
    7.29  
    7.30      /* STEP 1. Drop the in-use reference to the page-table base. */
    7.31      put_page_and_type(&frame_table[pagetable_val(p->mm.pagetable) >>
    7.32 @@ -1078,7 +1083,7 @@ int construct_dom0(struct task_struct *p
    7.33  
    7.34      set_bit(PF_CONSTRUCTED, &p->flags);
    7.35  
    7.36 -#if 0 // XXXXX DO NOT CHECK IN ENABLED !!! (but useful for testing so leave) 
    7.37 +#if 0 /* XXXXX DO NOT CHECK IN ENABLED !!! (but useful for testing so leave) */
    7.38      shadow_mode_enable(&p->mm, SHM_test); 
    7.39  #endif
    7.40  
     8.1 --- a/xen/common/kernel.c	Thu May 13 10:17:36 2004 +0000
     8.2 +++ b/xen/common/kernel.c	Thu May 13 10:51:36 2004 +0000
     8.3 @@ -75,31 +75,37 @@ unsigned char opt_pdb[10] = "none";
     8.4  unsigned int opt_tbuf_size = 1;
     8.5  /* opt_sched: scheduler - default to Borrowed Virtual Time */
     8.6  char opt_sched[10] = "bvt";
     8.7 -/* opt_physdev_dom0_hide: list of PCI slots to hide from dom0
     8.8 - * Should have the format '(%02x:%02x.%1x)(%02x:%02x.%1x)...etc' */
     8.9 -char opt_physdev_dom0_hide[20] = "";
    8.10 +/* opt_physdev_dom0_hide: list of PCI slots to hide from domain 0. */
    8.11 +/* Format is '(%02x:%02x.%1x)(%02x:%02x.%1x)' and so on. */
    8.12 +char opt_physdev_dom0_hide[200] = "";
    8.13 +/* opt_leveltrigger, opt_edgetrigger: Force an IO-APIC-routed IRQ to be */
    8.14 +/*                                    level- or edge-triggered.         */
    8.15 +/* Example: 'leveltrigger=4,5,6,20 edgetrigger=21'. */
    8.16 +char opt_leveltrigger[30] = "", opt_edgetrigger[30] = "";
    8.17  
    8.18  static struct {
    8.19      unsigned char *name;
    8.20      enum { OPT_IP, OPT_STR, OPT_UINT, OPT_BOOL } type;
    8.21      void *var;
    8.22  } opts[] = {
    8.23 -    { "console",          OPT_STR,  &opt_console },
    8.24 -    { "ser_baud",         OPT_UINT, &opt_ser_baud },
    8.25 -    { "com1",             OPT_STR,  &opt_com1 },
    8.26 -    { "com2",             OPT_STR,  &opt_com2 },
    8.27 -    { "dom0_mem",         OPT_UINT, &opt_dom0_mem }, 
    8.28 -    { "ifname",           OPT_STR,  &opt_ifname },
    8.29 -    { "noht",             OPT_BOOL, &opt_noht },
    8.30 -    { "noacpi",           OPT_BOOL, &opt_noacpi },
    8.31 -    { "nosmp",            OPT_BOOL, &opt_nosmp },
    8.32 -    { "noreboot",         OPT_BOOL, &opt_noreboot },
    8.33 -    { "ignorebiostables", OPT_BOOL, &opt_ignorebiostables },
    8.34 -    { "watchdog",         OPT_BOOL, &opt_watchdog },
    8.35 -    { "pdb",              OPT_STR,  &opt_pdb },
    8.36 -    { "tbuf_size",        OPT_UINT, &opt_tbuf_size },
    8.37 -    { "sched",            OPT_STR,  &opt_sched },
    8.38 -    { "physdev_dom0_hide",OPT_STR,  &opt_physdev_dom0_hide },
    8.39 +    { "console",           OPT_STR,  &opt_console },
    8.40 +    { "ser_baud",          OPT_UINT, &opt_ser_baud },
    8.41 +    { "com1",              OPT_STR,  &opt_com1 },
    8.42 +    { "com2",              OPT_STR,  &opt_com2 },
    8.43 +    { "dom0_mem",          OPT_UINT, &opt_dom0_mem }, 
    8.44 +    { "ifname",            OPT_STR,  &opt_ifname },
    8.45 +    { "noht",              OPT_BOOL, &opt_noht },
    8.46 +    { "noacpi",            OPT_BOOL, &opt_noacpi },
    8.47 +    { "nosmp",             OPT_BOOL, &opt_nosmp },
    8.48 +    { "noreboot",          OPT_BOOL, &opt_noreboot },
    8.49 +    { "ignorebiostables",  OPT_BOOL, &opt_ignorebiostables },
    8.50 +    { "watchdog",          OPT_BOOL, &opt_watchdog },
    8.51 +    { "pdb",               OPT_STR,  &opt_pdb },
    8.52 +    { "tbuf_size",         OPT_UINT, &opt_tbuf_size },
    8.53 +    { "sched",             OPT_STR,  &opt_sched },
    8.54 +    { "physdev_dom0_hide", OPT_STR,  &opt_physdev_dom0_hide },
    8.55 +    { "leveltrigger",      OPT_STR,  &opt_leveltrigger },
    8.56 +    { "edgetrigger",       OPT_STR,  &opt_edgetrigger },
    8.57      { NULL,               0,        NULL     }
    8.58  };
    8.59  
     9.1 --- a/xen/common/memory.c	Thu May 13 10:17:36 2004 +0000
     9.2 +++ b/xen/common/memory.c	Thu May 13 10:51:36 2004 +0000
     9.3 @@ -194,7 +194,6 @@ static struct {
     9.4   */
     9.5  void __init init_frametable(unsigned long nr_pages)
     9.6  {
     9.7 -    int i;
     9.8      unsigned long mfn;
     9.9  
    9.10      memset(percpu_info, 0, sizeof(percpu_info));
    9.11 @@ -209,20 +208,19 @@ void __init init_frametable(unsigned lon
    9.12      INIT_LIST_HEAD(&free_list);    
    9.13      free_pfns = 0;
    9.14  
    9.15 -    /* so that we can map them latter, set the ownership of pages
    9.16 -       belonging to the machine_to_phys_mapping to CPU0 idle task */
    9.17 -    
    9.18 -    mfn = virt_to_phys((void *)RDWR_MPT_VIRT_START)>>PAGE_SHIFT;
    9.19 -
    9.20      /* initialise to a magic of 0x55555555 so easier to spot bugs later */
    9.21      memset( machine_to_phys_mapping, 0x55, 4*1024*1024 );
    9.22  
    9.23      /* The array is sized for a 4GB machine regardless of actuall mem size. 
    9.24         This costs 4MB -- may want to fix some day */
    9.25 -    for(i=0;i<1024*1024;i+=1024,mfn++)
    9.26 +
    9.27 +    /* Pin the ownership of the MP table so that DOM0 can map it later. */
    9.28 +    for ( mfn = virt_to_phys((void *)RDWR_MPT_VIRT_START)>>PAGE_SHIFT;
    9.29 +          mfn < virt_to_phys((void *)RDWR_MPT_VIRT_END)>>PAGE_SHIFT;
    9.30 +          mfn++ )
    9.31      {
    9.32  	frame_table[mfn].count_and_flags = 1 | PGC_allocated;
    9.33 -	frame_table[mfn].type_and_flags = 1 | PGT_gdt_page; // anything non RW
    9.34 +	frame_table[mfn].type_and_flags = 1 | PGT_gdt_page; /* non-RW type */
    9.35  	frame_table[mfn].u.domain = &idle0_task;
    9.36      }
    9.37  }
    10.1 --- a/xen/common/physdev.c	Thu May 13 10:17:36 2004 +0000
    10.2 +++ b/xen/common/physdev.c	Thu May 13 10:51:36 2004 +0000
    10.3 @@ -634,9 +634,10 @@ static long pci_probe_root_buses(u32 *bu
    10.4   */
    10.5  long do_physdev_op(physdev_op_t *uop)
    10.6  {
    10.7 -    phys_dev_t *pdev;
    10.8 +    phys_dev_t  *pdev;
    10.9      physdev_op_t op;
   10.10 -    long ret;
   10.11 +    long         ret;
   10.12 +    int          irq;
   10.13  
   10.14      if ( unlikely(copy_from_user(&op, uop, sizeof(op)) != 0) )
   10.15          return -EFAULT;
   10.16 @@ -674,10 +675,22 @@ long do_physdev_op(physdev_op_t *uop)
   10.17          ret = pci_probe_root_buses(op.u.pci_probe_root_buses.busmask);
   10.18          break;
   10.19  
   10.20 -    case PHYSDEVOP_UNMASK_IRQ:
   10.21 +    case PHYSDEVOP_IRQ_UNMASK_NOTIFY:
   10.22          ret = pirq_guest_unmask(current);
   10.23          break;
   10.24  
   10.25 +    case PHYSDEVOP_IRQ_STATUS_QUERY:
   10.26 +        irq = op.u.irq_status_query.irq;
   10.27 +        ret = -EINVAL;
   10.28 +        if ( (irq < 0) || (irq >= NR_IRQS) )
   10.29 +            break;
   10.30 +        op.u.irq_status_query.flags = 0;
   10.31 +        /* Edge-triggered interrupts don't need an explicit unmask downcall. */
   10.32 +        if ( strstr(irq_desc[irq].handler->typename, "edge") == NULL )
   10.33 +            op.u.irq_status_query.flags |= PHYSDEVOP_IRQ_NEEDS_UNMASK_NOTIFY;
   10.34 +        ret = 0;
   10.35 +        break;
   10.36 +
   10.37      default:
   10.38          ret = -EINVAL;
   10.39          break;
    11.1 --- a/xen/common/schedule.c	Thu May 13 10:17:36 2004 +0000
    11.2 +++ b/xen/common/schedule.c	Thu May 13 10:51:36 2004 +0000
    11.3 @@ -520,7 +520,7 @@ asmlinkage void __enter_scheduler(void)
    11.4  
    11.5      /* Mark a timer event for the newly-scheduled domain. */
    11.6      if ( !is_idle_task(next) )
    11.7 -        evtchn_set_pending(next, VIRQ_TIMER);
    11.8 +        send_guest_virq(next, VIRQ_TIMER);
    11.9      
   11.10      schedule_tail(next);
   11.11  
    12.1 --- a/xen/include/hypervisor-ifs/physdev.h	Thu May 13 10:17:36 2004 +0000
    12.2 +++ b/xen/include/hypervisor-ifs/physdev.h	Thu May 13 10:51:36 2004 +0000
    12.3 @@ -14,44 +14,55 @@
    12.4  #define PHYSDEVOP_PCI_CFGREG_WRITE      1
    12.5  #define PHYSDEVOP_PCI_INITIALISE_DEVICE 2
    12.6  #define PHYSDEVOP_PCI_PROBE_ROOT_BUSES  3
    12.7 -#define PHYSDEVOP_UNMASK_IRQ            4
    12.8 +#define PHYSDEVOP_IRQ_UNMASK_NOTIFY     4
    12.9 +#define PHYSDEVOP_IRQ_STATUS_QUERY      5
   12.10  
   12.11  /* Read from PCI configuration space. */
   12.12 -typedef struct physdevop_pci_cfgreg_read_st
   12.13 -{
   12.14 -    int bus;        /* IN */
   12.15 -    int dev;        /* IN */
   12.16 -    int func;       /* IN */
   12.17 -    int reg;        /* IN */
   12.18 -    int len;        /* IN */
   12.19 -    u32 value;      /* OUT */
   12.20 +typedef struct {
   12.21 +    /* IN */
   12.22 +    int bus;
   12.23 +    int dev;
   12.24 +    int func;
   12.25 +    int reg;
   12.26 +    int len;
   12.27 +    /* OUT */
   12.28 +    u32 value;
   12.29  } physdevop_pci_cfgreg_read_t;
   12.30  
   12.31  /* Write to PCI configuration space. */
   12.32 -typedef struct physdevop_pci_cfgreg_write_st
   12.33 -{
   12.34 -    int bus;        /* IN */
   12.35 -    int dev;        /* IN */
   12.36 -    int func;       /* IN */
   12.37 -    int reg;        /* IN */
   12.38 -    int len;        /* IN */
   12.39 -    u32 value;      /* IN */
   12.40 +typedef struct {
   12.41 +    /* IN */
   12.42 +    int bus;
   12.43 +    int dev;
   12.44 +    int func;
   12.45 +    int reg;
   12.46 +    int len;
   12.47 +    u32 value;
   12.48  } physdevop_pci_cfgreg_write_t;
   12.49  
   12.50  /* Do final initialisation of a PCI device (e.g., last-moment IRQ routing). */
   12.51 -typedef struct physdevop_pci_initialise_device_st
   12.52 -{
   12.53 -    int bus;      /* IN */
   12.54 -    int dev;      /* IN */
   12.55 -    int func;     /* IN */
   12.56 +typedef struct {
   12.57 +    /* IN */
   12.58 +    int bus;
   12.59 +    int dev;
   12.60 +    int func;
   12.61  } physdevop_pci_initialise_device_t;
   12.62  
   12.63  /* Find the root buses for subsequent scanning. */
   12.64 -typedef struct physdevop_pci_probe_root_buses_st
   12.65 -{
   12.66 -    u32 busmask[256/32]; /* OUT */
   12.67 +typedef struct {
   12.68 +    /* OUT */
   12.69 +    u32 busmask[256/32];
   12.70  } physdevop_pci_probe_root_buses_t;
   12.71  
   12.72 +typedef struct {
   12.73 +    /* IN */
   12.74 +    int irq;
   12.75 +    /* OUT */
   12.76 +/* Need to call PHYSDEVOP_IRQ_UNMASK_NOTIFY when the IRQ has been serviced? */
   12.77 +#define PHYSDEVOP_IRQ_NEEDS_UNMASK_NOTIFY (1<<0)
   12.78 +    unsigned long flags;
   12.79 +} physdevop_irq_status_query_t;
   12.80 +
   12.81  typedef struct _physdev_op_st 
   12.82  {
   12.83      unsigned long cmd;
   12.84 @@ -61,6 +72,7 @@ typedef struct _physdev_op_st
   12.85          physdevop_pci_cfgreg_write_t      pci_cfgreg_write;
   12.86          physdevop_pci_initialise_device_t pci_initialise_device;
   12.87          physdevop_pci_probe_root_buses_t  pci_probe_root_buses;
   12.88 +        physdevop_irq_status_query_t      irq_status_query;
   12.89      } u;
   12.90  } physdev_op_t;
   12.91  
    13.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/interface.c	Thu May 13 10:17:36 2004 +0000
    13.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/interface.c	Thu May 13 10:51:36 2004 +0000
    13.3 @@ -116,6 +116,9 @@ void netif_create(netif_be_create_t *cre
    13.4      dev->get_stats       = netif_be_get_stats;
    13.5      memcpy(dev->dev_addr, create->mac, ETH_ALEN);
    13.6  
    13.7 +    /* Disable queuing. */
    13.8 +    dev->tx_queue_len = 0;
    13.9 +
   13.10      /* XXX In bridge mode we should force a different MAC from remote end. */
   13.11      dev->dev_addr[2] ^= 1;
   13.12  
    14.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/main.c	Thu May 13 10:17:36 2004 +0000
    14.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/main.c	Thu May 13 10:51:36 2004 +0000
    14.3 @@ -116,12 +116,14 @@ int netif_be_start_xmit(struct sk_buff *
    14.4   
    14.5      /*
    14.6       * We do not copy the packet unless:
    14.7 -     *  1. It is fragmented; or
    14.8 +     *  1. The data is shared; or
    14.9       *  2. It spans a page boundary; or
   14.10       *  3. We cannot be sure the whole data page is allocated.
   14.11       * The copying method is taken from skb_copy().
   14.12 +     * NB. We also couldn't cope with fragmented packets, but we won't get
   14.13 +     *     any because we not advertise the NETIF_F_SG feature.
   14.14       */
   14.15 -    if ( (skb_shinfo(skb)->nr_frags != 0) ||
   14.16 +    if ( skb_shared(skb) || skb_cloned(skb) || 
   14.17           (((unsigned long)skb->end ^ (unsigned long)skb->head) & PAGE_MASK) ||
   14.18           ((skb->end - skb->head) < (PAGE_SIZE/2)) )
   14.19      {
    15.1 --- a/xenolinux-2.4.26-sparse/arch/xen/kernel/evtchn.c	Thu May 13 10:17:36 2004 +0000
    15.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/kernel/evtchn.c	Thu May 13 10:51:36 2004 +0000
    15.3 @@ -36,6 +36,9 @@ static int virq_to_irq[NR_VIRQS];
    15.4  /* Reference counts for bindings to IRQs. */
    15.5  static int irq_bindcount[NR_IRQS];
    15.6  
    15.7 +/* Bitmap indicating which PIRQs require Xen to be notified on unmask. */
    15.8 +static unsigned long pirq_needs_unmask_notify[NR_PIRQS/sizeof(unsigned long)];
    15.9 +
   15.10  /* Upcall to generic IRQ layer. */
   15.11  extern asmlinkage unsigned int do_IRQ(int irq, struct pt_regs *regs);
   15.12  
   15.13 @@ -234,8 +237,22 @@ static struct hw_interrupt_type dynirq_t
   15.14  static inline void pirq_unmask_notify(int pirq)
   15.15  {
   15.16      physdev_op_t op;
   15.17 -    op.cmd = PHYSDEVOP_UNMASK_IRQ;
   15.18 +    if ( unlikely(test_bit(pirq, &pirq_needs_unmask_notify[0])) )
   15.19 +    {
   15.20 +        op.cmd = PHYSDEVOP_IRQ_UNMASK_NOTIFY;
   15.21 +        (void)HYPERVISOR_physdev_op(&op);
   15.22 +    }
   15.23 +}
   15.24 +
   15.25 +static inline void pirq_query_unmask(int pirq)
   15.26 +{
   15.27 +    physdev_op_t op;
   15.28 +    op.cmd = PHYSDEVOP_IRQ_STATUS_QUERY;
   15.29 +    op.u.irq_status_query.irq = pirq;
   15.30      (void)HYPERVISOR_physdev_op(&op);
   15.31 +    clear_bit(pirq, &pirq_needs_unmask_notify[0]);
   15.32 +    if ( op.u.irq_status_query.flags & PHYSDEVOP_IRQ_NEEDS_UNMASK_NOTIFY )
   15.33 +        set_bit(pirq, &pirq_needs_unmask_notify[0]);
   15.34  }
   15.35  
   15.36  /*
   15.37 @@ -261,6 +278,8 @@ static unsigned int startup_pirq(unsigne
   15.38      }
   15.39      evtchn = op.u.bind_pirq.port;
   15.40  
   15.41 +    pirq_query_unmask(irq_to_pirq(irq));
   15.42 +
   15.43      evtchn_to_irq[evtchn] = irq;
   15.44      irq_to_evtchn[irq]    = evtchn;
   15.45  
    16.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    16.2 +++ b/xenolinux-2.4.26-sparse/include/linux/skbuff.h	Thu May 13 10:51:36 2004 +0000
    16.3 @@ -0,0 +1,1174 @@
    16.4 +/*
    16.5 + *	Definitions for the 'struct sk_buff' memory handlers.
    16.6 + *
    16.7 + *	Authors:
    16.8 + *		Alan Cox, <gw4pts@gw4pts.ampr.org>
    16.9 + *		Florian La Roche, <rzsfl@rz.uni-sb.de>
   16.10 + *
   16.11 + *	This program is free software; you can redistribute it and/or
   16.12 + *	modify it under the terms of the GNU General Public License
   16.13 + *	as published by the Free Software Foundation; either version
   16.14 + *	2 of the License, or (at your option) any later version.
   16.15 + */
   16.16 + 
   16.17 +#ifndef _LINUX_SKBUFF_H
   16.18 +#define _LINUX_SKBUFF_H
   16.19 +
   16.20 +#include <linux/config.h>
   16.21 +#include <linux/kernel.h>
   16.22 +#include <linux/sched.h>
   16.23 +#include <linux/time.h>
   16.24 +#include <linux/cache.h>
   16.25 +
   16.26 +#include <asm/atomic.h>
   16.27 +#include <asm/types.h>
   16.28 +#include <linux/spinlock.h>
   16.29 +#include <linux/mm.h>
   16.30 +#include <linux/highmem.h>
   16.31 +
   16.32 +#define HAVE_ALLOC_SKB		/* For the drivers to know */
   16.33 +#define HAVE_ALIGNABLE_SKB	/* Ditto 8)		   */
   16.34 +#define SLAB_SKB 		/* Slabified skbuffs 	   */
   16.35 +
   16.36 +#define CHECKSUM_NONE 0
   16.37 +#define CHECKSUM_HW 1
   16.38 +#define CHECKSUM_UNNECESSARY 2
   16.39 +
   16.40 +#define SKB_DATA_ALIGN(X)	(((X) + (SMP_CACHE_BYTES-1)) & ~(SMP_CACHE_BYTES-1))
   16.41 +#define SKB_MAX_ORDER(X,ORDER)	(((PAGE_SIZE<<(ORDER)) - (X) - sizeof(struct skb_shared_info))&~(SMP_CACHE_BYTES-1))
   16.42 +#define SKB_MAX_HEAD(X)		(SKB_MAX_ORDER((X),0))
   16.43 +#define SKB_MAX_ALLOC		(SKB_MAX_ORDER(0,2))
   16.44 +
   16.45 +/* A. Checksumming of received packets by device.
   16.46 + *
   16.47 + *	NONE: device failed to checksum this packet.
   16.48 + *		skb->csum is undefined.
   16.49 + *
   16.50 + *	UNNECESSARY: device parsed packet and wouldbe verified checksum.
   16.51 + *		skb->csum is undefined.
   16.52 + *	      It is bad option, but, unfortunately, many of vendors do this.
   16.53 + *	      Apparently with secret goal to sell you new device, when you
   16.54 + *	      will add new protocol to your host. F.e. IPv6. 8)
   16.55 + *
   16.56 + *	HW: the most generic way. Device supplied checksum of _all_
   16.57 + *	    the packet as seen by netif_rx in skb->csum.
   16.58 + *	    NOTE: Even if device supports only some protocols, but
   16.59 + *	    is able to produce some skb->csum, it MUST use HW,
   16.60 + *	    not UNNECESSARY.
   16.61 + *
   16.62 + * B. Checksumming on output.
   16.63 + *
   16.64 + *	NONE: skb is checksummed by protocol or csum is not required.
   16.65 + *
   16.66 + *	HW: device is required to csum packet as seen by hard_start_xmit
   16.67 + *	from skb->h.raw to the end and to record the checksum
   16.68 + *	at skb->h.raw+skb->csum.
   16.69 + *
   16.70 + *	Device must show its capabilities in dev->features, set
   16.71 + *	at device setup time.
   16.72 + *	NETIF_F_HW_CSUM	- it is clever device, it is able to checksum
   16.73 + *			  everything.
   16.74 + *	NETIF_F_NO_CSUM - loopback or reliable single hop media.
   16.75 + *	NETIF_F_IP_CSUM - device is dumb. It is able to csum only
   16.76 + *			  TCP/UDP over IPv4. Sigh. Vendors like this
   16.77 + *			  way by an unknown reason. Though, see comment above
   16.78 + *			  about CHECKSUM_UNNECESSARY. 8)
   16.79 + *
   16.80 + *	Any questions? No questions, good. 		--ANK
   16.81 + */
   16.82 +
   16.83 +#ifdef __i386__
   16.84 +#define NET_CALLER(arg) (*(((void**)&arg)-1))
   16.85 +#else
   16.86 +#define NET_CALLER(arg) __builtin_return_address(0)
   16.87 +#endif
   16.88 +
   16.89 +#ifdef CONFIG_NETFILTER
   16.90 +struct nf_conntrack {
   16.91 +	atomic_t use;
   16.92 +	void (*destroy)(struct nf_conntrack *);
   16.93 +};
   16.94 +
   16.95 +struct nf_ct_info {
   16.96 +	struct nf_conntrack *master;
   16.97 +};
   16.98 +#endif
   16.99 +
  16.100 +struct sk_buff_head {
  16.101 +	/* These two members must be first. */
  16.102 +	struct sk_buff	* next;
  16.103 +	struct sk_buff	* prev;
  16.104 +
  16.105 +	__u32		qlen;
  16.106 +	spinlock_t	lock;
  16.107 +};
  16.108 +
  16.109 +struct sk_buff;
  16.110 +
  16.111 +#define MAX_SKB_FRAGS 6
  16.112 +
  16.113 +typedef struct skb_frag_struct skb_frag_t;
  16.114 +
  16.115 +struct skb_frag_struct
  16.116 +{
  16.117 +	struct page *page;
  16.118 +	__u16 page_offset;
  16.119 +	__u16 size;
  16.120 +};
  16.121 +
  16.122 +/* This data is invariant across clones and lives at
  16.123 + * the end of the header data, ie. at skb->end.
  16.124 + */
  16.125 +struct skb_shared_info {
  16.126 +	atomic_t	dataref;
  16.127 +	unsigned int	nr_frags;
  16.128 +	struct sk_buff	*frag_list;
  16.129 +	skb_frag_t	frags[MAX_SKB_FRAGS];
  16.130 +};
  16.131 +
  16.132 +struct sk_buff {
  16.133 +	/* These two members must be first. */
  16.134 +	struct sk_buff	* next;			/* Next buffer in list 				*/
  16.135 +	struct sk_buff	* prev;			/* Previous buffer in list 			*/
  16.136 +
  16.137 +	struct sk_buff_head * list;		/* List we are on				*/
  16.138 +	struct sock	*sk;			/* Socket we are owned by 			*/
  16.139 +	struct timeval	stamp;			/* Time we arrived				*/
  16.140 +	struct net_device	*dev;		/* Device we arrived on/are leaving by		*/
  16.141 +	struct net_device	*real_dev;	/* For support of point to point protocols 
  16.142 +						   (e.g. 802.3ad) over bonding, we must save the
  16.143 +						   physical device that got the packet before
  16.144 +						   replacing skb->dev with the virtual device.  */
  16.145 +
  16.146 +	/* Transport layer header */
  16.147 +	union
  16.148 +	{
  16.149 +		struct tcphdr	*th;
  16.150 +		struct udphdr	*uh;
  16.151 +		struct icmphdr	*icmph;
  16.152 +		struct igmphdr	*igmph;
  16.153 +		struct iphdr	*ipiph;
  16.154 +		struct spxhdr	*spxh;
  16.155 +		unsigned char	*raw;
  16.156 +	} h;
  16.157 +
  16.158 +	/* Network layer header */
  16.159 +	union
  16.160 +	{
  16.161 +		struct iphdr	*iph;
  16.162 +		struct ipv6hdr	*ipv6h;
  16.163 +		struct arphdr	*arph;
  16.164 +		struct ipxhdr	*ipxh;
  16.165 +		unsigned char	*raw;
  16.166 +	} nh;
  16.167 +  
  16.168 +	/* Link layer header */
  16.169 +	union 
  16.170 +	{	
  16.171 +	  	struct ethhdr	*ethernet;
  16.172 +	  	unsigned char 	*raw;
  16.173 +	} mac;
  16.174 +
  16.175 +	struct  dst_entry *dst;
  16.176 +
  16.177 +	/* 
  16.178 +	 * This is the control buffer. It is free to use for every
  16.179 +	 * layer. Please put your private variables there. If you
  16.180 +	 * want to keep them across layers you have to do a skb_clone()
  16.181 +	 * first. This is owned by whoever has the skb queued ATM.
  16.182 +	 */ 
  16.183 +	char		cb[48];	 
  16.184 +
  16.185 +	unsigned int 	len;			/* Length of actual data			*/
  16.186 + 	unsigned int 	data_len;
  16.187 +	unsigned int	csum;			/* Checksum 					*/
  16.188 +	unsigned char 	__unused,		/* Dead field, may be reused			*/
  16.189 +			cloned, 		/* head may be cloned (check refcnt to be sure). */
  16.190 +  			pkt_type,		/* Packet class					*/
  16.191 +  			ip_summed;		/* Driver fed us an IP checksum			*/
  16.192 +	__u32		priority;		/* Packet queueing priority			*/
  16.193 +	atomic_t	users;			/* User count - see datagram.c,tcp.c 		*/
  16.194 +	unsigned short	protocol;		/* Packet protocol from driver. 		*/
  16.195 +	unsigned short	security;		/* Security level of packet			*/
  16.196 +	unsigned int	truesize;		/* Buffer size 					*/
  16.197 +
  16.198 +	unsigned char	*head;			/* Head of buffer 				*/
  16.199 +	unsigned char	*data;			/* Data head pointer				*/
  16.200 +	unsigned char	*tail;			/* Tail pointer					*/
  16.201 +	unsigned char 	*end;			/* End pointer					*/
  16.202 +
  16.203 +	void 		(*destructor)(struct sk_buff *);	/* Destruct function		*/
  16.204 +#ifdef CONFIG_NETFILTER
  16.205 +	/* Can be used for communication between hooks. */
  16.206 +        unsigned long	nfmark;
  16.207 +	/* Cache info */
  16.208 +	__u32		nfcache;
  16.209 +	/* Associated connection, if any */
  16.210 +	struct nf_ct_info *nfct;
  16.211 +#ifdef CONFIG_NETFILTER_DEBUG
  16.212 +        unsigned int nf_debug;
  16.213 +#endif
  16.214 +#endif /*CONFIG_NETFILTER*/
  16.215 +
  16.216 +#if defined(CONFIG_HIPPI)
  16.217 +	union{
  16.218 +		__u32	ifield;
  16.219 +	} private;
  16.220 +#endif
  16.221 +
  16.222 +#ifdef CONFIG_NET_SCHED
  16.223 +       __u32           tc_index;               /* traffic control index */
  16.224 +#endif
  16.225 +};
  16.226 +
  16.227 +#ifdef __KERNEL__
  16.228 +/*
  16.229 + *	Handling routines are only of interest to the kernel
  16.230 + */
  16.231 +#include <linux/slab.h>
  16.232 +
  16.233 +#include <asm/system.h>
  16.234 +
  16.235 +extern void			__kfree_skb(struct sk_buff *skb);
  16.236 +extern struct sk_buff *		alloc_skb(unsigned int size, int priority);
  16.237 +extern void			kfree_skbmem(struct sk_buff *skb);
  16.238 +extern struct sk_buff *		skb_clone(struct sk_buff *skb, int priority);
  16.239 +extern struct sk_buff *		skb_copy(const struct sk_buff *skb, int priority);
  16.240 +extern struct sk_buff *		pskb_copy(struct sk_buff *skb, int gfp_mask);
  16.241 +extern int			pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask);
  16.242 +extern struct sk_buff *		skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom);
  16.243 +extern struct sk_buff *		skb_copy_expand(const struct sk_buff *skb, 
  16.244 +						int newheadroom,
  16.245 +						int newtailroom,
  16.246 +						int priority);
  16.247 +extern struct sk_buff *		skb_pad(struct sk_buff *skb, int pad);
  16.248 +#define dev_kfree_skb(a)	kfree_skb(a)
  16.249 +extern void	skb_over_panic(struct sk_buff *skb, int len, void *here);
  16.250 +extern void	skb_under_panic(struct sk_buff *skb, int len, void *here);
  16.251 +
  16.252 +/* Internal */
  16.253 +#define skb_shinfo(SKB)		((struct skb_shared_info *)((SKB)->end))
  16.254 +
  16.255 +/**
  16.256 + *	skb_queue_empty - check if a queue is empty
  16.257 + *	@list: queue head
  16.258 + *
  16.259 + *	Returns true if the queue is empty, false otherwise.
  16.260 + */
  16.261 + 
  16.262 +static inline int skb_queue_empty(struct sk_buff_head *list)
  16.263 +{
  16.264 +	return (list->next == (struct sk_buff *) list);
  16.265 +}
  16.266 +
  16.267 +/**
  16.268 + *	skb_get - reference buffer
  16.269 + *	@skb: buffer to reference
  16.270 + *
  16.271 + *	Makes another reference to a socket buffer and returns a pointer
  16.272 + *	to the buffer.
  16.273 + */
  16.274 + 
  16.275 +static inline struct sk_buff *skb_get(struct sk_buff *skb)
  16.276 +{
  16.277 +	atomic_inc(&skb->users);
  16.278 +	return skb;
  16.279 +}
  16.280 +
  16.281 +/*
  16.282 + * If users==1, we are the only owner and are can avoid redundant
  16.283 + * atomic change.
  16.284 + */
  16.285 + 
  16.286 +/**
  16.287 + *	kfree_skb - free an sk_buff
  16.288 + *	@skb: buffer to free
  16.289 + *
  16.290 + *	Drop a reference to the buffer and free it if the usage count has
  16.291 + *	hit zero.
  16.292 + */
  16.293 + 
  16.294 +static inline void kfree_skb(struct sk_buff *skb)
  16.295 +{
  16.296 +	if (atomic_read(&skb->users) == 1 || atomic_dec_and_test(&skb->users))
  16.297 +		__kfree_skb(skb);
  16.298 +}
  16.299 +
  16.300 +/* Use this if you didn't touch the skb state [for fast switching] */
  16.301 +static inline void kfree_skb_fast(struct sk_buff *skb)
  16.302 +{
  16.303 +	if (atomic_read(&skb->users) == 1 || atomic_dec_and_test(&skb->users))
  16.304 +		kfree_skbmem(skb);	
  16.305 +}
  16.306 +
  16.307 +/**
  16.308 + *	skb_cloned - is the buffer a clone
  16.309 + *	@skb: buffer to check
  16.310 + *
  16.311 + *	Returns true if the buffer was generated with skb_clone() and is
  16.312 + *	one of multiple shared copies of the buffer. Cloned buffers are
  16.313 + *	shared data so must not be written to under normal circumstances.
  16.314 + */
  16.315 +
  16.316 +static inline int skb_cloned(struct sk_buff *skb)
  16.317 +{
  16.318 +	return skb->cloned && atomic_read(&skb_shinfo(skb)->dataref) != 1;
  16.319 +}
  16.320 +
  16.321 +/**
  16.322 + *	skb_shared - is the buffer shared
  16.323 + *	@skb: buffer to check
  16.324 + *
  16.325 + *	Returns true if more than one person has a reference to this
  16.326 + *	buffer.
  16.327 + */
  16.328 + 
  16.329 +static inline int skb_shared(struct sk_buff *skb)
  16.330 +{
  16.331 +	return (atomic_read(&skb->users) != 1);
  16.332 +}
  16.333 +
  16.334 +/** 
  16.335 + *	skb_share_check - check if buffer is shared and if so clone it
  16.336 + *	@skb: buffer to check
  16.337 + *	@pri: priority for memory allocation
  16.338 + *	
  16.339 + *	If the buffer is shared the buffer is cloned and the old copy
  16.340 + *	drops a reference. A new clone with a single reference is returned.
  16.341 + *	If the buffer is not shared the original buffer is returned. When
  16.342 + *	being called from interrupt status or with spinlocks held pri must
  16.343 + *	be GFP_ATOMIC.
  16.344 + *
  16.345 + *	NULL is returned on a memory allocation failure.
  16.346 + */
  16.347 + 
  16.348 +static inline struct sk_buff *skb_share_check(struct sk_buff *skb, int pri)
  16.349 +{
  16.350 +	if (skb_shared(skb)) {
  16.351 +		struct sk_buff *nskb;
  16.352 +		nskb = skb_clone(skb, pri);
  16.353 +		kfree_skb(skb);
  16.354 +		return nskb;
  16.355 +	}
  16.356 +	return skb;
  16.357 +}
  16.358 +
  16.359 +
  16.360 +/*
  16.361 + *	Copy shared buffers into a new sk_buff. We effectively do COW on
  16.362 + *	packets to handle cases where we have a local reader and forward
  16.363 + *	and a couple of other messy ones. The normal one is tcpdumping
  16.364 + *	a packet thats being forwarded.
  16.365 + */
  16.366 + 
  16.367 +/**
  16.368 + *	skb_unshare - make a copy of a shared buffer
  16.369 + *	@skb: buffer to check
  16.370 + *	@pri: priority for memory allocation
  16.371 + *
  16.372 + *	If the socket buffer is a clone then this function creates a new
  16.373 + *	copy of the data, drops a reference count on the old copy and returns
  16.374 + *	the new copy with the reference count at 1. If the buffer is not a clone
  16.375 + *	the original buffer is returned. When called with a spinlock held or
  16.376 + *	from interrupt state @pri must be %GFP_ATOMIC
  16.377 + *
  16.378 + *	%NULL is returned on a memory allocation failure.
  16.379 + */
  16.380 + 
  16.381 +static inline struct sk_buff *skb_unshare(struct sk_buff *skb, int pri)
  16.382 +{
  16.383 +	struct sk_buff *nskb;
  16.384 +	if(!skb_cloned(skb))
  16.385 +		return skb;
  16.386 +	nskb=skb_copy(skb, pri);
  16.387 +	kfree_skb(skb);		/* Free our shared copy */
  16.388 +	return nskb;
  16.389 +}
  16.390 +
  16.391 +/**
  16.392 + *	skb_peek
  16.393 + *	@list_: list to peek at
  16.394 + *
  16.395 + *	Peek an &sk_buff. Unlike most other operations you _MUST_
  16.396 + *	be careful with this one. A peek leaves the buffer on the
  16.397 + *	list and someone else may run off with it. You must hold
  16.398 + *	the appropriate locks or have a private queue to do this.
  16.399 + *
  16.400 + *	Returns %NULL for an empty list or a pointer to the head element.
  16.401 + *	The reference count is not incremented and the reference is therefore
  16.402 + *	volatile. Use with caution.
  16.403 + */
  16.404 + 
  16.405 +static inline struct sk_buff *skb_peek(struct sk_buff_head *list_)
  16.406 +{
  16.407 +	struct sk_buff *list = ((struct sk_buff *)list_)->next;
  16.408 +	if (list == (struct sk_buff *)list_)
  16.409 +		list = NULL;
  16.410 +	return list;
  16.411 +}
  16.412 +
  16.413 +/**
  16.414 + *	skb_peek_tail
  16.415 + *	@list_: list to peek at
  16.416 + *
  16.417 + *	Peek an &sk_buff. Unlike most other operations you _MUST_
  16.418 + *	be careful with this one. A peek leaves the buffer on the
  16.419 + *	list and someone else may run off with it. You must hold
  16.420 + *	the appropriate locks or have a private queue to do this.
  16.421 + *
  16.422 + *	Returns %NULL for an empty list or a pointer to the tail element.
  16.423 + *	The reference count is not incremented and the reference is therefore
  16.424 + *	volatile. Use with caution.
  16.425 + */
  16.426 +
  16.427 +static inline struct sk_buff *skb_peek_tail(struct sk_buff_head *list_)
  16.428 +{
  16.429 +	struct sk_buff *list = ((struct sk_buff *)list_)->prev;
  16.430 +	if (list == (struct sk_buff *)list_)
  16.431 +		list = NULL;
  16.432 +	return list;
  16.433 +}
  16.434 +
  16.435 +/**
  16.436 + *	skb_queue_len	- get queue length
  16.437 + *	@list_: list to measure
  16.438 + *
  16.439 + *	Return the length of an &sk_buff queue. 
  16.440 + */
  16.441 + 
  16.442 +static inline __u32 skb_queue_len(struct sk_buff_head *list_)
  16.443 +{
  16.444 +	return(list_->qlen);
  16.445 +}
  16.446 +
  16.447 +static inline void skb_queue_head_init(struct sk_buff_head *list)
  16.448 +{
  16.449 +	spin_lock_init(&list->lock);
  16.450 +	list->prev = (struct sk_buff *)list;
  16.451 +	list->next = (struct sk_buff *)list;
  16.452 +	list->qlen = 0;
  16.453 +}
  16.454 +
  16.455 +/*
  16.456 + *	Insert an sk_buff at the start of a list.
  16.457 + *
  16.458 + *	The "__skb_xxxx()" functions are the non-atomic ones that
  16.459 + *	can only be called with interrupts disabled.
  16.460 + */
  16.461 +
  16.462 +/**
  16.463 + *	__skb_queue_head - queue a buffer at the list head
  16.464 + *	@list: list to use
  16.465 + *	@newsk: buffer to queue
  16.466 + *
  16.467 + *	Queue a buffer at the start of a list. This function takes no locks
  16.468 + *	and you must therefore hold required locks before calling it.
  16.469 + *
  16.470 + *	A buffer cannot be placed on two lists at the same time.
  16.471 + */	
  16.472 + 
  16.473 +static inline void __skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk)
  16.474 +{
  16.475 +	struct sk_buff *prev, *next;
  16.476 +
  16.477 +	newsk->list = list;
  16.478 +	list->qlen++;
  16.479 +	prev = (struct sk_buff *)list;
  16.480 +	next = prev->next;
  16.481 +	newsk->next = next;
  16.482 +	newsk->prev = prev;
  16.483 +	next->prev = newsk;
  16.484 +	prev->next = newsk;
  16.485 +}
  16.486 +
  16.487 +
  16.488 +/**
  16.489 + *	skb_queue_head - queue a buffer at the list head
  16.490 + *	@list: list to use
  16.491 + *	@newsk: buffer to queue
  16.492 + *
  16.493 + *	Queue a buffer at the start of the list. This function takes the
  16.494 + *	list lock and can be used safely with other locking &sk_buff functions
  16.495 + *	safely.
  16.496 + *
  16.497 + *	A buffer cannot be placed on two lists at the same time.
  16.498 + */	
  16.499 +
  16.500 +static inline void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk)
  16.501 +{
  16.502 +	unsigned long flags;
  16.503 +
  16.504 +	spin_lock_irqsave(&list->lock, flags);
  16.505 +	__skb_queue_head(list, newsk);
  16.506 +	spin_unlock_irqrestore(&list->lock, flags);
  16.507 +}
  16.508 +
  16.509 +/**
  16.510 + *	__skb_queue_tail - queue a buffer at the list tail
  16.511 + *	@list: list to use
  16.512 + *	@newsk: buffer to queue
  16.513 + *
  16.514 + *	Queue a buffer at the end of a list. This function takes no locks
  16.515 + *	and you must therefore hold required locks before calling it.
  16.516 + *
  16.517 + *	A buffer cannot be placed on two lists at the same time.
  16.518 + */	
  16.519 + 
  16.520 +
  16.521 +static inline void __skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk)
  16.522 +{
  16.523 +	struct sk_buff *prev, *next;
  16.524 +
  16.525 +	newsk->list = list;
  16.526 +	list->qlen++;
  16.527 +	next = (struct sk_buff *)list;
  16.528 +	prev = next->prev;
  16.529 +	newsk->next = next;
  16.530 +	newsk->prev = prev;
  16.531 +	next->prev = newsk;
  16.532 +	prev->next = newsk;
  16.533 +}
  16.534 +
  16.535 +/**
  16.536 + *	skb_queue_tail - queue a buffer at the list tail
  16.537 + *	@list: list to use
  16.538 + *	@newsk: buffer to queue
  16.539 + *
  16.540 + *	Queue a buffer at the tail of the list. This function takes the
  16.541 + *	list lock and can be used safely with other locking &sk_buff functions
  16.542 + *	safely.
  16.543 + *
  16.544 + *	A buffer cannot be placed on two lists at the same time.
  16.545 + */	
  16.546 +
  16.547 +static inline void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk)
  16.548 +{
  16.549 +	unsigned long flags;
  16.550 +
  16.551 +	spin_lock_irqsave(&list->lock, flags);
  16.552 +	__skb_queue_tail(list, newsk);
  16.553 +	spin_unlock_irqrestore(&list->lock, flags);
  16.554 +}
  16.555 +
  16.556 +/**
  16.557 + *	__skb_dequeue - remove from the head of the queue
  16.558 + *	@list: list to dequeue from
  16.559 + *
  16.560 + *	Remove the head of the list. This function does not take any locks
  16.561 + *	so must be used with appropriate locks held only. The head item is
  16.562 + *	returned or %NULL if the list is empty.
  16.563 + */
  16.564 +
  16.565 +static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list)
  16.566 +{
  16.567 +	struct sk_buff *next, *prev, *result;
  16.568 +
  16.569 +	prev = (struct sk_buff *) list;
  16.570 +	next = prev->next;
  16.571 +	result = NULL;
  16.572 +	if (next != prev) {
  16.573 +		result = next;
  16.574 +		next = next->next;
  16.575 +		list->qlen--;
  16.576 +		next->prev = prev;
  16.577 +		prev->next = next;
  16.578 +		result->next = NULL;
  16.579 +		result->prev = NULL;
  16.580 +		result->list = NULL;
  16.581 +	}
  16.582 +	return result;
  16.583 +}
  16.584 +
  16.585 +/**
  16.586 + *	skb_dequeue - remove from the head of the queue
  16.587 + *	@list: list to dequeue from
  16.588 + *
  16.589 + *	Remove the head of the list. The list lock is taken so the function
  16.590 + *	may be used safely with other locking list functions. The head item is
  16.591 + *	returned or %NULL if the list is empty.
  16.592 + */
  16.593 +
  16.594 +static inline struct sk_buff *skb_dequeue(struct sk_buff_head *list)
  16.595 +{
  16.596 +	unsigned long flags;
  16.597 +	struct sk_buff *result;
  16.598 +
  16.599 +	spin_lock_irqsave(&list->lock, flags);
  16.600 +	result = __skb_dequeue(list);
  16.601 +	spin_unlock_irqrestore(&list->lock, flags);
  16.602 +	return result;
  16.603 +}
  16.604 +
  16.605 +/*
  16.606 + *	Insert a packet on a list.
  16.607 + */
  16.608 +
  16.609 +static inline void __skb_insert(struct sk_buff *newsk,
  16.610 +	struct sk_buff * prev, struct sk_buff *next,
  16.611 +	struct sk_buff_head * list)
  16.612 +{
  16.613 +	newsk->next = next;
  16.614 +	newsk->prev = prev;
  16.615 +	next->prev = newsk;
  16.616 +	prev->next = newsk;
  16.617 +	newsk->list = list;
  16.618 +	list->qlen++;
  16.619 +}
  16.620 +
  16.621 +/**
  16.622 + *	skb_insert	-	insert a buffer
  16.623 + *	@old: buffer to insert before
  16.624 + *	@newsk: buffer to insert
  16.625 + *
  16.626 + *	Place a packet before a given packet in a list. The list locks are taken
  16.627 + *	and this function is atomic with respect to other list locked calls
  16.628 + *	A buffer cannot be placed on two lists at the same time.
  16.629 + */
  16.630 +
  16.631 +static inline void skb_insert(struct sk_buff *old, struct sk_buff *newsk)
  16.632 +{
  16.633 +	unsigned long flags;
  16.634 +
  16.635 +	spin_lock_irqsave(&old->list->lock, flags);
  16.636 +	__skb_insert(newsk, old->prev, old, old->list);
  16.637 +	spin_unlock_irqrestore(&old->list->lock, flags);
  16.638 +}
  16.639 +
  16.640 +/*
  16.641 + *	Place a packet after a given packet in a list.
  16.642 + */
  16.643 +
  16.644 +static inline void __skb_append(struct sk_buff *old, struct sk_buff *newsk)
  16.645 +{
  16.646 +	__skb_insert(newsk, old, old->next, old->list);
  16.647 +}
  16.648 +
  16.649 +/**
  16.650 + *	skb_append	-	append a buffer
  16.651 + *	@old: buffer to insert after
  16.652 + *	@newsk: buffer to insert
  16.653 + *
  16.654 + *	Place a packet after a given packet in a list. The list locks are taken
  16.655 + *	and this function is atomic with respect to other list locked calls.
  16.656 + *	A buffer cannot be placed on two lists at the same time.
  16.657 + */
  16.658 +
  16.659 +
  16.660 +static inline void skb_append(struct sk_buff *old, struct sk_buff *newsk)
  16.661 +{
  16.662 +	unsigned long flags;
  16.663 +
  16.664 +	spin_lock_irqsave(&old->list->lock, flags);
  16.665 +	__skb_append(old, newsk);
  16.666 +	spin_unlock_irqrestore(&old->list->lock, flags);
  16.667 +}
  16.668 +
  16.669 +/*
  16.670 + * remove sk_buff from list. _Must_ be called atomically, and with
  16.671 + * the list known..
  16.672 + */
  16.673 + 
  16.674 +static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list)
  16.675 +{
  16.676 +	struct sk_buff * next, * prev;
  16.677 +
  16.678 +	list->qlen--;
  16.679 +	next = skb->next;
  16.680 +	prev = skb->prev;
  16.681 +	skb->next = NULL;
  16.682 +	skb->prev = NULL;
  16.683 +	skb->list = NULL;
  16.684 +	next->prev = prev;
  16.685 +	prev->next = next;
  16.686 +}
  16.687 +
  16.688 +/**
  16.689 + *	skb_unlink	-	remove a buffer from a list
  16.690 + *	@skb: buffer to remove
  16.691 + *
  16.692 + *	Place a packet after a given packet in a list. The list locks are taken
  16.693 + *	and this function is atomic with respect to other list locked calls
  16.694 + *	
  16.695 + *	Works even without knowing the list it is sitting on, which can be 
  16.696 + *	handy at times. It also means that THE LIST MUST EXIST when you 
  16.697 + *	unlink. Thus a list must have its contents unlinked before it is
  16.698 + *	destroyed.
  16.699 + */
  16.700 +
  16.701 +static inline void skb_unlink(struct sk_buff *skb)
  16.702 +{
  16.703 +	struct sk_buff_head *list = skb->list;
  16.704 +
  16.705 +	if(list) {
  16.706 +		unsigned long flags;
  16.707 +
  16.708 +		spin_lock_irqsave(&list->lock, flags);
  16.709 +		if(skb->list == list)
  16.710 +			__skb_unlink(skb, skb->list);
  16.711 +		spin_unlock_irqrestore(&list->lock, flags);
  16.712 +	}
  16.713 +}
  16.714 +
  16.715 +/* XXX: more streamlined implementation */
  16.716 +
  16.717 +/**
  16.718 + *	__skb_dequeue_tail - remove from the tail of the queue
  16.719 + *	@list: list to dequeue from
  16.720 + *
  16.721 + *	Remove the tail of the list. This function does not take any locks
  16.722 + *	so must be used with appropriate locks held only. The tail item is
  16.723 + *	returned or %NULL if the list is empty.
  16.724 + */
  16.725 +
  16.726 +static inline struct sk_buff *__skb_dequeue_tail(struct sk_buff_head *list)
  16.727 +{
  16.728 +	struct sk_buff *skb = skb_peek_tail(list); 
  16.729 +	if (skb)
  16.730 +		__skb_unlink(skb, list);
  16.731 +	return skb;
  16.732 +}
  16.733 +
  16.734 +/**
  16.735 + *	skb_dequeue - remove from the head of the queue
  16.736 + *	@list: list to dequeue from
  16.737 + *
  16.738 + *	Remove the head of the list. The list lock is taken so the function
  16.739 + *	may be used safely with other locking list functions. The tail item is
  16.740 + *	returned or %NULL if the list is empty.
  16.741 + */
  16.742 +
  16.743 +static inline struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list)
  16.744 +{
  16.745 +	unsigned long flags;
  16.746 +	struct sk_buff *result;
  16.747 +
  16.748 +	spin_lock_irqsave(&list->lock, flags);
  16.749 +	result = __skb_dequeue_tail(list);
  16.750 +	spin_unlock_irqrestore(&list->lock, flags);
  16.751 +	return result;
  16.752 +}
  16.753 +
  16.754 +static inline int skb_is_nonlinear(const struct sk_buff *skb)
  16.755 +{
  16.756 +	return skb->data_len;
  16.757 +}
  16.758 +
  16.759 +static inline unsigned int skb_headlen(const struct sk_buff *skb)
  16.760 +{
  16.761 +	return skb->len - skb->data_len;
  16.762 +}
  16.763 +
  16.764 +#define SKB_PAGE_ASSERT(skb) do { if (skb_shinfo(skb)->nr_frags) out_of_line_bug(); } while (0)
  16.765 +#define SKB_FRAG_ASSERT(skb) do { if (skb_shinfo(skb)->frag_list) out_of_line_bug(); } while (0)
  16.766 +#define SKB_LINEAR_ASSERT(skb) do { if (skb_is_nonlinear(skb)) out_of_line_bug(); } while (0)
  16.767 +
  16.768 +/*
  16.769 + *	Add data to an sk_buff
  16.770 + */
  16.771 + 
  16.772 +static inline unsigned char *__skb_put(struct sk_buff *skb, unsigned int len)
  16.773 +{
  16.774 +	unsigned char *tmp=skb->tail;
  16.775 +	SKB_LINEAR_ASSERT(skb);
  16.776 +	skb->tail+=len;
  16.777 +	skb->len+=len;
  16.778 +	return tmp;
  16.779 +}
  16.780 +
  16.781 +/**
  16.782 + *	skb_put - add data to a buffer
  16.783 + *	@skb: buffer to use 
  16.784 + *	@len: amount of data to add
  16.785 + *
  16.786 + *	This function extends the used data area of the buffer. If this would
  16.787 + *	exceed the total buffer size the kernel will panic. A pointer to the
  16.788 + *	first byte of the extra data is returned.
  16.789 + */
  16.790 + 
  16.791 +static inline unsigned char *skb_put(struct sk_buff *skb, unsigned int len)
  16.792 +{
  16.793 +	unsigned char *tmp=skb->tail;
  16.794 +	SKB_LINEAR_ASSERT(skb);
  16.795 +	skb->tail+=len;
  16.796 +	skb->len+=len;
  16.797 +	if(skb->tail>skb->end) {
  16.798 +		skb_over_panic(skb, len, current_text_addr());
  16.799 +	}
  16.800 +	return tmp;
  16.801 +}
  16.802 +
  16.803 +static inline unsigned char *__skb_push(struct sk_buff *skb, unsigned int len)
  16.804 +{
  16.805 +	skb->data-=len;
  16.806 +	skb->len+=len;
  16.807 +	return skb->data;
  16.808 +}
  16.809 +
  16.810 +/**
  16.811 + *	skb_push - add data to the start of a buffer
  16.812 + *	@skb: buffer to use 
  16.813 + *	@len: amount of data to add
  16.814 + *
  16.815 + *	This function extends the used data area of the buffer at the buffer
  16.816 + *	start. If this would exceed the total buffer headroom the kernel will
  16.817 + *	panic. A pointer to the first byte of the extra data is returned.
  16.818 + */
  16.819 +
  16.820 +static inline unsigned char *skb_push(struct sk_buff *skb, unsigned int len)
  16.821 +{
  16.822 +	skb->data-=len;
  16.823 +	skb->len+=len;
  16.824 +	if(skb->data<skb->head) {
  16.825 +		skb_under_panic(skb, len, current_text_addr());
  16.826 +	}
  16.827 +	return skb->data;
  16.828 +}
  16.829 +
  16.830 +static inline char *__skb_pull(struct sk_buff *skb, unsigned int len)
  16.831 +{
  16.832 +	skb->len-=len;
  16.833 +	if (skb->len < skb->data_len)
  16.834 +		out_of_line_bug();
  16.835 +	return 	skb->data+=len;
  16.836 +}
  16.837 +
  16.838 +/**
  16.839 + *	skb_pull - remove data from the start of a buffer
  16.840 + *	@skb: buffer to use 
  16.841 + *	@len: amount of data to remove
  16.842 + *
  16.843 + *	This function removes data from the start of a buffer, returning
  16.844 + *	the memory to the headroom. A pointer to the next data in the buffer
  16.845 + *	is returned. Once the data has been pulled future pushes will overwrite
  16.846 + *	the old data.
  16.847 + */
  16.848 +
  16.849 +static inline unsigned char * skb_pull(struct sk_buff *skb, unsigned int len)
  16.850 +{	
  16.851 +	if (len > skb->len)
  16.852 +		return NULL;
  16.853 +	return __skb_pull(skb,len);
  16.854 +}
  16.855 +
  16.856 +extern unsigned char * __pskb_pull_tail(struct sk_buff *skb, int delta);
  16.857 +
  16.858 +static inline char *__pskb_pull(struct sk_buff *skb, unsigned int len)
  16.859 +{
  16.860 +	if (len > skb_headlen(skb) &&
  16.861 +	    __pskb_pull_tail(skb, len-skb_headlen(skb)) == NULL)
  16.862 +		return NULL;
  16.863 +	skb->len -= len;
  16.864 +	return 	skb->data += len;
  16.865 +}
  16.866 +
  16.867 +static inline unsigned char * pskb_pull(struct sk_buff *skb, unsigned int len)
  16.868 +{	
  16.869 +	if (len > skb->len)
  16.870 +		return NULL;
  16.871 +	return __pskb_pull(skb,len);
  16.872 +}
  16.873 +
  16.874 +static inline int pskb_may_pull(struct sk_buff *skb, unsigned int len)
  16.875 +{
  16.876 +	if (len <= skb_headlen(skb))
  16.877 +		return 1;
  16.878 +	if (len > skb->len)
  16.879 +		return 0;
  16.880 +	return (__pskb_pull_tail(skb, len-skb_headlen(skb)) != NULL);
  16.881 +}
  16.882 +
  16.883 +/**
  16.884 + *	skb_headroom - bytes at buffer head
  16.885 + *	@skb: buffer to check
  16.886 + *
  16.887 + *	Return the number of bytes of free space at the head of an &sk_buff.
  16.888 + */
  16.889 + 
  16.890 +static inline int skb_headroom(const struct sk_buff *skb)
  16.891 +{
  16.892 +	return skb->data-skb->head;
  16.893 +}
  16.894 +
  16.895 +/**
  16.896 + *	skb_tailroom - bytes at buffer end
  16.897 + *	@skb: buffer to check
  16.898 + *
  16.899 + *	Return the number of bytes of free space at the tail of an sk_buff
  16.900 + */
  16.901 +
  16.902 +static inline int skb_tailroom(const struct sk_buff *skb)
  16.903 +{
  16.904 +	return skb_is_nonlinear(skb) ? 0 : skb->end-skb->tail;
  16.905 +}
  16.906 +
  16.907 +/**
  16.908 + *	skb_reserve - adjust headroom
  16.909 + *	@skb: buffer to alter
  16.910 + *	@len: bytes to move
  16.911 + *
  16.912 + *	Increase the headroom of an empty &sk_buff by reducing the tail
  16.913 + *	room. This is only allowed for an empty buffer.
  16.914 + */
  16.915 +
  16.916 +static inline void skb_reserve(struct sk_buff *skb, unsigned int len)
  16.917 +{
  16.918 +	skb->data+=len;
  16.919 +	skb->tail+=len;
  16.920 +}
  16.921 +
  16.922 +extern int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc);
  16.923 +
  16.924 +static inline void __skb_trim(struct sk_buff *skb, unsigned int len)
  16.925 +{
  16.926 +	if (!skb->data_len) {
  16.927 +		skb->len = len;
  16.928 +		skb->tail = skb->data+len;
  16.929 +	} else {
  16.930 +		___pskb_trim(skb, len, 0);
  16.931 +	}
  16.932 +}
  16.933 +
  16.934 +/**
  16.935 + *	skb_trim - remove end from a buffer
  16.936 + *	@skb: buffer to alter
  16.937 + *	@len: new length
  16.938 + *
  16.939 + *	Cut the length of a buffer down by removing data from the tail. If
  16.940 + *	the buffer is already under the length specified it is not modified.
  16.941 + */
  16.942 +
  16.943 +static inline void skb_trim(struct sk_buff *skb, unsigned int len)
  16.944 +{
  16.945 +	if (skb->len > len) {
  16.946 +		__skb_trim(skb, len);
  16.947 +	}
  16.948 +}
  16.949 +
  16.950 +
  16.951 +static inline int __pskb_trim(struct sk_buff *skb, unsigned int len)
  16.952 +{
  16.953 +	if (!skb->data_len) {
  16.954 +		skb->len = len;
  16.955 +		skb->tail = skb->data+len;
  16.956 +		return 0;
  16.957 +	} else {
  16.958 +		return ___pskb_trim(skb, len, 1);
  16.959 +	}
  16.960 +}
  16.961 +
  16.962 +static inline int pskb_trim(struct sk_buff *skb, unsigned int len)
  16.963 +{
  16.964 +	if (len < skb->len)
  16.965 +		return __pskb_trim(skb, len);
  16.966 +	return 0;
  16.967 +}
  16.968 +
  16.969 +/**
  16.970 + *	skb_orphan - orphan a buffer
  16.971 + *	@skb: buffer to orphan
  16.972 + *
  16.973 + *	If a buffer currently has an owner then we call the owner's
  16.974 + *	destructor function and make the @skb unowned. The buffer continues
  16.975 + *	to exist but is no longer charged to its former owner.
  16.976 + */
  16.977 +
  16.978 +
  16.979 +static inline void skb_orphan(struct sk_buff *skb)
  16.980 +{
  16.981 +	if (skb->destructor)
  16.982 +		skb->destructor(skb);
  16.983 +	skb->destructor = NULL;
  16.984 +	skb->sk = NULL;
  16.985 +}
  16.986 +
  16.987 +/**
  16.988 + *	skb_purge - empty a list
  16.989 + *	@list: list to empty
  16.990 + *
  16.991 + *	Delete all buffers on an &sk_buff list. Each buffer is removed from
  16.992 + *	the list and one reference dropped. This function takes the list
  16.993 + *	lock and is atomic with respect to other list locking functions.
  16.994 + */
  16.995 +
  16.996 +
  16.997 +static inline void skb_queue_purge(struct sk_buff_head *list)
  16.998 +{
  16.999 +	struct sk_buff *skb;
 16.1000 +	while ((skb=skb_dequeue(list))!=NULL)
 16.1001 +		kfree_skb(skb);
 16.1002 +}
 16.1003 +
 16.1004 +/**
 16.1005 + *	__skb_purge - empty a list
 16.1006 + *	@list: list to empty
 16.1007 + *
 16.1008 + *	Delete all buffers on an &sk_buff list. Each buffer is removed from
 16.1009 + *	the list and one reference dropped. This function does not take the
 16.1010 + *	list lock and the caller must hold the relevant locks to use it.
 16.1011 + */
 16.1012 +
 16.1013 +
 16.1014 +static inline void __skb_queue_purge(struct sk_buff_head *list)
 16.1015 +{
 16.1016 +	struct sk_buff *skb;
 16.1017 +	while ((skb=__skb_dequeue(list))!=NULL)
 16.1018 +		kfree_skb(skb);
 16.1019 +}
 16.1020 +
 16.1021 +/**
 16.1022 + *	__dev_alloc_skb - allocate an skbuff for sending
 16.1023 + *	@length: length to allocate
 16.1024 + *	@gfp_mask: get_free_pages mask, passed to alloc_skb
 16.1025 + *
 16.1026 + *	Allocate a new &sk_buff and assign it a usage count of one. The
 16.1027 + *	buffer has unspecified headroom built in. Users should allocate
 16.1028 + *	the headroom they think they need without accounting for the
 16.1029 + *	built in space. The built in space is used for optimisations.
 16.1030 + *
 16.1031 + *	%NULL is returned in there is no free memory.
 16.1032 + */
 16.1033 + 
 16.1034 +static inline struct sk_buff *__dev_alloc_skb(unsigned int length,
 16.1035 +					      int gfp_mask)
 16.1036 +{
 16.1037 +	struct sk_buff *skb;
 16.1038 +#if defined(CONFIG_XEN)
 16.1039 +	length = (PAGE_SIZE/2)+1; /* force slab allocater to give us a page */
 16.1040 +#endif
 16.1041 +	skb = alloc_skb(length+16, gfp_mask);
 16.1042 +	if (skb)
 16.1043 +		skb_reserve(skb,16);
 16.1044 +	return skb;
 16.1045 +}
 16.1046 +
 16.1047 +/**
 16.1048 + *	dev_alloc_skb - allocate an skbuff for sending
 16.1049 + *	@length: length to allocate
 16.1050 + *
 16.1051 + *	Allocate a new &sk_buff and assign it a usage count of one. The
 16.1052 + *	buffer has unspecified headroom built in. Users should allocate
 16.1053 + *	the headroom they think they need without accounting for the
 16.1054 + *	built in space. The built in space is used for optimisations.
 16.1055 + *
 16.1056 + *	%NULL is returned in there is no free memory. Although this function
 16.1057 + *	allocates memory it can be called from an interrupt.
 16.1058 + */
 16.1059 + 
 16.1060 +static inline struct sk_buff *dev_alloc_skb(unsigned int length)
 16.1061 +{
 16.1062 +	return __dev_alloc_skb(length, GFP_ATOMIC);
 16.1063 +}
 16.1064 +
 16.1065 +/**
 16.1066 + *	skb_cow - copy header of skb when it is required
 16.1067 + *	@skb: buffer to cow
 16.1068 + *	@headroom: needed headroom
 16.1069 + *
 16.1070 + *	If the skb passed lacks sufficient headroom or its data part
 16.1071 + *	is shared, data is reallocated. If reallocation fails, an error
 16.1072 + *	is returned and original skb is not changed.
 16.1073 + *
 16.1074 + *	The result is skb with writable area skb->head...skb->tail
 16.1075 + *	and at least @headroom of space at head.
 16.1076 + */
 16.1077 +
 16.1078 +static inline int
 16.1079 +skb_cow(struct sk_buff *skb, unsigned int headroom)
 16.1080 +{
 16.1081 +	int delta = (headroom > 16 ? headroom : 16) - skb_headroom(skb);
 16.1082 +
 16.1083 +	if (delta < 0)
 16.1084 +		delta = 0;
 16.1085 +
 16.1086 +	if (delta || skb_cloned(skb))
 16.1087 +		return pskb_expand_head(skb, (delta+15)&~15, 0, GFP_ATOMIC);
 16.1088 +	return 0;
 16.1089 +}
 16.1090 +
 16.1091 +/**
 16.1092 + *	skb_padto	- pad an skbuff up to a minimal size
 16.1093 + *	@skb: buffer to pad
 16.1094 + *	@len: minimal length
 16.1095 + *
 16.1096 + *	Pads up a buffer to ensure the trailing bytes exist and are
 16.1097 + *	blanked. If the buffer already contains sufficient data it
 16.1098 + *	is untouched. Returns the buffer, which may be a replacement
 16.1099 + *	for the original, or NULL for out of memory - in which case
 16.1100 + *	the original buffer is still freed.
 16.1101 + */
 16.1102 + 
 16.1103 +static inline struct sk_buff *skb_padto(struct sk_buff *skb, unsigned int len)
 16.1104 +{
 16.1105 +	unsigned int size = skb->len;
 16.1106 +	if(likely(size >= len))
 16.1107 +		return skb;
 16.1108 +	return skb_pad(skb, len-size);
 16.1109 +}
 16.1110 +
 16.1111 +/**
 16.1112 + *	skb_linearize - convert paged skb to linear one
 16.1113 + *	@skb: buffer to linarize
 16.1114 + *	@gfp: allocation mode
 16.1115 + *
 16.1116 + *	If there is no free memory -ENOMEM is returned, otherwise zero
 16.1117 + *	is returned and the old skb data released.  */
 16.1118 +int skb_linearize(struct sk_buff *skb, int gfp);
 16.1119 +
 16.1120 +static inline void *kmap_skb_frag(const skb_frag_t *frag)
 16.1121 +{
 16.1122 +#ifdef CONFIG_HIGHMEM
 16.1123 +	if (in_irq())
 16.1124 +		out_of_line_bug();
 16.1125 +
 16.1126 +	local_bh_disable();
 16.1127 +#endif
 16.1128 +	return kmap_atomic(frag->page, KM_SKB_DATA_SOFTIRQ);
 16.1129 +}
 16.1130 +
 16.1131 +static inline void kunmap_skb_frag(void *vaddr)
 16.1132 +{
 16.1133 +	kunmap_atomic(vaddr, KM_SKB_DATA_SOFTIRQ);
 16.1134 +#ifdef CONFIG_HIGHMEM
 16.1135 +	local_bh_enable();
 16.1136 +#endif
 16.1137 +}
 16.1138 +
 16.1139 +#define skb_queue_walk(queue, skb) \
 16.1140 +		for (skb = (queue)->next;			\
 16.1141 +		     (skb != (struct sk_buff *)(queue));	\
 16.1142 +		     skb=skb->next)
 16.1143 +
 16.1144 +
 16.1145 +extern struct sk_buff *		skb_recv_datagram(struct sock *sk,unsigned flags,int noblock, int *err);
 16.1146 +extern unsigned int		datagram_poll(struct file *file, struct socket *sock, struct poll_table_struct *wait);
 16.1147 +extern int			skb_copy_datagram(const struct sk_buff *from, int offset, char *to,int size);
 16.1148 +extern int			skb_copy_datagram_iovec(const struct sk_buff *from, int offset, struct iovec *to,int size);
 16.1149 +extern int			skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int *csump);
 16.1150 +extern int			skb_copy_and_csum_datagram_iovec(const struct sk_buff *skb, int hlen, struct iovec *iov);
 16.1151 +extern void			skb_free_datagram(struct sock * sk, struct sk_buff *skb);
 16.1152 +
 16.1153 +extern unsigned int		skb_checksum(const struct sk_buff *skb, int offset, int len, unsigned int csum);
 16.1154 +extern int			skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len);
 16.1155 +extern unsigned int		skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int csum);
 16.1156 +extern void			skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to);
 16.1157 +
 16.1158 +extern void skb_init(void);
 16.1159 +extern void skb_add_mtu(int mtu);
 16.1160 +
 16.1161 +#ifdef CONFIG_NETFILTER
 16.1162 +static inline void
 16.1163 +nf_conntrack_put(struct nf_ct_info *nfct)
 16.1164 +{
 16.1165 +	if (nfct && atomic_dec_and_test(&nfct->master->use))
 16.1166 +		nfct->master->destroy(nfct->master);
 16.1167 +}
 16.1168 +static inline void
 16.1169 +nf_conntrack_get(struct nf_ct_info *nfct)
 16.1170 +{
 16.1171 +	if (nfct)
 16.1172 +		atomic_inc(&nfct->master->use);
 16.1173 +}
 16.1174 +#endif
 16.1175 +
 16.1176 +#endif	/* __KERNEL__ */
 16.1177 +#endif	/* _LINUX_SKBUFF_H */