ia64/xen-unstable
changeset 1377:acc04d188782
bitkeeper revision 1.903 (40a35338ZpUgNaqcF2wHoseyC85T4Q)
manual merge
manual merge
line diff
1.1 --- a/.rootkeys Thu May 13 10:17:36 2004 +0000 1.2 +++ b/.rootkeys Thu May 13 10:51:36 2004 +0000 1.3 @@ -753,6 +753,7 @@ 40659defgWA92arexpMGn8X3QMDj3w xenolinux 1.4 3f056927gMHl7mWB89rb73JahbhQIA xenolinux-2.4.26-sparse/include/linux/blk.h 1.5 3e5a4e68WLX3B8owTvktP3HHOtznPQ xenolinux-2.4.26-sparse/include/linux/major.h 1.6 401c0590D_kwJDU59X8NyvqSv_Cl2A xenolinux-2.4.26-sparse/include/linux/sched.h 1.7 +40a248afgI0_JKthdYAe8beVfXSTpQ xenolinux-2.4.26-sparse/include/linux/skbuff.h 1.8 3e5a4e686V0nioX2ZpFf056sgvdiQw xenolinux-2.4.26-sparse/include/linux/sunrpc/debug.h 1.9 401c0592pLrp_aCbQRo9GXiYQQaVVA xenolinux-2.4.26-sparse/include/linux/timer.h 1.10 3e5a4e68W_hpMlM3u_-QOKMp3gzcwQ xenolinux-2.4.26-sparse/init/do_mounts.c
2.1 --- a/tools/examples/xc_dom_control.py Thu May 13 10:17:36 2004 +0000 2.2 +++ b/tools/examples/xc_dom_control.py Thu May 13 10:51:36 2004 +0000 2.3 @@ -92,14 +92,14 @@ elif cmd == 'pincpu': 2.4 rc = xc.domain_pincpu( dom, cpu ) 2.5 2.6 elif cmd == 'list': 2.7 - print 'Dom Name Mem(kb) CPU State Time(s)' 2.8 + print 'Dom Name Mem(kb) CPU State Time(ms)' 2.9 for domain in xc.domain_getinfo(): 2.10 2.11 run = (domain['running'] and 'r') or '-' # domain['running'] ? run='r' : run='-' 2.12 stop = (domain['stopped'] and 's') or '-' # domain['stopped'] ? stop='s': stop='-' 2.13 2.14 domain['state'] = run + stop 2.15 - domain['cpu_time'] = domain['cpu_time']/1e8 2.16 + domain['cpu_time'] = domain['cpu_time']/1e6 2.17 2.18 print "%(dom)-4d %(name)-16s %(mem_kb)7d %(cpu)3d %(state)5s %(cpu_time)8d" % domain 2.19
3.1 --- a/xen/arch/i386/entry.S Thu May 13 10:17:36 2004 +0000 3.2 +++ b/xen/arch/i386/entry.S Thu May 13 10:51:36 2004 +0000 3.3 @@ -245,6 +245,10 @@ restore_all_guest: 3.4 movsl 3.5 movsl 3.6 movsl 3.7 + # Third, reenable interrupts. They will definitely be reenabled by IRET 3.8 + # in any case. They could be disabled here if we are returning from an 3.9 + # interrupt. We need interrupts enabled if we take a fault. 3.10 + sti 3.11 # Finally, restore guest registers -- faults will cause failsafe 3.12 popl %ebx 3.13 popl %ecx
4.1 --- a/xen/arch/i386/io_apic.c Thu May 13 10:17:36 2004 +0000 4.2 +++ b/xen/arch/i386/io_apic.c Thu May 13 10:51:36 2004 +0000 4.3 @@ -208,7 +208,11 @@ static void set_ioapic_affinity (unsigne 4.4 spin_unlock_irqrestore(&ioapic_lock, flags); 4.5 } 4.6 4.7 -#if CONFIG_SMP 4.8 +/* 4.9 + * In new I/O model, the interrupt is pinned to the CPU of the first 4.10 + * device-driver domain that attaches. Dynamic balancing is pointless. 4.11 + */ 4.12 +#if defined(CONFIG_SMP) && !defined(NO_DEVICES_IN_XEN) 4.13 4.14 typedef struct { 4.15 unsigned int cpu; 4.16 @@ -220,8 +224,6 @@ static irq_balance_t irq_balance[NR_IRQS 4.17 4.18 extern unsigned long irq_affinity [NR_IRQS]; 4.19 4.20 -#endif 4.21 - 4.22 #define IDLE_ENOUGH(cpu,now) \ 4.23 (idle_cpu(cpu) && ((now) - irq_stat[(cpu)].idle_timestamp > 1)) 4.24 4.25 @@ -256,7 +258,6 @@ inside: 4.26 4.27 static inline void balance_irq(int irq) 4.28 { 4.29 -#if CONFIG_SMP 4.30 irq_balance_t *entry = irq_balance + irq; 4.31 unsigned long now = jiffies; 4.32 4.33 @@ -272,8 +273,13 @@ static inline void balance_irq(int irq) 4.34 entry->cpu = move(entry->cpu, allowed_mask, now, random_number); 4.35 set_ioapic_affinity(irq, apicid_to_phys_cpu_present(entry->cpu)); 4.36 } 4.37 +} 4.38 + 4.39 +#else 4.40 + 4.41 +#define balance_irq(_irq) ((void)0) 4.42 + 4.43 #endif 4.44 -} 4.45 4.46 /* 4.47 * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to 4.48 @@ -883,6 +889,7 @@ void __init UNEXPECTED_IO_APIC(void) 4.49 4.50 void __init print_IO_APIC(void) 4.51 { 4.52 +#ifndef NDEBUG 4.53 int apic, i; 4.54 struct IO_APIC_reg_00 reg_00; 4.55 struct IO_APIC_reg_01 reg_01; 4.56 @@ -1019,9 +1026,11 @@ void __init print_IO_APIC(void) 4.57 } 4.58 4.59 printk(KERN_INFO ".................................... done.\n"); 4.60 +#endif 4.61 +} 4.62 4.63 - return; 4.64 -} 4.65 + 4.66 +#if 0 /* Maybe useful for debugging, but not currently used anywhere. */ 4.67 4.68 static void print_APIC_bitfield (int base) 4.69 { 4.70 @@ -1041,6 +1050,7 @@ static void print_APIC_bitfield (int bas 4.71 } 4.72 } 4.73 4.74 + 4.75 void /*__init*/ print_local_APIC(void * dummy) 4.76 { 4.77 unsigned int v, ver, maxlvt; 4.78 @@ -1156,6 +1166,9 @@ void /*__init*/ print_PIC(void) 4.79 printk(KERN_DEBUG "... PIC ELCR: %04x\n", v); 4.80 } 4.81 4.82 +#endif /* 0 */ 4.83 + 4.84 + 4.85 static void __init enable_IO_APIC(void) 4.86 { 4.87 struct IO_APIC_reg_01 reg_01; 4.88 @@ -1874,7 +1887,7 @@ int io_apic_set_pci_routing (int ioapic, 4.89 mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq, edge_level, active_high_low); 4.90 4.91 if (edge_level) { 4.92 - irq_desc[irq].handler = &ioapic_level_irq_type; 4.93 + irq_desc[irq].handler = &ioapic_level_irq_type; 4.94 } else { 4.95 irq_desc[irq].handler = &ioapic_edge_irq_type; 4.96 } 4.97 @@ -1893,3 +1906,110 @@ int io_apic_set_pci_routing (int ioapic, 4.98 } 4.99 4.100 #endif /*CONFIG_ACPI_BOOT*/ 4.101 + 4.102 +extern char opt_leveltrigger[], opt_edgetrigger[]; 4.103 + 4.104 +static int __init ioapic_trigger_setup(void) 4.105 +{ 4.106 + char *p; 4.107 + irq_desc_t *desc; 4.108 + long irq; 4.109 + 4.110 + p = opt_leveltrigger; 4.111 + while ( *p != '\0' ) 4.112 + { 4.113 + irq = simple_strtol(p, &p, 10); 4.114 + if ( (irq <= 0) || (irq >= NR_IRQS) ) 4.115 + { 4.116 + printk("IRQ '%ld' out of range in level-trigger list '%s'\n", 4.117 + irq, opt_leveltrigger); 4.118 + break; 4.119 + } 4.120 + 4.121 + printk("Forcing IRQ %ld to level-trigger: ", irq); 4.122 + 4.123 + desc = &irq_desc[irq]; 4.124 + spin_lock_irq(&desc->lock); 4.125 + 4.126 + if ( desc->handler == &ioapic_level_irq_type ) 4.127 + { 4.128 + printk("already level-triggered (no force applied).\n"); 4.129 + } 4.130 + else if ( desc->handler != &ioapic_edge_irq_type ) 4.131 + { 4.132 + printk("cannot force (can only force IO-APIC-edge IRQs).\n"); 4.133 + } 4.134 + else 4.135 + { 4.136 + desc->handler = &ioapic_level_irq_type; 4.137 + __mask_IO_APIC_irq(irq); 4.138 + __level_IO_APIC_irq(irq); 4.139 + printk("done.\n"); 4.140 + } 4.141 + 4.142 + spin_unlock_irq(&desc->lock); 4.143 + 4.144 + if ( *p == '\0' ) 4.145 + break; 4.146 + 4.147 + if ( *p != ',' ) 4.148 + { 4.149 + printk("Unexpected character '%c' in level-trigger list '%s'\n", 4.150 + *p, opt_leveltrigger); 4.151 + break; 4.152 + } 4.153 + 4.154 + p++; 4.155 + } 4.156 + 4.157 + p = opt_edgetrigger; 4.158 + while ( *p != '\0' ) 4.159 + { 4.160 + irq = simple_strtol(p, &p, 10); 4.161 + if ( (irq <= 0) || (irq >= NR_IRQS) ) 4.162 + { 4.163 + printk("IRQ '%ld' out of range in edge-trigger list '%s'\n", 4.164 + irq, opt_edgetrigger); 4.165 + break; 4.166 + } 4.167 + 4.168 + printk("Forcing IRQ %ld to edge-trigger: ", irq); 4.169 + 4.170 + desc = &irq_desc[irq]; 4.171 + spin_lock_irq(&desc->lock); 4.172 + 4.173 + if ( desc->handler == &ioapic_edge_irq_type ) 4.174 + { 4.175 + printk("already edge-triggered (no force applied).\n"); 4.176 + } 4.177 + else if ( desc->handler != &ioapic_level_irq_type ) 4.178 + { 4.179 + printk("cannot force (can only force IO-APIC-level IRQs).\n"); 4.180 + } 4.181 + else 4.182 + { 4.183 + desc->handler = &ioapic_edge_irq_type; 4.184 + __edge_IO_APIC_irq(irq); 4.185 + desc->status |= IRQ_PENDING; /* may have lost a masked edge */ 4.186 + printk("done.\n"); 4.187 + } 4.188 + 4.189 + spin_unlock_irq(&desc->lock); 4.190 + 4.191 + if ( *p == '\0' ) 4.192 + break; 4.193 + 4.194 + if ( *p != ',' ) 4.195 + { 4.196 + printk("Unexpected character '%c' in edge-trigger list '%s'\n", 4.197 + *p, opt_edgetrigger); 4.198 + break; 4.199 + } 4.200 + 4.201 + p++; 4.202 + } 4.203 + 4.204 + return 0; 4.205 +} 4.206 + 4.207 +__initcall(ioapic_trigger_setup);
5.1 --- a/xen/arch/i386/irq.c Thu May 13 10:17:36 2004 +0000 5.2 +++ b/xen/arch/i386/irq.c Thu May 13 10:51:36 2004 +0000 5.3 @@ -39,6 +39,7 @@ 5.4 #include <xen/delay.h> 5.5 #include <xen/timex.h> 5.6 #include <xen/perfc.h> 5.7 +#include <asm/smpboot.h> 5.8 5.9 /* 5.10 * Linux has a controller-independent x86 interrupt architecture. 5.11 @@ -1034,6 +1035,11 @@ int pirq_guest_bind(struct task_struct * 5.12 desc->status |= IRQ_GUEST; 5.13 desc->status &= ~(IRQ_DISABLED | IRQ_AUTODETECT | IRQ_WAITING); 5.14 desc->handler->startup(irq); 5.15 + 5.16 + /* Attempt to bind the interrupt target to the correct CPU. */ 5.17 + if ( desc->handler->set_affinity != NULL ) 5.18 + desc->handler->set_affinity( 5.19 + irq, apicid_to_phys_cpu_present(p->processor)); 5.20 } 5.21 else if ( !will_share || !action->shareable ) 5.22 {
6.1 --- a/xen/arch/i386/process.c Thu May 13 10:17:36 2004 +0000 6.2 +++ b/xen/arch/i386/process.c Thu May 13 10:51:36 2004 +0000 6.3 @@ -270,7 +270,7 @@ void switch_to(struct task_struct *prev_ 6.4 tss->ss1 = next->guestos_ss; 6.5 6.6 /* Maybe switch the debug registers. */ 6.7 - if ( next->debugreg[7] ) 6.8 + if ( unlikely(next->debugreg[7]) ) 6.9 { 6.10 loaddebug(next, 0); 6.11 loaddebug(next, 1); 6.12 @@ -280,10 +280,17 @@ void switch_to(struct task_struct *prev_ 6.13 loaddebug(next, 6); 6.14 loaddebug(next, 7); 6.15 } 6.16 + 6.17 + /* Switch page tables. */ 6.18 + write_ptbase(&next_p->mm); 6.19 + tlb_clocktick(); 6.20 } 6.21 6.22 - if ( ( prev_p->io_bitmap != NULL ) || ( next_p->io_bitmap != NULL ) ) { 6.23 - if ( next_p->io_bitmap != NULL ) { 6.24 + if ( unlikely(prev_p->io_bitmap != NULL) || 6.25 + unlikely(next_p->io_bitmap != NULL) ) 6.26 + { 6.27 + if ( next_p->io_bitmap != NULL ) 6.28 + { 6.29 /* Copy in the appropriate parts of the IO bitmap. We use the 6.30 * selector to copy only the interesting parts of the bitmap. */ 6.31 6.32 @@ -314,7 +321,9 @@ void switch_to(struct task_struct *prev_ 6.33 6.34 tss->bitmap = IO_BITMAP_OFFSET; 6.35 6.36 - } else { 6.37 + } 6.38 + else 6.39 + { 6.40 /* In this case, we're switching FROM a task with IO port access, 6.41 * to a task that doesn't use the IO bitmap. We set any TSS bits 6.42 * that might have been cleared, ready for future use. */ 6.43 @@ -332,11 +341,6 @@ void switch_to(struct task_struct *prev_ 6.44 tss->bitmap = INVALID_IO_BITMAP_OFFSET; 6.45 } 6.46 } 6.47 - 6.48 - 6.49 - /* Switch page tables. */ 6.50 - write_ptbase(&next_p->mm); 6.51 - tlb_clocktick(); 6.52 6.53 set_current(next_p); 6.54
7.1 --- a/xen/common/domain.c Thu May 13 10:17:36 2004 +0000 7.2 +++ b/xen/common/domain.c Thu May 13 10:51:36 2004 +0000 7.3 @@ -222,10 +222,6 @@ void __kill_domain(struct task_struct *p 7.4 *pp = p->next_hash; 7.5 write_unlock_irqrestore(&tasklist_lock, flags); 7.6 7.7 - if ( atomic_read(&p->refcnt) >2 ) 7.8 - DPRINTK("Domain refcnt>1 so kil deferred. Missing put_task? p=%p cur=%p cnt=%d\n",p,current,atomic_read(&p->refcnt)); 7.9 - 7.10 - 7.11 if ( p == current ) 7.12 { 7.13 __enter_scheduler(); 7.14 @@ -420,7 +416,16 @@ void free_all_dom_mem(struct task_struct 7.15 7.16 INIT_LIST_HEAD(&zombies); 7.17 7.18 - if ( p->mm.shadow_mode ) shadow_mode_disable(p); 7.19 + /* 7.20 + * If we're executing the idle task then we may still be running over the 7.21 + * dead domain's page tables. We'd better fix that before freeing them! 7.22 + */ 7.23 + if ( is_idle_task(current) ) 7.24 + write_ptbase(¤t->mm); 7.25 + 7.26 + /* Exit shadow mode before deconstructing final guest page table. */ 7.27 + if ( p->mm.shadow_mode ) 7.28 + shadow_mode_disable(p); 7.29 7.30 /* STEP 1. Drop the in-use reference to the page-table base. */ 7.31 put_page_and_type(&frame_table[pagetable_val(p->mm.pagetable) >> 7.32 @@ -1078,7 +1083,7 @@ int construct_dom0(struct task_struct *p 7.33 7.34 set_bit(PF_CONSTRUCTED, &p->flags); 7.35 7.36 -#if 0 // XXXXX DO NOT CHECK IN ENABLED !!! (but useful for testing so leave) 7.37 +#if 0 /* XXXXX DO NOT CHECK IN ENABLED !!! (but useful for testing so leave) */ 7.38 shadow_mode_enable(&p->mm, SHM_test); 7.39 #endif 7.40
8.1 --- a/xen/common/kernel.c Thu May 13 10:17:36 2004 +0000 8.2 +++ b/xen/common/kernel.c Thu May 13 10:51:36 2004 +0000 8.3 @@ -75,31 +75,37 @@ unsigned char opt_pdb[10] = "none"; 8.4 unsigned int opt_tbuf_size = 1; 8.5 /* opt_sched: scheduler - default to Borrowed Virtual Time */ 8.6 char opt_sched[10] = "bvt"; 8.7 -/* opt_physdev_dom0_hide: list of PCI slots to hide from dom0 8.8 - * Should have the format '(%02x:%02x.%1x)(%02x:%02x.%1x)...etc' */ 8.9 -char opt_physdev_dom0_hide[20] = ""; 8.10 +/* opt_physdev_dom0_hide: list of PCI slots to hide from domain 0. */ 8.11 +/* Format is '(%02x:%02x.%1x)(%02x:%02x.%1x)' and so on. */ 8.12 +char opt_physdev_dom0_hide[200] = ""; 8.13 +/* opt_leveltrigger, opt_edgetrigger: Force an IO-APIC-routed IRQ to be */ 8.14 +/* level- or edge-triggered. */ 8.15 +/* Example: 'leveltrigger=4,5,6,20 edgetrigger=21'. */ 8.16 +char opt_leveltrigger[30] = "", opt_edgetrigger[30] = ""; 8.17 8.18 static struct { 8.19 unsigned char *name; 8.20 enum { OPT_IP, OPT_STR, OPT_UINT, OPT_BOOL } type; 8.21 void *var; 8.22 } opts[] = { 8.23 - { "console", OPT_STR, &opt_console }, 8.24 - { "ser_baud", OPT_UINT, &opt_ser_baud }, 8.25 - { "com1", OPT_STR, &opt_com1 }, 8.26 - { "com2", OPT_STR, &opt_com2 }, 8.27 - { "dom0_mem", OPT_UINT, &opt_dom0_mem }, 8.28 - { "ifname", OPT_STR, &opt_ifname }, 8.29 - { "noht", OPT_BOOL, &opt_noht }, 8.30 - { "noacpi", OPT_BOOL, &opt_noacpi }, 8.31 - { "nosmp", OPT_BOOL, &opt_nosmp }, 8.32 - { "noreboot", OPT_BOOL, &opt_noreboot }, 8.33 - { "ignorebiostables", OPT_BOOL, &opt_ignorebiostables }, 8.34 - { "watchdog", OPT_BOOL, &opt_watchdog }, 8.35 - { "pdb", OPT_STR, &opt_pdb }, 8.36 - { "tbuf_size", OPT_UINT, &opt_tbuf_size }, 8.37 - { "sched", OPT_STR, &opt_sched }, 8.38 - { "physdev_dom0_hide",OPT_STR, &opt_physdev_dom0_hide }, 8.39 + { "console", OPT_STR, &opt_console }, 8.40 + { "ser_baud", OPT_UINT, &opt_ser_baud }, 8.41 + { "com1", OPT_STR, &opt_com1 }, 8.42 + { "com2", OPT_STR, &opt_com2 }, 8.43 + { "dom0_mem", OPT_UINT, &opt_dom0_mem }, 8.44 + { "ifname", OPT_STR, &opt_ifname }, 8.45 + { "noht", OPT_BOOL, &opt_noht }, 8.46 + { "noacpi", OPT_BOOL, &opt_noacpi }, 8.47 + { "nosmp", OPT_BOOL, &opt_nosmp }, 8.48 + { "noreboot", OPT_BOOL, &opt_noreboot }, 8.49 + { "ignorebiostables", OPT_BOOL, &opt_ignorebiostables }, 8.50 + { "watchdog", OPT_BOOL, &opt_watchdog }, 8.51 + { "pdb", OPT_STR, &opt_pdb }, 8.52 + { "tbuf_size", OPT_UINT, &opt_tbuf_size }, 8.53 + { "sched", OPT_STR, &opt_sched }, 8.54 + { "physdev_dom0_hide", OPT_STR, &opt_physdev_dom0_hide }, 8.55 + { "leveltrigger", OPT_STR, &opt_leveltrigger }, 8.56 + { "edgetrigger", OPT_STR, &opt_edgetrigger }, 8.57 { NULL, 0, NULL } 8.58 }; 8.59
9.1 --- a/xen/common/memory.c Thu May 13 10:17:36 2004 +0000 9.2 +++ b/xen/common/memory.c Thu May 13 10:51:36 2004 +0000 9.3 @@ -194,7 +194,6 @@ static struct { 9.4 */ 9.5 void __init init_frametable(unsigned long nr_pages) 9.6 { 9.7 - int i; 9.8 unsigned long mfn; 9.9 9.10 memset(percpu_info, 0, sizeof(percpu_info)); 9.11 @@ -209,20 +208,19 @@ void __init init_frametable(unsigned lon 9.12 INIT_LIST_HEAD(&free_list); 9.13 free_pfns = 0; 9.14 9.15 - /* so that we can map them latter, set the ownership of pages 9.16 - belonging to the machine_to_phys_mapping to CPU0 idle task */ 9.17 - 9.18 - mfn = virt_to_phys((void *)RDWR_MPT_VIRT_START)>>PAGE_SHIFT; 9.19 - 9.20 /* initialise to a magic of 0x55555555 so easier to spot bugs later */ 9.21 memset( machine_to_phys_mapping, 0x55, 4*1024*1024 ); 9.22 9.23 /* The array is sized for a 4GB machine regardless of actuall mem size. 9.24 This costs 4MB -- may want to fix some day */ 9.25 - for(i=0;i<1024*1024;i+=1024,mfn++) 9.26 + 9.27 + /* Pin the ownership of the MP table so that DOM0 can map it later. */ 9.28 + for ( mfn = virt_to_phys((void *)RDWR_MPT_VIRT_START)>>PAGE_SHIFT; 9.29 + mfn < virt_to_phys((void *)RDWR_MPT_VIRT_END)>>PAGE_SHIFT; 9.30 + mfn++ ) 9.31 { 9.32 frame_table[mfn].count_and_flags = 1 | PGC_allocated; 9.33 - frame_table[mfn].type_and_flags = 1 | PGT_gdt_page; // anything non RW 9.34 + frame_table[mfn].type_and_flags = 1 | PGT_gdt_page; /* non-RW type */ 9.35 frame_table[mfn].u.domain = &idle0_task; 9.36 } 9.37 }
10.1 --- a/xen/common/physdev.c Thu May 13 10:17:36 2004 +0000 10.2 +++ b/xen/common/physdev.c Thu May 13 10:51:36 2004 +0000 10.3 @@ -634,9 +634,10 @@ static long pci_probe_root_buses(u32 *bu 10.4 */ 10.5 long do_physdev_op(physdev_op_t *uop) 10.6 { 10.7 - phys_dev_t *pdev; 10.8 + phys_dev_t *pdev; 10.9 physdev_op_t op; 10.10 - long ret; 10.11 + long ret; 10.12 + int irq; 10.13 10.14 if ( unlikely(copy_from_user(&op, uop, sizeof(op)) != 0) ) 10.15 return -EFAULT; 10.16 @@ -674,10 +675,22 @@ long do_physdev_op(physdev_op_t *uop) 10.17 ret = pci_probe_root_buses(op.u.pci_probe_root_buses.busmask); 10.18 break; 10.19 10.20 - case PHYSDEVOP_UNMASK_IRQ: 10.21 + case PHYSDEVOP_IRQ_UNMASK_NOTIFY: 10.22 ret = pirq_guest_unmask(current); 10.23 break; 10.24 10.25 + case PHYSDEVOP_IRQ_STATUS_QUERY: 10.26 + irq = op.u.irq_status_query.irq; 10.27 + ret = -EINVAL; 10.28 + if ( (irq < 0) || (irq >= NR_IRQS) ) 10.29 + break; 10.30 + op.u.irq_status_query.flags = 0; 10.31 + /* Edge-triggered interrupts don't need an explicit unmask downcall. */ 10.32 + if ( strstr(irq_desc[irq].handler->typename, "edge") == NULL ) 10.33 + op.u.irq_status_query.flags |= PHYSDEVOP_IRQ_NEEDS_UNMASK_NOTIFY; 10.34 + ret = 0; 10.35 + break; 10.36 + 10.37 default: 10.38 ret = -EINVAL; 10.39 break;
11.1 --- a/xen/common/schedule.c Thu May 13 10:17:36 2004 +0000 11.2 +++ b/xen/common/schedule.c Thu May 13 10:51:36 2004 +0000 11.3 @@ -520,7 +520,7 @@ asmlinkage void __enter_scheduler(void) 11.4 11.5 /* Mark a timer event for the newly-scheduled domain. */ 11.6 if ( !is_idle_task(next) ) 11.7 - evtchn_set_pending(next, VIRQ_TIMER); 11.8 + send_guest_virq(next, VIRQ_TIMER); 11.9 11.10 schedule_tail(next); 11.11
12.1 --- a/xen/include/hypervisor-ifs/physdev.h Thu May 13 10:17:36 2004 +0000 12.2 +++ b/xen/include/hypervisor-ifs/physdev.h Thu May 13 10:51:36 2004 +0000 12.3 @@ -14,44 +14,55 @@ 12.4 #define PHYSDEVOP_PCI_CFGREG_WRITE 1 12.5 #define PHYSDEVOP_PCI_INITIALISE_DEVICE 2 12.6 #define PHYSDEVOP_PCI_PROBE_ROOT_BUSES 3 12.7 -#define PHYSDEVOP_UNMASK_IRQ 4 12.8 +#define PHYSDEVOP_IRQ_UNMASK_NOTIFY 4 12.9 +#define PHYSDEVOP_IRQ_STATUS_QUERY 5 12.10 12.11 /* Read from PCI configuration space. */ 12.12 -typedef struct physdevop_pci_cfgreg_read_st 12.13 -{ 12.14 - int bus; /* IN */ 12.15 - int dev; /* IN */ 12.16 - int func; /* IN */ 12.17 - int reg; /* IN */ 12.18 - int len; /* IN */ 12.19 - u32 value; /* OUT */ 12.20 +typedef struct { 12.21 + /* IN */ 12.22 + int bus; 12.23 + int dev; 12.24 + int func; 12.25 + int reg; 12.26 + int len; 12.27 + /* OUT */ 12.28 + u32 value; 12.29 } physdevop_pci_cfgreg_read_t; 12.30 12.31 /* Write to PCI configuration space. */ 12.32 -typedef struct physdevop_pci_cfgreg_write_st 12.33 -{ 12.34 - int bus; /* IN */ 12.35 - int dev; /* IN */ 12.36 - int func; /* IN */ 12.37 - int reg; /* IN */ 12.38 - int len; /* IN */ 12.39 - u32 value; /* IN */ 12.40 +typedef struct { 12.41 + /* IN */ 12.42 + int bus; 12.43 + int dev; 12.44 + int func; 12.45 + int reg; 12.46 + int len; 12.47 + u32 value; 12.48 } physdevop_pci_cfgreg_write_t; 12.49 12.50 /* Do final initialisation of a PCI device (e.g., last-moment IRQ routing). */ 12.51 -typedef struct physdevop_pci_initialise_device_st 12.52 -{ 12.53 - int bus; /* IN */ 12.54 - int dev; /* IN */ 12.55 - int func; /* IN */ 12.56 +typedef struct { 12.57 + /* IN */ 12.58 + int bus; 12.59 + int dev; 12.60 + int func; 12.61 } physdevop_pci_initialise_device_t; 12.62 12.63 /* Find the root buses for subsequent scanning. */ 12.64 -typedef struct physdevop_pci_probe_root_buses_st 12.65 -{ 12.66 - u32 busmask[256/32]; /* OUT */ 12.67 +typedef struct { 12.68 + /* OUT */ 12.69 + u32 busmask[256/32]; 12.70 } physdevop_pci_probe_root_buses_t; 12.71 12.72 +typedef struct { 12.73 + /* IN */ 12.74 + int irq; 12.75 + /* OUT */ 12.76 +/* Need to call PHYSDEVOP_IRQ_UNMASK_NOTIFY when the IRQ has been serviced? */ 12.77 +#define PHYSDEVOP_IRQ_NEEDS_UNMASK_NOTIFY (1<<0) 12.78 + unsigned long flags; 12.79 +} physdevop_irq_status_query_t; 12.80 + 12.81 typedef struct _physdev_op_st 12.82 { 12.83 unsigned long cmd; 12.84 @@ -61,6 +72,7 @@ typedef struct _physdev_op_st 12.85 physdevop_pci_cfgreg_write_t pci_cfgreg_write; 12.86 physdevop_pci_initialise_device_t pci_initialise_device; 12.87 physdevop_pci_probe_root_buses_t pci_probe_root_buses; 12.88 + physdevop_irq_status_query_t irq_status_query; 12.89 } u; 12.90 } physdev_op_t; 12.91
13.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/interface.c Thu May 13 10:17:36 2004 +0000 13.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/interface.c Thu May 13 10:51:36 2004 +0000 13.3 @@ -116,6 +116,9 @@ void netif_create(netif_be_create_t *cre 13.4 dev->get_stats = netif_be_get_stats; 13.5 memcpy(dev->dev_addr, create->mac, ETH_ALEN); 13.6 13.7 + /* Disable queuing. */ 13.8 + dev->tx_queue_len = 0; 13.9 + 13.10 /* XXX In bridge mode we should force a different MAC from remote end. */ 13.11 dev->dev_addr[2] ^= 1; 13.12
14.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/main.c Thu May 13 10:17:36 2004 +0000 14.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/main.c Thu May 13 10:51:36 2004 +0000 14.3 @@ -116,12 +116,14 @@ int netif_be_start_xmit(struct sk_buff * 14.4 14.5 /* 14.6 * We do not copy the packet unless: 14.7 - * 1. It is fragmented; or 14.8 + * 1. The data is shared; or 14.9 * 2. It spans a page boundary; or 14.10 * 3. We cannot be sure the whole data page is allocated. 14.11 * The copying method is taken from skb_copy(). 14.12 + * NB. We also couldn't cope with fragmented packets, but we won't get 14.13 + * any because we not advertise the NETIF_F_SG feature. 14.14 */ 14.15 - if ( (skb_shinfo(skb)->nr_frags != 0) || 14.16 + if ( skb_shared(skb) || skb_cloned(skb) || 14.17 (((unsigned long)skb->end ^ (unsigned long)skb->head) & PAGE_MASK) || 14.18 ((skb->end - skb->head) < (PAGE_SIZE/2)) ) 14.19 {
15.1 --- a/xenolinux-2.4.26-sparse/arch/xen/kernel/evtchn.c Thu May 13 10:17:36 2004 +0000 15.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/kernel/evtchn.c Thu May 13 10:51:36 2004 +0000 15.3 @@ -36,6 +36,9 @@ static int virq_to_irq[NR_VIRQS]; 15.4 /* Reference counts for bindings to IRQs. */ 15.5 static int irq_bindcount[NR_IRQS]; 15.6 15.7 +/* Bitmap indicating which PIRQs require Xen to be notified on unmask. */ 15.8 +static unsigned long pirq_needs_unmask_notify[NR_PIRQS/sizeof(unsigned long)]; 15.9 + 15.10 /* Upcall to generic IRQ layer. */ 15.11 extern asmlinkage unsigned int do_IRQ(int irq, struct pt_regs *regs); 15.12 15.13 @@ -234,8 +237,22 @@ static struct hw_interrupt_type dynirq_t 15.14 static inline void pirq_unmask_notify(int pirq) 15.15 { 15.16 physdev_op_t op; 15.17 - op.cmd = PHYSDEVOP_UNMASK_IRQ; 15.18 + if ( unlikely(test_bit(pirq, &pirq_needs_unmask_notify[0])) ) 15.19 + { 15.20 + op.cmd = PHYSDEVOP_IRQ_UNMASK_NOTIFY; 15.21 + (void)HYPERVISOR_physdev_op(&op); 15.22 + } 15.23 +} 15.24 + 15.25 +static inline void pirq_query_unmask(int pirq) 15.26 +{ 15.27 + physdev_op_t op; 15.28 + op.cmd = PHYSDEVOP_IRQ_STATUS_QUERY; 15.29 + op.u.irq_status_query.irq = pirq; 15.30 (void)HYPERVISOR_physdev_op(&op); 15.31 + clear_bit(pirq, &pirq_needs_unmask_notify[0]); 15.32 + if ( op.u.irq_status_query.flags & PHYSDEVOP_IRQ_NEEDS_UNMASK_NOTIFY ) 15.33 + set_bit(pirq, &pirq_needs_unmask_notify[0]); 15.34 } 15.35 15.36 /* 15.37 @@ -261,6 +278,8 @@ static unsigned int startup_pirq(unsigne 15.38 } 15.39 evtchn = op.u.bind_pirq.port; 15.40 15.41 + pirq_query_unmask(irq_to_pirq(irq)); 15.42 + 15.43 evtchn_to_irq[evtchn] = irq; 15.44 irq_to_evtchn[irq] = evtchn; 15.45
16.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 16.2 +++ b/xenolinux-2.4.26-sparse/include/linux/skbuff.h Thu May 13 10:51:36 2004 +0000 16.3 @@ -0,0 +1,1174 @@ 16.4 +/* 16.5 + * Definitions for the 'struct sk_buff' memory handlers. 16.6 + * 16.7 + * Authors: 16.8 + * Alan Cox, <gw4pts@gw4pts.ampr.org> 16.9 + * Florian La Roche, <rzsfl@rz.uni-sb.de> 16.10 + * 16.11 + * This program is free software; you can redistribute it and/or 16.12 + * modify it under the terms of the GNU General Public License 16.13 + * as published by the Free Software Foundation; either version 16.14 + * 2 of the License, or (at your option) any later version. 16.15 + */ 16.16 + 16.17 +#ifndef _LINUX_SKBUFF_H 16.18 +#define _LINUX_SKBUFF_H 16.19 + 16.20 +#include <linux/config.h> 16.21 +#include <linux/kernel.h> 16.22 +#include <linux/sched.h> 16.23 +#include <linux/time.h> 16.24 +#include <linux/cache.h> 16.25 + 16.26 +#include <asm/atomic.h> 16.27 +#include <asm/types.h> 16.28 +#include <linux/spinlock.h> 16.29 +#include <linux/mm.h> 16.30 +#include <linux/highmem.h> 16.31 + 16.32 +#define HAVE_ALLOC_SKB /* For the drivers to know */ 16.33 +#define HAVE_ALIGNABLE_SKB /* Ditto 8) */ 16.34 +#define SLAB_SKB /* Slabified skbuffs */ 16.35 + 16.36 +#define CHECKSUM_NONE 0 16.37 +#define CHECKSUM_HW 1 16.38 +#define CHECKSUM_UNNECESSARY 2 16.39 + 16.40 +#define SKB_DATA_ALIGN(X) (((X) + (SMP_CACHE_BYTES-1)) & ~(SMP_CACHE_BYTES-1)) 16.41 +#define SKB_MAX_ORDER(X,ORDER) (((PAGE_SIZE<<(ORDER)) - (X) - sizeof(struct skb_shared_info))&~(SMP_CACHE_BYTES-1)) 16.42 +#define SKB_MAX_HEAD(X) (SKB_MAX_ORDER((X),0)) 16.43 +#define SKB_MAX_ALLOC (SKB_MAX_ORDER(0,2)) 16.44 + 16.45 +/* A. Checksumming of received packets by device. 16.46 + * 16.47 + * NONE: device failed to checksum this packet. 16.48 + * skb->csum is undefined. 16.49 + * 16.50 + * UNNECESSARY: device parsed packet and wouldbe verified checksum. 16.51 + * skb->csum is undefined. 16.52 + * It is bad option, but, unfortunately, many of vendors do this. 16.53 + * Apparently with secret goal to sell you new device, when you 16.54 + * will add new protocol to your host. F.e. IPv6. 8) 16.55 + * 16.56 + * HW: the most generic way. Device supplied checksum of _all_ 16.57 + * the packet as seen by netif_rx in skb->csum. 16.58 + * NOTE: Even if device supports only some protocols, but 16.59 + * is able to produce some skb->csum, it MUST use HW, 16.60 + * not UNNECESSARY. 16.61 + * 16.62 + * B. Checksumming on output. 16.63 + * 16.64 + * NONE: skb is checksummed by protocol or csum is not required. 16.65 + * 16.66 + * HW: device is required to csum packet as seen by hard_start_xmit 16.67 + * from skb->h.raw to the end and to record the checksum 16.68 + * at skb->h.raw+skb->csum. 16.69 + * 16.70 + * Device must show its capabilities in dev->features, set 16.71 + * at device setup time. 16.72 + * NETIF_F_HW_CSUM - it is clever device, it is able to checksum 16.73 + * everything. 16.74 + * NETIF_F_NO_CSUM - loopback or reliable single hop media. 16.75 + * NETIF_F_IP_CSUM - device is dumb. It is able to csum only 16.76 + * TCP/UDP over IPv4. Sigh. Vendors like this 16.77 + * way by an unknown reason. Though, see comment above 16.78 + * about CHECKSUM_UNNECESSARY. 8) 16.79 + * 16.80 + * Any questions? No questions, good. --ANK 16.81 + */ 16.82 + 16.83 +#ifdef __i386__ 16.84 +#define NET_CALLER(arg) (*(((void**)&arg)-1)) 16.85 +#else 16.86 +#define NET_CALLER(arg) __builtin_return_address(0) 16.87 +#endif 16.88 + 16.89 +#ifdef CONFIG_NETFILTER 16.90 +struct nf_conntrack { 16.91 + atomic_t use; 16.92 + void (*destroy)(struct nf_conntrack *); 16.93 +}; 16.94 + 16.95 +struct nf_ct_info { 16.96 + struct nf_conntrack *master; 16.97 +}; 16.98 +#endif 16.99 + 16.100 +struct sk_buff_head { 16.101 + /* These two members must be first. */ 16.102 + struct sk_buff * next; 16.103 + struct sk_buff * prev; 16.104 + 16.105 + __u32 qlen; 16.106 + spinlock_t lock; 16.107 +}; 16.108 + 16.109 +struct sk_buff; 16.110 + 16.111 +#define MAX_SKB_FRAGS 6 16.112 + 16.113 +typedef struct skb_frag_struct skb_frag_t; 16.114 + 16.115 +struct skb_frag_struct 16.116 +{ 16.117 + struct page *page; 16.118 + __u16 page_offset; 16.119 + __u16 size; 16.120 +}; 16.121 + 16.122 +/* This data is invariant across clones and lives at 16.123 + * the end of the header data, ie. at skb->end. 16.124 + */ 16.125 +struct skb_shared_info { 16.126 + atomic_t dataref; 16.127 + unsigned int nr_frags; 16.128 + struct sk_buff *frag_list; 16.129 + skb_frag_t frags[MAX_SKB_FRAGS]; 16.130 +}; 16.131 + 16.132 +struct sk_buff { 16.133 + /* These two members must be first. */ 16.134 + struct sk_buff * next; /* Next buffer in list */ 16.135 + struct sk_buff * prev; /* Previous buffer in list */ 16.136 + 16.137 + struct sk_buff_head * list; /* List we are on */ 16.138 + struct sock *sk; /* Socket we are owned by */ 16.139 + struct timeval stamp; /* Time we arrived */ 16.140 + struct net_device *dev; /* Device we arrived on/are leaving by */ 16.141 + struct net_device *real_dev; /* For support of point to point protocols 16.142 + (e.g. 802.3ad) over bonding, we must save the 16.143 + physical device that got the packet before 16.144 + replacing skb->dev with the virtual device. */ 16.145 + 16.146 + /* Transport layer header */ 16.147 + union 16.148 + { 16.149 + struct tcphdr *th; 16.150 + struct udphdr *uh; 16.151 + struct icmphdr *icmph; 16.152 + struct igmphdr *igmph; 16.153 + struct iphdr *ipiph; 16.154 + struct spxhdr *spxh; 16.155 + unsigned char *raw; 16.156 + } h; 16.157 + 16.158 + /* Network layer header */ 16.159 + union 16.160 + { 16.161 + struct iphdr *iph; 16.162 + struct ipv6hdr *ipv6h; 16.163 + struct arphdr *arph; 16.164 + struct ipxhdr *ipxh; 16.165 + unsigned char *raw; 16.166 + } nh; 16.167 + 16.168 + /* Link layer header */ 16.169 + union 16.170 + { 16.171 + struct ethhdr *ethernet; 16.172 + unsigned char *raw; 16.173 + } mac; 16.174 + 16.175 + struct dst_entry *dst; 16.176 + 16.177 + /* 16.178 + * This is the control buffer. It is free to use for every 16.179 + * layer. Please put your private variables there. If you 16.180 + * want to keep them across layers you have to do a skb_clone() 16.181 + * first. This is owned by whoever has the skb queued ATM. 16.182 + */ 16.183 + char cb[48]; 16.184 + 16.185 + unsigned int len; /* Length of actual data */ 16.186 + unsigned int data_len; 16.187 + unsigned int csum; /* Checksum */ 16.188 + unsigned char __unused, /* Dead field, may be reused */ 16.189 + cloned, /* head may be cloned (check refcnt to be sure). */ 16.190 + pkt_type, /* Packet class */ 16.191 + ip_summed; /* Driver fed us an IP checksum */ 16.192 + __u32 priority; /* Packet queueing priority */ 16.193 + atomic_t users; /* User count - see datagram.c,tcp.c */ 16.194 + unsigned short protocol; /* Packet protocol from driver. */ 16.195 + unsigned short security; /* Security level of packet */ 16.196 + unsigned int truesize; /* Buffer size */ 16.197 + 16.198 + unsigned char *head; /* Head of buffer */ 16.199 + unsigned char *data; /* Data head pointer */ 16.200 + unsigned char *tail; /* Tail pointer */ 16.201 + unsigned char *end; /* End pointer */ 16.202 + 16.203 + void (*destructor)(struct sk_buff *); /* Destruct function */ 16.204 +#ifdef CONFIG_NETFILTER 16.205 + /* Can be used for communication between hooks. */ 16.206 + unsigned long nfmark; 16.207 + /* Cache info */ 16.208 + __u32 nfcache; 16.209 + /* Associated connection, if any */ 16.210 + struct nf_ct_info *nfct; 16.211 +#ifdef CONFIG_NETFILTER_DEBUG 16.212 + unsigned int nf_debug; 16.213 +#endif 16.214 +#endif /*CONFIG_NETFILTER*/ 16.215 + 16.216 +#if defined(CONFIG_HIPPI) 16.217 + union{ 16.218 + __u32 ifield; 16.219 + } private; 16.220 +#endif 16.221 + 16.222 +#ifdef CONFIG_NET_SCHED 16.223 + __u32 tc_index; /* traffic control index */ 16.224 +#endif 16.225 +}; 16.226 + 16.227 +#ifdef __KERNEL__ 16.228 +/* 16.229 + * Handling routines are only of interest to the kernel 16.230 + */ 16.231 +#include <linux/slab.h> 16.232 + 16.233 +#include <asm/system.h> 16.234 + 16.235 +extern void __kfree_skb(struct sk_buff *skb); 16.236 +extern struct sk_buff * alloc_skb(unsigned int size, int priority); 16.237 +extern void kfree_skbmem(struct sk_buff *skb); 16.238 +extern struct sk_buff * skb_clone(struct sk_buff *skb, int priority); 16.239 +extern struct sk_buff * skb_copy(const struct sk_buff *skb, int priority); 16.240 +extern struct sk_buff * pskb_copy(struct sk_buff *skb, int gfp_mask); 16.241 +extern int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask); 16.242 +extern struct sk_buff * skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom); 16.243 +extern struct sk_buff * skb_copy_expand(const struct sk_buff *skb, 16.244 + int newheadroom, 16.245 + int newtailroom, 16.246 + int priority); 16.247 +extern struct sk_buff * skb_pad(struct sk_buff *skb, int pad); 16.248 +#define dev_kfree_skb(a) kfree_skb(a) 16.249 +extern void skb_over_panic(struct sk_buff *skb, int len, void *here); 16.250 +extern void skb_under_panic(struct sk_buff *skb, int len, void *here); 16.251 + 16.252 +/* Internal */ 16.253 +#define skb_shinfo(SKB) ((struct skb_shared_info *)((SKB)->end)) 16.254 + 16.255 +/** 16.256 + * skb_queue_empty - check if a queue is empty 16.257 + * @list: queue head 16.258 + * 16.259 + * Returns true if the queue is empty, false otherwise. 16.260 + */ 16.261 + 16.262 +static inline int skb_queue_empty(struct sk_buff_head *list) 16.263 +{ 16.264 + return (list->next == (struct sk_buff *) list); 16.265 +} 16.266 + 16.267 +/** 16.268 + * skb_get - reference buffer 16.269 + * @skb: buffer to reference 16.270 + * 16.271 + * Makes another reference to a socket buffer and returns a pointer 16.272 + * to the buffer. 16.273 + */ 16.274 + 16.275 +static inline struct sk_buff *skb_get(struct sk_buff *skb) 16.276 +{ 16.277 + atomic_inc(&skb->users); 16.278 + return skb; 16.279 +} 16.280 + 16.281 +/* 16.282 + * If users==1, we are the only owner and are can avoid redundant 16.283 + * atomic change. 16.284 + */ 16.285 + 16.286 +/** 16.287 + * kfree_skb - free an sk_buff 16.288 + * @skb: buffer to free 16.289 + * 16.290 + * Drop a reference to the buffer and free it if the usage count has 16.291 + * hit zero. 16.292 + */ 16.293 + 16.294 +static inline void kfree_skb(struct sk_buff *skb) 16.295 +{ 16.296 + if (atomic_read(&skb->users) == 1 || atomic_dec_and_test(&skb->users)) 16.297 + __kfree_skb(skb); 16.298 +} 16.299 + 16.300 +/* Use this if you didn't touch the skb state [for fast switching] */ 16.301 +static inline void kfree_skb_fast(struct sk_buff *skb) 16.302 +{ 16.303 + if (atomic_read(&skb->users) == 1 || atomic_dec_and_test(&skb->users)) 16.304 + kfree_skbmem(skb); 16.305 +} 16.306 + 16.307 +/** 16.308 + * skb_cloned - is the buffer a clone 16.309 + * @skb: buffer to check 16.310 + * 16.311 + * Returns true if the buffer was generated with skb_clone() and is 16.312 + * one of multiple shared copies of the buffer. Cloned buffers are 16.313 + * shared data so must not be written to under normal circumstances. 16.314 + */ 16.315 + 16.316 +static inline int skb_cloned(struct sk_buff *skb) 16.317 +{ 16.318 + return skb->cloned && atomic_read(&skb_shinfo(skb)->dataref) != 1; 16.319 +} 16.320 + 16.321 +/** 16.322 + * skb_shared - is the buffer shared 16.323 + * @skb: buffer to check 16.324 + * 16.325 + * Returns true if more than one person has a reference to this 16.326 + * buffer. 16.327 + */ 16.328 + 16.329 +static inline int skb_shared(struct sk_buff *skb) 16.330 +{ 16.331 + return (atomic_read(&skb->users) != 1); 16.332 +} 16.333 + 16.334 +/** 16.335 + * skb_share_check - check if buffer is shared and if so clone it 16.336 + * @skb: buffer to check 16.337 + * @pri: priority for memory allocation 16.338 + * 16.339 + * If the buffer is shared the buffer is cloned and the old copy 16.340 + * drops a reference. A new clone with a single reference is returned. 16.341 + * If the buffer is not shared the original buffer is returned. When 16.342 + * being called from interrupt status or with spinlocks held pri must 16.343 + * be GFP_ATOMIC. 16.344 + * 16.345 + * NULL is returned on a memory allocation failure. 16.346 + */ 16.347 + 16.348 +static inline struct sk_buff *skb_share_check(struct sk_buff *skb, int pri) 16.349 +{ 16.350 + if (skb_shared(skb)) { 16.351 + struct sk_buff *nskb; 16.352 + nskb = skb_clone(skb, pri); 16.353 + kfree_skb(skb); 16.354 + return nskb; 16.355 + } 16.356 + return skb; 16.357 +} 16.358 + 16.359 + 16.360 +/* 16.361 + * Copy shared buffers into a new sk_buff. We effectively do COW on 16.362 + * packets to handle cases where we have a local reader and forward 16.363 + * and a couple of other messy ones. The normal one is tcpdumping 16.364 + * a packet thats being forwarded. 16.365 + */ 16.366 + 16.367 +/** 16.368 + * skb_unshare - make a copy of a shared buffer 16.369 + * @skb: buffer to check 16.370 + * @pri: priority for memory allocation 16.371 + * 16.372 + * If the socket buffer is a clone then this function creates a new 16.373 + * copy of the data, drops a reference count on the old copy and returns 16.374 + * the new copy with the reference count at 1. If the buffer is not a clone 16.375 + * the original buffer is returned. When called with a spinlock held or 16.376 + * from interrupt state @pri must be %GFP_ATOMIC 16.377 + * 16.378 + * %NULL is returned on a memory allocation failure. 16.379 + */ 16.380 + 16.381 +static inline struct sk_buff *skb_unshare(struct sk_buff *skb, int pri) 16.382 +{ 16.383 + struct sk_buff *nskb; 16.384 + if(!skb_cloned(skb)) 16.385 + return skb; 16.386 + nskb=skb_copy(skb, pri); 16.387 + kfree_skb(skb); /* Free our shared copy */ 16.388 + return nskb; 16.389 +} 16.390 + 16.391 +/** 16.392 + * skb_peek 16.393 + * @list_: list to peek at 16.394 + * 16.395 + * Peek an &sk_buff. Unlike most other operations you _MUST_ 16.396 + * be careful with this one. A peek leaves the buffer on the 16.397 + * list and someone else may run off with it. You must hold 16.398 + * the appropriate locks or have a private queue to do this. 16.399 + * 16.400 + * Returns %NULL for an empty list or a pointer to the head element. 16.401 + * The reference count is not incremented and the reference is therefore 16.402 + * volatile. Use with caution. 16.403 + */ 16.404 + 16.405 +static inline struct sk_buff *skb_peek(struct sk_buff_head *list_) 16.406 +{ 16.407 + struct sk_buff *list = ((struct sk_buff *)list_)->next; 16.408 + if (list == (struct sk_buff *)list_) 16.409 + list = NULL; 16.410 + return list; 16.411 +} 16.412 + 16.413 +/** 16.414 + * skb_peek_tail 16.415 + * @list_: list to peek at 16.416 + * 16.417 + * Peek an &sk_buff. Unlike most other operations you _MUST_ 16.418 + * be careful with this one. A peek leaves the buffer on the 16.419 + * list and someone else may run off with it. You must hold 16.420 + * the appropriate locks or have a private queue to do this. 16.421 + * 16.422 + * Returns %NULL for an empty list or a pointer to the tail element. 16.423 + * The reference count is not incremented and the reference is therefore 16.424 + * volatile. Use with caution. 16.425 + */ 16.426 + 16.427 +static inline struct sk_buff *skb_peek_tail(struct sk_buff_head *list_) 16.428 +{ 16.429 + struct sk_buff *list = ((struct sk_buff *)list_)->prev; 16.430 + if (list == (struct sk_buff *)list_) 16.431 + list = NULL; 16.432 + return list; 16.433 +} 16.434 + 16.435 +/** 16.436 + * skb_queue_len - get queue length 16.437 + * @list_: list to measure 16.438 + * 16.439 + * Return the length of an &sk_buff queue. 16.440 + */ 16.441 + 16.442 +static inline __u32 skb_queue_len(struct sk_buff_head *list_) 16.443 +{ 16.444 + return(list_->qlen); 16.445 +} 16.446 + 16.447 +static inline void skb_queue_head_init(struct sk_buff_head *list) 16.448 +{ 16.449 + spin_lock_init(&list->lock); 16.450 + list->prev = (struct sk_buff *)list; 16.451 + list->next = (struct sk_buff *)list; 16.452 + list->qlen = 0; 16.453 +} 16.454 + 16.455 +/* 16.456 + * Insert an sk_buff at the start of a list. 16.457 + * 16.458 + * The "__skb_xxxx()" functions are the non-atomic ones that 16.459 + * can only be called with interrupts disabled. 16.460 + */ 16.461 + 16.462 +/** 16.463 + * __skb_queue_head - queue a buffer at the list head 16.464 + * @list: list to use 16.465 + * @newsk: buffer to queue 16.466 + * 16.467 + * Queue a buffer at the start of a list. This function takes no locks 16.468 + * and you must therefore hold required locks before calling it. 16.469 + * 16.470 + * A buffer cannot be placed on two lists at the same time. 16.471 + */ 16.472 + 16.473 +static inline void __skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk) 16.474 +{ 16.475 + struct sk_buff *prev, *next; 16.476 + 16.477 + newsk->list = list; 16.478 + list->qlen++; 16.479 + prev = (struct sk_buff *)list; 16.480 + next = prev->next; 16.481 + newsk->next = next; 16.482 + newsk->prev = prev; 16.483 + next->prev = newsk; 16.484 + prev->next = newsk; 16.485 +} 16.486 + 16.487 + 16.488 +/** 16.489 + * skb_queue_head - queue a buffer at the list head 16.490 + * @list: list to use 16.491 + * @newsk: buffer to queue 16.492 + * 16.493 + * Queue a buffer at the start of the list. This function takes the 16.494 + * list lock and can be used safely with other locking &sk_buff functions 16.495 + * safely. 16.496 + * 16.497 + * A buffer cannot be placed on two lists at the same time. 16.498 + */ 16.499 + 16.500 +static inline void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk) 16.501 +{ 16.502 + unsigned long flags; 16.503 + 16.504 + spin_lock_irqsave(&list->lock, flags); 16.505 + __skb_queue_head(list, newsk); 16.506 + spin_unlock_irqrestore(&list->lock, flags); 16.507 +} 16.508 + 16.509 +/** 16.510 + * __skb_queue_tail - queue a buffer at the list tail 16.511 + * @list: list to use 16.512 + * @newsk: buffer to queue 16.513 + * 16.514 + * Queue a buffer at the end of a list. This function takes no locks 16.515 + * and you must therefore hold required locks before calling it. 16.516 + * 16.517 + * A buffer cannot be placed on two lists at the same time. 16.518 + */ 16.519 + 16.520 + 16.521 +static inline void __skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk) 16.522 +{ 16.523 + struct sk_buff *prev, *next; 16.524 + 16.525 + newsk->list = list; 16.526 + list->qlen++; 16.527 + next = (struct sk_buff *)list; 16.528 + prev = next->prev; 16.529 + newsk->next = next; 16.530 + newsk->prev = prev; 16.531 + next->prev = newsk; 16.532 + prev->next = newsk; 16.533 +} 16.534 + 16.535 +/** 16.536 + * skb_queue_tail - queue a buffer at the list tail 16.537 + * @list: list to use 16.538 + * @newsk: buffer to queue 16.539 + * 16.540 + * Queue a buffer at the tail of the list. This function takes the 16.541 + * list lock and can be used safely with other locking &sk_buff functions 16.542 + * safely. 16.543 + * 16.544 + * A buffer cannot be placed on two lists at the same time. 16.545 + */ 16.546 + 16.547 +static inline void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk) 16.548 +{ 16.549 + unsigned long flags; 16.550 + 16.551 + spin_lock_irqsave(&list->lock, flags); 16.552 + __skb_queue_tail(list, newsk); 16.553 + spin_unlock_irqrestore(&list->lock, flags); 16.554 +} 16.555 + 16.556 +/** 16.557 + * __skb_dequeue - remove from the head of the queue 16.558 + * @list: list to dequeue from 16.559 + * 16.560 + * Remove the head of the list. This function does not take any locks 16.561 + * so must be used with appropriate locks held only. The head item is 16.562 + * returned or %NULL if the list is empty. 16.563 + */ 16.564 + 16.565 +static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list) 16.566 +{ 16.567 + struct sk_buff *next, *prev, *result; 16.568 + 16.569 + prev = (struct sk_buff *) list; 16.570 + next = prev->next; 16.571 + result = NULL; 16.572 + if (next != prev) { 16.573 + result = next; 16.574 + next = next->next; 16.575 + list->qlen--; 16.576 + next->prev = prev; 16.577 + prev->next = next; 16.578 + result->next = NULL; 16.579 + result->prev = NULL; 16.580 + result->list = NULL; 16.581 + } 16.582 + return result; 16.583 +} 16.584 + 16.585 +/** 16.586 + * skb_dequeue - remove from the head of the queue 16.587 + * @list: list to dequeue from 16.588 + * 16.589 + * Remove the head of the list. The list lock is taken so the function 16.590 + * may be used safely with other locking list functions. The head item is 16.591 + * returned or %NULL if the list is empty. 16.592 + */ 16.593 + 16.594 +static inline struct sk_buff *skb_dequeue(struct sk_buff_head *list) 16.595 +{ 16.596 + unsigned long flags; 16.597 + struct sk_buff *result; 16.598 + 16.599 + spin_lock_irqsave(&list->lock, flags); 16.600 + result = __skb_dequeue(list); 16.601 + spin_unlock_irqrestore(&list->lock, flags); 16.602 + return result; 16.603 +} 16.604 + 16.605 +/* 16.606 + * Insert a packet on a list. 16.607 + */ 16.608 + 16.609 +static inline void __skb_insert(struct sk_buff *newsk, 16.610 + struct sk_buff * prev, struct sk_buff *next, 16.611 + struct sk_buff_head * list) 16.612 +{ 16.613 + newsk->next = next; 16.614 + newsk->prev = prev; 16.615 + next->prev = newsk; 16.616 + prev->next = newsk; 16.617 + newsk->list = list; 16.618 + list->qlen++; 16.619 +} 16.620 + 16.621 +/** 16.622 + * skb_insert - insert a buffer 16.623 + * @old: buffer to insert before 16.624 + * @newsk: buffer to insert 16.625 + * 16.626 + * Place a packet before a given packet in a list. The list locks are taken 16.627 + * and this function is atomic with respect to other list locked calls 16.628 + * A buffer cannot be placed on two lists at the same time. 16.629 + */ 16.630 + 16.631 +static inline void skb_insert(struct sk_buff *old, struct sk_buff *newsk) 16.632 +{ 16.633 + unsigned long flags; 16.634 + 16.635 + spin_lock_irqsave(&old->list->lock, flags); 16.636 + __skb_insert(newsk, old->prev, old, old->list); 16.637 + spin_unlock_irqrestore(&old->list->lock, flags); 16.638 +} 16.639 + 16.640 +/* 16.641 + * Place a packet after a given packet in a list. 16.642 + */ 16.643 + 16.644 +static inline void __skb_append(struct sk_buff *old, struct sk_buff *newsk) 16.645 +{ 16.646 + __skb_insert(newsk, old, old->next, old->list); 16.647 +} 16.648 + 16.649 +/** 16.650 + * skb_append - append a buffer 16.651 + * @old: buffer to insert after 16.652 + * @newsk: buffer to insert 16.653 + * 16.654 + * Place a packet after a given packet in a list. The list locks are taken 16.655 + * and this function is atomic with respect to other list locked calls. 16.656 + * A buffer cannot be placed on two lists at the same time. 16.657 + */ 16.658 + 16.659 + 16.660 +static inline void skb_append(struct sk_buff *old, struct sk_buff *newsk) 16.661 +{ 16.662 + unsigned long flags; 16.663 + 16.664 + spin_lock_irqsave(&old->list->lock, flags); 16.665 + __skb_append(old, newsk); 16.666 + spin_unlock_irqrestore(&old->list->lock, flags); 16.667 +} 16.668 + 16.669 +/* 16.670 + * remove sk_buff from list. _Must_ be called atomically, and with 16.671 + * the list known.. 16.672 + */ 16.673 + 16.674 +static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list) 16.675 +{ 16.676 + struct sk_buff * next, * prev; 16.677 + 16.678 + list->qlen--; 16.679 + next = skb->next; 16.680 + prev = skb->prev; 16.681 + skb->next = NULL; 16.682 + skb->prev = NULL; 16.683 + skb->list = NULL; 16.684 + next->prev = prev; 16.685 + prev->next = next; 16.686 +} 16.687 + 16.688 +/** 16.689 + * skb_unlink - remove a buffer from a list 16.690 + * @skb: buffer to remove 16.691 + * 16.692 + * Place a packet after a given packet in a list. The list locks are taken 16.693 + * and this function is atomic with respect to other list locked calls 16.694 + * 16.695 + * Works even without knowing the list it is sitting on, which can be 16.696 + * handy at times. It also means that THE LIST MUST EXIST when you 16.697 + * unlink. Thus a list must have its contents unlinked before it is 16.698 + * destroyed. 16.699 + */ 16.700 + 16.701 +static inline void skb_unlink(struct sk_buff *skb) 16.702 +{ 16.703 + struct sk_buff_head *list = skb->list; 16.704 + 16.705 + if(list) { 16.706 + unsigned long flags; 16.707 + 16.708 + spin_lock_irqsave(&list->lock, flags); 16.709 + if(skb->list == list) 16.710 + __skb_unlink(skb, skb->list); 16.711 + spin_unlock_irqrestore(&list->lock, flags); 16.712 + } 16.713 +} 16.714 + 16.715 +/* XXX: more streamlined implementation */ 16.716 + 16.717 +/** 16.718 + * __skb_dequeue_tail - remove from the tail of the queue 16.719 + * @list: list to dequeue from 16.720 + * 16.721 + * Remove the tail of the list. This function does not take any locks 16.722 + * so must be used with appropriate locks held only. The tail item is 16.723 + * returned or %NULL if the list is empty. 16.724 + */ 16.725 + 16.726 +static inline struct sk_buff *__skb_dequeue_tail(struct sk_buff_head *list) 16.727 +{ 16.728 + struct sk_buff *skb = skb_peek_tail(list); 16.729 + if (skb) 16.730 + __skb_unlink(skb, list); 16.731 + return skb; 16.732 +} 16.733 + 16.734 +/** 16.735 + * skb_dequeue - remove from the head of the queue 16.736 + * @list: list to dequeue from 16.737 + * 16.738 + * Remove the head of the list. The list lock is taken so the function 16.739 + * may be used safely with other locking list functions. The tail item is 16.740 + * returned or %NULL if the list is empty. 16.741 + */ 16.742 + 16.743 +static inline struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list) 16.744 +{ 16.745 + unsigned long flags; 16.746 + struct sk_buff *result; 16.747 + 16.748 + spin_lock_irqsave(&list->lock, flags); 16.749 + result = __skb_dequeue_tail(list); 16.750 + spin_unlock_irqrestore(&list->lock, flags); 16.751 + return result; 16.752 +} 16.753 + 16.754 +static inline int skb_is_nonlinear(const struct sk_buff *skb) 16.755 +{ 16.756 + return skb->data_len; 16.757 +} 16.758 + 16.759 +static inline unsigned int skb_headlen(const struct sk_buff *skb) 16.760 +{ 16.761 + return skb->len - skb->data_len; 16.762 +} 16.763 + 16.764 +#define SKB_PAGE_ASSERT(skb) do { if (skb_shinfo(skb)->nr_frags) out_of_line_bug(); } while (0) 16.765 +#define SKB_FRAG_ASSERT(skb) do { if (skb_shinfo(skb)->frag_list) out_of_line_bug(); } while (0) 16.766 +#define SKB_LINEAR_ASSERT(skb) do { if (skb_is_nonlinear(skb)) out_of_line_bug(); } while (0) 16.767 + 16.768 +/* 16.769 + * Add data to an sk_buff 16.770 + */ 16.771 + 16.772 +static inline unsigned char *__skb_put(struct sk_buff *skb, unsigned int len) 16.773 +{ 16.774 + unsigned char *tmp=skb->tail; 16.775 + SKB_LINEAR_ASSERT(skb); 16.776 + skb->tail+=len; 16.777 + skb->len+=len; 16.778 + return tmp; 16.779 +} 16.780 + 16.781 +/** 16.782 + * skb_put - add data to a buffer 16.783 + * @skb: buffer to use 16.784 + * @len: amount of data to add 16.785 + * 16.786 + * This function extends the used data area of the buffer. If this would 16.787 + * exceed the total buffer size the kernel will panic. A pointer to the 16.788 + * first byte of the extra data is returned. 16.789 + */ 16.790 + 16.791 +static inline unsigned char *skb_put(struct sk_buff *skb, unsigned int len) 16.792 +{ 16.793 + unsigned char *tmp=skb->tail; 16.794 + SKB_LINEAR_ASSERT(skb); 16.795 + skb->tail+=len; 16.796 + skb->len+=len; 16.797 + if(skb->tail>skb->end) { 16.798 + skb_over_panic(skb, len, current_text_addr()); 16.799 + } 16.800 + return tmp; 16.801 +} 16.802 + 16.803 +static inline unsigned char *__skb_push(struct sk_buff *skb, unsigned int len) 16.804 +{ 16.805 + skb->data-=len; 16.806 + skb->len+=len; 16.807 + return skb->data; 16.808 +} 16.809 + 16.810 +/** 16.811 + * skb_push - add data to the start of a buffer 16.812 + * @skb: buffer to use 16.813 + * @len: amount of data to add 16.814 + * 16.815 + * This function extends the used data area of the buffer at the buffer 16.816 + * start. If this would exceed the total buffer headroom the kernel will 16.817 + * panic. A pointer to the first byte of the extra data is returned. 16.818 + */ 16.819 + 16.820 +static inline unsigned char *skb_push(struct sk_buff *skb, unsigned int len) 16.821 +{ 16.822 + skb->data-=len; 16.823 + skb->len+=len; 16.824 + if(skb->data<skb->head) { 16.825 + skb_under_panic(skb, len, current_text_addr()); 16.826 + } 16.827 + return skb->data; 16.828 +} 16.829 + 16.830 +static inline char *__skb_pull(struct sk_buff *skb, unsigned int len) 16.831 +{ 16.832 + skb->len-=len; 16.833 + if (skb->len < skb->data_len) 16.834 + out_of_line_bug(); 16.835 + return skb->data+=len; 16.836 +} 16.837 + 16.838 +/** 16.839 + * skb_pull - remove data from the start of a buffer 16.840 + * @skb: buffer to use 16.841 + * @len: amount of data to remove 16.842 + * 16.843 + * This function removes data from the start of a buffer, returning 16.844 + * the memory to the headroom. A pointer to the next data in the buffer 16.845 + * is returned. Once the data has been pulled future pushes will overwrite 16.846 + * the old data. 16.847 + */ 16.848 + 16.849 +static inline unsigned char * skb_pull(struct sk_buff *skb, unsigned int len) 16.850 +{ 16.851 + if (len > skb->len) 16.852 + return NULL; 16.853 + return __skb_pull(skb,len); 16.854 +} 16.855 + 16.856 +extern unsigned char * __pskb_pull_tail(struct sk_buff *skb, int delta); 16.857 + 16.858 +static inline char *__pskb_pull(struct sk_buff *skb, unsigned int len) 16.859 +{ 16.860 + if (len > skb_headlen(skb) && 16.861 + __pskb_pull_tail(skb, len-skb_headlen(skb)) == NULL) 16.862 + return NULL; 16.863 + skb->len -= len; 16.864 + return skb->data += len; 16.865 +} 16.866 + 16.867 +static inline unsigned char * pskb_pull(struct sk_buff *skb, unsigned int len) 16.868 +{ 16.869 + if (len > skb->len) 16.870 + return NULL; 16.871 + return __pskb_pull(skb,len); 16.872 +} 16.873 + 16.874 +static inline int pskb_may_pull(struct sk_buff *skb, unsigned int len) 16.875 +{ 16.876 + if (len <= skb_headlen(skb)) 16.877 + return 1; 16.878 + if (len > skb->len) 16.879 + return 0; 16.880 + return (__pskb_pull_tail(skb, len-skb_headlen(skb)) != NULL); 16.881 +} 16.882 + 16.883 +/** 16.884 + * skb_headroom - bytes at buffer head 16.885 + * @skb: buffer to check 16.886 + * 16.887 + * Return the number of bytes of free space at the head of an &sk_buff. 16.888 + */ 16.889 + 16.890 +static inline int skb_headroom(const struct sk_buff *skb) 16.891 +{ 16.892 + return skb->data-skb->head; 16.893 +} 16.894 + 16.895 +/** 16.896 + * skb_tailroom - bytes at buffer end 16.897 + * @skb: buffer to check 16.898 + * 16.899 + * Return the number of bytes of free space at the tail of an sk_buff 16.900 + */ 16.901 + 16.902 +static inline int skb_tailroom(const struct sk_buff *skb) 16.903 +{ 16.904 + return skb_is_nonlinear(skb) ? 0 : skb->end-skb->tail; 16.905 +} 16.906 + 16.907 +/** 16.908 + * skb_reserve - adjust headroom 16.909 + * @skb: buffer to alter 16.910 + * @len: bytes to move 16.911 + * 16.912 + * Increase the headroom of an empty &sk_buff by reducing the tail 16.913 + * room. This is only allowed for an empty buffer. 16.914 + */ 16.915 + 16.916 +static inline void skb_reserve(struct sk_buff *skb, unsigned int len) 16.917 +{ 16.918 + skb->data+=len; 16.919 + skb->tail+=len; 16.920 +} 16.921 + 16.922 +extern int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc); 16.923 + 16.924 +static inline void __skb_trim(struct sk_buff *skb, unsigned int len) 16.925 +{ 16.926 + if (!skb->data_len) { 16.927 + skb->len = len; 16.928 + skb->tail = skb->data+len; 16.929 + } else { 16.930 + ___pskb_trim(skb, len, 0); 16.931 + } 16.932 +} 16.933 + 16.934 +/** 16.935 + * skb_trim - remove end from a buffer 16.936 + * @skb: buffer to alter 16.937 + * @len: new length 16.938 + * 16.939 + * Cut the length of a buffer down by removing data from the tail. If 16.940 + * the buffer is already under the length specified it is not modified. 16.941 + */ 16.942 + 16.943 +static inline void skb_trim(struct sk_buff *skb, unsigned int len) 16.944 +{ 16.945 + if (skb->len > len) { 16.946 + __skb_trim(skb, len); 16.947 + } 16.948 +} 16.949 + 16.950 + 16.951 +static inline int __pskb_trim(struct sk_buff *skb, unsigned int len) 16.952 +{ 16.953 + if (!skb->data_len) { 16.954 + skb->len = len; 16.955 + skb->tail = skb->data+len; 16.956 + return 0; 16.957 + } else { 16.958 + return ___pskb_trim(skb, len, 1); 16.959 + } 16.960 +} 16.961 + 16.962 +static inline int pskb_trim(struct sk_buff *skb, unsigned int len) 16.963 +{ 16.964 + if (len < skb->len) 16.965 + return __pskb_trim(skb, len); 16.966 + return 0; 16.967 +} 16.968 + 16.969 +/** 16.970 + * skb_orphan - orphan a buffer 16.971 + * @skb: buffer to orphan 16.972 + * 16.973 + * If a buffer currently has an owner then we call the owner's 16.974 + * destructor function and make the @skb unowned. The buffer continues 16.975 + * to exist but is no longer charged to its former owner. 16.976 + */ 16.977 + 16.978 + 16.979 +static inline void skb_orphan(struct sk_buff *skb) 16.980 +{ 16.981 + if (skb->destructor) 16.982 + skb->destructor(skb); 16.983 + skb->destructor = NULL; 16.984 + skb->sk = NULL; 16.985 +} 16.986 + 16.987 +/** 16.988 + * skb_purge - empty a list 16.989 + * @list: list to empty 16.990 + * 16.991 + * Delete all buffers on an &sk_buff list. Each buffer is removed from 16.992 + * the list and one reference dropped. This function takes the list 16.993 + * lock and is atomic with respect to other list locking functions. 16.994 + */ 16.995 + 16.996 + 16.997 +static inline void skb_queue_purge(struct sk_buff_head *list) 16.998 +{ 16.999 + struct sk_buff *skb; 16.1000 + while ((skb=skb_dequeue(list))!=NULL) 16.1001 + kfree_skb(skb); 16.1002 +} 16.1003 + 16.1004 +/** 16.1005 + * __skb_purge - empty a list 16.1006 + * @list: list to empty 16.1007 + * 16.1008 + * Delete all buffers on an &sk_buff list. Each buffer is removed from 16.1009 + * the list and one reference dropped. This function does not take the 16.1010 + * list lock and the caller must hold the relevant locks to use it. 16.1011 + */ 16.1012 + 16.1013 + 16.1014 +static inline void __skb_queue_purge(struct sk_buff_head *list) 16.1015 +{ 16.1016 + struct sk_buff *skb; 16.1017 + while ((skb=__skb_dequeue(list))!=NULL) 16.1018 + kfree_skb(skb); 16.1019 +} 16.1020 + 16.1021 +/** 16.1022 + * __dev_alloc_skb - allocate an skbuff for sending 16.1023 + * @length: length to allocate 16.1024 + * @gfp_mask: get_free_pages mask, passed to alloc_skb 16.1025 + * 16.1026 + * Allocate a new &sk_buff and assign it a usage count of one. The 16.1027 + * buffer has unspecified headroom built in. Users should allocate 16.1028 + * the headroom they think they need without accounting for the 16.1029 + * built in space. The built in space is used for optimisations. 16.1030 + * 16.1031 + * %NULL is returned in there is no free memory. 16.1032 + */ 16.1033 + 16.1034 +static inline struct sk_buff *__dev_alloc_skb(unsigned int length, 16.1035 + int gfp_mask) 16.1036 +{ 16.1037 + struct sk_buff *skb; 16.1038 +#if defined(CONFIG_XEN) 16.1039 + length = (PAGE_SIZE/2)+1; /* force slab allocater to give us a page */ 16.1040 +#endif 16.1041 + skb = alloc_skb(length+16, gfp_mask); 16.1042 + if (skb) 16.1043 + skb_reserve(skb,16); 16.1044 + return skb; 16.1045 +} 16.1046 + 16.1047 +/** 16.1048 + * dev_alloc_skb - allocate an skbuff for sending 16.1049 + * @length: length to allocate 16.1050 + * 16.1051 + * Allocate a new &sk_buff and assign it a usage count of one. The 16.1052 + * buffer has unspecified headroom built in. Users should allocate 16.1053 + * the headroom they think they need without accounting for the 16.1054 + * built in space. The built in space is used for optimisations. 16.1055 + * 16.1056 + * %NULL is returned in there is no free memory. Although this function 16.1057 + * allocates memory it can be called from an interrupt. 16.1058 + */ 16.1059 + 16.1060 +static inline struct sk_buff *dev_alloc_skb(unsigned int length) 16.1061 +{ 16.1062 + return __dev_alloc_skb(length, GFP_ATOMIC); 16.1063 +} 16.1064 + 16.1065 +/** 16.1066 + * skb_cow - copy header of skb when it is required 16.1067 + * @skb: buffer to cow 16.1068 + * @headroom: needed headroom 16.1069 + * 16.1070 + * If the skb passed lacks sufficient headroom or its data part 16.1071 + * is shared, data is reallocated. If reallocation fails, an error 16.1072 + * is returned and original skb is not changed. 16.1073 + * 16.1074 + * The result is skb with writable area skb->head...skb->tail 16.1075 + * and at least @headroom of space at head. 16.1076 + */ 16.1077 + 16.1078 +static inline int 16.1079 +skb_cow(struct sk_buff *skb, unsigned int headroom) 16.1080 +{ 16.1081 + int delta = (headroom > 16 ? headroom : 16) - skb_headroom(skb); 16.1082 + 16.1083 + if (delta < 0) 16.1084 + delta = 0; 16.1085 + 16.1086 + if (delta || skb_cloned(skb)) 16.1087 + return pskb_expand_head(skb, (delta+15)&~15, 0, GFP_ATOMIC); 16.1088 + return 0; 16.1089 +} 16.1090 + 16.1091 +/** 16.1092 + * skb_padto - pad an skbuff up to a minimal size 16.1093 + * @skb: buffer to pad 16.1094 + * @len: minimal length 16.1095 + * 16.1096 + * Pads up a buffer to ensure the trailing bytes exist and are 16.1097 + * blanked. If the buffer already contains sufficient data it 16.1098 + * is untouched. Returns the buffer, which may be a replacement 16.1099 + * for the original, or NULL for out of memory - in which case 16.1100 + * the original buffer is still freed. 16.1101 + */ 16.1102 + 16.1103 +static inline struct sk_buff *skb_padto(struct sk_buff *skb, unsigned int len) 16.1104 +{ 16.1105 + unsigned int size = skb->len; 16.1106 + if(likely(size >= len)) 16.1107 + return skb; 16.1108 + return skb_pad(skb, len-size); 16.1109 +} 16.1110 + 16.1111 +/** 16.1112 + * skb_linearize - convert paged skb to linear one 16.1113 + * @skb: buffer to linarize 16.1114 + * @gfp: allocation mode 16.1115 + * 16.1116 + * If there is no free memory -ENOMEM is returned, otherwise zero 16.1117 + * is returned and the old skb data released. */ 16.1118 +int skb_linearize(struct sk_buff *skb, int gfp); 16.1119 + 16.1120 +static inline void *kmap_skb_frag(const skb_frag_t *frag) 16.1121 +{ 16.1122 +#ifdef CONFIG_HIGHMEM 16.1123 + if (in_irq()) 16.1124 + out_of_line_bug(); 16.1125 + 16.1126 + local_bh_disable(); 16.1127 +#endif 16.1128 + return kmap_atomic(frag->page, KM_SKB_DATA_SOFTIRQ); 16.1129 +} 16.1130 + 16.1131 +static inline void kunmap_skb_frag(void *vaddr) 16.1132 +{ 16.1133 + kunmap_atomic(vaddr, KM_SKB_DATA_SOFTIRQ); 16.1134 +#ifdef CONFIG_HIGHMEM 16.1135 + local_bh_enable(); 16.1136 +#endif 16.1137 +} 16.1138 + 16.1139 +#define skb_queue_walk(queue, skb) \ 16.1140 + for (skb = (queue)->next; \ 16.1141 + (skb != (struct sk_buff *)(queue)); \ 16.1142 + skb=skb->next) 16.1143 + 16.1144 + 16.1145 +extern struct sk_buff * skb_recv_datagram(struct sock *sk,unsigned flags,int noblock, int *err); 16.1146 +extern unsigned int datagram_poll(struct file *file, struct socket *sock, struct poll_table_struct *wait); 16.1147 +extern int skb_copy_datagram(const struct sk_buff *from, int offset, char *to,int size); 16.1148 +extern int skb_copy_datagram_iovec(const struct sk_buff *from, int offset, struct iovec *to,int size); 16.1149 +extern int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int *csump); 16.1150 +extern int skb_copy_and_csum_datagram_iovec(const struct sk_buff *skb, int hlen, struct iovec *iov); 16.1151 +extern void skb_free_datagram(struct sock * sk, struct sk_buff *skb); 16.1152 + 16.1153 +extern unsigned int skb_checksum(const struct sk_buff *skb, int offset, int len, unsigned int csum); 16.1154 +extern int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len); 16.1155 +extern unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int csum); 16.1156 +extern void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to); 16.1157 + 16.1158 +extern void skb_init(void); 16.1159 +extern void skb_add_mtu(int mtu); 16.1160 + 16.1161 +#ifdef CONFIG_NETFILTER 16.1162 +static inline void 16.1163 +nf_conntrack_put(struct nf_ct_info *nfct) 16.1164 +{ 16.1165 + if (nfct && atomic_dec_and_test(&nfct->master->use)) 16.1166 + nfct->master->destroy(nfct->master); 16.1167 +} 16.1168 +static inline void 16.1169 +nf_conntrack_get(struct nf_ct_info *nfct) 16.1170 +{ 16.1171 + if (nfct) 16.1172 + atomic_inc(&nfct->master->use); 16.1173 +} 16.1174 +#endif 16.1175 + 16.1176 +#endif /* __KERNEL__ */ 16.1177 +#endif /* _LINUX_SKBUFF_H */