]> xenbits.xensource.com Git - xenclient/kernel.git/commitdiff
* Wed Oct 22 2008 Jiri Pirko <jpirko@redhat.com> [2.6.18-92.1.17.el5] kernel-2.6.18-92.1.13.el5.patch
authort_jeang <devnull@localhost>
Tue, 6 Jan 2009 12:05:55 +0000 (12:05 +0000)
committert_jeang <devnull@localhost>
Tue, 6 Jan 2009 12:05:55 +0000 (12:05 +0000)
- Revert: [nfs] pages of a memory mapped file get corrupted (Peter Staubach ) [450335 435291]

* Sat Oct 18 2008 Jiri Pirko <jpirko@redhat.com> [2.6.18-92.1.16.el5]
- [i386] vDSO: use install_special_mapping (Peter Zijlstra ) [460275 460276] {CVE-2008-3527}
- [scsi] aacraid: remove some quirk AAC_QUIRK_SCSI_32 bits (Tomas Henzl ) [466885 453472]
- [fs] remove SUID when splicing into an inode (Eric Sandeen ) [464451 464452] {CVE-2008-3833}
- [fs] open() allows setgid bit when user is not in group (Eugene Teo ) [463867 463687] {CVE-2008-4210}
- [xen] ia64: fix INIT injection (Tetsu Yamamoto ) [467105 464445]

* Fri Oct 10 2008 Jiri Pirko <jpirko@redhat.com> [2.6.18-92.1.15.el5]
- [pci] fix problems with msi interrupt management (Neil Horman ) [461894 428696]
- [x86_64] revert time syscall changes (Prarit Bhargava ) [466427 461184]
- [xen] allow guests to hide the TSC from applications (Chris Lalancette ) [378471 378481] {CVE-2007-5907}
- [scsi] qla2xxx: additional residual-count correction (Marcus Barrow ) [465741 462117]
- [char] add range_is_allowed check to mmap_mem (Eugene Teo ) [460858 460857]
- [fs] binfmt_misc: avoid potential kernel stack overflow (Vitaly Mayatskikh ) [459464 459463]
- [misc] cpufreq: fix format string bug (Vitaly Mayatskikh ) [459461 459460]
- [dlm] user.c input validation fixes (David Teigland ) [458759 458760]
- [nfs] pages of a memory mapped file get corrupted (Peter Staubach ) [450335 435291]
- [x86_64] gettimeofday fixes for HPET, PMTimer, TSC (Prarit Bhargava ) [462860 250708]

* Wed Sep 24 2008 Jiri Pirko <jpirko@redhat.com> [2.6.18-92.1.14.el5]
- [libata] ata_scsi_rbuf_get check for scatterlist usage (David Milburn ) [460638 455445]
- [net] random32: seeding improvement (Jiri Pirko ) [458021 458019]
- [x86_64] xen: local DOS due to NT bit leakage (Eugene Teo ) [457721 457722] {CVE-2006-5755}
- [fs] cifs: fix O_APPEND on directio mounts (Jeff Layton ) [462591 460063]
- [openib] race between QP async handler and destroy_qp (Brad Peters ) [458781 446109]
- [net] dccp_setsockopt_change integer overflow (Vitaly Mayatskikh ) [459232 459235] {CVE-2008-3276}
- [acpi] error attaching device data (peterm@redhat.com ) [460868 459670]
- [mm] optimize ZERO_PAGE in 'get_user_pages' and fix XIP (Anton Arapov ) [452667 452668] {CVE-2008-2372}
- [xen] xennet: coordinate ARP with backend network status (Herbert Xu ) [461457 458934]
- [xen] event channel lock and barrier (Markus Armbruster ) [461099 457086]
- [fs] fix bad unlock_page in pip_to_file() error path (Larry Woodman ) [462436 439917]

49 files changed:
Makefile
arch/i386/kernel/sysenter.c
arch/powerpc/kernel/vdso.c
arch/x86_64/ia32/syscall32-xen.c
arch/x86_64/ia32/syscall32.c
arch/x86_64/kernel/pmtimer.c
arch/x86_64/kernel/setup64-xen.c
arch/x86_64/kernel/time.c
arch/x86_64/kernel/vsyscall.c
buildconfigs/Rules.mk
configs/kernel-2.6.18-i686-PAE.config
configs/kernel-2.6.18-i686-debug.config
configs/kernel-2.6.18-i686-xen.config
configs/kernel-2.6.18-i686.config
drivers/acpi/executer/excreate.c
drivers/acpi/namespace/nsaccess.c
drivers/ata/libata-scsi.c
drivers/char/mem.c
drivers/cpufreq/cpufreq.c
drivers/infiniband/hw/ehca/ehca_classes.h
drivers/infiniband/hw/ehca/ehca_irq.c
drivers/infiniband/hw/ehca/ehca_qp.c
drivers/pci/msi.c
drivers/pci/msi.h
drivers/scsi/aacraid/linit.c
drivers/scsi/qla2xxx/qla_isr.c
drivers/xen/evtchn/evtchn.c
drivers/xen/netback/common.h
drivers/xen/netback/interface.c
drivers/xen/netback/netback.c
drivers/xen/netback/xenbus.c
fs/binfmt_em86.c
fs/binfmt_misc.c
fs/binfmt_script.c
fs/cifs/file.c
fs/dlm/user.c
fs/open.c
fs/splice.c
include/asm-x86_64/mach-xen/asm/system.h
include/asm-x86_64/proto.h
include/asm-x86_64/vsyscall32.h
include/linux/fs.h
include/linux/mm.h
mm/filemap.c
mm/memory.c
mm/migrate.c
mm/mmap.c
net/core/utils.c
net/dccp/proto.c

index 8da9f7480c1eb49531128470a25c8be842e7161e..69800b5cf56329748f5a2fb6a7a03eec2afe09d9 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 2
 PATCHLEVEL = 6
 SUBLEVEL = 18
-EXTRAVERSION = -92.1.13.el5
+EXTRAVERSION = -92.1.17.el5
 RHEL_MAJOR = 5
 RHEL_MINOR = 2
 NAME=Avast! A bilge rat!
index d02074f2a2d411e2553752c485591a696bf8909d..57bce7488219a6e42c31484490be8a41ed9ffdf8 100644 (file)
@@ -100,11 +100,12 @@ void enable_sep_cpu(void)
  */
 extern const char vsyscall_int80_start, vsyscall_int80_end;
 extern const char vsyscall_sysenter_start, vsyscall_sysenter_end;
-static void *syscall_page;
+static struct page *syscall_pages[1];
 
 int __cpuinit sysenter_setup(void)
 {
-       syscall_page = (void *)get_zeroed_page(GFP_ATOMIC);
+       void *syscall_page = (void *)get_zeroed_page(GFP_ATOMIC);
+       syscall_pages[0] = virt_to_page(syscall_page);
 
 #ifdef CONFIG_COMPAT_VDSO
        __set_fixmap(FIX_VDSO, __pa(syscall_page), PAGE_READONLY);
@@ -125,24 +126,6 @@ int __cpuinit sysenter_setup(void)
        return 0;
 }
 
-static struct page *syscall_nopage(struct vm_area_struct *vma,
-                               unsigned long adr, int *type)
-{
-       struct page *p = virt_to_page(adr - vma->vm_start + syscall_page);
-       get_page(p);
-       return p;
-}
-
-/* Prevent VMA merging */
-static void syscall_vma_close(struct vm_area_struct *vma)
-{
-}
-
-static struct vm_operations_struct syscall_vm_ops = {
-       .close = syscall_vma_close,
-       .nopage = syscall_nopage,
-};
-
 /* Defined in vsyscall-sysenter.S */
 extern void SYSENTER_RETURN;
 
@@ -150,7 +133,6 @@ extern void SYSENTER_RETURN;
 int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack,
                                unsigned long start_code, unsigned long interp_map_address)
 {
-       struct vm_area_struct *vma;
        struct mm_struct *mm = current->mm;
        unsigned long addr;
        int ret;
@@ -162,38 +144,25 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack,
                goto up_fail;
        }
 
-       vma = kmem_cache_zalloc(vm_area_cachep, SLAB_KERNEL);
-       if (!vma) {
-               ret = -ENOMEM;
-               goto up_fail;
-       }
-
-       vma->vm_start = addr;
-       vma->vm_end = addr + PAGE_SIZE;
-       /* MAYWRITE to allow gdb to COW and set breakpoints */
-       vma->vm_flags = VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC|VM_MAYWRITE;
        /*
+        * MAYWRITE to allow gdb to COW and set breakpoints
+        *
         * Make sure the vDSO gets into every core dump.
         * Dumping its contents makes post-mortem fully interpretable later
         * without matching up the same kernel and hardware config to see
         * what PC values meant.
         */
-       vma->vm_flags |= VM_ALWAYSDUMP;
-       vma->vm_flags |= mm->def_flags;
-       vma->vm_page_prot = protection_map[vma->vm_flags & 7];
-       vma->vm_ops = &syscall_vm_ops;
-       vma->vm_mm = mm;
-
-       ret = insert_vm_struct(mm, vma);
-       if (unlikely(ret)) {
-               kmem_cache_free(vm_area_cachep, vma);
+       ret = install_special_mapping(mm, addr, PAGE_SIZE,
+                       VM_READ|VM_EXEC|
+                       VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
+                       VM_ALWAYSDUMP,
+                       syscall_pages);
+       if (ret)
                goto up_fail;
-       }
 
        current->mm->context.vdso = (void *)addr;
        current_thread_info()->sysenter_return =
                                    (void *)VDSO_SYM(&SYSENTER_RETURN);
-       mm->total_vm++;
 up_fail:
        up_write(&mm->mmap_sem);
        return ret;
index a9184d754456a94d96e10d1c9a5f0c777b966826..ec616f4970912da7350f6450fdd8414e1fbdad38 100644 (file)
@@ -126,7 +126,7 @@ static void dump_one_vdso_page(struct page *pg, struct page *upg)
        printk("kpg: %p (c:%d,f:%08lx)", __va(page_to_pfn(pg) << PAGE_SHIFT),
               page_count(pg),
               pg->flags);
-       if (upg/* && pg != upg*/) {
+       if (upg && !IS_ERR(upg) /* && pg != upg*/) {
                printk(" upg: %p (c:%d,f:%08lx)", __va(page_to_pfn(upg)
                                                       << PAGE_SHIFT),
                       page_count(upg),
index 67dca1035b63edbb954f2933900781b7658b5db0..0f57b9fb26b9fd074a5abad6271e27311e55d4e4 100644 (file)
@@ -19,7 +19,7 @@ extern unsigned char syscall32_syscall[], syscall32_syscall_end[];
 extern unsigned char syscall32_sysenter[], syscall32_sysenter_end[];
 extern int sysctl_vsyscall32;
 
-char *syscall32_page; 
+static struct page *syscall32_pages[1];
 static int use_sysenter = -1;
 
 #if CONFIG_XEN_COMPAT < 0x030200
@@ -27,24 +27,6 @@ extern unsigned char syscall32_int80[], syscall32_int80_end[];
 static int use_int80 = 1;
 #endif
 
-static struct page *
-syscall32_nopage(struct vm_area_struct *vma, unsigned long adr, int *type)
-{
-       struct page *p = virt_to_page(adr - vma->vm_start + syscall32_page);
-       get_page(p);
-       return p;
-}
-
-/* Prevent VMA merging */
-static void syscall32_vma_close(struct vm_area_struct *vma)
-{
-}
-
-static struct vm_operations_struct syscall32_vm_ops = {
-       .close = syscall32_vma_close,
-       .nopage = syscall32_nopage,
-};
-
 struct linux_binprm;
 
 /* Setup a VMA at program startup for the vsyscall page */
@@ -52,40 +34,31 @@ int syscall32_setup_pages(struct linux_binprm *bprm, int exstack,
                         unsigned long start_code,
                         unsigned long interp_map_address)
 {
-       int npages = (VSYSCALL32_END - VSYSCALL32_BASE) >> PAGE_SHIFT;
-       struct vm_area_struct *vma;
        struct mm_struct *mm = current->mm;
        int ret;
 
-       vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
-       if (!vma)
-               return -ENOMEM;
-
-       memset(vma, 0, sizeof(struct vm_area_struct));
-       /* Could randomize here */
-       vma->vm_start = VSYSCALL32_BASE;
-       vma->vm_end = VSYSCALL32_END;
-       /* MAYWRITE to allow gdb to COW and set breakpoints */
-       vma->vm_flags = VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC|VM_MAYWRITE;
-       vma->vm_flags |= mm->def_flags;
-       vma->vm_page_prot = protection_map[vma->vm_flags & 7];
-       vma->vm_ops = &syscall32_vm_ops;
-       vma->vm_mm = mm;
-
        down_write(&mm->mmap_sem);
-       if ((ret = insert_vm_struct(mm, vma))) {
-               up_write(&mm->mmap_sem);
-               kmem_cache_free(vm_area_cachep, vma);
-               return ret;
-       }
-       mm->total_vm += npages;
+       /*
+        * MAYWRITE to allow gdb to COW and set breakpoints
+        *
+        * Make sure the vDSO gets into every core dump.
+        * Dumping its contents makes post-mortem fully interpretable later
+        * without matching up the same kernel and hardware config to see
+        * what PC values meant.
+        */
+       ret = install_special_mapping(mm, VSYSCALL32_BASE, PAGE_SIZE,
+                       VM_READ|VM_EXEC|
+                       VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
+                       VM_ALWAYSDUMP,
+                       syscall32_pages);
        up_write(&mm->mmap_sem);
-       return 0;
+       return ret;
 }
 
 static int __init init_syscall32(void)
 { 
-       syscall32_page = (void *)get_zeroed_page(GFP_KERNEL); 
+       void *syscall32_page = (void *)get_zeroed_page(GFP_KERNEL);
+       syscall32_pages[0] = virt_to_page(syscall32_page);
        if (!syscall32_page) 
                panic("Cannot allocate syscall32 page"); 
 
index 3b13188e2ee5e8a00d9a54b14ec8e55a5a1e1dc2..78372c4a851825cd7e78145a411dcd01498369f1 100644 (file)
@@ -18,27 +18,9 @@ extern unsigned char syscall32_syscall[], syscall32_syscall_end[];
 extern unsigned char syscall32_sysenter[], syscall32_sysenter_end[];
 extern int sysctl_vsyscall32;
 
-char *syscall32_page; 
+static struct page *syscall32_pages[1];
 static int use_sysenter = -1;
 
-static struct page *
-syscall32_nopage(struct vm_area_struct *vma, unsigned long adr, int *type)
-{
-       struct page *p = virt_to_page(adr - vma->vm_start + syscall32_page);
-       get_page(p);
-       return p;
-}
-
-/* Prevent VMA merging */
-static void syscall32_vma_close(struct vm_area_struct *vma)
-{
-}
-
-static struct vm_operations_struct syscall32_vm_ops = {
-       .close = syscall32_vma_close,
-       .nopage = syscall32_nopage,
-};
-
 struct linux_binprm;
 
 /* Setup a VMA at program startup for the vsyscall page */
@@ -46,47 +28,31 @@ int syscall32_setup_pages(struct linux_binprm *bprm, int exstack,
                          unsigned long start_code,
                          unsigned long interp_map_address)
 {
-       int npages = (VSYSCALL32_END - VSYSCALL32_BASE) >> PAGE_SHIFT;
-       struct vm_area_struct *vma;
        struct mm_struct *mm = current->mm;
        int ret;
 
-       vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
-       if (!vma)
-               return -ENOMEM;
-
-       memset(vma, 0, sizeof(struct vm_area_struct));
-       /* Could randomize here */
-       vma->vm_start = VSYSCALL32_BASE;
-       vma->vm_end = VSYSCALL32_END;
-       /* MAYWRITE to allow gdb to COW and set breakpoints */
-       vma->vm_flags = VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC|VM_MAYWRITE;
+       down_write(&mm->mmap_sem);
        /*
+        * MAYWRITE to allow gdb to COW and set breakpoints
+        *
         * Make sure the vDSO gets into every core dump.
         * Dumping its contents makes post-mortem fully interpretable later
         * without matching up the same kernel and hardware config to see
         * what PC values meant.
         */
-       vma->vm_flags |= VM_ALWAYSDUMP;
-       vma->vm_flags |= mm->def_flags;
-       vma->vm_page_prot = protection_map[vma->vm_flags & 7];
-       vma->vm_ops = &syscall32_vm_ops;
-       vma->vm_mm = mm;
-
-       down_write(&mm->mmap_sem);
-       if ((ret = insert_vm_struct(mm, vma))) {
-               up_write(&mm->mmap_sem);
-               kmem_cache_free(vm_area_cachep, vma);
-               return ret;
-       }
-       mm->total_vm += npages;
+       ret = install_special_mapping(mm, VSYSCALL32_BASE, PAGE_SIZE,
+                       VM_READ|VM_EXEC|
+                       VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
+                       VM_ALWAYSDUMP,
+                       syscall32_pages);
        up_write(&mm->mmap_sem);
-       return 0;
+       return ret;
 }
 
 static int __init init_syscall32(void)
 { 
-       syscall32_page = (void *)get_zeroed_page(GFP_KERNEL); 
+       void *syscall32_page = (void *)get_zeroed_page(GFP_KERNEL);
+       syscall32_pages[0] = virt_to_page(syscall32_page);
        if (!syscall32_page) 
                panic("Cannot allocate syscall32 page"); 
        if (use_sysenter > 0) {
index 9edee7c16e9e47e5f03dda7f4c10bd1b821ba168..b9712ce75b3b8a7ca8f1e24f021a1d925bad9e32 100644 (file)
@@ -106,7 +106,7 @@ void pmtimer_resume(void)
        last_pmtmr_tick = inl(pmtmr_ioport);
 }
 
-unsigned int do_gettimeoffset_pm(void)
+long do_gettimeoffset_pm(void)
 {
        u32 now, offset, delta = 0;
 
@@ -114,7 +114,9 @@ unsigned int do_gettimeoffset_pm(void)
        now = inl(pmtmr_ioport);
        delta = (now - offset) & ACPI_PM_MASK;
 
-       return offset_delay + cyc2us(delta);
+       /* seems crazy to do with PM timer resolution but we need nsec
+          resolution in arch/x86_64/kernel/time.c code */
+       return ((offset_delay + cyc2us(delta)) * NSEC_PER_USEC);
 }
 
 
index f932f8f8f0371b6718d1cfaa307919d024aea5e0..97c823b7fbd8b7f6bca602e4b3a0ecbdcd13a4f8 100644 (file)
@@ -238,6 +238,8 @@ void __cpuinit check_efer(void)
 
 unsigned long kernel_eflags;
 
+unsigned long kernel_eflags;
+
 /*
  * cpu_init() initializes state that is per-CPU. Some data is already
  * initialized (naturally) in the bootstrap process, such as the GDT
index 867760da54bc2ee071512475cb548a552a258d1a..e2c3a218bcdba96c9f2b6d9dcdb476c56b08f61e 100644 (file)
  *  Copyright (c) 2002,2006  Vojtech Pavlik
  *  Copyright (c) 2003  Andi Kleen
  *  RTC support code taken from arch/i386/kernel/timers/time_hpet.c
+ *
+ *  March 2008: Upstream has diverged significantly from this codebase.
+ *  Modifications to this file to convert the gettimeofday call into nsecs
+ *  (but still return usec values) were done in order to resolve a large
+ *  number of gettimeofday issues seen across a wide swath of Intel and
+ *  AMD systems.
  */
 
 #include <linux/kernel.h>
@@ -65,7 +71,7 @@ static int notsc __initdata = 0;
 #define NSEC_PER_TICK (NSEC_PER_SEC / HZ)
 #define FSEC_PER_TICK (FSEC_PER_SEC / HZ)
 
-#define USEC_PER_REAL_TICK (USEC_PER_SEC / REAL_HZ)
+#define NSEC_PER_REAL_TICK (NSEC_PER_SEC / REAL_HZ)
 
 #define NS_SCALE       10 /* 2^10, carefully chosen */
 #define US_SCALE       32 /* 2^32, arbitralrily chosen */
@@ -90,7 +96,7 @@ struct timespec __xtime __section_xtime;
 struct timezone __sys_tz __section_sys_tz;
 
 /*
- * do_gettimeoffset() returns microseconds since last timer interrupt was
+ * do_gettimeoffset() returns nanoseconds since last timer interrupt was
  * triggered by hardware. A memory read of HPET is slower than a register read
  * of TSC, but much more reliable. It's also synchronized to the timer
  * interrupt. Note that do_gettimeoffset() may return more than hpet_tick, if a
@@ -99,27 +105,27 @@ struct timezone __sys_tz __section_sys_tz;
  * together by xtime_lock.
  */
 
-static inline unsigned int do_gettimeoffset_tsc(void)
+static inline long do_gettimeoffset_tsc(void)
 {
        unsigned long t;
        unsigned long x;
        t = get_cycles_sync();
        if (t < vxtime.last_tsc) 
                t = vxtime.last_tsc; /* hack */
-       x = ((t - vxtime.last_tsc) * vxtime.tsc_quot) >> US_SCALE;
+       x = ((t - vxtime.last_tsc) * vxtime.tsc_quot) >> NS_SCALE;
        return x;
 }
 
-static inline unsigned int do_gettimeoffset_hpet(void)
+static inline long do_gettimeoffset_hpet(void)
 {
        /* cap counter read to one tick to avoid inconsistencies */
        unsigned long counter = hpet_readl(HPET_COUNTER) - vxtime.last;
        /* The hpet counter runs at a fixed rate so we don't care about HZ
           scaling here. We do however care that the limit is in real ticks */
-       return (min(counter,hpet_tick_real) * vxtime.quot) >> US_SCALE;
+       return (min(counter,hpet_tick_real) * vxtime.quot) >> NS_SCALE;
 }
 
-unsigned int (*do_gettimeoffset)(void) = do_gettimeoffset_tsc;
+long (*do_gettimeoffset)(void) = do_gettimeoffset_tsc;
 
 /*
  * This version of gettimeofday() has microsecond resolution and better than
@@ -129,32 +135,25 @@ unsigned int (*do_gettimeoffset)(void) = do_gettimeoffset_tsc;
 
 void do_gettimeofday(struct timeval *tv)
 {
-       unsigned long seq, t;
-       unsigned int sec, usec;
+       unsigned long seq;
+       long sec, nsec;
 
        do {
                seq = read_seqbegin(&xtime_lock);
 
                sec = xtime.tv_sec;
-               usec = xtime.tv_nsec / NSEC_PER_USEC;
+               nsec = xtime.tv_nsec + (jiffies - wall_jiffies) * NSEC_PER_TICK;
 
-               /* i386 does some correction here to keep the clock 
-                  monotonous even when ntpd is fixing drift.
-                  But they didn't work for me, there is a non monotonic
-                  clock anyways with ntp.
-                  I dropped all corrections now until a real solution can
-                  be found. Note when you fix it here you need to do the same
-                  in arch/x86_64/kernel/vsyscall.c and export all needed
-                  variables in vmlinux.lds. -AK */ 
-
-               t = (jiffies - wall_jiffies) * USEC_PER_TICK +
-                       do_gettimeoffset();
-               usec += t;
+               nsec += do_gettimeoffset();
 
        } while (read_seqretry(&xtime_lock, seq));
 
-       tv->tv_sec = sec + usec / USEC_PER_SEC;
-       tv->tv_usec = usec % USEC_PER_SEC;
+       tv->tv_sec = sec;
+       while (nsec >= NSEC_PER_SEC) {
+               tv->tv_sec += 1;
+               nsec -= NSEC_PER_SEC;
+       }
+       tv->tv_usec = nsec / NSEC_PER_USEC;
 }
 
 EXPORT_SYMBOL(do_gettimeofday);
@@ -175,8 +174,7 @@ int do_settimeofday(struct timespec *tv)
 
        write_seqlock_irq(&xtime_lock);
 
-       nsec -= do_gettimeoffset() * NSEC_PER_USEC +
-               (jiffies - wall_jiffies) * NSEC_PER_TICK;
+       nsec -= do_gettimeoffset() + (jiffies - wall_jiffies) * NSEC_PER_TICK;
 
        wtm_sec  = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
        wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
@@ -417,14 +415,15 @@ void main_timer_handler(struct pt_regs *regs)
 #endif
        } else {
                offset = (((tsc - vxtime.last_tsc) *
-                          vxtime.tsc_quot) >> US_SCALE) - USEC_PER_REAL_TICK;
+                          vxtime.tsc_quot) >> NS_SCALE) - NSEC_PER_REAL_TICK;
 
                if (offset < 0)
                        offset = 0;
 
-               if (offset > USEC_PER_REAL_TICK) {
-                       lost = offset / USEC_PER_REAL_TICK;
-                       offset %= USEC_PER_REAL_TICK;
+               lost = 0;
+               while (offset > NSEC_PER_REAL_TICK) {
+                       lost++;
+                       offset -= NSEC_PER_REAL_TICK;
                }
 
                /* FIXME: 1000 or 1000000? */
@@ -433,9 +432,9 @@ void main_timer_handler(struct pt_regs *regs)
                vxtime.last_tsc = tsc - vxtime.quot * delay / vxtime.tsc_quot;
 
                if ((((tsc - vxtime.last_tsc) *
-                     vxtime.tsc_quot) >> US_SCALE) < offset)
+                     vxtime.tsc_quot) >> NS_SCALE) < offset)
                        vxtime.last_tsc = tsc -
-                               (((long) offset << US_SCALE) / vxtime.tsc_quot) - 1;
+                               (((long) offset << NS_SCALE) / vxtime.tsc_quot) - 1;
        }
        /* SCALE: We expect tick_divider - 1 lost, ie 0 for normal behaviour */
        if (lost > (int)tick_divider - 1)  {
@@ -692,7 +691,7 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
 
                tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new);
                if (!(freq->flags & CPUFREQ_CONST_LOOPS))
-                       vxtime.tsc_quot = (USEC_PER_MSEC << US_SCALE) / cpu_khz;
+                       vxtime.tsc_quot = (NSEC_PER_MSEC << NS_SCALE) / cpu_khz;
        }
        
        set_cyc2ns_scale(tsc_khz_ref);
@@ -997,8 +996,8 @@ void __init time_init(void)
                cpu_khz = tsc_calibrate_cpu_khz();
 
        vxtime.mode = VXTIME_TSC;
-       vxtime.quot = (USEC_PER_SEC << US_SCALE) / vxtime_hz;
-       vxtime.tsc_quot = (USEC_PER_MSEC << US_SCALE) / cpu_khz;
+       vxtime.quot = (NSEC_PER_SEC << NS_SCALE) / vxtime_hz;
+       vxtime.tsc_quot = (NSEC_PER_MSEC << NS_SCALE) / cpu_khz;
        vxtime.last_tsc = get_cycles_sync();
        setup_irq(0, &irq0);
 
@@ -1085,8 +1084,8 @@ void time_init_gtod(void)
                vxtime_hz / 1000000, vxtime_hz % 1000000, timename, timetype);
        printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n", 
                cpu_khz / 1000, cpu_khz % 1000);
-       vxtime.quot = (USEC_PER_SEC << US_SCALE) / vxtime_hz;
-       vxtime.tsc_quot = (USEC_PER_MSEC << US_SCALE) / cpu_khz;
+       vxtime.quot = (NSEC_PER_SEC << NS_SCALE) / vxtime_hz;
+       vxtime.tsc_quot = (NSEC_PER_MSEC << NS_SCALE) / cpu_khz;
        vxtime.last_tsc = get_cycles_sync();
 
        set_cyc2ns_scale(cpu_khz);
index cf6c5d1e07f85e440da3ad00580be0155e4355bc..69719d6ec97fc5ee3319fd59485a47ce4ff6e867 100644 (file)
@@ -52,6 +52,8 @@ int __vgetcpu_mode __section_vgetcpu_mode;
          asm("" : "=r" (v) : "0" (x)); \
          ((v - fix_to_virt(VSYSCALL_FIRST_PAGE)) + __pa_symbol(&__vsyscall_0)); })
 
+#define NS_SCALE       10 /* 2^10, carefully chosen */
+
 static __always_inline void timeval_normalize(struct timeval * tv)
 {
        time_t __sec;
@@ -66,30 +68,34 @@ static __always_inline void timeval_normalize(struct timeval * tv)
 static __always_inline void do_vgettimeofday(struct timeval * tv)
 {
        long sequence, t;
-       unsigned long sec, usec;
+       long sec, nsec;
 
        do {
                sequence = read_seqbegin(&__xtime_lock);
-               
+
                sec = __xtime.tv_sec;
-               usec = (__xtime.tv_nsec / 1000) +
-                       (__jiffies - __wall_jiffies) * (1000000 / HZ);
+               nsec = __xtime.tv_nsec +
+                       (__jiffies - __wall_jiffies) * (NSEC_PER_SEC / HZ);
 
                if (__vxtime.mode != VXTIME_HPET) {
                        t = get_cycles_sync();
                        if (t < __vxtime.last_tsc)
                                t = __vxtime.last_tsc;
-                       usec += ((t - __vxtime.last_tsc) *
-                                __vxtime.tsc_quot) >> 32;
-                       /* See comment in x86_64 do_gettimeofday. */
+                       nsec += ((t - __vxtime.last_tsc) *
+                                __vxtime.tsc_quot) >> NS_SCALE;
                } else {
-                       usec += ((readl((void *)fix_to_virt(VSYSCALL_HPET) + 0xf0) -
-                                 __vxtime.last) * __vxtime.quot) >> 32;
+                       nsec += ((readl((void *)fix_to_virt(VSYSCALL_HPET) +
+                                       0xf0) -
+                                 __vxtime.last) * __vxtime.quot) >> NS_SCALE;
                }
        } while (read_seqretry(&__xtime_lock, sequence));
 
-       tv->tv_sec = sec + usec / 1000000;
-       tv->tv_usec = usec % 1000000;
+       tv->tv_sec = sec;
+       while (nsec >= NSEC_PER_SEC) {
+               tv->tv_sec += 1;
+               nsec -= NSEC_PER_SEC;
+       }
+       tv->tv_usec = nsec / NSEC_PER_USEC;
 }
 
 /* RED-PEN may want to readd seq locking, but then the variable should be write-once. */
@@ -134,7 +140,8 @@ time_t __vsyscall(1) vtime(time_t *t)
        if (!__sysctl_vsyscall)
                return time_syscall(t);
        else if (t)
-               *t = __xtime.tv_sec;            
+               *t = __xtime.tv_sec;
+
        return __xtime.tv_sec;
 }
 
index 0b93dcae1ff91d1a9ed4237d0fa8740070ef7750..c51edd412ff01811c80ce6be6800777c152298d0 100644 (file)
@@ -2,7 +2,7 @@ XEN_TARGET_ARCH     = x86_32
 XEN_TARGET_X86_PAE ?= y
 
 LINUX_SERIES = 2.6
-LINUX_VER    = 2.6.18-92.1.13.el5
+LINUX_VER    = 2.6.18-92.1.17.el5
 
 EXTRAVERSION ?= xen
 
index 50586060e214134a5cb8139ff7abb009d5563ccc..939a1301625d978ac5e3d17e2a3fc29c850c5826 100644 (file)
@@ -2,7 +2,7 @@
 #
 # Automatically generated make config: don't edit
 # Linux kernel version: 2.6.18-prep
-# Mon Sep 29 11:42:18 2008
+# Wed Nov  5 04:51:57 2008
 #
 CONFIG_X86_32=y
 CONFIG_GENERIC_TIME=y
index 84bd50eec0eb2b3672daa2c19e141f2a44dc3dad..c8657140cc702999f1515d21b42cc60bc6d2b1a8 100644 (file)
@@ -2,7 +2,7 @@
 #
 # Automatically generated make config: don't edit
 # Linux kernel version: 2.6.18-prep
-# Mon Sep 29 11:42:18 2008
+# Wed Nov  5 04:51:58 2008
 #
 CONFIG_X86_32=y
 CONFIG_GENERIC_TIME=y
index afd893b9cea4d4b3af6df7b88fb1b1a71029d9b4..476b56ee5476ce53662178d89153d3273e1d2240 100644 (file)
@@ -2,7 +2,7 @@
 #
 # Automatically generated make config: don't edit
 # Linux kernel version: 2.6.18-prep
-# Mon Sep 29 11:42:18 2008
+# Wed Nov  5 04:51:58 2008
 #
 CONFIG_X86_32=y
 CONFIG_LOCKDEP_SUPPORT=y
index b7e684a19475b14fe98f4f436b37699bd0ff03a3..44e70ee22db5d896ce7e87eacd302e0c4733391a 100644 (file)
@@ -2,7 +2,7 @@
 #
 # Automatically generated make config: don't edit
 # Linux kernel version: 2.6.18-prep
-# Mon Sep 29 11:42:18 2008
+# Wed Nov  5 04:51:58 2008
 #
 CONFIG_X86_32=y
 CONFIG_GENERIC_TIME=y
index 34eec82c1b1e8902cce4eea96f193214771cbc64..13143bf46ef339b1bc8e8eba59e633a70751a01d 100644 (file)
@@ -97,16 +97,28 @@ acpi_status acpi_ex_create_alias(struct acpi_walk_state *walk_state)
         * to the original Node.
         */
        switch (target_node->type) {
+
+               /* For these types, the sub-object can change dynamically via a Store */
+
        case ACPI_TYPE_INTEGER:
        case ACPI_TYPE_STRING:
        case ACPI_TYPE_BUFFER:
        case ACPI_TYPE_PACKAGE:
        case ACPI_TYPE_BUFFER_FIELD:
 
+               /*
+                * These types open a new scope, so we need the NS node in order to access
+                * any children.
+                */
+       case ACPI_TYPE_DEVICE:
+       case ACPI_TYPE_POWER:
+       case ACPI_TYPE_PROCESSOR:
+       case ACPI_TYPE_THERMAL:
+       case ACPI_TYPE_LOCAL_SCOPE:
+
                /*
                 * The new alias has the type ALIAS and points to the original
-                * NS node, not the object itself.  This is because for these
-                * types, the object can change dynamically via a Store.
+                * NS node, not the object itself.
                 */
                alias_node->type = ACPI_TYPE_LOCAL_ALIAS;
                alias_node->object =
@@ -116,9 +128,7 @@ acpi_status acpi_ex_create_alias(struct acpi_walk_state *walk_state)
        case ACPI_TYPE_METHOD:
 
                /*
-                * The new alias has the type ALIAS and points to the original
-                * NS node, not the object itself.  This is because for these
-                * types, the object can change dynamically via a Store.
+                * Control method aliases need to be differentiated
                 */
                alias_node->type = ACPI_TYPE_LOCAL_METHOD_ALIAS;
                alias_node->object =
index c1c6c236df9affedc4ba07f0bbd161684b4053a6..913da3dd5d0cdfa29fb37038bdd4aea05b294da5 100644 (file)
@@ -586,44 +586,68 @@ acpi_ns_lookup(union acpi_generic_state *scope_info,
                        return_ACPI_STATUS(status);
                }
 
-               /*
-                * Sanity typecheck of the target object:
-                *
-                * If 1) This is the last segment (num_segments == 0)
-                *    2) And we are looking for a specific type
-                *       (Not checking for TYPE_ANY)
-                *    3) Which is not an alias
-                *    4) Which is not a local type (TYPE_SCOPE)
-                *    5) And the type of target object is known (not TYPE_ANY)
-                *    6) And target object does not match what we are looking for
-                *
-                * Then we have a type mismatch.  Just warn and ignore it.
-                */
-               if ((num_segments == 0) &&
-                   (type_to_check_for != ACPI_TYPE_ANY) &&
-                   (type_to_check_for != ACPI_TYPE_LOCAL_ALIAS) &&
-                   (type_to_check_for != ACPI_TYPE_LOCAL_METHOD_ALIAS) &&
-                   (type_to_check_for != ACPI_TYPE_LOCAL_SCOPE) &&
-                   (this_node->type != ACPI_TYPE_ANY) &&
-                   (this_node->type != type_to_check_for)) {
-
-                       /* Complain about a type mismatch */
-
-                       ACPI_WARNING((AE_INFO,
-                                     "NsLookup: Type mismatch on %4.4s (%s), searching for (%s)",
-                                     ACPI_CAST_PTR(char, &simple_name),
-                                     acpi_ut_get_type_name(this_node->type),
-                                     acpi_ut_get_type_name
-                                     (type_to_check_for)));
+               /* More segments to follow? */
+
+               if (num_segments > 0) {
+                       /*
+                        * If we have an alias to an object that opens a scope (such as a
+                        * device or processor), we need to dereference the alias here so that
+                        * we can access any children of the original node (via the remaining
+                        * segments).
+                        */
+                       if (this_node->type == ACPI_TYPE_LOCAL_ALIAS) {
+                               if (acpi_ns_opens_scope
+                                   (((struct acpi_namespace_node *)this_node->
+                                     object)->type)) {
+                                       this_node =
+                                           (struct acpi_namespace_node *)
+                                           this_node->object;
+                               }
+                       }
                }
 
-               /*
-                * If this is the last name segment and we are not looking for a
-                * specific type, but the type of found object is known, use that type
-                * to see if it opens a scope.
-                */
-               if ((num_segments == 0) && (type == ACPI_TYPE_ANY)) {
-                       type = this_node->type;
+               /* Special handling for the last segment (num_segments == 0) */
+
+               else {
+                       /*
+                        * Sanity typecheck of the target object:
+                        *
+                        * If 1) This is the last segment (num_segments == 0)
+                        *    2) And we are looking for a specific type
+                        *       (Not checking for TYPE_ANY)
+                        *    3) Which is not an alias
+                        *    4) Which is not a local type (TYPE_SCOPE)
+                        *    5) And the type of target object is known (not TYPE_ANY)
+                        *    6) And target object does not match what we are looking for
+                        *
+                        * Then we have a type mismatch. Just warn and ignore it.
+                        */
+                       if ((type_to_check_for != ACPI_TYPE_ANY) &&
+                           (type_to_check_for != ACPI_TYPE_LOCAL_ALIAS) &&
+                           (type_to_check_for != ACPI_TYPE_LOCAL_METHOD_ALIAS)
+                           && (type_to_check_for != ACPI_TYPE_LOCAL_SCOPE)
+                           && (this_node->type != ACPI_TYPE_ANY)
+                           && (this_node->type != type_to_check_for)) {
+
+                               /* Complain about a type mismatch */
+
+                               ACPI_WARNING((AE_INFO,
+                                             "NsLookup: Type mismatch on %4.4s (%s), searching for (%s)",
+                                             ACPI_CAST_PTR(char, &simple_name),
+                                             acpi_ut_get_type_name(this_node->
+                                                                   type),
+                                             acpi_ut_get_type_name
+                                             (type_to_check_for)));
+                       }
+
+                       /*
+                        * If this is the last name segment and we are not looking for a
+                        * specific type, but the type of found object is known, use that type
+                        * to (later) see if it opens a scope.
+                        */
+                       if (type == ACPI_TYPE_ANY) {
+                               type = this_node->type;
+                       }
                }
 
                /* Point to next name segment and make this node current */
index 68c9b630134c2a830a8ff5566234cd536cf8cebd..4722d53871dbbff42f876707ceee50934cfdb107 100644 (file)
@@ -1605,9 +1605,10 @@ static unsigned int ata_scsi_rbuf_get(struct scsi_cmnd *cmd, u8 **buf_out)
        u8 *buf;
        unsigned int buflen;
 
-       struct scatterlist *sg = scsi_sglist(cmd);
-
-       if (sg) {
+       if (cmd->use_sg) {
+               struct scatterlist *sg;
+               
+               sg = (struct scatterlist *) cmd->request_buffer;
                buf = kmap_atomic(sg_page(sg), KM_IRQ0) + sg->offset;
                buflen = sg->length;
        } else {
@@ -1632,9 +1633,12 @@ static unsigned int ata_scsi_rbuf_get(struct scsi_cmnd *cmd, u8 **buf_out)
 
 static inline void ata_scsi_rbuf_put(struct scsi_cmnd *cmd, u8 *buf)
 {
-       struct scatterlist *sg = scsi_sglist(cmd);
-       if (sg)
+       if (cmd->use_sg) {
+               struct scatterlist *sg;
+
+               sg = (struct scatterlist *) cmd->request_buffer;
                kunmap_atomic(buf - sg->offset, KM_IRQ0);
+       }
 }
 
 /**
index 1d7a0006684bdfa77ff51e0162a74553889f4125..735a0dd7222229655dafaa544b35ccf1ffd66d17 100644 (file)
 # include <linux/efi.h>
 #endif
 
-static inline int range_is_allowed(unsigned long from, unsigned long to)
+static inline int range_is_allowed(unsigned long pfn, unsigned long size)
 {
-       unsigned long cursor;
+       u64 from = ((u64)pfn) << PAGE_SHIFT;
+       u64 to = from + size;
+       u64 cursor = from;
 
-       cursor = from >> PAGE_SHIFT;
-       while ((cursor << PAGE_SHIFT) < to) {
-               if (!devmem_is_allowed(cursor)) {
-                       printk ("Program %s tried to read /dev/mem between %lx->%lx.\n",
+       while (cursor < to) {
+               if (!devmem_is_allowed(pfn)) {
+                       printk ("Program %s tried to read /dev/mem between %Lx->%Lx.\n",
                                        current->comm, from, to);
                        return 0;
                }
-               cursor++;
+               cursor += PAGE_SIZE;
+               pfn++;
        }
        return 1;
 }
@@ -167,7 +169,7 @@ static ssize_t read_mem(struct file * file, char __user * buf,
                 */
                ptr = xlate_dev_mem_ptr(p);
 
-               if (!range_is_allowed(p, p+count))
+               if (!range_is_allowed(p >> PAGE_SHIFT, count))
                        return -EPERM;
                if (copy_to_user(buf, ptr, sz))
                        return -EFAULT;
@@ -265,6 +267,9 @@ static int mmap_mem(struct file * file, struct vm_area_struct * vma)
        if (!valid_mmap_phys_addr_range(vma->vm_pgoff, size))
                return -EINVAL;
 
+       if (!range_is_allowed(vma->vm_pgoff, size))
+               return -EPERM;
+
        vma->vm_page_prot = phys_mem_access_prot(file, vma->vm_pgoff,
                                                 size,
                                                 vma->vm_page_prot);
index 0451ce0cfdd96b1cd9642f57d3ae183979d0c8c0..3c902839ba26ae61f2305250e2ca725ade6aff48 100644 (file)
@@ -387,7 +387,7 @@ static int cpufreq_parse_governor (char *str_governor, unsigned int *policy,
                                int ret;
 
                                mutex_unlock(&cpufreq_governor_mutex);
-                               ret = request_module(name);
+                               ret = request_module("%s", name);
                                mutex_lock(&cpufreq_governor_mutex);
 
                                if (ret == 0)
index 7e725ba4764f982b6b6b6627a21a980243d960f2..43d2ee9e7f526f04c0c37f1ce3f8c0ffeaf3d5c5 100644 (file)
@@ -192,6 +192,8 @@ struct ehca_qp {
        int mtu_shift;
        u32 message_count;
        u32 packet_count;
+       atomic_t nr_events; /* events seen */
+       wait_queue_head_t wait_completion;
 };
 
 #define IS_SRQ(qp) (qp->ext_type == EQPT_SRQ)
index b5ca94c6b8d91a603aebff7f907c42f3234bb31e..ab8f42c318013bf5b753bcc7185e9667e7225254 100644 (file)
@@ -204,6 +204,8 @@ static void qp_event_callback(struct ehca_shca *shca, u64 eqe,
 
        read_lock(&ehca_qp_idr_lock);
        qp = idr_find(&ehca_qp_idr, token);
+       if (qp)
+               atomic_inc(&qp->nr_events);
        read_unlock(&ehca_qp_idr_lock);
 
        if (!qp)
@@ -223,6 +225,8 @@ static void qp_event_callback(struct ehca_shca *shca, u64 eqe,
        if (fatal && qp->ext_type == EQPT_SRQBASE)
                dispatch_qp_event(shca, qp, IB_EVENT_QP_LAST_WQE_REACHED);
 
+       if (atomic_dec_and_test(&qp->nr_events))
+               wake_up(&qp->wait_completion);
        return;
 }
 
index 162ce6f65f637d419fdd70b76c4b54dacdf5c4ff..f8f3ad38023bb06e505d641c588153827229edd3 100644 (file)
@@ -564,6 +564,8 @@ static struct ehca_qp *internal_create_qp(
                return ERR_PTR(-ENOMEM);
        }
 
+       atomic_set(&my_qp->nr_events, 0);
+       init_waitqueue_head(&my_qp->wait_completion);
        spin_lock_init(&my_qp->spinlock_s);
        spin_lock_init(&my_qp->spinlock_r);
        my_qp->qp_type = qp_type;
@@ -1974,6 +1976,9 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
        idr_remove(&ehca_qp_idr, my_qp->token);
        write_unlock_irqrestore(&ehca_qp_idr_lock, flags);
 
+        /* now wait until all pending events have completed */
+       wait_event(my_qp->wait_completion, !atomic_read(&my_qp->nr_events));
+
        h_ret = hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp);
        if (h_ret != H_SUCCESS) {
                ehca_err(dev, "hipz_h_destroy_qp() failed h_ret=%li "
index f08d9f88abf71aff88314cbbaaa88c9281188a92..1bbc070bd51b5525d00dd2e5a3582612483cce93 100644 (file)
@@ -45,16 +45,10 @@ msi_register(struct msi_ops *ops)
        return 0;
 }
 
-static void msi_cache_ctor(void *p, kmem_cache_t *cache, unsigned long flags)
-{
-       memset(p, 0, sizeof(struct msi_desc));
-}
-
 static int msi_cache_init(void)
 {
-       msi_cachep = kmem_cache_create("msi_cache",
-                       sizeof(struct msi_desc),
-                       0, SLAB_HWCACHE_ALIGN, msi_cache_ctor, NULL);
+       msi_cachep = kmem_cache_create("msi_cache", sizeof(struct msi_desc),
+                                       0, SLAB_HWCACHE_ALIGN, NULL, NULL);
        if (!msi_cachep)
                return -ENOMEM;
 
@@ -411,11 +405,10 @@ static struct msi_desc* alloc_msi_entry(void)
 {
        struct msi_desc *entry;
 
-       entry = kmem_cache_alloc(msi_cachep, SLAB_KERNEL);
+       entry = kmem_cache_zalloc(msi_cachep, GFP_KERNEL);
        if (!entry)
                return NULL;
 
-       memset(entry, 0, sizeof(struct msi_desc));
        entry->link.tail = entry->link.head = 0;        /* single message */
        entry->dev = NULL;
 
@@ -909,6 +902,33 @@ static int msix_capability_init(struct pci_dev *dev,
        return 0;
 }
 
+/**
+ * pci_msi_supported - check whether MSI may be enabled on device
+ * @dev: pointer to the pci_dev data structure of MSI device function
+ *
+ * MSI must be globally enabled and supported by the device and its root
+ * bus. But, the root bus is not easy to find since some architectures
+ * have virtual busses on top of the PCI hierarchy (for instance the
+ * hypertransport bus), while the actual bus where MSI must be supported
+ * is below. So we test the MSI flag on all parent busses and assume
+ * that no quirk will ever set the NO_MSI flag on a non-root bus.
+ **/
+static
+int pci_msi_supported(struct pci_dev * dev)
+{
+       struct pci_bus *bus;
+
+       if (!pci_msi_enable || !dev || dev->no_msi)
+               return -EINVAL;
+
+       /* check MSI flags of all parent busses */
+       for (bus = dev->bus; bus; bus = bus->parent)
+               if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI)
+                       return -EINVAL;
+
+       return 0;
+}
+
 /**
  * pci_enable_msi - configure device's MSI capability structure
  * @dev: pointer to the pci_dev data structure of MSI device function
@@ -921,19 +941,10 @@ static int msix_capability_init(struct pci_dev *dev,
  **/
 int pci_enable_msi(struct pci_dev* dev)
 {
-       struct pci_bus *bus;
-       int pos, temp, status = -EINVAL;
-       u16 control;
-
-       if (!pci_msi_enable || !dev)
-               return status;
-
-       if (dev->no_msi)
-               return status;
+       int pos, temp, status;
 
-       for (bus = dev->bus; bus; bus = bus->parent)
-               if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI)
-                       return -EINVAL;
+       if (pci_msi_supported(dev) < 0)
+               return -EINVAL;
 
        temp = dev->irq;
 
@@ -945,27 +956,8 @@ int pci_enable_msi(struct pci_dev* dev)
        if (!pos)
                return -EINVAL;
 
-       if (!msi_lookup_vector(dev, PCI_CAP_ID_MSI)) {
-               /* Lookup Sucess */
-               unsigned long flags;
+       WARN_ON(!msi_lookup_vector(dev, PCI_CAP_ID_MSI));
 
-               pci_read_config_word(dev, msi_control_reg(pos), &control);
-               if (control & PCI_MSI_FLAGS_ENABLE)
-                       return 0;       /* Already in MSI mode */
-               spin_lock_irqsave(&msi_lock, flags);
-               if (!vector_irq[dev->irq]) {
-                       msi_desc[dev->irq]->msi_attrib.state = 0;
-                       vector_irq[dev->irq] = -1;
-                       nr_released_vectors--;
-                       spin_unlock_irqrestore(&msi_lock, flags);
-                       status = msi_register_init(dev, msi_desc[dev->irq]);
-                       if (status == 0)
-                               enable_msi_mode(dev, pos, PCI_CAP_ID_MSI);
-                       return status;
-               }
-               spin_unlock_irqrestore(&msi_lock, flags);
-               dev->irq = temp;
-       }
        /* Check whether driver already requested for MSI-X vectors */
        pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
        if (pos > 0 && !msi_lookup_vector(dev, PCI_CAP_ID_MSIX)) {
@@ -1007,6 +999,8 @@ void pci_disable_msi(struct pci_dev* dev)
        if (!(control & PCI_MSI_FLAGS_ENABLE))
                return;
 
+       disable_msi_mode(dev, pos, PCI_CAP_ID_MSI);
+
        spin_lock_irqsave(&msi_lock, flags);
        entry = msi_desc[dev->irq];
        if (!entry || !entry->dev || entry->msi_attrib.type != PCI_CAP_ID_MSI) {
@@ -1020,14 +1014,12 @@ void pci_disable_msi(struct pci_dev* dev)
                       pci_name(dev), dev->irq);
                BUG_ON(entry->msi_attrib.state > 0);
        } else {
-               vector_irq[dev->irq] = 0; /* free it */
-               nr_released_vectors++;
                default_vector = entry->msi_attrib.default_vector;
                spin_unlock_irqrestore(&msi_lock, flags);
+               msi_free_vector(dev, dev->irq, 0);
+
                /* Restore dev->irq to its default pin-assertion vector */
                dev->irq = default_vector;
-               disable_msi_mode(dev, pci_find_capability(dev, PCI_CAP_ID_MSI),
-                                       PCI_CAP_ID_MSI);
        }
 }
 
@@ -1075,57 +1067,6 @@ static int msi_free_vector(struct pci_dev* dev, int vector, int reassign)
        return 0;
 }
 
-static int reroute_msix_table(int head, struct msix_entry *entries, int *nvec)
-{
-       int vector = head, tail = 0;
-       int i, j = 0, nr_entries = 0;
-       void __iomem *base;
-       unsigned long flags;
-
-       spin_lock_irqsave(&msi_lock, flags);
-       while (head != tail) {
-               nr_entries++;
-               tail = msi_desc[vector]->link.tail;
-               if (entries[0].entry == msi_desc[vector]->msi_attrib.entry_nr)
-                       j = vector;
-               vector = tail;
-       }
-       if (*nvec > nr_entries) {
-               spin_unlock_irqrestore(&msi_lock, flags);
-               *nvec = nr_entries;
-               return -EINVAL;
-       }
-       vector = ((j > 0) ? j : head);
-       for (i = 0; i < *nvec; i++) {
-               j = msi_desc[vector]->msi_attrib.entry_nr;
-               msi_desc[vector]->msi_attrib.state = 0; /* Mark it not active */
-               vector_irq[vector] = -1;                /* Mark it busy */
-               nr_released_vectors--;
-               entries[i].vector = vector;
-               if (j != (entries + i)->entry) {
-                       base = msi_desc[vector]->mask_base;
-                       msi_desc[vector]->msi_attrib.entry_nr =
-                               (entries + i)->entry;
-                       writel( readl(base + j * PCI_MSIX_ENTRY_SIZE +
-                               PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET), base +
-                               (entries + i)->entry * PCI_MSIX_ENTRY_SIZE +
-                               PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
-                       writel( readl(base + j * PCI_MSIX_ENTRY_SIZE +
-                               PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET), base +
-                               (entries + i)->entry * PCI_MSIX_ENTRY_SIZE +
-                               PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
-                       writel( (readl(base + j * PCI_MSIX_ENTRY_SIZE +
-                               PCI_MSIX_ENTRY_DATA_OFFSET) & 0xff00) | vector,
-                               base + (entries+i)->entry*PCI_MSIX_ENTRY_SIZE +
-                               PCI_MSIX_ENTRY_DATA_OFFSET);
-               }
-               vector = msi_desc[vector]->link.tail;
-       }
-       spin_unlock_irqrestore(&msi_lock, flags);
-
-       return 0;
-}
-
 /**
  * pci_enable_msix - configure device's MSI-X capability structure
  * @dev: pointer to the pci_dev data structure of MSI-X device function
@@ -1143,22 +1084,14 @@ static int reroute_msix_table(int head, struct msix_entry *entries, int *nvec)
  **/
 int pci_enable_msix(struct pci_dev* dev, struct msix_entry *entries, int nvec)
 {
-       struct pci_bus *bus;
        int status, pos, nr_entries, free_vectors;
        int i, j, temp;
        u16 control;
        unsigned long flags;
 
-       if (!pci_msi_enable || !dev || !entries)
+       if (!entries || pci_msi_supported(dev) < 0)
                return -EINVAL;
 
-       if (dev->no_msi)
-               return -EINVAL;
-
-       for (bus = dev->bus; bus; bus = bus->parent)
-               if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI)
-                       return -EINVAL;
-
        status = msi_init();
        if (status < 0)
                return status;
@@ -1168,9 +1101,6 @@ int pci_enable_msix(struct pci_dev* dev, struct msix_entry *entries, int nvec)
                return -EINVAL;
 
        pci_read_config_word(dev, msi_control_reg(pos), &control);
-       if (control & PCI_MSIX_FLAGS_ENABLE)
-               return -EINVAL;                 /* Already in MSI-X mode */
-
        nr_entries = multi_msix_capable(control);
        if (nvec > nr_entries)
                return -EINVAL;
@@ -1185,19 +1115,8 @@ int pci_enable_msix(struct pci_dev* dev, struct msix_entry *entries, int nvec)
                }
        }
        temp = dev->irq;
-       if (!msi_lookup_vector(dev, PCI_CAP_ID_MSIX)) {
-               /* Lookup Sucess */
-               nr_entries = nvec;
-               /* Reroute MSI-X table */
-               if (reroute_msix_table(dev->irq, entries, &nr_entries)) {
-                       /* #requested > #previous-assigned */
-                       dev->irq = temp;
-                       return nr_entries;
-               }
-               dev->irq = temp;
-               enable_msi_mode(dev, pos, PCI_CAP_ID_MSIX);
-               return 0;
-       }
+       WARN_ON(!msi_lookup_vector(dev, PCI_CAP_ID_MSIX));
+
        /* Check whether driver already requested for MSI vector */
        if (pci_find_capability(dev, PCI_CAP_ID_MSI) > 0 &&
                !msi_lookup_vector(dev, PCI_CAP_ID_MSI)) {
@@ -1256,37 +1175,32 @@ void pci_disable_msix(struct pci_dev* dev)
        if (!(control & PCI_MSIX_FLAGS_ENABLE))
                return;
 
+       disable_msi_mode(dev, pos, PCI_CAP_ID_MSIX);
+
        temp = dev->irq;
        if (!msi_lookup_vector(dev, PCI_CAP_ID_MSIX)) {
                int state, vector, head, tail = 0, warning = 0;
                unsigned long flags;
 
                vector = head = dev->irq;
-               spin_lock_irqsave(&msi_lock, flags);
+               dev->irq = temp;                        /* Restore pin IRQ */
                while (head != tail) {
+                       spin_lock_irqsave(&msi_lock, flags);
                        state = msi_desc[vector]->msi_attrib.state;
+                       tail = msi_desc[vector]->link.tail;
+                       spin_unlock_irqrestore(&msi_lock, flags);
                        if (state)
                                warning = 1;
-                       else {
-                               vector_irq[vector] = 0; /* free it */
-                               nr_released_vectors++;
-                       }
-                       tail = msi_desc[vector]->link.tail;
+                       else if (vector != head)        /* Release MSI-X vector */
+                               msi_free_vector(dev, vector, 0);
                        vector = tail;
                }
-               spin_unlock_irqrestore(&msi_lock, flags);
+               msi_free_vector(dev, vector, 0);
                if (warning) {
-                       dev->irq = temp;
                        printk(KERN_WARNING "PCI: %s: pci_disable_msix() called without "
                               "free_irq() on all MSI-X vectors\n",
                               pci_name(dev));
                        BUG_ON(warning > 0);
-               } else {
-                       dev->irq = temp;
-                       disable_msi_mode(dev,
-                               pci_find_capability(dev, PCI_CAP_ID_MSIX),
-                               PCI_CAP_ID_MSIX);
-
                }
        }
 }
index 56951c39d3a3e8c8d6656f2b2a2f1ac49321f475..9b31d4cbbce46f6749adca35924f1ef045813cf8 100644 (file)
@@ -110,8 +110,8 @@ extern int pci_vector_resources(int last, int nr_released);
        (1 << ((control & PCI_MSI_FLAGS_QMASK) >> 1))
 #define multi_msi_enable(control, num) \
        control |= (((num >> 1) << 4) & PCI_MSI_FLAGS_QSIZE);
-#define is_64bit_address(control)      (control & PCI_MSI_FLAGS_64BIT)
-#define is_mask_bit_support(control)   (control & PCI_MSI_FLAGS_MASKBIT)
+#define is_64bit_address(control)      (!!(control & PCI_MSI_FLAGS_64BIT))
+#define is_mask_bit_support(control)   (!!(control & PCI_MSI_FLAGS_MASKBIT))
 #define msi_enable(control, num) multi_msi_enable(control, num); \
        control |= PCI_MSI_FLAGS_ENABLE
 
index fc06f60f206ea5e10707c4702560607d7e73afaa..3cc6146e898f6ce4ee64ae326f32a0e6d7812c65 100644 (file)
@@ -175,8 +175,8 @@ static struct aac_driver_ident aac_drivers[] = {
        { aac_rx_init, "percraid", "DELL    ", "PERCRAID        ", 2, AAC_QUIRK_31BIT | AAC_QUIRK_34SG | AAC_QUIRK_SCSI_32 }, /* PERC 3/Di (Boxster/PERC3DiB) */
        { aac_rx_init, "aacraid",  "ADAPTEC ", "catapult        ", 2, AAC_QUIRK_31BIT | AAC_QUIRK_34SG | AAC_QUIRK_SCSI_32 }, /* catapult */
        { aac_rx_init, "aacraid",  "ADAPTEC ", "tomcat          ", 2, AAC_QUIRK_31BIT | AAC_QUIRK_34SG | AAC_QUIRK_SCSI_32 }, /* tomcat */
-       { aac_rx_init, "aacraid",  "ADAPTEC ", "Adaptec 2120S   ", 1, AAC_QUIRK_31BIT | AAC_QUIRK_34SG | AAC_QUIRK_SCSI_32 }, /* Adaptec 2120S (Crusader) */
-       { aac_rx_init, "aacraid",  "ADAPTEC ", "Adaptec 2200S   ", 2, AAC_QUIRK_31BIT | AAC_QUIRK_34SG | AAC_QUIRK_SCSI_32 }, /* Adaptec 2200S (Vulcan) */
+       { aac_rx_init, "aacraid",  "ADAPTEC ", "Adaptec 2120S   ", 1, AAC_QUIRK_31BIT | AAC_QUIRK_34SG },                     /* Adaptec 2120S (Crusader) */
+       { aac_rx_init, "aacraid",  "ADAPTEC ", "Adaptec 2200S   ", 2, AAC_QUIRK_31BIT | AAC_QUIRK_34SG },                     /* Adaptec 2200S (Vulcan) */
        { aac_rx_init, "aacraid",  "ADAPTEC ", "Adaptec 2200S   ", 2, AAC_QUIRK_31BIT | AAC_QUIRK_34SG | AAC_QUIRK_SCSI_32 }, /* Adaptec 2200S (Vulcan-2m) */
        { aac_rx_init, "aacraid",  "Legend  ", "Legend S220     ", 1, AAC_QUIRK_31BIT | AAC_QUIRK_34SG | AAC_QUIRK_SCSI_32 }, /* Legend S220 (Legend Crusader) */
        { aac_rx_init, "aacraid",  "Legend  ", "Legend S230     ", 2, AAC_QUIRK_31BIT | AAC_QUIRK_34SG | AAC_QUIRK_SCSI_32 }, /* Legend S230 (Legend Vulcan) */
index 1d408d15443852ebf41576dc60beadec023db23a..e3577dbc6f33711188ed66a512923087174668e9 100644 (file)
@@ -1005,8 +1005,9 @@ qla2x00_status_entry(scsi_qla_host_t *ha, void *pkt)
                resid = resid_len;
                /* Use F/W calculated residual length. */
                if (IS_FWI2_CAPABLE(ha)) {
-                       if (scsi_status & SS_RESIDUAL_UNDER &&
-                           resid != fw_resid_len) {
+                       if (!(scsi_status & SS_RESIDUAL_UNDER)) {
+                               lscsi_status = 0;
+                       } else if (resid != fw_resid_len) {
                                scsi_status &= ~SS_RESIDUAL_UNDER;
                                lscsi_status = 0;
                        }
index f814a8f8feaadc6dadfd3486c09f457579911151..3ebdabb62d7f107bc658496dcb2f030791aa19e1 100644 (file)
@@ -437,6 +437,8 @@ static int evtchn_open(struct inode *inode, struct file *filp)
 
        mutex_init(&u->ring_cons_mutex);
 
+       mutex_init(&u->ring_cons_mutex);
+
        filp->private_data = u;
 
        u->bind_cpu = -1;
index 10a1d58f59d944bb5f72203640d9d52165dc125e..90d1d542108b542b9cc26ce21c9979aab171d181 100644 (file)
@@ -97,6 +97,7 @@ typedef struct netif_st {
        /* Miscellaneous private stuff. */
        struct list_head list;  /* scheduling list */
        atomic_t         refcnt;
+       struct xenbus_device *xendev;
        struct net_device *dev;
        struct net_device_stats stats;
 
@@ -191,6 +192,7 @@ int netif_map(netif_t *netif, unsigned long tx_ring_ref,
        } while (0)
 
 void netif_xenbus_init(void);
+void netif_interfaces_init(void);
 
 #define netif_schedulable(netif)                               \
        (netif_running((netif)->dev) && netback_carrier_ok(netif))
index 610891fc29437393ccbbd5fe68ef4a5b0c4389e8..61f94cce53b3179ac0d171fe54aeea691e980b8d 100644 (file)
@@ -33,6 +33,7 @@
 #include "common.h"
 #include <linux/ethtool.h>
 #include <linux/rtnetlink.h>
+#include <xen/xenbus.h>
 
 /*
  * Module parameter 'queue_length':
@@ -334,3 +335,31 @@ void netif_disconnect(netif_t *netif)
 
        free_netdev(netif->dev);
 }
+
+
+static int
+netdev_notify(struct notifier_block *this, unsigned long event, void *ptr)
+{
+       struct net_device *dev = ptr;
+
+       /* Carrier up event and is it one of our devices? */
+       if (event == NETDEV_CHANGE && netif_carrier_ok(dev) &&
+           dev->open == net_open) {
+               netif_t *netif = netdev_priv(dev);
+
+               xenbus_switch_state(netif->xendev, XenbusStateConnected);
+       }
+
+       return NOTIFY_DONE;
+}
+
+
+static struct notifier_block notifier_netdev = {
+       .notifier_call  = netdev_notify,
+};
+
+
+void netif_interfaces_init(void)
+{
+       (void)register_netdevice_notifier(&notifier_netdev);
+}
index 145f67db669f8fcdd6b6c70271add013b2687996..70aecf3f08a9fb87b0f57e58747c25d7117df63c 100644 (file)
@@ -1613,6 +1613,7 @@ static int __init netback_init(void)
 
        netif_accel_init();
 
+       netif_interfaces_init();
        netif_xenbus_init();
 
 #ifdef NETBE_DEBUG_INTERRUPT
index d7faeb624d1d2718f7d410fd7027c17029f08e63..491596f651d476fd7197514eaa2752e8a5367389 100644 (file)
@@ -203,6 +203,7 @@ static void backend_create_netif(struct backend_info *be)
                return;
        }
 
+       be->netif->xendev = dev;
        kobject_uevent(&dev->dev.kobj, KOBJ_ONLINE);
 }
 
@@ -328,10 +329,6 @@ static void connect(struct backend_info *be)
        int err;
        struct xenbus_device *dev = be->dev;
 
-       err = connect_rings(be);
-       if (err)
-               return;
-
        err = xen_net_read_mac(dev, be->netif->fe_dev_addr);
        if (err) {
                xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename);
@@ -342,7 +339,9 @@ static void connect(struct backend_info *be)
                          &be->netif->credit_usec);
        be->netif->remaining_credit = be->netif->credit_bytes;
 
-       xenbus_switch_state(dev, XenbusStateConnected);
+       err = connect_rings(be);
+       if (err)
+               return;
 
        netif_wake_queue(be->netif->dev);
 }
index 1f2d1ad63319430ad4ea6ac4c2684df93829fc41..7f302b80cc77f8fc177b70fc87ac85192f7c68db 100644 (file)
@@ -44,7 +44,7 @@ static int load_em86(struct linux_binprm *bprm,struct pt_regs *regs)
                        return -ENOEXEC;
        }
 
-       bprm->sh_bang++;        /* Well, the bang-shell is implicit... */
+       bprm->sh_bang = 1;      /* Well, the bang-shell is implicit... */
        allow_write_access(bprm->file);
        fput(bprm->file);
        bprm->file = NULL;
index 1713c48fef5494e5e8ef361a83b5795ecaae4ce9..a5d77c6661feede1592ada5cc5afa1319a6476dc 100644 (file)
@@ -42,6 +42,9 @@ enum {Enabled, Magic};
 #define MISC_FMT_OPEN_BINARY (1<<30)
 #define MISC_FMT_CREDENTIALS (1<<29)
 
+/* Marker for breaking misc - > script -> misc loop */
+#define MISC_BANG (1<<1)
+
 typedef struct {
        struct list_head list;
        unsigned long flags;            /* type, status, etc. */
@@ -116,6 +119,10 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs)
        if (!enabled)
                goto _ret;
 
+       retval = -ENOEXEC;
+       if (bprm->sh_bang & MISC_BANG)
+               goto _ret;
+
        /* to keep locking time low, we copy the interpreter string */
        read_lock(&entries_lock);
        fmt = check_file(bprm);
@@ -199,6 +206,8 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs)
        if (retval < 0)
                goto _error;
 
+       bprm->sh_bang |= MISC_BANG;
+
        retval = search_binary_handler (bprm, regs);
        if (retval < 0)
                goto _error;
index 1edbcca25a7366198667d5c8fdfe92d4005a65bc..de5377c9adc9bff2b9c16f034b46de4bbf62f7f6 100644 (file)
@@ -30,7 +30,7 @@ static int load_script(struct linux_binprm *bprm,struct pt_regs *regs)
         * Sorta complicated, but hopefully it will work.  -TYT
         */
 
-       bprm->sh_bang++;
+       bprm->sh_bang = 1;
        allow_write_access(bprm->file);
        fput(bprm->file);
        bprm->file = NULL;
index 0abde4204ad811ee6eb173189dc056cc33642228..40e30ad6b9c99e3f700a97e5c54a22ddd6f005f0 100644 (file)
@@ -939,6 +939,10 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data,
                return -EBADF;
        open_file = (struct cifsFileInfo *) file->private_data;
 
+       rc = generic_write_checks(file, poffset, &write_size, 0);
+       if (rc)
+               return rc;
+
        xid = GetXid();
 
 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20)
index c19eac7263f6fe62069ff096b73d0adddf8fdb74..1862b9d8f15fe087ce6c42f21e60ea9e312ab56f 100644 (file)
@@ -84,7 +84,7 @@ struct dlm_lock_result32 {
 
 static void compat_input(struct dlm_write_request *kb,
                         struct dlm_write_request32 *kb32,
-                        int max_namelen)
+                        size_t count)
 {
        kb->version[0] = kb32->version[0];
        kb->version[1] = kb32->version[1];
@@ -96,7 +96,8 @@ static void compat_input(struct dlm_write_request *kb,
            kb->cmd == DLM_USER_REMOVE_LOCKSPACE) {
                kb->i.lspace.flags = kb32->i.lspace.flags;
                kb->i.lspace.minor = kb32->i.lspace.minor;
-               strcpy(kb->i.lspace.name, kb32->i.lspace.name);
+               memcpy(kb->i.lspace.name, kb32->i.lspace.name, count -
+                      offsetof(struct dlm_write_request32, i.lspace.name));
        } else if (kb->cmd == DLM_USER_PURGE) {
                kb->i.purge.nodeid = kb32->i.purge.nodeid;
                kb->i.purge.pid = kb32->i.purge.pid;
@@ -114,10 +115,8 @@ static void compat_input(struct dlm_write_request *kb,
                kb->i.lock.bastaddr = (void *)(long)kb32->i.lock.bastaddr;
                kb->i.lock.lksb = (void *)(long)kb32->i.lock.lksb;
                memcpy(kb->i.lock.lvb, kb32->i.lock.lvb, DLM_USER_LVB_LEN);
-               if (kb->i.lock.namelen <= max_namelen)
-                       memcpy(kb->i.lock.name, kb32->i.lock.name, kb->i.lock.namelen);
-               else
-                       kb->i.lock.namelen = max_namelen;
+               memcpy(kb->i.lock.name, kb32->i.lock.name, count -
+                      offsetof(struct dlm_write_request32, i.lock.name));
        }
 }
 
@@ -509,7 +508,7 @@ static ssize_t device_write(struct file *file, const char __user *buf,
 #endif
                return -EINVAL;
 
-       kbuf = kmalloc(count, GFP_KERNEL);
+       kbuf = kzalloc(count + 1, GFP_KERNEL);
        if (!kbuf)
                return -ENOMEM;
 
@@ -527,14 +526,14 @@ static ssize_t device_write(struct file *file, const char __user *buf,
        if (!kbuf->is64bit) {
                struct dlm_write_request32 *k32buf;
                k32buf = (struct dlm_write_request32 *)kbuf;
-               kbuf = kmalloc(count + (sizeof(struct dlm_write_request) -
+               kbuf = kmalloc(count + 1 + (sizeof(struct dlm_write_request) -
                               sizeof(struct dlm_write_request32)), GFP_KERNEL);
                if (!kbuf)
                        return -ENOMEM;
 
                if (proc)
                        set_bit(DLM_PROC_FLAGS_COMPAT, &proc->flags);
-               compat_input(kbuf, k32buf, count - sizeof(struct dlm_write_request32));
+               compat_input(kbuf, k32buf, count + 1);
                kfree(k32buf);
        }
 #endif
index 39ee034aefb036c740fd56e66811f9f54524f3c8..9e9cb22b9363a016b6f9363ee44a627d756f98d7 100644 (file)
--- a/fs/open.c
+++ b/fs/open.c
@@ -214,6 +214,9 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
                newattrs.ia_valid |= ATTR_FILE;
        }
 
+       /* Remove suid/sgid on truncate too */
+       newattrs.ia_valid |= should_remove_suid(dentry);
+
        mutex_lock(&dentry->d_inode->i_mutex);
        err = notify_change(dentry, &newattrs);
        mutex_unlock(&dentry->d_inode->i_mutex);
index d108451880c43e11ac23e7008cbcb683bc607e0a..32e0cc1c6072d8216cb94f9cb6288a920aac43d5 100644 (file)
@@ -615,7 +615,7 @@ find_page:
                        ret = add_to_page_cache_lru(page, mapping, index,
                                                    gfp_mask);
                        if (unlikely(ret))
-                               goto out;
+                               goto out_release;
                }
 
                /*
@@ -696,8 +696,9 @@ find_page:
                goto find_page;
        }
 out:
-       page_cache_release(page);
        unlock_page(page);
+out_release:
+       page_cache_release(page);
 out_ret:
        return ret;
 }
@@ -826,12 +827,21 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
                          loff_t *ppos, size_t len, unsigned int flags)
 {
        struct address_space *mapping = out->f_mapping;
+       struct inode *inode = mapping->host;
        ssize_t ret;
+       int err;
+
+       err = should_remove_suid(out->f_dentry);
+       if (unlikely(err)) {
+               mutex_lock(&inode->i_mutex);
+               err = __remove_suid(out->f_dentry, err);
+               mutex_unlock(&inode->i_mutex);
+               if (err)
+                       return err;
+       }
 
        ret = splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_file);
        if (ret > 0) {
-               struct inode *inode = mapping->host;
-
                *ppos += ret;
 
                /*
@@ -839,8 +849,6 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
                 * sync it.
                 */
                if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) {
-                       int err;
-
                        mutex_lock(&inode->i_mutex);
                        err = generic_osync_inode(inode, mapping,
                                                  OSYNC_METADATA|OSYNC_DATA);
index 6cd9f2d70512af067bd6de3c76a6f45a7ad0ffe8..6b0cfd4656757463485fe29755ac2a080a551f7c 100644 (file)
@@ -24,6 +24,7 @@
 #define __EXTRA_CLOBBER  \
        ,"rcx","rbx","rdx","r8","r9","r10","r11","r12","r13","r14","r15"
 
+/* Save restore flags to clear handle leaking NT */
 #define switch_to(prev,next,last) \
        asm volatile(SAVE_CONTEXT                                                   \
                     "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */       \
index 4aa1c87f7e42ed8c5ead849c0d4524ac34eac9bb..c448d24022a46a0cb1c4e30f0b45d1c385f6295b 100644 (file)
@@ -39,7 +39,7 @@ extern void ia32_syscall(void);
 extern int pmtimer_mark_offset(void);
 extern void pmtimer_resume(void);
 extern void pmtimer_wait(unsigned);
-extern unsigned int do_gettimeoffset_pm(void);
+extern long do_gettimeoffset_pm(void);
 #ifdef CONFIG_X86_PM_TIMER
 extern u32 pmtmr_ioport;
 #else
@@ -85,7 +85,6 @@ extern void swap_low_mappings(void);
 extern void __show_regs(struct pt_regs * regs);
 extern void show_regs(struct pt_regs * regs);
 
-extern char *syscall32_page;
 extern void syscall32_cpu_init(void);
 
 extern void setup_node_bootmem(int nodeid, unsigned long start, unsigned long end);
index c631c082f8f7d281464f7eb8c5df07791e2836ab..246b461f0ef8344a35de22452d75090479ce031f 100644 (file)
@@ -8,7 +8,6 @@
 #define VSYSCALL32_SYSEXIT (VSYSCALL32_BASE + 0x410)
 #else
 #define VSYSCALL32_BASE 0xffffe000UL
-#define VSYSCALL32_END (VSYSCALL32_BASE + PAGE_SIZE)
 #define VSYSCALL32_EHDR ((const struct elf32_hdr *) VSYSCALL32_BASE)
 
 #define VSYSCALL32_VSYSCALL ((void *)VSYSCALL32_BASE + 0x400) 
index 6644592ddc74273607984aed4a5d56e8b67119ff..942464afeb948af59ca4cb0ec8142186aaa5370e 100644 (file)
@@ -1667,6 +1667,8 @@ extern void __iget(struct inode * inode);
 extern void clear_inode(struct inode *);
 extern void destroy_inode(struct inode *);
 extern struct inode *new_inode(struct super_block *);
+extern int __remove_suid(struct dentry *, int);
+extern int should_remove_suid(struct dentry *);
 extern int remove_suid(struct dentry *);
 extern void remove_dquot_ref(struct super_block *, int, struct list_head *);
 
index 5cab56664c19ed2876375eae129912463df66311..61c5d6db594396013b2008847500ca68edb2119e 100644 (file)
@@ -1034,8 +1034,7 @@ static inline unsigned long get_unmapped_area(struct file * file, unsigned long
 
 extern int install_special_mapping(struct mm_struct *mm,
                                   unsigned long addr, unsigned long len,
-                                  unsigned long vm_flags, pgprot_t pgprot,
-                                  struct page **pages);
+                                  unsigned long vm_flags, struct page **pages);
 
 extern unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
        unsigned long len, unsigned long prot,
index 6605ba75ec9adbea35d5d85ba95cbd4b1b242ac2..7bb547e5cfc6da59b68a6c5f20ee6e6c9b8821d7 100644 (file)
@@ -1929,11 +1929,10 @@ repeat:
  *     if suid or (sgid and xgrp)
  *             remove privs
  */
-int remove_suid(struct dentry *dentry)
+int should_remove_suid(struct dentry *dentry)
 {
        mode_t mode = dentry->d_inode->i_mode;
        int kill = 0;
-       int result = 0;
 
        /* suid always must be killed */
        if (unlikely(mode & S_ISUID))
@@ -1946,13 +1945,28 @@ int remove_suid(struct dentry *dentry)
        if (unlikely((mode & S_ISGID) && (mode & S_IXGRP)))
                kill |= ATTR_KILL_SGID;
 
-       if (unlikely(kill && !capable(CAP_FSETID))) {
-               struct iattr newattrs;
+       if (unlikely(kill && !capable(CAP_FSETID)))
+               return kill;
 
-               newattrs.ia_valid = ATTR_FORCE | kill;
-               result = notify_change(dentry, &newattrs);
-       }
-       return result;
+       return 0;
+}
+
+int __remove_suid(struct dentry *dentry, int kill)
+{
+       struct iattr newattrs;
+
+       newattrs.ia_valid = ATTR_FORCE | kill;
+       return notify_change(dentry, &newattrs);
+}
+
+int remove_suid(struct dentry *dentry)
+{
+       int kill = should_remove_suid(dentry);
+
+       if (unlikely(kill))
+               return __remove_suid(dentry, kill);
+
+       return 0;
 }
 EXPORT_SYMBOL(remove_suid);
 
index ca888fec672cf7209bd6a0097dde8307fba9d0df..70a7d6cb41ec2e3c338d4d28fbee866bec9a4a39 100644 (file)
@@ -955,17 +955,15 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
        }
 
        ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
-       if (!ptep)
-               goto out;
 
        pte = *ptep;
        if (!pte_present(pte))
-               goto unlock;
+               goto no_page;
        if ((flags & FOLL_WRITE) && !pte_write(pte))
                goto unlock;
        page = vm_normal_page(vma, address, pte);
        if (unlikely(!page))
-               goto unlock;
+               goto bad_page;
 
        if (flags & FOLL_GET)
                get_page(page);
@@ -980,6 +978,15 @@ unlock:
 out:
        return page;
 
+bad_page:
+       pte_unmap_unlock(ptep, ptl);
+       return ERR_PTR(-EFAULT);
+
+no_page:
+       pte_unmap_unlock(ptep, ptl);
+       if (!pte_none(pte))
+               return page;
+       /* Fall through to ZERO_PAGE handling */
 no_page_table:
        /*
         * When core dumping an enormous anonymous area that nobody
@@ -994,6 +1001,26 @@ no_page_table:
        return page;
 }
 
+/* Can we do the FOLL_ANON optimization? */
+static inline int use_zero_page(struct vm_area_struct *vma)
+{
+       /*
+        * We don't want to optimize FOLL_ANON for make_pages_present()
+        * when it tries to page in a VM_LOCKED region. As to VM_SHARED,
+        * we want to get the page from the page tables to make sure
+        * that we serialize and update with any other user of that
+        * mapping.
+        */
+       if (vma->vm_flags & (VM_LOCKED | VM_SHARED))
+               return 0;
+       /*
+        * And if we have a fault or a nopfn routine, it's not an
+        * anonymous region.
+        */
+       return !vma->vm_ops ||
+               (!vma->vm_ops->nopage && !vma->vm_ops->nopfn);
+}
+
 int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                unsigned long start, int len, int write, int force,
                struct page **pages, struct vm_area_struct **vmas)
@@ -1086,8 +1113,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                foll_flags = FOLL_TOUCH;
                if (pages)
                        foll_flags |= FOLL_GET;
-               if (!write && !(vma->vm_flags & VM_LOCKED) &&
-                   (!vma->vm_ops || !vma->vm_ops->nopage))
+               if (!write && use_zero_page(vma))
                        foll_flags |= FOLL_ANON;
 
                do {
@@ -1125,6 +1151,8 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                                        BUG();
                                }
                        }
+                       if (IS_ERR(page))
+                               return i ? i : PTR_ERR(page);
                        if (pages) {
                                pages[i] = page;
 
index 289b068aa311b0f16228efb547d1017e23668bbf..2f454fdbd11c2e8bfcee5c524ceaad8969d1e7d5 100644 (file)
@@ -778,6 +778,11 @@ static int do_move_pages(struct mm_struct *mm, struct page_to_node *pm,
                        goto set_status;
 
                page = follow_page(vma, pp->addr, FOLL_GET);
+
+               err = PTR_ERR(page);
+               if (IS_ERR(page))
+                       goto set_status;
+
                err = -ENOENT;
                if (!page)
                        goto set_status;
@@ -841,6 +846,11 @@ static int do_pages_stat(struct mm_struct *mm, struct page_to_node *pm)
                        goto set_status;
 
                page = follow_page(vma, pm->addr, 0);
+
+               err = PTR_ERR(page);
+               if (IS_ERR(page))
+                       goto set_status;
+
                err = -ENOENT;
                /* Use PageReserved to check for zero page */
                if (!page || PageReserved(page))
index d89ab1f3f4f6191616346a8633a9f131ee98cd58..dabff0b8a527e9c39c0cd031f70a9733aec40e7c 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2223,11 +2223,16 @@ special_mapping_nopage(struct vm_area_struct *vma,
        return NOPAGE_SIGBUS;
 }
 
+static void special_mapping_close(struct vm_area_struct *vma)
+{
+}
+
 static struct vm_operations_struct special_mapping_vmops = {
+       .close = special_mapping_close,
        .nopage = special_mapping_nopage,
 };
 
-unsigned int vdso_populate = 1;
+unsigned int vdso_populate = 0;
 
 /*
  * Insert a new vma covering the given region, with the given flags and
@@ -2238,8 +2243,7 @@ unsigned int vdso_populate = 1;
  */
 int install_special_mapping(struct mm_struct *mm,
                            unsigned long addr, unsigned long len,
-                           unsigned long vm_flags, pgprot_t pgprot,
-                           struct page **pages)
+                           unsigned long vm_flags, struct page **pages)
 {
        struct vm_area_struct *vma;
        int err;
@@ -2253,13 +2257,16 @@ int install_special_mapping(struct mm_struct *mm,
        vma->vm_start = addr;
        vma->vm_end = addr + len;
 
-       vma->vm_flags = vm_flags | VM_DONTEXPAND;
-       vma->vm_page_prot = pgprot;
+       vma->vm_flags = vm_flags | mm->def_flags | VM_DONTEXPAND;
+       vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
 
        vma->vm_ops = &special_mapping_vmops;
        vma->vm_private_data = pages;
 
-       insert_vm_struct(mm, vma);
+       if (unlikely(insert_vm_struct(mm, vma))) {
+               kmem_cache_free(vm_area_cachep, vma);
+               return -ENOMEM;
+       }
        mm->total_vm += len >> PAGE_SHIFT;
 
        if (!vdso_populate)
index e31c90e055941e73f020e07adcd92360e81960da..4ca43fb0b1f1686184478ca3c4c3e03240a25a9e 100644 (file)
@@ -80,23 +80,12 @@ static u32 __net_random(struct nrnd_state *state)
        return (state->s1 ^ state->s2 ^ state->s3);
 }
 
-static void __net_srandom(struct nrnd_state *state, unsigned long s)
+/*
+ * Handle minimum values for seeds
+ */
+static inline u32 __seed(u32 x, u32 m)
 {
-       if (s == 0)
-               s = 1;      /* default seed is 1 */
-
-#define LCG(n) (69069 * n)
-       state->s1 = LCG(s);
-       state->s2 = LCG(state->s1);
-       state->s3 = LCG(state->s2);
-
-       /* "warm it up" */
-       __net_random(state);
-       __net_random(state);
-       __net_random(state);
-       __net_random(state);
-       __net_random(state);
-       __net_random(state);
+       return (x < m) ? x + m : x;
 }
 
 
@@ -112,9 +101,15 @@ unsigned long net_random(void)
 
 void net_srandom(unsigned long entropy)
 {
-       struct nrnd_state *state = &get_cpu_var(net_rand_state);
-       __net_srandom(state, state->s1^entropy);
-       put_cpu_var(state);
+       int i;
+       /*
+        * No locking on the CPUs, but then somewhat random results are, well,
+        * expected.
+        */
+       for_each_possible_cpu (i) {
+               struct nrnd_state *state = &per_cpu(net_rand_state, i);
+               state->s1 = __seed(state->s1 ^ entropy, 1);
+       }
 }
 
 void __init net_random_init(void)
@@ -123,20 +118,37 @@ void __init net_random_init(void)
 
        for_each_possible_cpu(i) {
                struct nrnd_state *state = &per_cpu(net_rand_state,i);
-               __net_srandom(state, i+jiffies);
+
+#define LCG(x) ((x) * 69069)   /* super-duper LCG */
+               state->s1 = __seed(LCG(i + jiffies), 1);
+               state->s2 = __seed(LCG(state->s1), 7);
+               state->s3 = __seed(LCG(state->s2), 15);
+
+               /* "warm it up" */
+               __net_random(state);
+               __net_random(state);
+               __net_random(state);
+               __net_random(state);
+               __net_random(state);
+               __net_random(state);
        }
 }
 
 static int net_random_reseed(void)
 {
        int i;
-       unsigned long seed;
 
        for_each_possible_cpu(i) {
                struct nrnd_state *state = &per_cpu(net_rand_state,i);
+               u32 seeds[3];
+
+               get_random_bytes(&seeds, sizeof(seeds));
+               state->s1 = __seed(seeds[0], 1);
+               state->s2 = __seed(seeds[1], 7);
+               state->s3 = __seed(seeds[2], 15);
 
-               get_random_bytes(&seed, sizeof(seed));
-               __net_srandom(state, seed);
+               /* mix it in */
+               __net_random(state);
        }
        return 0;
 }
index 6f14bb5a28d41c4de55bfa8c7bdad0b50b795a13..2a2f9e775cb7bcc5e765ed61a42b7aa9597913c2 100644 (file)
@@ -431,6 +431,11 @@ static int dccp_setsockopt_change(struct sock *sk, int type,
 
        if (copy_from_user(&opt, optval, sizeof(opt)))
                return -EFAULT;
+       /*
+        * rfc4340: 6.1. Change Options
+        */
+       if (opt.dccpsf_len < 1)
+               return -EINVAL;
 
        val = kmalloc(opt.dccpsf_len, GFP_KERNEL);
        if (!val)