From 32423fce6784281f0b199f128ff422569058bfca Mon Sep 17 00:00:00 2001 From: t_jeang Date: Tue, 6 Jan 2009 12:05:55 +0000 Subject: [PATCH] * Wed Oct 22 2008 Jiri Pirko [2.6.18-92.1.17.el5] - Revert: [nfs] pages of a memory mapped file get corrupted (Peter Staubach ) [450335 435291] * Sat Oct 18 2008 Jiri Pirko [2.6.18-92.1.16.el5] - [i386] vDSO: use install_special_mapping (Peter Zijlstra ) [460275 460276] {CVE-2008-3527} - [scsi] aacraid: remove some quirk AAC_QUIRK_SCSI_32 bits (Tomas Henzl ) [466885 453472] - [fs] remove SUID when splicing into an inode (Eric Sandeen ) [464451 464452] {CVE-2008-3833} - [fs] open() allows setgid bit when user is not in group (Eugene Teo ) [463867 463687] {CVE-2008-4210} - [xen] ia64: fix INIT injection (Tetsu Yamamoto ) [467105 464445] * Fri Oct 10 2008 Jiri Pirko [2.6.18-92.1.15.el5] - [pci] fix problems with msi interrupt management (Neil Horman ) [461894 428696] - [x86_64] revert time syscall changes (Prarit Bhargava ) [466427 461184] - [xen] allow guests to hide the TSC from applications (Chris Lalancette ) [378471 378481] {CVE-2007-5907} - [scsi] qla2xxx: additional residual-count correction (Marcus Barrow ) [465741 462117] - [char] add range_is_allowed check to mmap_mem (Eugene Teo ) [460858 460857] - [fs] binfmt_misc: avoid potential kernel stack overflow (Vitaly Mayatskikh ) [459464 459463] - [misc] cpufreq: fix format string bug (Vitaly Mayatskikh ) [459461 459460] - [dlm] user.c input validation fixes (David Teigland ) [458759 458760] - [nfs] pages of a memory mapped file get corrupted (Peter Staubach ) [450335 435291] - [x86_64] gettimeofday fixes for HPET, PMTimer, TSC (Prarit Bhargava ) [462860 250708] * Wed Sep 24 2008 Jiri Pirko [2.6.18-92.1.14.el5] - [libata] ata_scsi_rbuf_get check for scatterlist usage (David Milburn ) [460638 455445] - [net] random32: seeding improvement (Jiri Pirko ) [458021 458019] - [x86_64] xen: local DOS due to NT bit leakage (Eugene Teo ) [457721 457722] {CVE-2006-5755} - [fs] cifs: fix O_APPEND on directio mounts (Jeff Layton ) [462591 460063] - [openib] race between QP async handler and destroy_qp (Brad Peters ) [458781 446109] - [net] dccp_setsockopt_change integer overflow (Vitaly Mayatskikh ) [459232 459235] {CVE-2008-3276} - [acpi] error attaching device data (peterm@redhat.com ) [460868 459670] - [mm] optimize ZERO_PAGE in 'get_user_pages' and fix XIP (Anton Arapov ) [452667 452668] {CVE-2008-2372} - [xen] xennet: coordinate ARP with backend network status (Herbert Xu ) [461457 458934] - [xen] event channel lock and barrier (Markus Armbruster ) [461099 457086] - [fs] fix bad unlock_page in pip_to_file() error path (Larry Woodman ) [462436 439917] --- Makefile | 2 +- arch/i386/kernel/sysenter.c | 53 ++---- arch/powerpc/kernel/vdso.c | 2 +- arch/x86_64/ia32/syscall32-xen.c | 61 ++----- arch/x86_64/ia32/syscall32.c | 58 ++----- arch/x86_64/kernel/pmtimer.c | 6 +- arch/x86_64/kernel/setup64-xen.c | 2 + arch/x86_64/kernel/time.c | 73 +++++---- arch/x86_64/kernel/vsyscall.c | 31 ++-- buildconfigs/Rules.mk | 2 +- configs/kernel-2.6.18-i686-PAE.config | 2 +- configs/kernel-2.6.18-i686-debug.config | 2 +- configs/kernel-2.6.18-i686-xen.config | 2 +- configs/kernel-2.6.18-i686.config | 2 +- drivers/acpi/executer/excreate.c | 20 ++- drivers/acpi/namespace/nsaccess.c | 96 ++++++----- drivers/ata/libata-scsi.c | 14 +- drivers/char/mem.c | 21 ++- drivers/cpufreq/cpufreq.c | 2 +- drivers/infiniband/hw/ehca/ehca_classes.h | 2 + drivers/infiniband/hw/ehca/ehca_irq.c | 4 + drivers/infiniband/hw/ehca/ehca_qp.c | 5 + drivers/pci/msi.c | 186 ++++++---------------- drivers/pci/msi.h | 4 +- drivers/scsi/aacraid/linit.c | 4 +- drivers/scsi/qla2xxx/qla_isr.c | 5 +- drivers/xen/evtchn/evtchn.c | 2 + drivers/xen/netback/common.h | 2 + drivers/xen/netback/interface.c | 29 ++++ drivers/xen/netback/netback.c | 1 + drivers/xen/netback/xenbus.c | 9 +- fs/binfmt_em86.c | 2 +- fs/binfmt_misc.c | 9 ++ fs/binfmt_script.c | 2 +- fs/cifs/file.c | 4 + fs/dlm/user.c | 17 +- fs/open.c | 3 + fs/splice.c | 20 ++- include/asm-x86_64/mach-xen/asm/system.h | 1 + include/asm-x86_64/proto.h | 3 +- include/asm-x86_64/vsyscall32.h | 1 - include/linux/fs.h | 2 + include/linux/mm.h | 3 +- mm/filemap.c | 30 +++- mm/memory.c | 40 ++++- mm/migrate.c | 10 ++ mm/mmap.c | 19 ++- net/core/utils.c | 58 ++++--- net/dccp/proto.c | 5 + 49 files changed, 476 insertions(+), 457 deletions(-) diff --git a/Makefile b/Makefile index 8da9f748..69800b5c 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 18 -EXTRAVERSION = -92.1.13.el5 +EXTRAVERSION = -92.1.17.el5 RHEL_MAJOR = 5 RHEL_MINOR = 2 NAME=Avast! A bilge rat! diff --git a/arch/i386/kernel/sysenter.c b/arch/i386/kernel/sysenter.c index d02074f2..57bce748 100644 --- a/arch/i386/kernel/sysenter.c +++ b/arch/i386/kernel/sysenter.c @@ -100,11 +100,12 @@ void enable_sep_cpu(void) */ extern const char vsyscall_int80_start, vsyscall_int80_end; extern const char vsyscall_sysenter_start, vsyscall_sysenter_end; -static void *syscall_page; +static struct page *syscall_pages[1]; int __cpuinit sysenter_setup(void) { - syscall_page = (void *)get_zeroed_page(GFP_ATOMIC); + void *syscall_page = (void *)get_zeroed_page(GFP_ATOMIC); + syscall_pages[0] = virt_to_page(syscall_page); #ifdef CONFIG_COMPAT_VDSO __set_fixmap(FIX_VDSO, __pa(syscall_page), PAGE_READONLY); @@ -125,24 +126,6 @@ int __cpuinit sysenter_setup(void) return 0; } -static struct page *syscall_nopage(struct vm_area_struct *vma, - unsigned long adr, int *type) -{ - struct page *p = virt_to_page(adr - vma->vm_start + syscall_page); - get_page(p); - return p; -} - -/* Prevent VMA merging */ -static void syscall_vma_close(struct vm_area_struct *vma) -{ -} - -static struct vm_operations_struct syscall_vm_ops = { - .close = syscall_vma_close, - .nopage = syscall_nopage, -}; - /* Defined in vsyscall-sysenter.S */ extern void SYSENTER_RETURN; @@ -150,7 +133,6 @@ extern void SYSENTER_RETURN; int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack, unsigned long start_code, unsigned long interp_map_address) { - struct vm_area_struct *vma; struct mm_struct *mm = current->mm; unsigned long addr; int ret; @@ -162,38 +144,25 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack, goto up_fail; } - vma = kmem_cache_zalloc(vm_area_cachep, SLAB_KERNEL); - if (!vma) { - ret = -ENOMEM; - goto up_fail; - } - - vma->vm_start = addr; - vma->vm_end = addr + PAGE_SIZE; - /* MAYWRITE to allow gdb to COW and set breakpoints */ - vma->vm_flags = VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC|VM_MAYWRITE; /* + * MAYWRITE to allow gdb to COW and set breakpoints + * * Make sure the vDSO gets into every core dump. * Dumping its contents makes post-mortem fully interpretable later * without matching up the same kernel and hardware config to see * what PC values meant. */ - vma->vm_flags |= VM_ALWAYSDUMP; - vma->vm_flags |= mm->def_flags; - vma->vm_page_prot = protection_map[vma->vm_flags & 7]; - vma->vm_ops = &syscall_vm_ops; - vma->vm_mm = mm; - - ret = insert_vm_struct(mm, vma); - if (unlikely(ret)) { - kmem_cache_free(vm_area_cachep, vma); + ret = install_special_mapping(mm, addr, PAGE_SIZE, + VM_READ|VM_EXEC| + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| + VM_ALWAYSDUMP, + syscall_pages); + if (ret) goto up_fail; - } current->mm->context.vdso = (void *)addr; current_thread_info()->sysenter_return = (void *)VDSO_SYM(&SYSENTER_RETURN); - mm->total_vm++; up_fail: up_write(&mm->mmap_sem); return ret; diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index a9184d75..ec616f49 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -126,7 +126,7 @@ static void dump_one_vdso_page(struct page *pg, struct page *upg) printk("kpg: %p (c:%d,f:%08lx)", __va(page_to_pfn(pg) << PAGE_SHIFT), page_count(pg), pg->flags); - if (upg/* && pg != upg*/) { + if (upg && !IS_ERR(upg) /* && pg != upg*/) { printk(" upg: %p (c:%d,f:%08lx)", __va(page_to_pfn(upg) << PAGE_SHIFT), page_count(upg), diff --git a/arch/x86_64/ia32/syscall32-xen.c b/arch/x86_64/ia32/syscall32-xen.c index 67dca103..0f57b9fb 100644 --- a/arch/x86_64/ia32/syscall32-xen.c +++ b/arch/x86_64/ia32/syscall32-xen.c @@ -19,7 +19,7 @@ extern unsigned char syscall32_syscall[], syscall32_syscall_end[]; extern unsigned char syscall32_sysenter[], syscall32_sysenter_end[]; extern int sysctl_vsyscall32; -char *syscall32_page; +static struct page *syscall32_pages[1]; static int use_sysenter = -1; #if CONFIG_XEN_COMPAT < 0x030200 @@ -27,24 +27,6 @@ extern unsigned char syscall32_int80[], syscall32_int80_end[]; static int use_int80 = 1; #endif -static struct page * -syscall32_nopage(struct vm_area_struct *vma, unsigned long adr, int *type) -{ - struct page *p = virt_to_page(adr - vma->vm_start + syscall32_page); - get_page(p); - return p; -} - -/* Prevent VMA merging */ -static void syscall32_vma_close(struct vm_area_struct *vma) -{ -} - -static struct vm_operations_struct syscall32_vm_ops = { - .close = syscall32_vma_close, - .nopage = syscall32_nopage, -}; - struct linux_binprm; /* Setup a VMA at program startup for the vsyscall page */ @@ -52,40 +34,31 @@ int syscall32_setup_pages(struct linux_binprm *bprm, int exstack, unsigned long start_code, unsigned long interp_map_address) { - int npages = (VSYSCALL32_END - VSYSCALL32_BASE) >> PAGE_SHIFT; - struct vm_area_struct *vma; struct mm_struct *mm = current->mm; int ret; - vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); - if (!vma) - return -ENOMEM; - - memset(vma, 0, sizeof(struct vm_area_struct)); - /* Could randomize here */ - vma->vm_start = VSYSCALL32_BASE; - vma->vm_end = VSYSCALL32_END; - /* MAYWRITE to allow gdb to COW and set breakpoints */ - vma->vm_flags = VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC|VM_MAYWRITE; - vma->vm_flags |= mm->def_flags; - vma->vm_page_prot = protection_map[vma->vm_flags & 7]; - vma->vm_ops = &syscall32_vm_ops; - vma->vm_mm = mm; - down_write(&mm->mmap_sem); - if ((ret = insert_vm_struct(mm, vma))) { - up_write(&mm->mmap_sem); - kmem_cache_free(vm_area_cachep, vma); - return ret; - } - mm->total_vm += npages; + /* + * MAYWRITE to allow gdb to COW and set breakpoints + * + * Make sure the vDSO gets into every core dump. + * Dumping its contents makes post-mortem fully interpretable later + * without matching up the same kernel and hardware config to see + * what PC values meant. + */ + ret = install_special_mapping(mm, VSYSCALL32_BASE, PAGE_SIZE, + VM_READ|VM_EXEC| + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| + VM_ALWAYSDUMP, + syscall32_pages); up_write(&mm->mmap_sem); - return 0; + return ret; } static int __init init_syscall32(void) { - syscall32_page = (void *)get_zeroed_page(GFP_KERNEL); + void *syscall32_page = (void *)get_zeroed_page(GFP_KERNEL); + syscall32_pages[0] = virt_to_page(syscall32_page); if (!syscall32_page) panic("Cannot allocate syscall32 page"); diff --git a/arch/x86_64/ia32/syscall32.c b/arch/x86_64/ia32/syscall32.c index 3b13188e..78372c4a 100644 --- a/arch/x86_64/ia32/syscall32.c +++ b/arch/x86_64/ia32/syscall32.c @@ -18,27 +18,9 @@ extern unsigned char syscall32_syscall[], syscall32_syscall_end[]; extern unsigned char syscall32_sysenter[], syscall32_sysenter_end[]; extern int sysctl_vsyscall32; -char *syscall32_page; +static struct page *syscall32_pages[1]; static int use_sysenter = -1; -static struct page * -syscall32_nopage(struct vm_area_struct *vma, unsigned long adr, int *type) -{ - struct page *p = virt_to_page(adr - vma->vm_start + syscall32_page); - get_page(p); - return p; -} - -/* Prevent VMA merging */ -static void syscall32_vma_close(struct vm_area_struct *vma) -{ -} - -static struct vm_operations_struct syscall32_vm_ops = { - .close = syscall32_vma_close, - .nopage = syscall32_nopage, -}; - struct linux_binprm; /* Setup a VMA at program startup for the vsyscall page */ @@ -46,47 +28,31 @@ int syscall32_setup_pages(struct linux_binprm *bprm, int exstack, unsigned long start_code, unsigned long interp_map_address) { - int npages = (VSYSCALL32_END - VSYSCALL32_BASE) >> PAGE_SHIFT; - struct vm_area_struct *vma; struct mm_struct *mm = current->mm; int ret; - vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); - if (!vma) - return -ENOMEM; - - memset(vma, 0, sizeof(struct vm_area_struct)); - /* Could randomize here */ - vma->vm_start = VSYSCALL32_BASE; - vma->vm_end = VSYSCALL32_END; - /* MAYWRITE to allow gdb to COW and set breakpoints */ - vma->vm_flags = VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC|VM_MAYWRITE; + down_write(&mm->mmap_sem); /* + * MAYWRITE to allow gdb to COW and set breakpoints + * * Make sure the vDSO gets into every core dump. * Dumping its contents makes post-mortem fully interpretable later * without matching up the same kernel and hardware config to see * what PC values meant. */ - vma->vm_flags |= VM_ALWAYSDUMP; - vma->vm_flags |= mm->def_flags; - vma->vm_page_prot = protection_map[vma->vm_flags & 7]; - vma->vm_ops = &syscall32_vm_ops; - vma->vm_mm = mm; - - down_write(&mm->mmap_sem); - if ((ret = insert_vm_struct(mm, vma))) { - up_write(&mm->mmap_sem); - kmem_cache_free(vm_area_cachep, vma); - return ret; - } - mm->total_vm += npages; + ret = install_special_mapping(mm, VSYSCALL32_BASE, PAGE_SIZE, + VM_READ|VM_EXEC| + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| + VM_ALWAYSDUMP, + syscall32_pages); up_write(&mm->mmap_sem); - return 0; + return ret; } static int __init init_syscall32(void) { - syscall32_page = (void *)get_zeroed_page(GFP_KERNEL); + void *syscall32_page = (void *)get_zeroed_page(GFP_KERNEL); + syscall32_pages[0] = virt_to_page(syscall32_page); if (!syscall32_page) panic("Cannot allocate syscall32 page"); if (use_sysenter > 0) { diff --git a/arch/x86_64/kernel/pmtimer.c b/arch/x86_64/kernel/pmtimer.c index 9edee7c1..b9712ce7 100644 --- a/arch/x86_64/kernel/pmtimer.c +++ b/arch/x86_64/kernel/pmtimer.c @@ -106,7 +106,7 @@ void pmtimer_resume(void) last_pmtmr_tick = inl(pmtmr_ioport); } -unsigned int do_gettimeoffset_pm(void) +long do_gettimeoffset_pm(void) { u32 now, offset, delta = 0; @@ -114,7 +114,9 @@ unsigned int do_gettimeoffset_pm(void) now = inl(pmtmr_ioport); delta = (now - offset) & ACPI_PM_MASK; - return offset_delay + cyc2us(delta); + /* seems crazy to do with PM timer resolution but we need nsec + resolution in arch/x86_64/kernel/time.c code */ + return ((offset_delay + cyc2us(delta)) * NSEC_PER_USEC); } diff --git a/arch/x86_64/kernel/setup64-xen.c b/arch/x86_64/kernel/setup64-xen.c index f932f8f8..97c823b7 100644 --- a/arch/x86_64/kernel/setup64-xen.c +++ b/arch/x86_64/kernel/setup64-xen.c @@ -238,6 +238,8 @@ void __cpuinit check_efer(void) unsigned long kernel_eflags; +unsigned long kernel_eflags; + /* * cpu_init() initializes state that is per-CPU. Some data is already * initialized (naturally) in the bootstrap process, such as the GDT diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index 867760da..e2c3a218 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c @@ -11,6 +11,12 @@ * Copyright (c) 2002,2006 Vojtech Pavlik * Copyright (c) 2003 Andi Kleen * RTC support code taken from arch/i386/kernel/timers/time_hpet.c + * + * March 2008: Upstream has diverged significantly from this codebase. + * Modifications to this file to convert the gettimeofday call into nsecs + * (but still return usec values) were done in order to resolve a large + * number of gettimeofday issues seen across a wide swath of Intel and + * AMD systems. */ #include @@ -65,7 +71,7 @@ static int notsc __initdata = 0; #define NSEC_PER_TICK (NSEC_PER_SEC / HZ) #define FSEC_PER_TICK (FSEC_PER_SEC / HZ) -#define USEC_PER_REAL_TICK (USEC_PER_SEC / REAL_HZ) +#define NSEC_PER_REAL_TICK (NSEC_PER_SEC / REAL_HZ) #define NS_SCALE 10 /* 2^10, carefully chosen */ #define US_SCALE 32 /* 2^32, arbitralrily chosen */ @@ -90,7 +96,7 @@ struct timespec __xtime __section_xtime; struct timezone __sys_tz __section_sys_tz; /* - * do_gettimeoffset() returns microseconds since last timer interrupt was + * do_gettimeoffset() returns nanoseconds since last timer interrupt was * triggered by hardware. A memory read of HPET is slower than a register read * of TSC, but much more reliable. It's also synchronized to the timer * interrupt. Note that do_gettimeoffset() may return more than hpet_tick, if a @@ -99,27 +105,27 @@ struct timezone __sys_tz __section_sys_tz; * together by xtime_lock. */ -static inline unsigned int do_gettimeoffset_tsc(void) +static inline long do_gettimeoffset_tsc(void) { unsigned long t; unsigned long x; t = get_cycles_sync(); if (t < vxtime.last_tsc) t = vxtime.last_tsc; /* hack */ - x = ((t - vxtime.last_tsc) * vxtime.tsc_quot) >> US_SCALE; + x = ((t - vxtime.last_tsc) * vxtime.tsc_quot) >> NS_SCALE; return x; } -static inline unsigned int do_gettimeoffset_hpet(void) +static inline long do_gettimeoffset_hpet(void) { /* cap counter read to one tick to avoid inconsistencies */ unsigned long counter = hpet_readl(HPET_COUNTER) - vxtime.last; /* The hpet counter runs at a fixed rate so we don't care about HZ scaling here. We do however care that the limit is in real ticks */ - return (min(counter,hpet_tick_real) * vxtime.quot) >> US_SCALE; + return (min(counter,hpet_tick_real) * vxtime.quot) >> NS_SCALE; } -unsigned int (*do_gettimeoffset)(void) = do_gettimeoffset_tsc; +long (*do_gettimeoffset)(void) = do_gettimeoffset_tsc; /* * This version of gettimeofday() has microsecond resolution and better than @@ -129,32 +135,25 @@ unsigned int (*do_gettimeoffset)(void) = do_gettimeoffset_tsc; void do_gettimeofday(struct timeval *tv) { - unsigned long seq, t; - unsigned int sec, usec; + unsigned long seq; + long sec, nsec; do { seq = read_seqbegin(&xtime_lock); sec = xtime.tv_sec; - usec = xtime.tv_nsec / NSEC_PER_USEC; + nsec = xtime.tv_nsec + (jiffies - wall_jiffies) * NSEC_PER_TICK; - /* i386 does some correction here to keep the clock - monotonous even when ntpd is fixing drift. - But they didn't work for me, there is a non monotonic - clock anyways with ntp. - I dropped all corrections now until a real solution can - be found. Note when you fix it here you need to do the same - in arch/x86_64/kernel/vsyscall.c and export all needed - variables in vmlinux.lds. -AK */ - - t = (jiffies - wall_jiffies) * USEC_PER_TICK + - do_gettimeoffset(); - usec += t; + nsec += do_gettimeoffset(); } while (read_seqretry(&xtime_lock, seq)); - tv->tv_sec = sec + usec / USEC_PER_SEC; - tv->tv_usec = usec % USEC_PER_SEC; + tv->tv_sec = sec; + while (nsec >= NSEC_PER_SEC) { + tv->tv_sec += 1; + nsec -= NSEC_PER_SEC; + } + tv->tv_usec = nsec / NSEC_PER_USEC; } EXPORT_SYMBOL(do_gettimeofday); @@ -175,8 +174,7 @@ int do_settimeofday(struct timespec *tv) write_seqlock_irq(&xtime_lock); - nsec -= do_gettimeoffset() * NSEC_PER_USEC + - (jiffies - wall_jiffies) * NSEC_PER_TICK; + nsec -= do_gettimeoffset() + (jiffies - wall_jiffies) * NSEC_PER_TICK; wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec); wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec); @@ -417,14 +415,15 @@ void main_timer_handler(struct pt_regs *regs) #endif } else { offset = (((tsc - vxtime.last_tsc) * - vxtime.tsc_quot) >> US_SCALE) - USEC_PER_REAL_TICK; + vxtime.tsc_quot) >> NS_SCALE) - NSEC_PER_REAL_TICK; if (offset < 0) offset = 0; - if (offset > USEC_PER_REAL_TICK) { - lost = offset / USEC_PER_REAL_TICK; - offset %= USEC_PER_REAL_TICK; + lost = 0; + while (offset > NSEC_PER_REAL_TICK) { + lost++; + offset -= NSEC_PER_REAL_TICK; } /* FIXME: 1000 or 1000000? */ @@ -433,9 +432,9 @@ void main_timer_handler(struct pt_regs *regs) vxtime.last_tsc = tsc - vxtime.quot * delay / vxtime.tsc_quot; if ((((tsc - vxtime.last_tsc) * - vxtime.tsc_quot) >> US_SCALE) < offset) + vxtime.tsc_quot) >> NS_SCALE) < offset) vxtime.last_tsc = tsc - - (((long) offset << US_SCALE) / vxtime.tsc_quot) - 1; + (((long) offset << NS_SCALE) / vxtime.tsc_quot) - 1; } /* SCALE: We expect tick_divider - 1 lost, ie 0 for normal behaviour */ if (lost > (int)tick_divider - 1) { @@ -692,7 +691,7 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new); if (!(freq->flags & CPUFREQ_CONST_LOOPS)) - vxtime.tsc_quot = (USEC_PER_MSEC << US_SCALE) / cpu_khz; + vxtime.tsc_quot = (NSEC_PER_MSEC << NS_SCALE) / cpu_khz; } set_cyc2ns_scale(tsc_khz_ref); @@ -997,8 +996,8 @@ void __init time_init(void) cpu_khz = tsc_calibrate_cpu_khz(); vxtime.mode = VXTIME_TSC; - vxtime.quot = (USEC_PER_SEC << US_SCALE) / vxtime_hz; - vxtime.tsc_quot = (USEC_PER_MSEC << US_SCALE) / cpu_khz; + vxtime.quot = (NSEC_PER_SEC << NS_SCALE) / vxtime_hz; + vxtime.tsc_quot = (NSEC_PER_MSEC << NS_SCALE) / cpu_khz; vxtime.last_tsc = get_cycles_sync(); setup_irq(0, &irq0); @@ -1085,8 +1084,8 @@ void time_init_gtod(void) vxtime_hz / 1000000, vxtime_hz % 1000000, timename, timetype); printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n", cpu_khz / 1000, cpu_khz % 1000); - vxtime.quot = (USEC_PER_SEC << US_SCALE) / vxtime_hz; - vxtime.tsc_quot = (USEC_PER_MSEC << US_SCALE) / cpu_khz; + vxtime.quot = (NSEC_PER_SEC << NS_SCALE) / vxtime_hz; + vxtime.tsc_quot = (NSEC_PER_MSEC << NS_SCALE) / cpu_khz; vxtime.last_tsc = get_cycles_sync(); set_cyc2ns_scale(cpu_khz); diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c index cf6c5d1e..69719d6e 100644 --- a/arch/x86_64/kernel/vsyscall.c +++ b/arch/x86_64/kernel/vsyscall.c @@ -52,6 +52,8 @@ int __vgetcpu_mode __section_vgetcpu_mode; asm("" : "=r" (v) : "0" (x)); \ ((v - fix_to_virt(VSYSCALL_FIRST_PAGE)) + __pa_symbol(&__vsyscall_0)); }) +#define NS_SCALE 10 /* 2^10, carefully chosen */ + static __always_inline void timeval_normalize(struct timeval * tv) { time_t __sec; @@ -66,30 +68,34 @@ static __always_inline void timeval_normalize(struct timeval * tv) static __always_inline void do_vgettimeofday(struct timeval * tv) { long sequence, t; - unsigned long sec, usec; + long sec, nsec; do { sequence = read_seqbegin(&__xtime_lock); - + sec = __xtime.tv_sec; - usec = (__xtime.tv_nsec / 1000) + - (__jiffies - __wall_jiffies) * (1000000 / HZ); + nsec = __xtime.tv_nsec + + (__jiffies - __wall_jiffies) * (NSEC_PER_SEC / HZ); if (__vxtime.mode != VXTIME_HPET) { t = get_cycles_sync(); if (t < __vxtime.last_tsc) t = __vxtime.last_tsc; - usec += ((t - __vxtime.last_tsc) * - __vxtime.tsc_quot) >> 32; - /* See comment in x86_64 do_gettimeofday. */ + nsec += ((t - __vxtime.last_tsc) * + __vxtime.tsc_quot) >> NS_SCALE; } else { - usec += ((readl((void *)fix_to_virt(VSYSCALL_HPET) + 0xf0) - - __vxtime.last) * __vxtime.quot) >> 32; + nsec += ((readl((void *)fix_to_virt(VSYSCALL_HPET) + + 0xf0) - + __vxtime.last) * __vxtime.quot) >> NS_SCALE; } } while (read_seqretry(&__xtime_lock, sequence)); - tv->tv_sec = sec + usec / 1000000; - tv->tv_usec = usec % 1000000; + tv->tv_sec = sec; + while (nsec >= NSEC_PER_SEC) { + tv->tv_sec += 1; + nsec -= NSEC_PER_SEC; + } + tv->tv_usec = nsec / NSEC_PER_USEC; } /* RED-PEN may want to readd seq locking, but then the variable should be write-once. */ @@ -134,7 +140,8 @@ time_t __vsyscall(1) vtime(time_t *t) if (!__sysctl_vsyscall) return time_syscall(t); else if (t) - *t = __xtime.tv_sec; + *t = __xtime.tv_sec; + return __xtime.tv_sec; } diff --git a/buildconfigs/Rules.mk b/buildconfigs/Rules.mk index 0b93dcae..c51edd41 100644 --- a/buildconfigs/Rules.mk +++ b/buildconfigs/Rules.mk @@ -2,7 +2,7 @@ XEN_TARGET_ARCH = x86_32 XEN_TARGET_X86_PAE ?= y LINUX_SERIES = 2.6 -LINUX_VER = 2.6.18-92.1.13.el5 +LINUX_VER = 2.6.18-92.1.17.el5 EXTRAVERSION ?= xen diff --git a/configs/kernel-2.6.18-i686-PAE.config b/configs/kernel-2.6.18-i686-PAE.config index 50586060..939a1301 100644 --- a/configs/kernel-2.6.18-i686-PAE.config +++ b/configs/kernel-2.6.18-i686-PAE.config @@ -2,7 +2,7 @@ # # Automatically generated make config: don't edit # Linux kernel version: 2.6.18-prep -# Mon Sep 29 11:42:18 2008 +# Wed Nov 5 04:51:57 2008 # CONFIG_X86_32=y CONFIG_GENERIC_TIME=y diff --git a/configs/kernel-2.6.18-i686-debug.config b/configs/kernel-2.6.18-i686-debug.config index 84bd50ee..c8657140 100644 --- a/configs/kernel-2.6.18-i686-debug.config +++ b/configs/kernel-2.6.18-i686-debug.config @@ -2,7 +2,7 @@ # # Automatically generated make config: don't edit # Linux kernel version: 2.6.18-prep -# Mon Sep 29 11:42:18 2008 +# Wed Nov 5 04:51:58 2008 # CONFIG_X86_32=y CONFIG_GENERIC_TIME=y diff --git a/configs/kernel-2.6.18-i686-xen.config b/configs/kernel-2.6.18-i686-xen.config index afd893b9..476b56ee 100644 --- a/configs/kernel-2.6.18-i686-xen.config +++ b/configs/kernel-2.6.18-i686-xen.config @@ -2,7 +2,7 @@ # # Automatically generated make config: don't edit # Linux kernel version: 2.6.18-prep -# Mon Sep 29 11:42:18 2008 +# Wed Nov 5 04:51:58 2008 # CONFIG_X86_32=y CONFIG_LOCKDEP_SUPPORT=y diff --git a/configs/kernel-2.6.18-i686.config b/configs/kernel-2.6.18-i686.config index b7e684a1..44e70ee2 100644 --- a/configs/kernel-2.6.18-i686.config +++ b/configs/kernel-2.6.18-i686.config @@ -2,7 +2,7 @@ # # Automatically generated make config: don't edit # Linux kernel version: 2.6.18-prep -# Mon Sep 29 11:42:18 2008 +# Wed Nov 5 04:51:58 2008 # CONFIG_X86_32=y CONFIG_GENERIC_TIME=y diff --git a/drivers/acpi/executer/excreate.c b/drivers/acpi/executer/excreate.c index 34eec82c..13143bf4 100644 --- a/drivers/acpi/executer/excreate.c +++ b/drivers/acpi/executer/excreate.c @@ -97,16 +97,28 @@ acpi_status acpi_ex_create_alias(struct acpi_walk_state *walk_state) * to the original Node. */ switch (target_node->type) { + + /* For these types, the sub-object can change dynamically via a Store */ + case ACPI_TYPE_INTEGER: case ACPI_TYPE_STRING: case ACPI_TYPE_BUFFER: case ACPI_TYPE_PACKAGE: case ACPI_TYPE_BUFFER_FIELD: + /* + * These types open a new scope, so we need the NS node in order to access + * any children. + */ + case ACPI_TYPE_DEVICE: + case ACPI_TYPE_POWER: + case ACPI_TYPE_PROCESSOR: + case ACPI_TYPE_THERMAL: + case ACPI_TYPE_LOCAL_SCOPE: + /* * The new alias has the type ALIAS and points to the original - * NS node, not the object itself. This is because for these - * types, the object can change dynamically via a Store. + * NS node, not the object itself. */ alias_node->type = ACPI_TYPE_LOCAL_ALIAS; alias_node->object = @@ -116,9 +128,7 @@ acpi_status acpi_ex_create_alias(struct acpi_walk_state *walk_state) case ACPI_TYPE_METHOD: /* - * The new alias has the type ALIAS and points to the original - * NS node, not the object itself. This is because for these - * types, the object can change dynamically via a Store. + * Control method aliases need to be differentiated */ alias_node->type = ACPI_TYPE_LOCAL_METHOD_ALIAS; alias_node->object = diff --git a/drivers/acpi/namespace/nsaccess.c b/drivers/acpi/namespace/nsaccess.c index c1c6c236..913da3dd 100644 --- a/drivers/acpi/namespace/nsaccess.c +++ b/drivers/acpi/namespace/nsaccess.c @@ -586,44 +586,68 @@ acpi_ns_lookup(union acpi_generic_state *scope_info, return_ACPI_STATUS(status); } - /* - * Sanity typecheck of the target object: - * - * If 1) This is the last segment (num_segments == 0) - * 2) And we are looking for a specific type - * (Not checking for TYPE_ANY) - * 3) Which is not an alias - * 4) Which is not a local type (TYPE_SCOPE) - * 5) And the type of target object is known (not TYPE_ANY) - * 6) And target object does not match what we are looking for - * - * Then we have a type mismatch. Just warn and ignore it. - */ - if ((num_segments == 0) && - (type_to_check_for != ACPI_TYPE_ANY) && - (type_to_check_for != ACPI_TYPE_LOCAL_ALIAS) && - (type_to_check_for != ACPI_TYPE_LOCAL_METHOD_ALIAS) && - (type_to_check_for != ACPI_TYPE_LOCAL_SCOPE) && - (this_node->type != ACPI_TYPE_ANY) && - (this_node->type != type_to_check_for)) { - - /* Complain about a type mismatch */ - - ACPI_WARNING((AE_INFO, - "NsLookup: Type mismatch on %4.4s (%s), searching for (%s)", - ACPI_CAST_PTR(char, &simple_name), - acpi_ut_get_type_name(this_node->type), - acpi_ut_get_type_name - (type_to_check_for))); + /* More segments to follow? */ + + if (num_segments > 0) { + /* + * If we have an alias to an object that opens a scope (such as a + * device or processor), we need to dereference the alias here so that + * we can access any children of the original node (via the remaining + * segments). + */ + if (this_node->type == ACPI_TYPE_LOCAL_ALIAS) { + if (acpi_ns_opens_scope + (((struct acpi_namespace_node *)this_node-> + object)->type)) { + this_node = + (struct acpi_namespace_node *) + this_node->object; + } + } } - /* - * If this is the last name segment and we are not looking for a - * specific type, but the type of found object is known, use that type - * to see if it opens a scope. - */ - if ((num_segments == 0) && (type == ACPI_TYPE_ANY)) { - type = this_node->type; + /* Special handling for the last segment (num_segments == 0) */ + + else { + /* + * Sanity typecheck of the target object: + * + * If 1) This is the last segment (num_segments == 0) + * 2) And we are looking for a specific type + * (Not checking for TYPE_ANY) + * 3) Which is not an alias + * 4) Which is not a local type (TYPE_SCOPE) + * 5) And the type of target object is known (not TYPE_ANY) + * 6) And target object does not match what we are looking for + * + * Then we have a type mismatch. Just warn and ignore it. + */ + if ((type_to_check_for != ACPI_TYPE_ANY) && + (type_to_check_for != ACPI_TYPE_LOCAL_ALIAS) && + (type_to_check_for != ACPI_TYPE_LOCAL_METHOD_ALIAS) + && (type_to_check_for != ACPI_TYPE_LOCAL_SCOPE) + && (this_node->type != ACPI_TYPE_ANY) + && (this_node->type != type_to_check_for)) { + + /* Complain about a type mismatch */ + + ACPI_WARNING((AE_INFO, + "NsLookup: Type mismatch on %4.4s (%s), searching for (%s)", + ACPI_CAST_PTR(char, &simple_name), + acpi_ut_get_type_name(this_node-> + type), + acpi_ut_get_type_name + (type_to_check_for))); + } + + /* + * If this is the last name segment and we are not looking for a + * specific type, but the type of found object is known, use that type + * to (later) see if it opens a scope. + */ + if (type == ACPI_TYPE_ANY) { + type = this_node->type; + } } /* Point to next name segment and make this node current */ diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 68c9b630..4722d538 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -1605,9 +1605,10 @@ static unsigned int ata_scsi_rbuf_get(struct scsi_cmnd *cmd, u8 **buf_out) u8 *buf; unsigned int buflen; - struct scatterlist *sg = scsi_sglist(cmd); - - if (sg) { + if (cmd->use_sg) { + struct scatterlist *sg; + + sg = (struct scatterlist *) cmd->request_buffer; buf = kmap_atomic(sg_page(sg), KM_IRQ0) + sg->offset; buflen = sg->length; } else { @@ -1632,9 +1633,12 @@ static unsigned int ata_scsi_rbuf_get(struct scsi_cmnd *cmd, u8 **buf_out) static inline void ata_scsi_rbuf_put(struct scsi_cmnd *cmd, u8 *buf) { - struct scatterlist *sg = scsi_sglist(cmd); - if (sg) + if (cmd->use_sg) { + struct scatterlist *sg; + + sg = (struct scatterlist *) cmd->request_buffer; kunmap_atomic(buf - sg->offset, KM_IRQ0); + } } /** diff --git a/drivers/char/mem.c b/drivers/char/mem.c index 1d7a0006..735a0dd7 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c @@ -34,18 +34,20 @@ # include #endif -static inline int range_is_allowed(unsigned long from, unsigned long to) +static inline int range_is_allowed(unsigned long pfn, unsigned long size) { - unsigned long cursor; + u64 from = ((u64)pfn) << PAGE_SHIFT; + u64 to = from + size; + u64 cursor = from; - cursor = from >> PAGE_SHIFT; - while ((cursor << PAGE_SHIFT) < to) { - if (!devmem_is_allowed(cursor)) { - printk ("Program %s tried to read /dev/mem between %lx->%lx.\n", + while (cursor < to) { + if (!devmem_is_allowed(pfn)) { + printk ("Program %s tried to read /dev/mem between %Lx->%Lx.\n", current->comm, from, to); return 0; } - cursor++; + cursor += PAGE_SIZE; + pfn++; } return 1; } @@ -167,7 +169,7 @@ static ssize_t read_mem(struct file * file, char __user * buf, */ ptr = xlate_dev_mem_ptr(p); - if (!range_is_allowed(p, p+count)) + if (!range_is_allowed(p >> PAGE_SHIFT, count)) return -EPERM; if (copy_to_user(buf, ptr, sz)) return -EFAULT; @@ -265,6 +267,9 @@ static int mmap_mem(struct file * file, struct vm_area_struct * vma) if (!valid_mmap_phys_addr_range(vma->vm_pgoff, size)) return -EINVAL; + if (!range_is_allowed(vma->vm_pgoff, size)) + return -EPERM; + vma->vm_page_prot = phys_mem_access_prot(file, vma->vm_pgoff, size, vma->vm_page_prot); diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 0451ce0c..3c902839 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -387,7 +387,7 @@ static int cpufreq_parse_governor (char *str_governor, unsigned int *policy, int ret; mutex_unlock(&cpufreq_governor_mutex); - ret = request_module(name); + ret = request_module("%s", name); mutex_lock(&cpufreq_governor_mutex); if (ret == 0) diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h index 7e725ba4..43d2ee9e 100644 --- a/drivers/infiniband/hw/ehca/ehca_classes.h +++ b/drivers/infiniband/hw/ehca/ehca_classes.h @@ -192,6 +192,8 @@ struct ehca_qp { int mtu_shift; u32 message_count; u32 packet_count; + atomic_t nr_events; /* events seen */ + wait_queue_head_t wait_completion; }; #define IS_SRQ(qp) (qp->ext_type == EQPT_SRQ) diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c index b5ca94c6..ab8f42c3 100644 --- a/drivers/infiniband/hw/ehca/ehca_irq.c +++ b/drivers/infiniband/hw/ehca/ehca_irq.c @@ -204,6 +204,8 @@ static void qp_event_callback(struct ehca_shca *shca, u64 eqe, read_lock(&ehca_qp_idr_lock); qp = idr_find(&ehca_qp_idr, token); + if (qp) + atomic_inc(&qp->nr_events); read_unlock(&ehca_qp_idr_lock); if (!qp) @@ -223,6 +225,8 @@ static void qp_event_callback(struct ehca_shca *shca, u64 eqe, if (fatal && qp->ext_type == EQPT_SRQBASE) dispatch_qp_event(shca, qp, IB_EVENT_QP_LAST_WQE_REACHED); + if (atomic_dec_and_test(&qp->nr_events)) + wake_up(&qp->wait_completion); return; } diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c index 162ce6f6..f8f3ad38 100644 --- a/drivers/infiniband/hw/ehca/ehca_qp.c +++ b/drivers/infiniband/hw/ehca/ehca_qp.c @@ -564,6 +564,8 @@ static struct ehca_qp *internal_create_qp( return ERR_PTR(-ENOMEM); } + atomic_set(&my_qp->nr_events, 0); + init_waitqueue_head(&my_qp->wait_completion); spin_lock_init(&my_qp->spinlock_s); spin_lock_init(&my_qp->spinlock_r); my_qp->qp_type = qp_type; @@ -1974,6 +1976,9 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, idr_remove(&ehca_qp_idr, my_qp->token); write_unlock_irqrestore(&ehca_qp_idr_lock, flags); + /* now wait until all pending events have completed */ + wait_event(my_qp->wait_completion, !atomic_read(&my_qp->nr_events)); + h_ret = hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp); if (h_ret != H_SUCCESS) { ehca_err(dev, "hipz_h_destroy_qp() failed h_ret=%li " diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index f08d9f88..1bbc070b 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -45,16 +45,10 @@ msi_register(struct msi_ops *ops) return 0; } -static void msi_cache_ctor(void *p, kmem_cache_t *cache, unsigned long flags) -{ - memset(p, 0, sizeof(struct msi_desc)); -} - static int msi_cache_init(void) { - msi_cachep = kmem_cache_create("msi_cache", - sizeof(struct msi_desc), - 0, SLAB_HWCACHE_ALIGN, msi_cache_ctor, NULL); + msi_cachep = kmem_cache_create("msi_cache", sizeof(struct msi_desc), + 0, SLAB_HWCACHE_ALIGN, NULL, NULL); if (!msi_cachep) return -ENOMEM; @@ -411,11 +405,10 @@ static struct msi_desc* alloc_msi_entry(void) { struct msi_desc *entry; - entry = kmem_cache_alloc(msi_cachep, SLAB_KERNEL); + entry = kmem_cache_zalloc(msi_cachep, GFP_KERNEL); if (!entry) return NULL; - memset(entry, 0, sizeof(struct msi_desc)); entry->link.tail = entry->link.head = 0; /* single message */ entry->dev = NULL; @@ -909,6 +902,33 @@ static int msix_capability_init(struct pci_dev *dev, return 0; } +/** + * pci_msi_supported - check whether MSI may be enabled on device + * @dev: pointer to the pci_dev data structure of MSI device function + * + * MSI must be globally enabled and supported by the device and its root + * bus. But, the root bus is not easy to find since some architectures + * have virtual busses on top of the PCI hierarchy (for instance the + * hypertransport bus), while the actual bus where MSI must be supported + * is below. So we test the MSI flag on all parent busses and assume + * that no quirk will ever set the NO_MSI flag on a non-root bus. + **/ +static +int pci_msi_supported(struct pci_dev * dev) +{ + struct pci_bus *bus; + + if (!pci_msi_enable || !dev || dev->no_msi) + return -EINVAL; + + /* check MSI flags of all parent busses */ + for (bus = dev->bus; bus; bus = bus->parent) + if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI) + return -EINVAL; + + return 0; +} + /** * pci_enable_msi - configure device's MSI capability structure * @dev: pointer to the pci_dev data structure of MSI device function @@ -921,19 +941,10 @@ static int msix_capability_init(struct pci_dev *dev, **/ int pci_enable_msi(struct pci_dev* dev) { - struct pci_bus *bus; - int pos, temp, status = -EINVAL; - u16 control; - - if (!pci_msi_enable || !dev) - return status; - - if (dev->no_msi) - return status; + int pos, temp, status; - for (bus = dev->bus; bus; bus = bus->parent) - if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI) - return -EINVAL; + if (pci_msi_supported(dev) < 0) + return -EINVAL; temp = dev->irq; @@ -945,27 +956,8 @@ int pci_enable_msi(struct pci_dev* dev) if (!pos) return -EINVAL; - if (!msi_lookup_vector(dev, PCI_CAP_ID_MSI)) { - /* Lookup Sucess */ - unsigned long flags; + WARN_ON(!msi_lookup_vector(dev, PCI_CAP_ID_MSI)); - pci_read_config_word(dev, msi_control_reg(pos), &control); - if (control & PCI_MSI_FLAGS_ENABLE) - return 0; /* Already in MSI mode */ - spin_lock_irqsave(&msi_lock, flags); - if (!vector_irq[dev->irq]) { - msi_desc[dev->irq]->msi_attrib.state = 0; - vector_irq[dev->irq] = -1; - nr_released_vectors--; - spin_unlock_irqrestore(&msi_lock, flags); - status = msi_register_init(dev, msi_desc[dev->irq]); - if (status == 0) - enable_msi_mode(dev, pos, PCI_CAP_ID_MSI); - return status; - } - spin_unlock_irqrestore(&msi_lock, flags); - dev->irq = temp; - } /* Check whether driver already requested for MSI-X vectors */ pos = pci_find_capability(dev, PCI_CAP_ID_MSIX); if (pos > 0 && !msi_lookup_vector(dev, PCI_CAP_ID_MSIX)) { @@ -1007,6 +999,8 @@ void pci_disable_msi(struct pci_dev* dev) if (!(control & PCI_MSI_FLAGS_ENABLE)) return; + disable_msi_mode(dev, pos, PCI_CAP_ID_MSI); + spin_lock_irqsave(&msi_lock, flags); entry = msi_desc[dev->irq]; if (!entry || !entry->dev || entry->msi_attrib.type != PCI_CAP_ID_MSI) { @@ -1020,14 +1014,12 @@ void pci_disable_msi(struct pci_dev* dev) pci_name(dev), dev->irq); BUG_ON(entry->msi_attrib.state > 0); } else { - vector_irq[dev->irq] = 0; /* free it */ - nr_released_vectors++; default_vector = entry->msi_attrib.default_vector; spin_unlock_irqrestore(&msi_lock, flags); + msi_free_vector(dev, dev->irq, 0); + /* Restore dev->irq to its default pin-assertion vector */ dev->irq = default_vector; - disable_msi_mode(dev, pci_find_capability(dev, PCI_CAP_ID_MSI), - PCI_CAP_ID_MSI); } } @@ -1075,57 +1067,6 @@ static int msi_free_vector(struct pci_dev* dev, int vector, int reassign) return 0; } -static int reroute_msix_table(int head, struct msix_entry *entries, int *nvec) -{ - int vector = head, tail = 0; - int i, j = 0, nr_entries = 0; - void __iomem *base; - unsigned long flags; - - spin_lock_irqsave(&msi_lock, flags); - while (head != tail) { - nr_entries++; - tail = msi_desc[vector]->link.tail; - if (entries[0].entry == msi_desc[vector]->msi_attrib.entry_nr) - j = vector; - vector = tail; - } - if (*nvec > nr_entries) { - spin_unlock_irqrestore(&msi_lock, flags); - *nvec = nr_entries; - return -EINVAL; - } - vector = ((j > 0) ? j : head); - for (i = 0; i < *nvec; i++) { - j = msi_desc[vector]->msi_attrib.entry_nr; - msi_desc[vector]->msi_attrib.state = 0; /* Mark it not active */ - vector_irq[vector] = -1; /* Mark it busy */ - nr_released_vectors--; - entries[i].vector = vector; - if (j != (entries + i)->entry) { - base = msi_desc[vector]->mask_base; - msi_desc[vector]->msi_attrib.entry_nr = - (entries + i)->entry; - writel( readl(base + j * PCI_MSIX_ENTRY_SIZE + - PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET), base + - (entries + i)->entry * PCI_MSIX_ENTRY_SIZE + - PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET); - writel( readl(base + j * PCI_MSIX_ENTRY_SIZE + - PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET), base + - (entries + i)->entry * PCI_MSIX_ENTRY_SIZE + - PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET); - writel( (readl(base + j * PCI_MSIX_ENTRY_SIZE + - PCI_MSIX_ENTRY_DATA_OFFSET) & 0xff00) | vector, - base + (entries+i)->entry*PCI_MSIX_ENTRY_SIZE + - PCI_MSIX_ENTRY_DATA_OFFSET); - } - vector = msi_desc[vector]->link.tail; - } - spin_unlock_irqrestore(&msi_lock, flags); - - return 0; -} - /** * pci_enable_msix - configure device's MSI-X capability structure * @dev: pointer to the pci_dev data structure of MSI-X device function @@ -1143,22 +1084,14 @@ static int reroute_msix_table(int head, struct msix_entry *entries, int *nvec) **/ int pci_enable_msix(struct pci_dev* dev, struct msix_entry *entries, int nvec) { - struct pci_bus *bus; int status, pos, nr_entries, free_vectors; int i, j, temp; u16 control; unsigned long flags; - if (!pci_msi_enable || !dev || !entries) + if (!entries || pci_msi_supported(dev) < 0) return -EINVAL; - if (dev->no_msi) - return -EINVAL; - - for (bus = dev->bus; bus; bus = bus->parent) - if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI) - return -EINVAL; - status = msi_init(); if (status < 0) return status; @@ -1168,9 +1101,6 @@ int pci_enable_msix(struct pci_dev* dev, struct msix_entry *entries, int nvec) return -EINVAL; pci_read_config_word(dev, msi_control_reg(pos), &control); - if (control & PCI_MSIX_FLAGS_ENABLE) - return -EINVAL; /* Already in MSI-X mode */ - nr_entries = multi_msix_capable(control); if (nvec > nr_entries) return -EINVAL; @@ -1185,19 +1115,8 @@ int pci_enable_msix(struct pci_dev* dev, struct msix_entry *entries, int nvec) } } temp = dev->irq; - if (!msi_lookup_vector(dev, PCI_CAP_ID_MSIX)) { - /* Lookup Sucess */ - nr_entries = nvec; - /* Reroute MSI-X table */ - if (reroute_msix_table(dev->irq, entries, &nr_entries)) { - /* #requested > #previous-assigned */ - dev->irq = temp; - return nr_entries; - } - dev->irq = temp; - enable_msi_mode(dev, pos, PCI_CAP_ID_MSIX); - return 0; - } + WARN_ON(!msi_lookup_vector(dev, PCI_CAP_ID_MSIX)); + /* Check whether driver already requested for MSI vector */ if (pci_find_capability(dev, PCI_CAP_ID_MSI) > 0 && !msi_lookup_vector(dev, PCI_CAP_ID_MSI)) { @@ -1256,37 +1175,32 @@ void pci_disable_msix(struct pci_dev* dev) if (!(control & PCI_MSIX_FLAGS_ENABLE)) return; + disable_msi_mode(dev, pos, PCI_CAP_ID_MSIX); + temp = dev->irq; if (!msi_lookup_vector(dev, PCI_CAP_ID_MSIX)) { int state, vector, head, tail = 0, warning = 0; unsigned long flags; vector = head = dev->irq; - spin_lock_irqsave(&msi_lock, flags); + dev->irq = temp; /* Restore pin IRQ */ while (head != tail) { + spin_lock_irqsave(&msi_lock, flags); state = msi_desc[vector]->msi_attrib.state; + tail = msi_desc[vector]->link.tail; + spin_unlock_irqrestore(&msi_lock, flags); if (state) warning = 1; - else { - vector_irq[vector] = 0; /* free it */ - nr_released_vectors++; - } - tail = msi_desc[vector]->link.tail; + else if (vector != head) /* Release MSI-X vector */ + msi_free_vector(dev, vector, 0); vector = tail; } - spin_unlock_irqrestore(&msi_lock, flags); + msi_free_vector(dev, vector, 0); if (warning) { - dev->irq = temp; printk(KERN_WARNING "PCI: %s: pci_disable_msix() called without " "free_irq() on all MSI-X vectors\n", pci_name(dev)); BUG_ON(warning > 0); - } else { - dev->irq = temp; - disable_msi_mode(dev, - pci_find_capability(dev, PCI_CAP_ID_MSIX), - PCI_CAP_ID_MSIX); - } } } diff --git a/drivers/pci/msi.h b/drivers/pci/msi.h index 56951c39..9b31d4cb 100644 --- a/drivers/pci/msi.h +++ b/drivers/pci/msi.h @@ -110,8 +110,8 @@ extern int pci_vector_resources(int last, int nr_released); (1 << ((control & PCI_MSI_FLAGS_QMASK) >> 1)) #define multi_msi_enable(control, num) \ control |= (((num >> 1) << 4) & PCI_MSI_FLAGS_QSIZE); -#define is_64bit_address(control) (control & PCI_MSI_FLAGS_64BIT) -#define is_mask_bit_support(control) (control & PCI_MSI_FLAGS_MASKBIT) +#define is_64bit_address(control) (!!(control & PCI_MSI_FLAGS_64BIT)) +#define is_mask_bit_support(control) (!!(control & PCI_MSI_FLAGS_MASKBIT)) #define msi_enable(control, num) multi_msi_enable(control, num); \ control |= PCI_MSI_FLAGS_ENABLE diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c index fc06f60f..3cc6146e 100644 --- a/drivers/scsi/aacraid/linit.c +++ b/drivers/scsi/aacraid/linit.c @@ -175,8 +175,8 @@ static struct aac_driver_ident aac_drivers[] = { { aac_rx_init, "percraid", "DELL ", "PERCRAID ", 2, AAC_QUIRK_31BIT | AAC_QUIRK_34SG | AAC_QUIRK_SCSI_32 }, /* PERC 3/Di (Boxster/PERC3DiB) */ { aac_rx_init, "aacraid", "ADAPTEC ", "catapult ", 2, AAC_QUIRK_31BIT | AAC_QUIRK_34SG | AAC_QUIRK_SCSI_32 }, /* catapult */ { aac_rx_init, "aacraid", "ADAPTEC ", "tomcat ", 2, AAC_QUIRK_31BIT | AAC_QUIRK_34SG | AAC_QUIRK_SCSI_32 }, /* tomcat */ - { aac_rx_init, "aacraid", "ADAPTEC ", "Adaptec 2120S ", 1, AAC_QUIRK_31BIT | AAC_QUIRK_34SG | AAC_QUIRK_SCSI_32 }, /* Adaptec 2120S (Crusader) */ - { aac_rx_init, "aacraid", "ADAPTEC ", "Adaptec 2200S ", 2, AAC_QUIRK_31BIT | AAC_QUIRK_34SG | AAC_QUIRK_SCSI_32 }, /* Adaptec 2200S (Vulcan) */ + { aac_rx_init, "aacraid", "ADAPTEC ", "Adaptec 2120S ", 1, AAC_QUIRK_31BIT | AAC_QUIRK_34SG }, /* Adaptec 2120S (Crusader) */ + { aac_rx_init, "aacraid", "ADAPTEC ", "Adaptec 2200S ", 2, AAC_QUIRK_31BIT | AAC_QUIRK_34SG }, /* Adaptec 2200S (Vulcan) */ { aac_rx_init, "aacraid", "ADAPTEC ", "Adaptec 2200S ", 2, AAC_QUIRK_31BIT | AAC_QUIRK_34SG | AAC_QUIRK_SCSI_32 }, /* Adaptec 2200S (Vulcan-2m) */ { aac_rx_init, "aacraid", "Legend ", "Legend S220 ", 1, AAC_QUIRK_31BIT | AAC_QUIRK_34SG | AAC_QUIRK_SCSI_32 }, /* Legend S220 (Legend Crusader) */ { aac_rx_init, "aacraid", "Legend ", "Legend S230 ", 2, AAC_QUIRK_31BIT | AAC_QUIRK_34SG | AAC_QUIRK_SCSI_32 }, /* Legend S230 (Legend Vulcan) */ diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index 1d408d15..e3577dbc 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -1005,8 +1005,9 @@ qla2x00_status_entry(scsi_qla_host_t *ha, void *pkt) resid = resid_len; /* Use F/W calculated residual length. */ if (IS_FWI2_CAPABLE(ha)) { - if (scsi_status & SS_RESIDUAL_UNDER && - resid != fw_resid_len) { + if (!(scsi_status & SS_RESIDUAL_UNDER)) { + lscsi_status = 0; + } else if (resid != fw_resid_len) { scsi_status &= ~SS_RESIDUAL_UNDER; lscsi_status = 0; } diff --git a/drivers/xen/evtchn/evtchn.c b/drivers/xen/evtchn/evtchn.c index f814a8f8..3ebdabb6 100644 --- a/drivers/xen/evtchn/evtchn.c +++ b/drivers/xen/evtchn/evtchn.c @@ -437,6 +437,8 @@ static int evtchn_open(struct inode *inode, struct file *filp) mutex_init(&u->ring_cons_mutex); + mutex_init(&u->ring_cons_mutex); + filp->private_data = u; u->bind_cpu = -1; diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h index 10a1d58f..90d1d542 100644 --- a/drivers/xen/netback/common.h +++ b/drivers/xen/netback/common.h @@ -97,6 +97,7 @@ typedef struct netif_st { /* Miscellaneous private stuff. */ struct list_head list; /* scheduling list */ atomic_t refcnt; + struct xenbus_device *xendev; struct net_device *dev; struct net_device_stats stats; @@ -191,6 +192,7 @@ int netif_map(netif_t *netif, unsigned long tx_ring_ref, } while (0) void netif_xenbus_init(void); +void netif_interfaces_init(void); #define netif_schedulable(netif) \ (netif_running((netif)->dev) && netback_carrier_ok(netif)) diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c index 610891fc..61f94cce 100644 --- a/drivers/xen/netback/interface.c +++ b/drivers/xen/netback/interface.c @@ -33,6 +33,7 @@ #include "common.h" #include #include +#include /* * Module parameter 'queue_length': @@ -334,3 +335,31 @@ void netif_disconnect(netif_t *netif) free_netdev(netif->dev); } + + +static int +netdev_notify(struct notifier_block *this, unsigned long event, void *ptr) +{ + struct net_device *dev = ptr; + + /* Carrier up event and is it one of our devices? */ + if (event == NETDEV_CHANGE && netif_carrier_ok(dev) && + dev->open == net_open) { + netif_t *netif = netdev_priv(dev); + + xenbus_switch_state(netif->xendev, XenbusStateConnected); + } + + return NOTIFY_DONE; +} + + +static struct notifier_block notifier_netdev = { + .notifier_call = netdev_notify, +}; + + +void netif_interfaces_init(void) +{ + (void)register_netdevice_notifier(¬ifier_netdev); +} diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c index 145f67db..70aecf3f 100644 --- a/drivers/xen/netback/netback.c +++ b/drivers/xen/netback/netback.c @@ -1613,6 +1613,7 @@ static int __init netback_init(void) netif_accel_init(); + netif_interfaces_init(); netif_xenbus_init(); #ifdef NETBE_DEBUG_INTERRUPT diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c index d7faeb62..491596f6 100644 --- a/drivers/xen/netback/xenbus.c +++ b/drivers/xen/netback/xenbus.c @@ -203,6 +203,7 @@ static void backend_create_netif(struct backend_info *be) return; } + be->netif->xendev = dev; kobject_uevent(&dev->dev.kobj, KOBJ_ONLINE); } @@ -328,10 +329,6 @@ static void connect(struct backend_info *be) int err; struct xenbus_device *dev = be->dev; - err = connect_rings(be); - if (err) - return; - err = xen_net_read_mac(dev, be->netif->fe_dev_addr); if (err) { xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename); @@ -342,7 +339,9 @@ static void connect(struct backend_info *be) &be->netif->credit_usec); be->netif->remaining_credit = be->netif->credit_bytes; - xenbus_switch_state(dev, XenbusStateConnected); + err = connect_rings(be); + if (err) + return; netif_wake_queue(be->netif->dev); } diff --git a/fs/binfmt_em86.c b/fs/binfmt_em86.c index 1f2d1ad6..7f302b80 100644 --- a/fs/binfmt_em86.c +++ b/fs/binfmt_em86.c @@ -44,7 +44,7 @@ static int load_em86(struct linux_binprm *bprm,struct pt_regs *regs) return -ENOEXEC; } - bprm->sh_bang++; /* Well, the bang-shell is implicit... */ + bprm->sh_bang = 1; /* Well, the bang-shell is implicit... */ allow_write_access(bprm->file); fput(bprm->file); bprm->file = NULL; diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 1713c48f..a5d77c66 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -42,6 +42,9 @@ enum {Enabled, Magic}; #define MISC_FMT_OPEN_BINARY (1<<30) #define MISC_FMT_CREDENTIALS (1<<29) +/* Marker for breaking misc - > script -> misc loop */ +#define MISC_BANG (1<<1) + typedef struct { struct list_head list; unsigned long flags; /* type, status, etc. */ @@ -116,6 +119,10 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs) if (!enabled) goto _ret; + retval = -ENOEXEC; + if (bprm->sh_bang & MISC_BANG) + goto _ret; + /* to keep locking time low, we copy the interpreter string */ read_lock(&entries_lock); fmt = check_file(bprm); @@ -199,6 +206,8 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs) if (retval < 0) goto _error; + bprm->sh_bang |= MISC_BANG; + retval = search_binary_handler (bprm, regs); if (retval < 0) goto _error; diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c index 1edbcca2..de5377c9 100644 --- a/fs/binfmt_script.c +++ b/fs/binfmt_script.c @@ -30,7 +30,7 @@ static int load_script(struct linux_binprm *bprm,struct pt_regs *regs) * Sorta complicated, but hopefully it will work. -TYT */ - bprm->sh_bang++; + bprm->sh_bang = 1; allow_write_access(bprm->file); fput(bprm->file); bprm->file = NULL; diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 0abde420..40e30ad6 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -939,6 +939,10 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data, return -EBADF; open_file = (struct cifsFileInfo *) file->private_data; + rc = generic_write_checks(file, poffset, &write_size, 0); + if (rc) + return rc; + xid = GetXid(); #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) diff --git a/fs/dlm/user.c b/fs/dlm/user.c index c19eac72..1862b9d8 100644 --- a/fs/dlm/user.c +++ b/fs/dlm/user.c @@ -84,7 +84,7 @@ struct dlm_lock_result32 { static void compat_input(struct dlm_write_request *kb, struct dlm_write_request32 *kb32, - int max_namelen) + size_t count) { kb->version[0] = kb32->version[0]; kb->version[1] = kb32->version[1]; @@ -96,7 +96,8 @@ static void compat_input(struct dlm_write_request *kb, kb->cmd == DLM_USER_REMOVE_LOCKSPACE) { kb->i.lspace.flags = kb32->i.lspace.flags; kb->i.lspace.minor = kb32->i.lspace.minor; - strcpy(kb->i.lspace.name, kb32->i.lspace.name); + memcpy(kb->i.lspace.name, kb32->i.lspace.name, count - + offsetof(struct dlm_write_request32, i.lspace.name)); } else if (kb->cmd == DLM_USER_PURGE) { kb->i.purge.nodeid = kb32->i.purge.nodeid; kb->i.purge.pid = kb32->i.purge.pid; @@ -114,10 +115,8 @@ static void compat_input(struct dlm_write_request *kb, kb->i.lock.bastaddr = (void *)(long)kb32->i.lock.bastaddr; kb->i.lock.lksb = (void *)(long)kb32->i.lock.lksb; memcpy(kb->i.lock.lvb, kb32->i.lock.lvb, DLM_USER_LVB_LEN); - if (kb->i.lock.namelen <= max_namelen) - memcpy(kb->i.lock.name, kb32->i.lock.name, kb->i.lock.namelen); - else - kb->i.lock.namelen = max_namelen; + memcpy(kb->i.lock.name, kb32->i.lock.name, count - + offsetof(struct dlm_write_request32, i.lock.name)); } } @@ -509,7 +508,7 @@ static ssize_t device_write(struct file *file, const char __user *buf, #endif return -EINVAL; - kbuf = kmalloc(count, GFP_KERNEL); + kbuf = kzalloc(count + 1, GFP_KERNEL); if (!kbuf) return -ENOMEM; @@ -527,14 +526,14 @@ static ssize_t device_write(struct file *file, const char __user *buf, if (!kbuf->is64bit) { struct dlm_write_request32 *k32buf; k32buf = (struct dlm_write_request32 *)kbuf; - kbuf = kmalloc(count + (sizeof(struct dlm_write_request) - + kbuf = kmalloc(count + 1 + (sizeof(struct dlm_write_request) - sizeof(struct dlm_write_request32)), GFP_KERNEL); if (!kbuf) return -ENOMEM; if (proc) set_bit(DLM_PROC_FLAGS_COMPAT, &proc->flags); - compat_input(kbuf, k32buf, count - sizeof(struct dlm_write_request32)); + compat_input(kbuf, k32buf, count + 1); kfree(k32buf); } #endif diff --git a/fs/open.c b/fs/open.c index 39ee034a..9e9cb22b 100644 --- a/fs/open.c +++ b/fs/open.c @@ -214,6 +214,9 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs, newattrs.ia_valid |= ATTR_FILE; } + /* Remove suid/sgid on truncate too */ + newattrs.ia_valid |= should_remove_suid(dentry); + mutex_lock(&dentry->d_inode->i_mutex); err = notify_change(dentry, &newattrs); mutex_unlock(&dentry->d_inode->i_mutex); diff --git a/fs/splice.c b/fs/splice.c index d1084518..32e0cc1c 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -615,7 +615,7 @@ find_page: ret = add_to_page_cache_lru(page, mapping, index, gfp_mask); if (unlikely(ret)) - goto out; + goto out_release; } /* @@ -696,8 +696,9 @@ find_page: goto find_page; } out: - page_cache_release(page); unlock_page(page); +out_release: + page_cache_release(page); out_ret: return ret; } @@ -826,12 +827,21 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, loff_t *ppos, size_t len, unsigned int flags) { struct address_space *mapping = out->f_mapping; + struct inode *inode = mapping->host; ssize_t ret; + int err; + + err = should_remove_suid(out->f_dentry); + if (unlikely(err)) { + mutex_lock(&inode->i_mutex); + err = __remove_suid(out->f_dentry, err); + mutex_unlock(&inode->i_mutex); + if (err) + return err; + } ret = splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_file); if (ret > 0) { - struct inode *inode = mapping->host; - *ppos += ret; /* @@ -839,8 +849,6 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, * sync it. */ if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) { - int err; - mutex_lock(&inode->i_mutex); err = generic_osync_inode(inode, mapping, OSYNC_METADATA|OSYNC_DATA); diff --git a/include/asm-x86_64/mach-xen/asm/system.h b/include/asm-x86_64/mach-xen/asm/system.h index 6cd9f2d7..6b0cfd46 100644 --- a/include/asm-x86_64/mach-xen/asm/system.h +++ b/include/asm-x86_64/mach-xen/asm/system.h @@ -24,6 +24,7 @@ #define __EXTRA_CLOBBER \ ,"rcx","rbx","rdx","r8","r9","r10","r11","r12","r13","r14","r15" +/* Save restore flags to clear handle leaking NT */ #define switch_to(prev,next,last) \ asm volatile(SAVE_CONTEXT \ "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ diff --git a/include/asm-x86_64/proto.h b/include/asm-x86_64/proto.h index 4aa1c87f..c448d240 100644 --- a/include/asm-x86_64/proto.h +++ b/include/asm-x86_64/proto.h @@ -39,7 +39,7 @@ extern void ia32_syscall(void); extern int pmtimer_mark_offset(void); extern void pmtimer_resume(void); extern void pmtimer_wait(unsigned); -extern unsigned int do_gettimeoffset_pm(void); +extern long do_gettimeoffset_pm(void); #ifdef CONFIG_X86_PM_TIMER extern u32 pmtmr_ioport; #else @@ -85,7 +85,6 @@ extern void swap_low_mappings(void); extern void __show_regs(struct pt_regs * regs); extern void show_regs(struct pt_regs * regs); -extern char *syscall32_page; extern void syscall32_cpu_init(void); extern void setup_node_bootmem(int nodeid, unsigned long start, unsigned long end); diff --git a/include/asm-x86_64/vsyscall32.h b/include/asm-x86_64/vsyscall32.h index c631c082..246b461f 100644 --- a/include/asm-x86_64/vsyscall32.h +++ b/include/asm-x86_64/vsyscall32.h @@ -8,7 +8,6 @@ #define VSYSCALL32_SYSEXIT (VSYSCALL32_BASE + 0x410) #else #define VSYSCALL32_BASE 0xffffe000UL -#define VSYSCALL32_END (VSYSCALL32_BASE + PAGE_SIZE) #define VSYSCALL32_EHDR ((const struct elf32_hdr *) VSYSCALL32_BASE) #define VSYSCALL32_VSYSCALL ((void *)VSYSCALL32_BASE + 0x400) diff --git a/include/linux/fs.h b/include/linux/fs.h index 6644592d..942464af 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1667,6 +1667,8 @@ extern void __iget(struct inode * inode); extern void clear_inode(struct inode *); extern void destroy_inode(struct inode *); extern struct inode *new_inode(struct super_block *); +extern int __remove_suid(struct dentry *, int); +extern int should_remove_suid(struct dentry *); extern int remove_suid(struct dentry *); extern void remove_dquot_ref(struct super_block *, int, struct list_head *); diff --git a/include/linux/mm.h b/include/linux/mm.h index 5cab5666..61c5d6db 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1034,8 +1034,7 @@ static inline unsigned long get_unmapped_area(struct file * file, unsigned long extern int install_special_mapping(struct mm_struct *mm, unsigned long addr, unsigned long len, - unsigned long vm_flags, pgprot_t pgprot, - struct page **pages); + unsigned long vm_flags, struct page **pages); extern unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, diff --git a/mm/filemap.c b/mm/filemap.c index 6605ba75..7bb547e5 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1929,11 +1929,10 @@ repeat: * if suid or (sgid and xgrp) * remove privs */ -int remove_suid(struct dentry *dentry) +int should_remove_suid(struct dentry *dentry) { mode_t mode = dentry->d_inode->i_mode; int kill = 0; - int result = 0; /* suid always must be killed */ if (unlikely(mode & S_ISUID)) @@ -1946,13 +1945,28 @@ int remove_suid(struct dentry *dentry) if (unlikely((mode & S_ISGID) && (mode & S_IXGRP))) kill |= ATTR_KILL_SGID; - if (unlikely(kill && !capable(CAP_FSETID))) { - struct iattr newattrs; + if (unlikely(kill && !capable(CAP_FSETID))) + return kill; - newattrs.ia_valid = ATTR_FORCE | kill; - result = notify_change(dentry, &newattrs); - } - return result; + return 0; +} + +int __remove_suid(struct dentry *dentry, int kill) +{ + struct iattr newattrs; + + newattrs.ia_valid = ATTR_FORCE | kill; + return notify_change(dentry, &newattrs); +} + +int remove_suid(struct dentry *dentry) +{ + int kill = should_remove_suid(dentry); + + if (unlikely(kill)) + return __remove_suid(dentry, kill); + + return 0; } EXPORT_SYMBOL(remove_suid); diff --git a/mm/memory.c b/mm/memory.c index ca888fec..70a7d6cb 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -955,17 +955,15 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address, } ptep = pte_offset_map_lock(mm, pmd, address, &ptl); - if (!ptep) - goto out; pte = *ptep; if (!pte_present(pte)) - goto unlock; + goto no_page; if ((flags & FOLL_WRITE) && !pte_write(pte)) goto unlock; page = vm_normal_page(vma, address, pte); if (unlikely(!page)) - goto unlock; + goto bad_page; if (flags & FOLL_GET) get_page(page); @@ -980,6 +978,15 @@ unlock: out: return page; +bad_page: + pte_unmap_unlock(ptep, ptl); + return ERR_PTR(-EFAULT); + +no_page: + pte_unmap_unlock(ptep, ptl); + if (!pte_none(pte)) + return page; + /* Fall through to ZERO_PAGE handling */ no_page_table: /* * When core dumping an enormous anonymous area that nobody @@ -994,6 +1001,26 @@ no_page_table: return page; } +/* Can we do the FOLL_ANON optimization? */ +static inline int use_zero_page(struct vm_area_struct *vma) +{ + /* + * We don't want to optimize FOLL_ANON for make_pages_present() + * when it tries to page in a VM_LOCKED region. As to VM_SHARED, + * we want to get the page from the page tables to make sure + * that we serialize and update with any other user of that + * mapping. + */ + if (vma->vm_flags & (VM_LOCKED | VM_SHARED)) + return 0; + /* + * And if we have a fault or a nopfn routine, it's not an + * anonymous region. + */ + return !vma->vm_ops || + (!vma->vm_ops->nopage && !vma->vm_ops->nopfn); +} + int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, int len, int write, int force, struct page **pages, struct vm_area_struct **vmas) @@ -1086,8 +1113,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, foll_flags = FOLL_TOUCH; if (pages) foll_flags |= FOLL_GET; - if (!write && !(vma->vm_flags & VM_LOCKED) && - (!vma->vm_ops || !vma->vm_ops->nopage)) + if (!write && use_zero_page(vma)) foll_flags |= FOLL_ANON; do { @@ -1125,6 +1151,8 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, BUG(); } } + if (IS_ERR(page)) + return i ? i : PTR_ERR(page); if (pages) { pages[i] = page; diff --git a/mm/migrate.c b/mm/migrate.c index 289b068a..2f454fdb 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -778,6 +778,11 @@ static int do_move_pages(struct mm_struct *mm, struct page_to_node *pm, goto set_status; page = follow_page(vma, pp->addr, FOLL_GET); + + err = PTR_ERR(page); + if (IS_ERR(page)) + goto set_status; + err = -ENOENT; if (!page) goto set_status; @@ -841,6 +846,11 @@ static int do_pages_stat(struct mm_struct *mm, struct page_to_node *pm) goto set_status; page = follow_page(vma, pm->addr, 0); + + err = PTR_ERR(page); + if (IS_ERR(page)) + goto set_status; + err = -ENOENT; /* Use PageReserved to check for zero page */ if (!page || PageReserved(page)) diff --git a/mm/mmap.c b/mm/mmap.c index d89ab1f3..dabff0b8 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2223,11 +2223,16 @@ special_mapping_nopage(struct vm_area_struct *vma, return NOPAGE_SIGBUS; } +static void special_mapping_close(struct vm_area_struct *vma) +{ +} + static struct vm_operations_struct special_mapping_vmops = { + .close = special_mapping_close, .nopage = special_mapping_nopage, }; -unsigned int vdso_populate = 1; +unsigned int vdso_populate = 0; /* * Insert a new vma covering the given region, with the given flags and @@ -2238,8 +2243,7 @@ unsigned int vdso_populate = 1; */ int install_special_mapping(struct mm_struct *mm, unsigned long addr, unsigned long len, - unsigned long vm_flags, pgprot_t pgprot, - struct page **pages) + unsigned long vm_flags, struct page **pages) { struct vm_area_struct *vma; int err; @@ -2253,13 +2257,16 @@ int install_special_mapping(struct mm_struct *mm, vma->vm_start = addr; vma->vm_end = addr + len; - vma->vm_flags = vm_flags | VM_DONTEXPAND; - vma->vm_page_prot = pgprot; + vma->vm_flags = vm_flags | mm->def_flags | VM_DONTEXPAND; + vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); vma->vm_ops = &special_mapping_vmops; vma->vm_private_data = pages; - insert_vm_struct(mm, vma); + if (unlikely(insert_vm_struct(mm, vma))) { + kmem_cache_free(vm_area_cachep, vma); + return -ENOMEM; + } mm->total_vm += len >> PAGE_SHIFT; if (!vdso_populate) diff --git a/net/core/utils.c b/net/core/utils.c index e31c90e0..4ca43fb0 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -80,23 +80,12 @@ static u32 __net_random(struct nrnd_state *state) return (state->s1 ^ state->s2 ^ state->s3); } -static void __net_srandom(struct nrnd_state *state, unsigned long s) +/* + * Handle minimum values for seeds + */ +static inline u32 __seed(u32 x, u32 m) { - if (s == 0) - s = 1; /* default seed is 1 */ - -#define LCG(n) (69069 * n) - state->s1 = LCG(s); - state->s2 = LCG(state->s1); - state->s3 = LCG(state->s2); - - /* "warm it up" */ - __net_random(state); - __net_random(state); - __net_random(state); - __net_random(state); - __net_random(state); - __net_random(state); + return (x < m) ? x + m : x; } @@ -112,9 +101,15 @@ unsigned long net_random(void) void net_srandom(unsigned long entropy) { - struct nrnd_state *state = &get_cpu_var(net_rand_state); - __net_srandom(state, state->s1^entropy); - put_cpu_var(state); + int i; + /* + * No locking on the CPUs, but then somewhat random results are, well, + * expected. + */ + for_each_possible_cpu (i) { + struct nrnd_state *state = &per_cpu(net_rand_state, i); + state->s1 = __seed(state->s1 ^ entropy, 1); + } } void __init net_random_init(void) @@ -123,20 +118,37 @@ void __init net_random_init(void) for_each_possible_cpu(i) { struct nrnd_state *state = &per_cpu(net_rand_state,i); - __net_srandom(state, i+jiffies); + +#define LCG(x) ((x) * 69069) /* super-duper LCG */ + state->s1 = __seed(LCG(i + jiffies), 1); + state->s2 = __seed(LCG(state->s1), 7); + state->s3 = __seed(LCG(state->s2), 15); + + /* "warm it up" */ + __net_random(state); + __net_random(state); + __net_random(state); + __net_random(state); + __net_random(state); + __net_random(state); } } static int net_random_reseed(void) { int i; - unsigned long seed; for_each_possible_cpu(i) { struct nrnd_state *state = &per_cpu(net_rand_state,i); + u32 seeds[3]; + + get_random_bytes(&seeds, sizeof(seeds)); + state->s1 = __seed(seeds[0], 1); + state->s2 = __seed(seeds[1], 7); + state->s3 = __seed(seeds[2], 15); - get_random_bytes(&seed, sizeof(seed)); - __net_srandom(state, seed); + /* mix it in */ + __net_random(state); } return 0; } diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 6f14bb5a..2a2f9e77 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -431,6 +431,11 @@ static int dccp_setsockopt_change(struct sock *sk, int type, if (copy_from_user(&opt, optval, sizeof(opt))) return -EFAULT; + /* + * rfc4340: 6.1. Change Options + */ + if (opt.dccpsf_len < 1) + return -EINVAL; val = kmalloc(opt.dccpsf_len, GFP_KERNEL); if (!val) -- 2.39.5