- Revert: [nfs] pages of a memory mapped file get corrupted (Peter Staubach ) [450335 435291]
* Sat Oct 18 2008 Jiri Pirko <jpirko@redhat.com> [2.6.18-92.1.16.el5]
- [i386] vDSO: use install_special_mapping (Peter Zijlstra ) [460275 460276] {CVE-2008-3527}
- [scsi] aacraid: remove some quirk AAC_QUIRK_SCSI_32 bits (Tomas Henzl ) [466885 453472]
- [fs] remove SUID when splicing into an inode (Eric Sandeen ) [464451 464452] {CVE-2008-3833}
- [fs] open() allows setgid bit when user is not in group (Eugene Teo ) [463867 463687] {CVE-2008-4210}
- [xen] ia64: fix INIT injection (Tetsu Yamamoto ) [467105 464445]
* Fri Oct 10 2008 Jiri Pirko <jpirko@redhat.com> [2.6.18-92.1.15.el5]
- [pci] fix problems with msi interrupt management (Neil Horman ) [461894 428696]
- [x86_64] revert time syscall changes (Prarit Bhargava ) [466427 461184]
- [xen] allow guests to hide the TSC from applications (Chris Lalancette ) [378471 378481] {CVE-2007-5907}
- [scsi] qla2xxx: additional residual-count correction (Marcus Barrow ) [465741 462117]
- [char] add range_is_allowed check to mmap_mem (Eugene Teo ) [460858 460857]
- [fs] binfmt_misc: avoid potential kernel stack overflow (Vitaly Mayatskikh ) [459464 459463]
- [misc] cpufreq: fix format string bug (Vitaly Mayatskikh ) [459461 459460]
- [dlm] user.c input validation fixes (David Teigland ) [458759 458760]
- [nfs] pages of a memory mapped file get corrupted (Peter Staubach ) [450335 435291]
- [x86_64] gettimeofday fixes for HPET, PMTimer, TSC (Prarit Bhargava ) [462860 250708]
* Wed Sep 24 2008 Jiri Pirko <jpirko@redhat.com> [2.6.18-92.1.14.el5]
- [libata] ata_scsi_rbuf_get check for scatterlist usage (David Milburn ) [460638 455445]
- [net] random32: seeding improvement (Jiri Pirko ) [458021 458019]
- [x86_64] xen: local DOS due to NT bit leakage (Eugene Teo ) [457721 457722] {CVE-2006-5755}
- [fs] cifs: fix O_APPEND on directio mounts (Jeff Layton ) [462591 460063]
- [openib] race between QP async handler and destroy_qp (Brad Peters ) [458781 446109]
- [net] dccp_setsockopt_change integer overflow (Vitaly Mayatskikh ) [459232 459235] {CVE-2008-3276}
- [acpi] error attaching device data (peterm@redhat.com ) [460868 459670]
- [mm] optimize ZERO_PAGE in 'get_user_pages' and fix XIP (Anton Arapov ) [452667 452668] {CVE-2008-2372}
- [xen] xennet: coordinate ARP with backend network status (Herbert Xu ) [461457 458934]
- [xen] event channel lock and barrier (Markus Armbruster ) [461099 457086]
- [fs] fix bad unlock_page in pip_to_file() error path (Larry Woodman ) [462436 439917]
VERSION = 2
PATCHLEVEL = 6
SUBLEVEL = 18
-EXTRAVERSION = -92.1.13.el5
+EXTRAVERSION = -92.1.17.el5
RHEL_MAJOR = 5
RHEL_MINOR = 2
NAME=Avast! A bilge rat!
*/
extern const char vsyscall_int80_start, vsyscall_int80_end;
extern const char vsyscall_sysenter_start, vsyscall_sysenter_end;
-static void *syscall_page;
+static struct page *syscall_pages[1];
int __cpuinit sysenter_setup(void)
{
- syscall_page = (void *)get_zeroed_page(GFP_ATOMIC);
+ void *syscall_page = (void *)get_zeroed_page(GFP_ATOMIC);
+ syscall_pages[0] = virt_to_page(syscall_page);
#ifdef CONFIG_COMPAT_VDSO
__set_fixmap(FIX_VDSO, __pa(syscall_page), PAGE_READONLY);
return 0;
}
-static struct page *syscall_nopage(struct vm_area_struct *vma,
- unsigned long adr, int *type)
-{
- struct page *p = virt_to_page(adr - vma->vm_start + syscall_page);
- get_page(p);
- return p;
-}
-
-/* Prevent VMA merging */
-static void syscall_vma_close(struct vm_area_struct *vma)
-{
-}
-
-static struct vm_operations_struct syscall_vm_ops = {
- .close = syscall_vma_close,
- .nopage = syscall_nopage,
-};
-
/* Defined in vsyscall-sysenter.S */
extern void SYSENTER_RETURN;
int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack,
unsigned long start_code, unsigned long interp_map_address)
{
- struct vm_area_struct *vma;
struct mm_struct *mm = current->mm;
unsigned long addr;
int ret;
goto up_fail;
}
- vma = kmem_cache_zalloc(vm_area_cachep, SLAB_KERNEL);
- if (!vma) {
- ret = -ENOMEM;
- goto up_fail;
- }
-
- vma->vm_start = addr;
- vma->vm_end = addr + PAGE_SIZE;
- /* MAYWRITE to allow gdb to COW and set breakpoints */
- vma->vm_flags = VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC|VM_MAYWRITE;
/*
+ * MAYWRITE to allow gdb to COW and set breakpoints
+ *
* Make sure the vDSO gets into every core dump.
* Dumping its contents makes post-mortem fully interpretable later
* without matching up the same kernel and hardware config to see
* what PC values meant.
*/
- vma->vm_flags |= VM_ALWAYSDUMP;
- vma->vm_flags |= mm->def_flags;
- vma->vm_page_prot = protection_map[vma->vm_flags & 7];
- vma->vm_ops = &syscall_vm_ops;
- vma->vm_mm = mm;
-
- ret = insert_vm_struct(mm, vma);
- if (unlikely(ret)) {
- kmem_cache_free(vm_area_cachep, vma);
+ ret = install_special_mapping(mm, addr, PAGE_SIZE,
+ VM_READ|VM_EXEC|
+ VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
+ VM_ALWAYSDUMP,
+ syscall_pages);
+ if (ret)
goto up_fail;
- }
current->mm->context.vdso = (void *)addr;
current_thread_info()->sysenter_return =
(void *)VDSO_SYM(&SYSENTER_RETURN);
- mm->total_vm++;
up_fail:
up_write(&mm->mmap_sem);
return ret;
printk("kpg: %p (c:%d,f:%08lx)", __va(page_to_pfn(pg) << PAGE_SHIFT),
page_count(pg),
pg->flags);
- if (upg/* && pg != upg*/) {
+ if (upg && !IS_ERR(upg) /* && pg != upg*/) {
printk(" upg: %p (c:%d,f:%08lx)", __va(page_to_pfn(upg)
<< PAGE_SHIFT),
page_count(upg),
extern unsigned char syscall32_sysenter[], syscall32_sysenter_end[];
extern int sysctl_vsyscall32;
-char *syscall32_page;
+static struct page *syscall32_pages[1];
static int use_sysenter = -1;
#if CONFIG_XEN_COMPAT < 0x030200
static int use_int80 = 1;
#endif
-static struct page *
-syscall32_nopage(struct vm_area_struct *vma, unsigned long adr, int *type)
-{
- struct page *p = virt_to_page(adr - vma->vm_start + syscall32_page);
- get_page(p);
- return p;
-}
-
-/* Prevent VMA merging */
-static void syscall32_vma_close(struct vm_area_struct *vma)
-{
-}
-
-static struct vm_operations_struct syscall32_vm_ops = {
- .close = syscall32_vma_close,
- .nopage = syscall32_nopage,
-};
-
struct linux_binprm;
/* Setup a VMA at program startup for the vsyscall page */
unsigned long start_code,
unsigned long interp_map_address)
{
- int npages = (VSYSCALL32_END - VSYSCALL32_BASE) >> PAGE_SHIFT;
- struct vm_area_struct *vma;
struct mm_struct *mm = current->mm;
int ret;
- vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
- if (!vma)
- return -ENOMEM;
-
- memset(vma, 0, sizeof(struct vm_area_struct));
- /* Could randomize here */
- vma->vm_start = VSYSCALL32_BASE;
- vma->vm_end = VSYSCALL32_END;
- /* MAYWRITE to allow gdb to COW and set breakpoints */
- vma->vm_flags = VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC|VM_MAYWRITE;
- vma->vm_flags |= mm->def_flags;
- vma->vm_page_prot = protection_map[vma->vm_flags & 7];
- vma->vm_ops = &syscall32_vm_ops;
- vma->vm_mm = mm;
-
down_write(&mm->mmap_sem);
- if ((ret = insert_vm_struct(mm, vma))) {
- up_write(&mm->mmap_sem);
- kmem_cache_free(vm_area_cachep, vma);
- return ret;
- }
- mm->total_vm += npages;
+ /*
+ * MAYWRITE to allow gdb to COW and set breakpoints
+ *
+ * Make sure the vDSO gets into every core dump.
+ * Dumping its contents makes post-mortem fully interpretable later
+ * without matching up the same kernel and hardware config to see
+ * what PC values meant.
+ */
+ ret = install_special_mapping(mm, VSYSCALL32_BASE, PAGE_SIZE,
+ VM_READ|VM_EXEC|
+ VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
+ VM_ALWAYSDUMP,
+ syscall32_pages);
up_write(&mm->mmap_sem);
- return 0;
+ return ret;
}
static int __init init_syscall32(void)
{
- syscall32_page = (void *)get_zeroed_page(GFP_KERNEL);
+ void *syscall32_page = (void *)get_zeroed_page(GFP_KERNEL);
+ syscall32_pages[0] = virt_to_page(syscall32_page);
if (!syscall32_page)
panic("Cannot allocate syscall32 page");
extern unsigned char syscall32_sysenter[], syscall32_sysenter_end[];
extern int sysctl_vsyscall32;
-char *syscall32_page;
+static struct page *syscall32_pages[1];
static int use_sysenter = -1;
-static struct page *
-syscall32_nopage(struct vm_area_struct *vma, unsigned long adr, int *type)
-{
- struct page *p = virt_to_page(adr - vma->vm_start + syscall32_page);
- get_page(p);
- return p;
-}
-
-/* Prevent VMA merging */
-static void syscall32_vma_close(struct vm_area_struct *vma)
-{
-}
-
-static struct vm_operations_struct syscall32_vm_ops = {
- .close = syscall32_vma_close,
- .nopage = syscall32_nopage,
-};
-
struct linux_binprm;
/* Setup a VMA at program startup for the vsyscall page */
unsigned long start_code,
unsigned long interp_map_address)
{
- int npages = (VSYSCALL32_END - VSYSCALL32_BASE) >> PAGE_SHIFT;
- struct vm_area_struct *vma;
struct mm_struct *mm = current->mm;
int ret;
- vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
- if (!vma)
- return -ENOMEM;
-
- memset(vma, 0, sizeof(struct vm_area_struct));
- /* Could randomize here */
- vma->vm_start = VSYSCALL32_BASE;
- vma->vm_end = VSYSCALL32_END;
- /* MAYWRITE to allow gdb to COW and set breakpoints */
- vma->vm_flags = VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC|VM_MAYWRITE;
+ down_write(&mm->mmap_sem);
/*
+ * MAYWRITE to allow gdb to COW and set breakpoints
+ *
* Make sure the vDSO gets into every core dump.
* Dumping its contents makes post-mortem fully interpretable later
* without matching up the same kernel and hardware config to see
* what PC values meant.
*/
- vma->vm_flags |= VM_ALWAYSDUMP;
- vma->vm_flags |= mm->def_flags;
- vma->vm_page_prot = protection_map[vma->vm_flags & 7];
- vma->vm_ops = &syscall32_vm_ops;
- vma->vm_mm = mm;
-
- down_write(&mm->mmap_sem);
- if ((ret = insert_vm_struct(mm, vma))) {
- up_write(&mm->mmap_sem);
- kmem_cache_free(vm_area_cachep, vma);
- return ret;
- }
- mm->total_vm += npages;
+ ret = install_special_mapping(mm, VSYSCALL32_BASE, PAGE_SIZE,
+ VM_READ|VM_EXEC|
+ VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
+ VM_ALWAYSDUMP,
+ syscall32_pages);
up_write(&mm->mmap_sem);
- return 0;
+ return ret;
}
static int __init init_syscall32(void)
{
- syscall32_page = (void *)get_zeroed_page(GFP_KERNEL);
+ void *syscall32_page = (void *)get_zeroed_page(GFP_KERNEL);
+ syscall32_pages[0] = virt_to_page(syscall32_page);
if (!syscall32_page)
panic("Cannot allocate syscall32 page");
if (use_sysenter > 0) {
last_pmtmr_tick = inl(pmtmr_ioport);
}
-unsigned int do_gettimeoffset_pm(void)
+long do_gettimeoffset_pm(void)
{
u32 now, offset, delta = 0;
now = inl(pmtmr_ioport);
delta = (now - offset) & ACPI_PM_MASK;
- return offset_delay + cyc2us(delta);
+ /* seems crazy to do with PM timer resolution but we need nsec
+ resolution in arch/x86_64/kernel/time.c code */
+ return ((offset_delay + cyc2us(delta)) * NSEC_PER_USEC);
}
unsigned long kernel_eflags;
+unsigned long kernel_eflags;
+
/*
* cpu_init() initializes state that is per-CPU. Some data is already
* initialized (naturally) in the bootstrap process, such as the GDT
* Copyright (c) 2002,2006 Vojtech Pavlik
* Copyright (c) 2003 Andi Kleen
* RTC support code taken from arch/i386/kernel/timers/time_hpet.c
+ *
+ * March 2008: Upstream has diverged significantly from this codebase.
+ * Modifications to this file to convert the gettimeofday call into nsecs
+ * (but still return usec values) were done in order to resolve a large
+ * number of gettimeofday issues seen across a wide swath of Intel and
+ * AMD systems.
*/
#include <linux/kernel.h>
#define NSEC_PER_TICK (NSEC_PER_SEC / HZ)
#define FSEC_PER_TICK (FSEC_PER_SEC / HZ)
-#define USEC_PER_REAL_TICK (USEC_PER_SEC / REAL_HZ)
+#define NSEC_PER_REAL_TICK (NSEC_PER_SEC / REAL_HZ)
#define NS_SCALE 10 /* 2^10, carefully chosen */
#define US_SCALE 32 /* 2^32, arbitralrily chosen */
struct timezone __sys_tz __section_sys_tz;
/*
- * do_gettimeoffset() returns microseconds since last timer interrupt was
+ * do_gettimeoffset() returns nanoseconds since last timer interrupt was
* triggered by hardware. A memory read of HPET is slower than a register read
* of TSC, but much more reliable. It's also synchronized to the timer
* interrupt. Note that do_gettimeoffset() may return more than hpet_tick, if a
* together by xtime_lock.
*/
-static inline unsigned int do_gettimeoffset_tsc(void)
+static inline long do_gettimeoffset_tsc(void)
{
unsigned long t;
unsigned long x;
t = get_cycles_sync();
if (t < vxtime.last_tsc)
t = vxtime.last_tsc; /* hack */
- x = ((t - vxtime.last_tsc) * vxtime.tsc_quot) >> US_SCALE;
+ x = ((t - vxtime.last_tsc) * vxtime.tsc_quot) >> NS_SCALE;
return x;
}
-static inline unsigned int do_gettimeoffset_hpet(void)
+static inline long do_gettimeoffset_hpet(void)
{
/* cap counter read to one tick to avoid inconsistencies */
unsigned long counter = hpet_readl(HPET_COUNTER) - vxtime.last;
/* The hpet counter runs at a fixed rate so we don't care about HZ
scaling here. We do however care that the limit is in real ticks */
- return (min(counter,hpet_tick_real) * vxtime.quot) >> US_SCALE;
+ return (min(counter,hpet_tick_real) * vxtime.quot) >> NS_SCALE;
}
-unsigned int (*do_gettimeoffset)(void) = do_gettimeoffset_tsc;
+long (*do_gettimeoffset)(void) = do_gettimeoffset_tsc;
/*
* This version of gettimeofday() has microsecond resolution and better than
void do_gettimeofday(struct timeval *tv)
{
- unsigned long seq, t;
- unsigned int sec, usec;
+ unsigned long seq;
+ long sec, nsec;
do {
seq = read_seqbegin(&xtime_lock);
sec = xtime.tv_sec;
- usec = xtime.tv_nsec / NSEC_PER_USEC;
+ nsec = xtime.tv_nsec + (jiffies - wall_jiffies) * NSEC_PER_TICK;
- /* i386 does some correction here to keep the clock
- monotonous even when ntpd is fixing drift.
- But they didn't work for me, there is a non monotonic
- clock anyways with ntp.
- I dropped all corrections now until a real solution can
- be found. Note when you fix it here you need to do the same
- in arch/x86_64/kernel/vsyscall.c and export all needed
- variables in vmlinux.lds. -AK */
-
- t = (jiffies - wall_jiffies) * USEC_PER_TICK +
- do_gettimeoffset();
- usec += t;
+ nsec += do_gettimeoffset();
} while (read_seqretry(&xtime_lock, seq));
- tv->tv_sec = sec + usec / USEC_PER_SEC;
- tv->tv_usec = usec % USEC_PER_SEC;
+ tv->tv_sec = sec;
+ while (nsec >= NSEC_PER_SEC) {
+ tv->tv_sec += 1;
+ nsec -= NSEC_PER_SEC;
+ }
+ tv->tv_usec = nsec / NSEC_PER_USEC;
}
EXPORT_SYMBOL(do_gettimeofday);
write_seqlock_irq(&xtime_lock);
- nsec -= do_gettimeoffset() * NSEC_PER_USEC +
- (jiffies - wall_jiffies) * NSEC_PER_TICK;
+ nsec -= do_gettimeoffset() + (jiffies - wall_jiffies) * NSEC_PER_TICK;
wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
#endif
} else {
offset = (((tsc - vxtime.last_tsc) *
- vxtime.tsc_quot) >> US_SCALE) - USEC_PER_REAL_TICK;
+ vxtime.tsc_quot) >> NS_SCALE) - NSEC_PER_REAL_TICK;
if (offset < 0)
offset = 0;
- if (offset > USEC_PER_REAL_TICK) {
- lost = offset / USEC_PER_REAL_TICK;
- offset %= USEC_PER_REAL_TICK;
+ lost = 0;
+ while (offset > NSEC_PER_REAL_TICK) {
+ lost++;
+ offset -= NSEC_PER_REAL_TICK;
}
/* FIXME: 1000 or 1000000? */
vxtime.last_tsc = tsc - vxtime.quot * delay / vxtime.tsc_quot;
if ((((tsc - vxtime.last_tsc) *
- vxtime.tsc_quot) >> US_SCALE) < offset)
+ vxtime.tsc_quot) >> NS_SCALE) < offset)
vxtime.last_tsc = tsc -
- (((long) offset << US_SCALE) / vxtime.tsc_quot) - 1;
+ (((long) offset << NS_SCALE) / vxtime.tsc_quot) - 1;
}
/* SCALE: We expect tick_divider - 1 lost, ie 0 for normal behaviour */
if (lost > (int)tick_divider - 1) {
tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new);
if (!(freq->flags & CPUFREQ_CONST_LOOPS))
- vxtime.tsc_quot = (USEC_PER_MSEC << US_SCALE) / cpu_khz;
+ vxtime.tsc_quot = (NSEC_PER_MSEC << NS_SCALE) / cpu_khz;
}
set_cyc2ns_scale(tsc_khz_ref);
cpu_khz = tsc_calibrate_cpu_khz();
vxtime.mode = VXTIME_TSC;
- vxtime.quot = (USEC_PER_SEC << US_SCALE) / vxtime_hz;
- vxtime.tsc_quot = (USEC_PER_MSEC << US_SCALE) / cpu_khz;
+ vxtime.quot = (NSEC_PER_SEC << NS_SCALE) / vxtime_hz;
+ vxtime.tsc_quot = (NSEC_PER_MSEC << NS_SCALE) / cpu_khz;
vxtime.last_tsc = get_cycles_sync();
setup_irq(0, &irq0);
vxtime_hz / 1000000, vxtime_hz % 1000000, timename, timetype);
printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n",
cpu_khz / 1000, cpu_khz % 1000);
- vxtime.quot = (USEC_PER_SEC << US_SCALE) / vxtime_hz;
- vxtime.tsc_quot = (USEC_PER_MSEC << US_SCALE) / cpu_khz;
+ vxtime.quot = (NSEC_PER_SEC << NS_SCALE) / vxtime_hz;
+ vxtime.tsc_quot = (NSEC_PER_MSEC << NS_SCALE) / cpu_khz;
vxtime.last_tsc = get_cycles_sync();
set_cyc2ns_scale(cpu_khz);
asm("" : "=r" (v) : "0" (x)); \
((v - fix_to_virt(VSYSCALL_FIRST_PAGE)) + __pa_symbol(&__vsyscall_0)); })
+#define NS_SCALE 10 /* 2^10, carefully chosen */
+
static __always_inline void timeval_normalize(struct timeval * tv)
{
time_t __sec;
static __always_inline void do_vgettimeofday(struct timeval * tv)
{
long sequence, t;
- unsigned long sec, usec;
+ long sec, nsec;
do {
sequence = read_seqbegin(&__xtime_lock);
-
+
sec = __xtime.tv_sec;
- usec = (__xtime.tv_nsec / 1000) +
- (__jiffies - __wall_jiffies) * (1000000 / HZ);
+ nsec = __xtime.tv_nsec +
+ (__jiffies - __wall_jiffies) * (NSEC_PER_SEC / HZ);
if (__vxtime.mode != VXTIME_HPET) {
t = get_cycles_sync();
if (t < __vxtime.last_tsc)
t = __vxtime.last_tsc;
- usec += ((t - __vxtime.last_tsc) *
- __vxtime.tsc_quot) >> 32;
- /* See comment in x86_64 do_gettimeofday. */
+ nsec += ((t - __vxtime.last_tsc) *
+ __vxtime.tsc_quot) >> NS_SCALE;
} else {
- usec += ((readl((void *)fix_to_virt(VSYSCALL_HPET) + 0xf0) -
- __vxtime.last) * __vxtime.quot) >> 32;
+ nsec += ((readl((void *)fix_to_virt(VSYSCALL_HPET) +
+ 0xf0) -
+ __vxtime.last) * __vxtime.quot) >> NS_SCALE;
}
} while (read_seqretry(&__xtime_lock, sequence));
- tv->tv_sec = sec + usec / 1000000;
- tv->tv_usec = usec % 1000000;
+ tv->tv_sec = sec;
+ while (nsec >= NSEC_PER_SEC) {
+ tv->tv_sec += 1;
+ nsec -= NSEC_PER_SEC;
+ }
+ tv->tv_usec = nsec / NSEC_PER_USEC;
}
/* RED-PEN may want to readd seq locking, but then the variable should be write-once. */
if (!__sysctl_vsyscall)
return time_syscall(t);
else if (t)
- *t = __xtime.tv_sec;
+ *t = __xtime.tv_sec;
+
return __xtime.tv_sec;
}
XEN_TARGET_X86_PAE ?= y
LINUX_SERIES = 2.6
-LINUX_VER = 2.6.18-92.1.13.el5
+LINUX_VER = 2.6.18-92.1.17.el5
EXTRAVERSION ?= xen
#
# Automatically generated make config: don't edit
# Linux kernel version: 2.6.18-prep
-# Mon Sep 29 11:42:18 2008
+# Wed Nov 5 04:51:57 2008
#
CONFIG_X86_32=y
CONFIG_GENERIC_TIME=y
#
# Automatically generated make config: don't edit
# Linux kernel version: 2.6.18-prep
-# Mon Sep 29 11:42:18 2008
+# Wed Nov 5 04:51:58 2008
#
CONFIG_X86_32=y
CONFIG_GENERIC_TIME=y
#
# Automatically generated make config: don't edit
# Linux kernel version: 2.6.18-prep
-# Mon Sep 29 11:42:18 2008
+# Wed Nov 5 04:51:58 2008
#
CONFIG_X86_32=y
CONFIG_LOCKDEP_SUPPORT=y
#
# Automatically generated make config: don't edit
# Linux kernel version: 2.6.18-prep
-# Mon Sep 29 11:42:18 2008
+# Wed Nov 5 04:51:58 2008
#
CONFIG_X86_32=y
CONFIG_GENERIC_TIME=y
* to the original Node.
*/
switch (target_node->type) {
+
+ /* For these types, the sub-object can change dynamically via a Store */
+
case ACPI_TYPE_INTEGER:
case ACPI_TYPE_STRING:
case ACPI_TYPE_BUFFER:
case ACPI_TYPE_PACKAGE:
case ACPI_TYPE_BUFFER_FIELD:
+ /*
+ * These types open a new scope, so we need the NS node in order to access
+ * any children.
+ */
+ case ACPI_TYPE_DEVICE:
+ case ACPI_TYPE_POWER:
+ case ACPI_TYPE_PROCESSOR:
+ case ACPI_TYPE_THERMAL:
+ case ACPI_TYPE_LOCAL_SCOPE:
+
/*
* The new alias has the type ALIAS and points to the original
- * NS node, not the object itself. This is because for these
- * types, the object can change dynamically via a Store.
+ * NS node, not the object itself.
*/
alias_node->type = ACPI_TYPE_LOCAL_ALIAS;
alias_node->object =
case ACPI_TYPE_METHOD:
/*
- * The new alias has the type ALIAS and points to the original
- * NS node, not the object itself. This is because for these
- * types, the object can change dynamically via a Store.
+ * Control method aliases need to be differentiated
*/
alias_node->type = ACPI_TYPE_LOCAL_METHOD_ALIAS;
alias_node->object =
return_ACPI_STATUS(status);
}
- /*
- * Sanity typecheck of the target object:
- *
- * If 1) This is the last segment (num_segments == 0)
- * 2) And we are looking for a specific type
- * (Not checking for TYPE_ANY)
- * 3) Which is not an alias
- * 4) Which is not a local type (TYPE_SCOPE)
- * 5) And the type of target object is known (not TYPE_ANY)
- * 6) And target object does not match what we are looking for
- *
- * Then we have a type mismatch. Just warn and ignore it.
- */
- if ((num_segments == 0) &&
- (type_to_check_for != ACPI_TYPE_ANY) &&
- (type_to_check_for != ACPI_TYPE_LOCAL_ALIAS) &&
- (type_to_check_for != ACPI_TYPE_LOCAL_METHOD_ALIAS) &&
- (type_to_check_for != ACPI_TYPE_LOCAL_SCOPE) &&
- (this_node->type != ACPI_TYPE_ANY) &&
- (this_node->type != type_to_check_for)) {
-
- /* Complain about a type mismatch */
-
- ACPI_WARNING((AE_INFO,
- "NsLookup: Type mismatch on %4.4s (%s), searching for (%s)",
- ACPI_CAST_PTR(char, &simple_name),
- acpi_ut_get_type_name(this_node->type),
- acpi_ut_get_type_name
- (type_to_check_for)));
+ /* More segments to follow? */
+
+ if (num_segments > 0) {
+ /*
+ * If we have an alias to an object that opens a scope (such as a
+ * device or processor), we need to dereference the alias here so that
+ * we can access any children of the original node (via the remaining
+ * segments).
+ */
+ if (this_node->type == ACPI_TYPE_LOCAL_ALIAS) {
+ if (acpi_ns_opens_scope
+ (((struct acpi_namespace_node *)this_node->
+ object)->type)) {
+ this_node =
+ (struct acpi_namespace_node *)
+ this_node->object;
+ }
+ }
}
- /*
- * If this is the last name segment and we are not looking for a
- * specific type, but the type of found object is known, use that type
- * to see if it opens a scope.
- */
- if ((num_segments == 0) && (type == ACPI_TYPE_ANY)) {
- type = this_node->type;
+ /* Special handling for the last segment (num_segments == 0) */
+
+ else {
+ /*
+ * Sanity typecheck of the target object:
+ *
+ * If 1) This is the last segment (num_segments == 0)
+ * 2) And we are looking for a specific type
+ * (Not checking for TYPE_ANY)
+ * 3) Which is not an alias
+ * 4) Which is not a local type (TYPE_SCOPE)
+ * 5) And the type of target object is known (not TYPE_ANY)
+ * 6) And target object does not match what we are looking for
+ *
+ * Then we have a type mismatch. Just warn and ignore it.
+ */
+ if ((type_to_check_for != ACPI_TYPE_ANY) &&
+ (type_to_check_for != ACPI_TYPE_LOCAL_ALIAS) &&
+ (type_to_check_for != ACPI_TYPE_LOCAL_METHOD_ALIAS)
+ && (type_to_check_for != ACPI_TYPE_LOCAL_SCOPE)
+ && (this_node->type != ACPI_TYPE_ANY)
+ && (this_node->type != type_to_check_for)) {
+
+ /* Complain about a type mismatch */
+
+ ACPI_WARNING((AE_INFO,
+ "NsLookup: Type mismatch on %4.4s (%s), searching for (%s)",
+ ACPI_CAST_PTR(char, &simple_name),
+ acpi_ut_get_type_name(this_node->
+ type),
+ acpi_ut_get_type_name
+ (type_to_check_for)));
+ }
+
+ /*
+ * If this is the last name segment and we are not looking for a
+ * specific type, but the type of found object is known, use that type
+ * to (later) see if it opens a scope.
+ */
+ if (type == ACPI_TYPE_ANY) {
+ type = this_node->type;
+ }
}
/* Point to next name segment and make this node current */
u8 *buf;
unsigned int buflen;
- struct scatterlist *sg = scsi_sglist(cmd);
-
- if (sg) {
+ if (cmd->use_sg) {
+ struct scatterlist *sg;
+
+ sg = (struct scatterlist *) cmd->request_buffer;
buf = kmap_atomic(sg_page(sg), KM_IRQ0) + sg->offset;
buflen = sg->length;
} else {
static inline void ata_scsi_rbuf_put(struct scsi_cmnd *cmd, u8 *buf)
{
- struct scatterlist *sg = scsi_sglist(cmd);
- if (sg)
+ if (cmd->use_sg) {
+ struct scatterlist *sg;
+
+ sg = (struct scatterlist *) cmd->request_buffer;
kunmap_atomic(buf - sg->offset, KM_IRQ0);
+ }
}
/**
# include <linux/efi.h>
#endif
-static inline int range_is_allowed(unsigned long from, unsigned long to)
+static inline int range_is_allowed(unsigned long pfn, unsigned long size)
{
- unsigned long cursor;
+ u64 from = ((u64)pfn) << PAGE_SHIFT;
+ u64 to = from + size;
+ u64 cursor = from;
- cursor = from >> PAGE_SHIFT;
- while ((cursor << PAGE_SHIFT) < to) {
- if (!devmem_is_allowed(cursor)) {
- printk ("Program %s tried to read /dev/mem between %lx->%lx.\n",
+ while (cursor < to) {
+ if (!devmem_is_allowed(pfn)) {
+ printk ("Program %s tried to read /dev/mem between %Lx->%Lx.\n",
current->comm, from, to);
return 0;
}
- cursor++;
+ cursor += PAGE_SIZE;
+ pfn++;
}
return 1;
}
*/
ptr = xlate_dev_mem_ptr(p);
- if (!range_is_allowed(p, p+count))
+ if (!range_is_allowed(p >> PAGE_SHIFT, count))
return -EPERM;
if (copy_to_user(buf, ptr, sz))
return -EFAULT;
if (!valid_mmap_phys_addr_range(vma->vm_pgoff, size))
return -EINVAL;
+ if (!range_is_allowed(vma->vm_pgoff, size))
+ return -EPERM;
+
vma->vm_page_prot = phys_mem_access_prot(file, vma->vm_pgoff,
size,
vma->vm_page_prot);
int ret;
mutex_unlock(&cpufreq_governor_mutex);
- ret = request_module(name);
+ ret = request_module("%s", name);
mutex_lock(&cpufreq_governor_mutex);
if (ret == 0)
int mtu_shift;
u32 message_count;
u32 packet_count;
+ atomic_t nr_events; /* events seen */
+ wait_queue_head_t wait_completion;
};
#define IS_SRQ(qp) (qp->ext_type == EQPT_SRQ)
read_lock(&ehca_qp_idr_lock);
qp = idr_find(&ehca_qp_idr, token);
+ if (qp)
+ atomic_inc(&qp->nr_events);
read_unlock(&ehca_qp_idr_lock);
if (!qp)
if (fatal && qp->ext_type == EQPT_SRQBASE)
dispatch_qp_event(shca, qp, IB_EVENT_QP_LAST_WQE_REACHED);
+ if (atomic_dec_and_test(&qp->nr_events))
+ wake_up(&qp->wait_completion);
return;
}
return ERR_PTR(-ENOMEM);
}
+ atomic_set(&my_qp->nr_events, 0);
+ init_waitqueue_head(&my_qp->wait_completion);
spin_lock_init(&my_qp->spinlock_s);
spin_lock_init(&my_qp->spinlock_r);
my_qp->qp_type = qp_type;
idr_remove(&ehca_qp_idr, my_qp->token);
write_unlock_irqrestore(&ehca_qp_idr_lock, flags);
+ /* now wait until all pending events have completed */
+ wait_event(my_qp->wait_completion, !atomic_read(&my_qp->nr_events));
+
h_ret = hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp);
if (h_ret != H_SUCCESS) {
ehca_err(dev, "hipz_h_destroy_qp() failed h_ret=%li "
return 0;
}
-static void msi_cache_ctor(void *p, kmem_cache_t *cache, unsigned long flags)
-{
- memset(p, 0, sizeof(struct msi_desc));
-}
-
static int msi_cache_init(void)
{
- msi_cachep = kmem_cache_create("msi_cache",
- sizeof(struct msi_desc),
- 0, SLAB_HWCACHE_ALIGN, msi_cache_ctor, NULL);
+ msi_cachep = kmem_cache_create("msi_cache", sizeof(struct msi_desc),
+ 0, SLAB_HWCACHE_ALIGN, NULL, NULL);
if (!msi_cachep)
return -ENOMEM;
{
struct msi_desc *entry;
- entry = kmem_cache_alloc(msi_cachep, SLAB_KERNEL);
+ entry = kmem_cache_zalloc(msi_cachep, GFP_KERNEL);
if (!entry)
return NULL;
- memset(entry, 0, sizeof(struct msi_desc));
entry->link.tail = entry->link.head = 0; /* single message */
entry->dev = NULL;
return 0;
}
+/**
+ * pci_msi_supported - check whether MSI may be enabled on device
+ * @dev: pointer to the pci_dev data structure of MSI device function
+ *
+ * MSI must be globally enabled and supported by the device and its root
+ * bus. But, the root bus is not easy to find since some architectures
+ * have virtual busses on top of the PCI hierarchy (for instance the
+ * hypertransport bus), while the actual bus where MSI must be supported
+ * is below. So we test the MSI flag on all parent busses and assume
+ * that no quirk will ever set the NO_MSI flag on a non-root bus.
+ **/
+static
+int pci_msi_supported(struct pci_dev * dev)
+{
+ struct pci_bus *bus;
+
+ if (!pci_msi_enable || !dev || dev->no_msi)
+ return -EINVAL;
+
+ /* check MSI flags of all parent busses */
+ for (bus = dev->bus; bus; bus = bus->parent)
+ if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI)
+ return -EINVAL;
+
+ return 0;
+}
+
/**
* pci_enable_msi - configure device's MSI capability structure
* @dev: pointer to the pci_dev data structure of MSI device function
**/
int pci_enable_msi(struct pci_dev* dev)
{
- struct pci_bus *bus;
- int pos, temp, status = -EINVAL;
- u16 control;
-
- if (!pci_msi_enable || !dev)
- return status;
-
- if (dev->no_msi)
- return status;
+ int pos, temp, status;
- for (bus = dev->bus; bus; bus = bus->parent)
- if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI)
- return -EINVAL;
+ if (pci_msi_supported(dev) < 0)
+ return -EINVAL;
temp = dev->irq;
if (!pos)
return -EINVAL;
- if (!msi_lookup_vector(dev, PCI_CAP_ID_MSI)) {
- /* Lookup Sucess */
- unsigned long flags;
+ WARN_ON(!msi_lookup_vector(dev, PCI_CAP_ID_MSI));
- pci_read_config_word(dev, msi_control_reg(pos), &control);
- if (control & PCI_MSI_FLAGS_ENABLE)
- return 0; /* Already in MSI mode */
- spin_lock_irqsave(&msi_lock, flags);
- if (!vector_irq[dev->irq]) {
- msi_desc[dev->irq]->msi_attrib.state = 0;
- vector_irq[dev->irq] = -1;
- nr_released_vectors--;
- spin_unlock_irqrestore(&msi_lock, flags);
- status = msi_register_init(dev, msi_desc[dev->irq]);
- if (status == 0)
- enable_msi_mode(dev, pos, PCI_CAP_ID_MSI);
- return status;
- }
- spin_unlock_irqrestore(&msi_lock, flags);
- dev->irq = temp;
- }
/* Check whether driver already requested for MSI-X vectors */
pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
if (pos > 0 && !msi_lookup_vector(dev, PCI_CAP_ID_MSIX)) {
if (!(control & PCI_MSI_FLAGS_ENABLE))
return;
+ disable_msi_mode(dev, pos, PCI_CAP_ID_MSI);
+
spin_lock_irqsave(&msi_lock, flags);
entry = msi_desc[dev->irq];
if (!entry || !entry->dev || entry->msi_attrib.type != PCI_CAP_ID_MSI) {
pci_name(dev), dev->irq);
BUG_ON(entry->msi_attrib.state > 0);
} else {
- vector_irq[dev->irq] = 0; /* free it */
- nr_released_vectors++;
default_vector = entry->msi_attrib.default_vector;
spin_unlock_irqrestore(&msi_lock, flags);
+ msi_free_vector(dev, dev->irq, 0);
+
/* Restore dev->irq to its default pin-assertion vector */
dev->irq = default_vector;
- disable_msi_mode(dev, pci_find_capability(dev, PCI_CAP_ID_MSI),
- PCI_CAP_ID_MSI);
}
}
return 0;
}
-static int reroute_msix_table(int head, struct msix_entry *entries, int *nvec)
-{
- int vector = head, tail = 0;
- int i, j = 0, nr_entries = 0;
- void __iomem *base;
- unsigned long flags;
-
- spin_lock_irqsave(&msi_lock, flags);
- while (head != tail) {
- nr_entries++;
- tail = msi_desc[vector]->link.tail;
- if (entries[0].entry == msi_desc[vector]->msi_attrib.entry_nr)
- j = vector;
- vector = tail;
- }
- if (*nvec > nr_entries) {
- spin_unlock_irqrestore(&msi_lock, flags);
- *nvec = nr_entries;
- return -EINVAL;
- }
- vector = ((j > 0) ? j : head);
- for (i = 0; i < *nvec; i++) {
- j = msi_desc[vector]->msi_attrib.entry_nr;
- msi_desc[vector]->msi_attrib.state = 0; /* Mark it not active */
- vector_irq[vector] = -1; /* Mark it busy */
- nr_released_vectors--;
- entries[i].vector = vector;
- if (j != (entries + i)->entry) {
- base = msi_desc[vector]->mask_base;
- msi_desc[vector]->msi_attrib.entry_nr =
- (entries + i)->entry;
- writel( readl(base + j * PCI_MSIX_ENTRY_SIZE +
- PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET), base +
- (entries + i)->entry * PCI_MSIX_ENTRY_SIZE +
- PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
- writel( readl(base + j * PCI_MSIX_ENTRY_SIZE +
- PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET), base +
- (entries + i)->entry * PCI_MSIX_ENTRY_SIZE +
- PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
- writel( (readl(base + j * PCI_MSIX_ENTRY_SIZE +
- PCI_MSIX_ENTRY_DATA_OFFSET) & 0xff00) | vector,
- base + (entries+i)->entry*PCI_MSIX_ENTRY_SIZE +
- PCI_MSIX_ENTRY_DATA_OFFSET);
- }
- vector = msi_desc[vector]->link.tail;
- }
- spin_unlock_irqrestore(&msi_lock, flags);
-
- return 0;
-}
-
/**
* pci_enable_msix - configure device's MSI-X capability structure
* @dev: pointer to the pci_dev data structure of MSI-X device function
**/
int pci_enable_msix(struct pci_dev* dev, struct msix_entry *entries, int nvec)
{
- struct pci_bus *bus;
int status, pos, nr_entries, free_vectors;
int i, j, temp;
u16 control;
unsigned long flags;
- if (!pci_msi_enable || !dev || !entries)
+ if (!entries || pci_msi_supported(dev) < 0)
return -EINVAL;
- if (dev->no_msi)
- return -EINVAL;
-
- for (bus = dev->bus; bus; bus = bus->parent)
- if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI)
- return -EINVAL;
-
status = msi_init();
if (status < 0)
return status;
return -EINVAL;
pci_read_config_word(dev, msi_control_reg(pos), &control);
- if (control & PCI_MSIX_FLAGS_ENABLE)
- return -EINVAL; /* Already in MSI-X mode */
-
nr_entries = multi_msix_capable(control);
if (nvec > nr_entries)
return -EINVAL;
}
}
temp = dev->irq;
- if (!msi_lookup_vector(dev, PCI_CAP_ID_MSIX)) {
- /* Lookup Sucess */
- nr_entries = nvec;
- /* Reroute MSI-X table */
- if (reroute_msix_table(dev->irq, entries, &nr_entries)) {
- /* #requested > #previous-assigned */
- dev->irq = temp;
- return nr_entries;
- }
- dev->irq = temp;
- enable_msi_mode(dev, pos, PCI_CAP_ID_MSIX);
- return 0;
- }
+ WARN_ON(!msi_lookup_vector(dev, PCI_CAP_ID_MSIX));
+
/* Check whether driver already requested for MSI vector */
if (pci_find_capability(dev, PCI_CAP_ID_MSI) > 0 &&
!msi_lookup_vector(dev, PCI_CAP_ID_MSI)) {
if (!(control & PCI_MSIX_FLAGS_ENABLE))
return;
+ disable_msi_mode(dev, pos, PCI_CAP_ID_MSIX);
+
temp = dev->irq;
if (!msi_lookup_vector(dev, PCI_CAP_ID_MSIX)) {
int state, vector, head, tail = 0, warning = 0;
unsigned long flags;
vector = head = dev->irq;
- spin_lock_irqsave(&msi_lock, flags);
+ dev->irq = temp; /* Restore pin IRQ */
while (head != tail) {
+ spin_lock_irqsave(&msi_lock, flags);
state = msi_desc[vector]->msi_attrib.state;
+ tail = msi_desc[vector]->link.tail;
+ spin_unlock_irqrestore(&msi_lock, flags);
if (state)
warning = 1;
- else {
- vector_irq[vector] = 0; /* free it */
- nr_released_vectors++;
- }
- tail = msi_desc[vector]->link.tail;
+ else if (vector != head) /* Release MSI-X vector */
+ msi_free_vector(dev, vector, 0);
vector = tail;
}
- spin_unlock_irqrestore(&msi_lock, flags);
+ msi_free_vector(dev, vector, 0);
if (warning) {
- dev->irq = temp;
printk(KERN_WARNING "PCI: %s: pci_disable_msix() called without "
"free_irq() on all MSI-X vectors\n",
pci_name(dev));
BUG_ON(warning > 0);
- } else {
- dev->irq = temp;
- disable_msi_mode(dev,
- pci_find_capability(dev, PCI_CAP_ID_MSIX),
- PCI_CAP_ID_MSIX);
-
}
}
}
(1 << ((control & PCI_MSI_FLAGS_QMASK) >> 1))
#define multi_msi_enable(control, num) \
control |= (((num >> 1) << 4) & PCI_MSI_FLAGS_QSIZE);
-#define is_64bit_address(control) (control & PCI_MSI_FLAGS_64BIT)
-#define is_mask_bit_support(control) (control & PCI_MSI_FLAGS_MASKBIT)
+#define is_64bit_address(control) (!!(control & PCI_MSI_FLAGS_64BIT))
+#define is_mask_bit_support(control) (!!(control & PCI_MSI_FLAGS_MASKBIT))
#define msi_enable(control, num) multi_msi_enable(control, num); \
control |= PCI_MSI_FLAGS_ENABLE
{ aac_rx_init, "percraid", "DELL ", "PERCRAID ", 2, AAC_QUIRK_31BIT | AAC_QUIRK_34SG | AAC_QUIRK_SCSI_32 }, /* PERC 3/Di (Boxster/PERC3DiB) */
{ aac_rx_init, "aacraid", "ADAPTEC ", "catapult ", 2, AAC_QUIRK_31BIT | AAC_QUIRK_34SG | AAC_QUIRK_SCSI_32 }, /* catapult */
{ aac_rx_init, "aacraid", "ADAPTEC ", "tomcat ", 2, AAC_QUIRK_31BIT | AAC_QUIRK_34SG | AAC_QUIRK_SCSI_32 }, /* tomcat */
- { aac_rx_init, "aacraid", "ADAPTEC ", "Adaptec 2120S ", 1, AAC_QUIRK_31BIT | AAC_QUIRK_34SG | AAC_QUIRK_SCSI_32 }, /* Adaptec 2120S (Crusader) */
- { aac_rx_init, "aacraid", "ADAPTEC ", "Adaptec 2200S ", 2, AAC_QUIRK_31BIT | AAC_QUIRK_34SG | AAC_QUIRK_SCSI_32 }, /* Adaptec 2200S (Vulcan) */
+ { aac_rx_init, "aacraid", "ADAPTEC ", "Adaptec 2120S ", 1, AAC_QUIRK_31BIT | AAC_QUIRK_34SG }, /* Adaptec 2120S (Crusader) */
+ { aac_rx_init, "aacraid", "ADAPTEC ", "Adaptec 2200S ", 2, AAC_QUIRK_31BIT | AAC_QUIRK_34SG }, /* Adaptec 2200S (Vulcan) */
{ aac_rx_init, "aacraid", "ADAPTEC ", "Adaptec 2200S ", 2, AAC_QUIRK_31BIT | AAC_QUIRK_34SG | AAC_QUIRK_SCSI_32 }, /* Adaptec 2200S (Vulcan-2m) */
{ aac_rx_init, "aacraid", "Legend ", "Legend S220 ", 1, AAC_QUIRK_31BIT | AAC_QUIRK_34SG | AAC_QUIRK_SCSI_32 }, /* Legend S220 (Legend Crusader) */
{ aac_rx_init, "aacraid", "Legend ", "Legend S230 ", 2, AAC_QUIRK_31BIT | AAC_QUIRK_34SG | AAC_QUIRK_SCSI_32 }, /* Legend S230 (Legend Vulcan) */
resid = resid_len;
/* Use F/W calculated residual length. */
if (IS_FWI2_CAPABLE(ha)) {
- if (scsi_status & SS_RESIDUAL_UNDER &&
- resid != fw_resid_len) {
+ if (!(scsi_status & SS_RESIDUAL_UNDER)) {
+ lscsi_status = 0;
+ } else if (resid != fw_resid_len) {
scsi_status &= ~SS_RESIDUAL_UNDER;
lscsi_status = 0;
}
mutex_init(&u->ring_cons_mutex);
+ mutex_init(&u->ring_cons_mutex);
+
filp->private_data = u;
u->bind_cpu = -1;
/* Miscellaneous private stuff. */
struct list_head list; /* scheduling list */
atomic_t refcnt;
+ struct xenbus_device *xendev;
struct net_device *dev;
struct net_device_stats stats;
} while (0)
void netif_xenbus_init(void);
+void netif_interfaces_init(void);
#define netif_schedulable(netif) \
(netif_running((netif)->dev) && netback_carrier_ok(netif))
#include "common.h"
#include <linux/ethtool.h>
#include <linux/rtnetlink.h>
+#include <xen/xenbus.h>
/*
* Module parameter 'queue_length':
free_netdev(netif->dev);
}
+
+
+static int
+netdev_notify(struct notifier_block *this, unsigned long event, void *ptr)
+{
+ struct net_device *dev = ptr;
+
+ /* Carrier up event and is it one of our devices? */
+ if (event == NETDEV_CHANGE && netif_carrier_ok(dev) &&
+ dev->open == net_open) {
+ netif_t *netif = netdev_priv(dev);
+
+ xenbus_switch_state(netif->xendev, XenbusStateConnected);
+ }
+
+ return NOTIFY_DONE;
+}
+
+
+static struct notifier_block notifier_netdev = {
+ .notifier_call = netdev_notify,
+};
+
+
+void netif_interfaces_init(void)
+{
+ (void)register_netdevice_notifier(¬ifier_netdev);
+}
netif_accel_init();
+ netif_interfaces_init();
netif_xenbus_init();
#ifdef NETBE_DEBUG_INTERRUPT
return;
}
+ be->netif->xendev = dev;
kobject_uevent(&dev->dev.kobj, KOBJ_ONLINE);
}
int err;
struct xenbus_device *dev = be->dev;
- err = connect_rings(be);
- if (err)
- return;
-
err = xen_net_read_mac(dev, be->netif->fe_dev_addr);
if (err) {
xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename);
&be->netif->credit_usec);
be->netif->remaining_credit = be->netif->credit_bytes;
- xenbus_switch_state(dev, XenbusStateConnected);
+ err = connect_rings(be);
+ if (err)
+ return;
netif_wake_queue(be->netif->dev);
}
return -ENOEXEC;
}
- bprm->sh_bang++; /* Well, the bang-shell is implicit... */
+ bprm->sh_bang = 1; /* Well, the bang-shell is implicit... */
allow_write_access(bprm->file);
fput(bprm->file);
bprm->file = NULL;
#define MISC_FMT_OPEN_BINARY (1<<30)
#define MISC_FMT_CREDENTIALS (1<<29)
+/* Marker for breaking misc - > script -> misc loop */
+#define MISC_BANG (1<<1)
+
typedef struct {
struct list_head list;
unsigned long flags; /* type, status, etc. */
if (!enabled)
goto _ret;
+ retval = -ENOEXEC;
+ if (bprm->sh_bang & MISC_BANG)
+ goto _ret;
+
/* to keep locking time low, we copy the interpreter string */
read_lock(&entries_lock);
fmt = check_file(bprm);
if (retval < 0)
goto _error;
+ bprm->sh_bang |= MISC_BANG;
+
retval = search_binary_handler (bprm, regs);
if (retval < 0)
goto _error;
* Sorta complicated, but hopefully it will work. -TYT
*/
- bprm->sh_bang++;
+ bprm->sh_bang = 1;
allow_write_access(bprm->file);
fput(bprm->file);
bprm->file = NULL;
return -EBADF;
open_file = (struct cifsFileInfo *) file->private_data;
+ rc = generic_write_checks(file, poffset, &write_size, 0);
+ if (rc)
+ return rc;
+
xid = GetXid();
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20)
static void compat_input(struct dlm_write_request *kb,
struct dlm_write_request32 *kb32,
- int max_namelen)
+ size_t count)
{
kb->version[0] = kb32->version[0];
kb->version[1] = kb32->version[1];
kb->cmd == DLM_USER_REMOVE_LOCKSPACE) {
kb->i.lspace.flags = kb32->i.lspace.flags;
kb->i.lspace.minor = kb32->i.lspace.minor;
- strcpy(kb->i.lspace.name, kb32->i.lspace.name);
+ memcpy(kb->i.lspace.name, kb32->i.lspace.name, count -
+ offsetof(struct dlm_write_request32, i.lspace.name));
} else if (kb->cmd == DLM_USER_PURGE) {
kb->i.purge.nodeid = kb32->i.purge.nodeid;
kb->i.purge.pid = kb32->i.purge.pid;
kb->i.lock.bastaddr = (void *)(long)kb32->i.lock.bastaddr;
kb->i.lock.lksb = (void *)(long)kb32->i.lock.lksb;
memcpy(kb->i.lock.lvb, kb32->i.lock.lvb, DLM_USER_LVB_LEN);
- if (kb->i.lock.namelen <= max_namelen)
- memcpy(kb->i.lock.name, kb32->i.lock.name, kb->i.lock.namelen);
- else
- kb->i.lock.namelen = max_namelen;
+ memcpy(kb->i.lock.name, kb32->i.lock.name, count -
+ offsetof(struct dlm_write_request32, i.lock.name));
}
}
#endif
return -EINVAL;
- kbuf = kmalloc(count, GFP_KERNEL);
+ kbuf = kzalloc(count + 1, GFP_KERNEL);
if (!kbuf)
return -ENOMEM;
if (!kbuf->is64bit) {
struct dlm_write_request32 *k32buf;
k32buf = (struct dlm_write_request32 *)kbuf;
- kbuf = kmalloc(count + (sizeof(struct dlm_write_request) -
+ kbuf = kmalloc(count + 1 + (sizeof(struct dlm_write_request) -
sizeof(struct dlm_write_request32)), GFP_KERNEL);
if (!kbuf)
return -ENOMEM;
if (proc)
set_bit(DLM_PROC_FLAGS_COMPAT, &proc->flags);
- compat_input(kbuf, k32buf, count - sizeof(struct dlm_write_request32));
+ compat_input(kbuf, k32buf, count + 1);
kfree(k32buf);
}
#endif
newattrs.ia_valid |= ATTR_FILE;
}
+ /* Remove suid/sgid on truncate too */
+ newattrs.ia_valid |= should_remove_suid(dentry);
+
mutex_lock(&dentry->d_inode->i_mutex);
err = notify_change(dentry, &newattrs);
mutex_unlock(&dentry->d_inode->i_mutex);
ret = add_to_page_cache_lru(page, mapping, index,
gfp_mask);
if (unlikely(ret))
- goto out;
+ goto out_release;
}
/*
goto find_page;
}
out:
- page_cache_release(page);
unlock_page(page);
+out_release:
+ page_cache_release(page);
out_ret:
return ret;
}
loff_t *ppos, size_t len, unsigned int flags)
{
struct address_space *mapping = out->f_mapping;
+ struct inode *inode = mapping->host;
ssize_t ret;
+ int err;
+
+ err = should_remove_suid(out->f_dentry);
+ if (unlikely(err)) {
+ mutex_lock(&inode->i_mutex);
+ err = __remove_suid(out->f_dentry, err);
+ mutex_unlock(&inode->i_mutex);
+ if (err)
+ return err;
+ }
ret = splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_file);
if (ret > 0) {
- struct inode *inode = mapping->host;
-
*ppos += ret;
/*
* sync it.
*/
if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) {
- int err;
-
mutex_lock(&inode->i_mutex);
err = generic_osync_inode(inode, mapping,
OSYNC_METADATA|OSYNC_DATA);
#define __EXTRA_CLOBBER \
,"rcx","rbx","rdx","r8","r9","r10","r11","r12","r13","r14","r15"
+/* Save restore flags to clear handle leaking NT */
#define switch_to(prev,next,last) \
asm volatile(SAVE_CONTEXT \
"movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \
extern int pmtimer_mark_offset(void);
extern void pmtimer_resume(void);
extern void pmtimer_wait(unsigned);
-extern unsigned int do_gettimeoffset_pm(void);
+extern long do_gettimeoffset_pm(void);
#ifdef CONFIG_X86_PM_TIMER
extern u32 pmtmr_ioport;
#else
extern void __show_regs(struct pt_regs * regs);
extern void show_regs(struct pt_regs * regs);
-extern char *syscall32_page;
extern void syscall32_cpu_init(void);
extern void setup_node_bootmem(int nodeid, unsigned long start, unsigned long end);
#define VSYSCALL32_SYSEXIT (VSYSCALL32_BASE + 0x410)
#else
#define VSYSCALL32_BASE 0xffffe000UL
-#define VSYSCALL32_END (VSYSCALL32_BASE + PAGE_SIZE)
#define VSYSCALL32_EHDR ((const struct elf32_hdr *) VSYSCALL32_BASE)
#define VSYSCALL32_VSYSCALL ((void *)VSYSCALL32_BASE + 0x400)
extern void clear_inode(struct inode *);
extern void destroy_inode(struct inode *);
extern struct inode *new_inode(struct super_block *);
+extern int __remove_suid(struct dentry *, int);
+extern int should_remove_suid(struct dentry *);
extern int remove_suid(struct dentry *);
extern void remove_dquot_ref(struct super_block *, int, struct list_head *);
extern int install_special_mapping(struct mm_struct *mm,
unsigned long addr, unsigned long len,
- unsigned long vm_flags, pgprot_t pgprot,
- struct page **pages);
+ unsigned long vm_flags, struct page **pages);
extern unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
unsigned long len, unsigned long prot,
* if suid or (sgid and xgrp)
* remove privs
*/
-int remove_suid(struct dentry *dentry)
+int should_remove_suid(struct dentry *dentry)
{
mode_t mode = dentry->d_inode->i_mode;
int kill = 0;
- int result = 0;
/* suid always must be killed */
if (unlikely(mode & S_ISUID))
if (unlikely((mode & S_ISGID) && (mode & S_IXGRP)))
kill |= ATTR_KILL_SGID;
- if (unlikely(kill && !capable(CAP_FSETID))) {
- struct iattr newattrs;
+ if (unlikely(kill && !capable(CAP_FSETID)))
+ return kill;
- newattrs.ia_valid = ATTR_FORCE | kill;
- result = notify_change(dentry, &newattrs);
- }
- return result;
+ return 0;
+}
+
+int __remove_suid(struct dentry *dentry, int kill)
+{
+ struct iattr newattrs;
+
+ newattrs.ia_valid = ATTR_FORCE | kill;
+ return notify_change(dentry, &newattrs);
+}
+
+int remove_suid(struct dentry *dentry)
+{
+ int kill = should_remove_suid(dentry);
+
+ if (unlikely(kill))
+ return __remove_suid(dentry, kill);
+
+ return 0;
}
EXPORT_SYMBOL(remove_suid);
}
ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
- if (!ptep)
- goto out;
pte = *ptep;
if (!pte_present(pte))
- goto unlock;
+ goto no_page;
if ((flags & FOLL_WRITE) && !pte_write(pte))
goto unlock;
page = vm_normal_page(vma, address, pte);
if (unlikely(!page))
- goto unlock;
+ goto bad_page;
if (flags & FOLL_GET)
get_page(page);
out:
return page;
+bad_page:
+ pte_unmap_unlock(ptep, ptl);
+ return ERR_PTR(-EFAULT);
+
+no_page:
+ pte_unmap_unlock(ptep, ptl);
+ if (!pte_none(pte))
+ return page;
+ /* Fall through to ZERO_PAGE handling */
no_page_table:
/*
* When core dumping an enormous anonymous area that nobody
return page;
}
+/* Can we do the FOLL_ANON optimization? */
+static inline int use_zero_page(struct vm_area_struct *vma)
+{
+ /*
+ * We don't want to optimize FOLL_ANON for make_pages_present()
+ * when it tries to page in a VM_LOCKED region. As to VM_SHARED,
+ * we want to get the page from the page tables to make sure
+ * that we serialize and update with any other user of that
+ * mapping.
+ */
+ if (vma->vm_flags & (VM_LOCKED | VM_SHARED))
+ return 0;
+ /*
+ * And if we have a fault or a nopfn routine, it's not an
+ * anonymous region.
+ */
+ return !vma->vm_ops ||
+ (!vma->vm_ops->nopage && !vma->vm_ops->nopfn);
+}
+
int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
unsigned long start, int len, int write, int force,
struct page **pages, struct vm_area_struct **vmas)
foll_flags = FOLL_TOUCH;
if (pages)
foll_flags |= FOLL_GET;
- if (!write && !(vma->vm_flags & VM_LOCKED) &&
- (!vma->vm_ops || !vma->vm_ops->nopage))
+ if (!write && use_zero_page(vma))
foll_flags |= FOLL_ANON;
do {
BUG();
}
}
+ if (IS_ERR(page))
+ return i ? i : PTR_ERR(page);
if (pages) {
pages[i] = page;
goto set_status;
page = follow_page(vma, pp->addr, FOLL_GET);
+
+ err = PTR_ERR(page);
+ if (IS_ERR(page))
+ goto set_status;
+
err = -ENOENT;
if (!page)
goto set_status;
goto set_status;
page = follow_page(vma, pm->addr, 0);
+
+ err = PTR_ERR(page);
+ if (IS_ERR(page))
+ goto set_status;
+
err = -ENOENT;
/* Use PageReserved to check for zero page */
if (!page || PageReserved(page))
return NOPAGE_SIGBUS;
}
+static void special_mapping_close(struct vm_area_struct *vma)
+{
+}
+
static struct vm_operations_struct special_mapping_vmops = {
+ .close = special_mapping_close,
.nopage = special_mapping_nopage,
};
-unsigned int vdso_populate = 1;
+unsigned int vdso_populate = 0;
/*
* Insert a new vma covering the given region, with the given flags and
*/
int install_special_mapping(struct mm_struct *mm,
unsigned long addr, unsigned long len,
- unsigned long vm_flags, pgprot_t pgprot,
- struct page **pages)
+ unsigned long vm_flags, struct page **pages)
{
struct vm_area_struct *vma;
int err;
vma->vm_start = addr;
vma->vm_end = addr + len;
- vma->vm_flags = vm_flags | VM_DONTEXPAND;
- vma->vm_page_prot = pgprot;
+ vma->vm_flags = vm_flags | mm->def_flags | VM_DONTEXPAND;
+ vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
vma->vm_ops = &special_mapping_vmops;
vma->vm_private_data = pages;
- insert_vm_struct(mm, vma);
+ if (unlikely(insert_vm_struct(mm, vma))) {
+ kmem_cache_free(vm_area_cachep, vma);
+ return -ENOMEM;
+ }
mm->total_vm += len >> PAGE_SHIFT;
if (!vdso_populate)
return (state->s1 ^ state->s2 ^ state->s3);
}
-static void __net_srandom(struct nrnd_state *state, unsigned long s)
+/*
+ * Handle minimum values for seeds
+ */
+static inline u32 __seed(u32 x, u32 m)
{
- if (s == 0)
- s = 1; /* default seed is 1 */
-
-#define LCG(n) (69069 * n)
- state->s1 = LCG(s);
- state->s2 = LCG(state->s1);
- state->s3 = LCG(state->s2);
-
- /* "warm it up" */
- __net_random(state);
- __net_random(state);
- __net_random(state);
- __net_random(state);
- __net_random(state);
- __net_random(state);
+ return (x < m) ? x + m : x;
}
void net_srandom(unsigned long entropy)
{
- struct nrnd_state *state = &get_cpu_var(net_rand_state);
- __net_srandom(state, state->s1^entropy);
- put_cpu_var(state);
+ int i;
+ /*
+ * No locking on the CPUs, but then somewhat random results are, well,
+ * expected.
+ */
+ for_each_possible_cpu (i) {
+ struct nrnd_state *state = &per_cpu(net_rand_state, i);
+ state->s1 = __seed(state->s1 ^ entropy, 1);
+ }
}
void __init net_random_init(void)
for_each_possible_cpu(i) {
struct nrnd_state *state = &per_cpu(net_rand_state,i);
- __net_srandom(state, i+jiffies);
+
+#define LCG(x) ((x) * 69069) /* super-duper LCG */
+ state->s1 = __seed(LCG(i + jiffies), 1);
+ state->s2 = __seed(LCG(state->s1), 7);
+ state->s3 = __seed(LCG(state->s2), 15);
+
+ /* "warm it up" */
+ __net_random(state);
+ __net_random(state);
+ __net_random(state);
+ __net_random(state);
+ __net_random(state);
+ __net_random(state);
}
}
static int net_random_reseed(void)
{
int i;
- unsigned long seed;
for_each_possible_cpu(i) {
struct nrnd_state *state = &per_cpu(net_rand_state,i);
+ u32 seeds[3];
+
+ get_random_bytes(&seeds, sizeof(seeds));
+ state->s1 = __seed(seeds[0], 1);
+ state->s2 = __seed(seeds[1], 7);
+ state->s3 = __seed(seeds[2], 15);
- get_random_bytes(&seed, sizeof(seed));
- __net_srandom(state, seed);
+ /* mix it in */
+ __net_random(state);
}
return 0;
}
if (copy_from_user(&opt, optval, sizeof(opt)))
return -EFAULT;
+ /*
+ * rfc4340: 6.1. Change Options
+ */
+ if (opt.dccpsf_len < 1)
+ return -EINVAL;
val = kmalloc(opt.dccpsf_len, GFP_KERNEL);
if (!val)