From dfff3605f8c98a72ef33cb54fe6b1841dddc885c Mon Sep 17 00:00:00 2001 From: t_jeang Date: Tue, 6 Jan 2009 12:05:55 +0000 Subject: [PATCH] * Thu Sep 04 2008 Jiri Pirko [2.6.18-92.1.13.el5] - [md] fix crashes in iterate_rdev (Doug Ledford ) [460128 455471] - [sound] snd_seq_oss_synth_make_info info leak (Eugene Teo ) [458000 458001] {CVE-2008-3272} - [ipmi] control BMC device ordering (peterm@redhat.com ) [459071 430157] - [ia64] fix to check module_free parameter (Masami Hiramatsu ) [460639 457961] - [misc] NULL pointer dereference in kobject_get_path (Jiri Pirko ) [459776 455460] - [xen] ia64: SMP-unsafe with XENMEM_add_to_physmap on HVM (Tetsu Yamamoto ) [459780 457137] - [net] bridge: eliminate delay on carrier up (Herbert Xu ) [458783 453526] - [fs] dio: lock refcount operations (Jeff Moyer ) [459082 455750] - [misc] serial: fix break handling for i82571 over LAN (Aristeu Rozanski ) [460509 440018] - [fs] dio: use kzalloc to zero out struct dio (Jeff Moyer ) [461091 439918] - [fs] lockd: nlmsvc_lookup_host called with f_sema held (Jeff Layton ) [459083 453094] - [net] bnx2x: chip reset and port type fixes (Andy Gospodarek ) [441259 442026] * Wed Aug 27 2008 Jiri Pirko [2.6.18-92.1.12.el5] - [mm] tmpfs: restore missing clear_highpage (Eugene Teo ) [426082 426083]{CVE-2007-6417} - [fs] vfs: fix lookup on deleted directory (Eugene Teo ) [457865 457866]{CVE-2008-3275} - [net] ixgbe: remove device ID for unsupported device (Andy Gospodarek ) [457484 454910] - [ppc] Event Queue overflow on eHCA adapters (Brad Peters ) [458779 446713] * Fri Aug 01 2008 Jiri Pirko [2.6.18-92.1.11.el5] - [mm] xpmem: inhibit page swapping under heavy mem use (George Beshers ) [456946 456574] - [xen] HV: memory corruption with large number of cpus (Chris Lalancette ) [455768 449945] - [fs] missing check before setting mount propagation (Eugene Teo ) [454392 454393] - [openib] small ipoib packet can cause an oops (Doug Ledford ) [447913 445731] - [misc] fix race in switch_uid and user signal accounting (Vince Worthington ) [456235 441762 440830] --- Documentation/IPMI.txt | 14 ++++ Documentation/kernel-parameters.txt | 3 + Makefile | 2 +- arch/i386/kernel/setup-xen.c | 6 ++ arch/i386/kernel/setup.c | 6 ++ arch/ia64/kernel/module.c | 3 +- arch/x86_64/kernel/setup-xen.c | 6 ++ arch/x86_64/kernel/setup.c | 6 ++ buildconfigs/Rules.mk | 2 +- configs/kernel-2.6.18-i686-PAE.config | 2 +- configs/kernel-2.6.18-i686-debug.config | 2 +- configs/kernel-2.6.18-i686-xen.config | 2 +- configs/kernel-2.6.18-i686.config | 2 +- drivers/firmware/dmi_scan.c | 17 ++++- drivers/infiniband/hw/ehca/ehca_classes.h | 5 ++ drivers/infiniband/hw/ehca/ehca_cq.c | 11 +++ drivers/infiniband/hw/ehca/ehca_main.c | 36 ++++++++- drivers/infiniband/hw/ehca/ehca_qp.c | 22 +++++- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 8 +- drivers/md/bitmap.c | 5 +- drivers/md/md.c | 61 +++++++++++---- drivers/net/bnx2x.c | 49 +++--------- drivers/net/bnx2x.h | 3 +- drivers/net/bnx2x_init.h | 3 +- drivers/net/ixgbe/ixgbe_main.c | 2 - drivers/serial/8250.c | 17 ++++- fs/direct-io.c | 93 +++++++++++------------ fs/lockd/svc4proc.c | 4 +- fs/lockd/svclock.c | 24 +++--- fs/lockd/svcproc.c | 6 +- fs/namei.c | 18 ++++- fs/namespace.c | 3 + include/linux/lockd/lockd.h | 7 +- include/linux/page-flags.h | 9 +++ include/linux/raid/md_k.h | 3 + kernel/signal.c | 15 +++- kernel/user.c | 11 +++ lib/kref.c | 7 +- mm/rmap.c | 3 + mm/shmem.c | 5 +- net/bridge/br_if.c | 28 ++----- net/bridge/br_notify.c | 19 ++--- net/bridge/br_private.h | 3 +- net/bridge/br_stp.c | 27 +++++-- sound/core/seq/oss/seq_oss_synth.c | 3 + 45 files changed, 380 insertions(+), 203 deletions(-) diff --git a/Documentation/IPMI.txt b/Documentation/IPMI.txt index 0d0ebefe..2b705dff 100644 --- a/Documentation/IPMI.txt +++ b/Documentation/IPMI.txt @@ -658,3 +658,17 @@ registered. Note that if you have ACPI enabled, the system will prefer using ACPI to power off. + + +BMC Device Ordering +------------------- + +To control the order in which device nodes are created there is a kernel +parameter named, "ipmi_dev_order". By default dmi_scan.c creates ipmi +device nodes in LIFO order. Users can specify, "ipmi_dev_order=2", in +the kernel command line to direct the system to create the device nodes +in FIFO order. + +Note that this option is only available on IA-32, and X86_64. The option +is only important to users on multinode systems. + diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index bd4e2a98..1d605e19 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -707,6 +707,9 @@ running once the system is up. ip2= [HW] Set IO/IRQ pairs for up to 4 IntelliPort boards See comment before ip2_setup() in drivers/char/ip2.c. + ipmi_dev_order= [IA-32,X86_64] + See Documentation/IPMI.txt. + ips= [HW,SCSI] Adaptec / IBM ServeRAID controller See header of drivers/scsi/ips.c. diff --git a/Makefile b/Makefile index 8f4a08cc..8da9f748 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 18 -EXTRAVERSION = -92.1.10.el5 +EXTRAVERSION = -92.1.13.el5 RHEL_MAJOR = 5 RHEL_MINOR = 2 NAME=Avast! A bilge rat! diff --git a/arch/i386/kernel/setup-xen.c b/arch/i386/kernel/setup-xen.c index 1170e104..7a3792eb 100644 --- a/arch/i386/kernel/setup-xen.c +++ b/arch/i386/kernel/setup-xen.c @@ -105,6 +105,9 @@ EXPORT_SYMBOL(boot_cpu_data); unsigned long mmu_cr4_features; +unsigned int ipmi_dev_order=1; +EXPORT_SYMBOL_GPL(ipmi_dev_order); + #ifdef CONFIG_ACPI int acpi_disabled = 0; #else @@ -1007,6 +1010,9 @@ static void __init parse_cmdline_early (char ** cmdline_p) else if (!memcmp(from, "vmalloc=", 8)) __VMALLOC_RESERVE = memparse(from+8, &from); + else if (!memcmp(from, "ipmi_dev_order=", 15)) + ipmi_dev_order = simple_strtoul(from + 15, NULL, 0); + next_char: c = *(from++); if (!c) diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index 5188aa7e..424b7be0 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -90,6 +90,9 @@ EXPORT_SYMBOL(boot_cpu_data); unsigned long mmu_cr4_features; +unsigned int ipmi_dev_order=1; +EXPORT_SYMBOL_GPL(ipmi_dev_order); + #ifdef CONFIG_ACPI int acpi_disabled = 0; #else @@ -920,6 +923,9 @@ static void __init parse_cmdline_early (char ** cmdline_p) else if (!memcmp(from, "vmalloc=", 8)) __VMALLOC_RESERVE = memparse(from+8, &from); + else if (!memcmp(from, "ipmi_dev_order=", 15)) + ipmi_dev_order = simple_strtoul(from + 15, NULL, 0); + next_char: c = *(from++); if (!c) diff --git a/arch/ia64/kernel/module.c b/arch/ia64/kernel/module.c index 158e3c51..32e5e0a1 100644 --- a/arch/ia64/kernel/module.c +++ b/arch/ia64/kernel/module.c @@ -321,7 +321,8 @@ module_alloc (unsigned long size) void module_free (struct module *mod, void *module_region) { - if (mod->arch.init_unw_table && module_region == mod->module_init) { + if (mod && mod->arch.init_unw_table && + module_region == mod->module_init) { unw_remove_unwind_table(mod->arch.init_unw_table); mod->arch.init_unw_table = NULL; } diff --git a/arch/x86_64/kernel/setup-xen.c b/arch/x86_64/kernel/setup-xen.c index 75e493c9..879924be 100644 --- a/arch/x86_64/kernel/setup-xen.c +++ b/arch/x86_64/kernel/setup-xen.c @@ -118,6 +118,9 @@ EXPORT_SYMBOL(boot_cpu_data); unsigned long mmu_cr4_features; +unsigned int ipmi_dev_order=1; +EXPORT_SYMBOL_GPL(ipmi_dev_order); + int acpi_disabled; EXPORT_SYMBOL(acpi_disabled); #ifdef CONFIG_ACPI @@ -493,6 +496,9 @@ static __init void parse_cmdline_early (char ** cmdline_p) setup_additional_cpus(from+16); #endif + else if (!memcmp(from, "ipmi_dev_order=", 15)) + ipmi_dev_order = simple_strtoul(from + 15, NULL, 0); + next_char: c = *(from++); if (!c) diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 7012ca8a..9bd9bf56 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -75,6 +75,9 @@ EXPORT_SYMBOL(boot_cpu_data); unsigned long mmu_cr4_features; +unsigned int ipmi_dev_order=1; +EXPORT_SYMBOL_GPL(ipmi_dev_order); + int acpi_disabled; EXPORT_SYMBOL(acpi_disabled); #ifdef CONFIG_ACPI @@ -441,6 +444,9 @@ static __init void parse_cmdline_early (char ** cmdline_p) setup_additional_cpus(from+16); #endif + else if (!memcmp(from, "ipmi_dev_order=", 15)) + ipmi_dev_order = simple_strtoul(from + 15, NULL, 0); + next_char: c = *(from++); if (!c) diff --git a/buildconfigs/Rules.mk b/buildconfigs/Rules.mk index 0ce24ae5..0b93dcae 100644 --- a/buildconfigs/Rules.mk +++ b/buildconfigs/Rules.mk @@ -2,7 +2,7 @@ XEN_TARGET_ARCH = x86_32 XEN_TARGET_X86_PAE ?= y LINUX_SERIES = 2.6 -LINUX_VER = 2.6.18-92.1.10.el5 +LINUX_VER = 2.6.18-92.1.13.el5 EXTRAVERSION ?= xen diff --git a/configs/kernel-2.6.18-i686-PAE.config b/configs/kernel-2.6.18-i686-PAE.config index f2e5c7ad..50586060 100644 --- a/configs/kernel-2.6.18-i686-PAE.config +++ b/configs/kernel-2.6.18-i686-PAE.config @@ -2,7 +2,7 @@ # # Automatically generated make config: don't edit # Linux kernel version: 2.6.18-prep -# Thu Aug 7 09:42:27 2008 +# Mon Sep 29 11:42:18 2008 # CONFIG_X86_32=y CONFIG_GENERIC_TIME=y diff --git a/configs/kernel-2.6.18-i686-debug.config b/configs/kernel-2.6.18-i686-debug.config index d8307fac..84bd50ee 100644 --- a/configs/kernel-2.6.18-i686-debug.config +++ b/configs/kernel-2.6.18-i686-debug.config @@ -2,7 +2,7 @@ # # Automatically generated make config: don't edit # Linux kernel version: 2.6.18-prep -# Thu Aug 7 09:42:27 2008 +# Mon Sep 29 11:42:18 2008 # CONFIG_X86_32=y CONFIG_GENERIC_TIME=y diff --git a/configs/kernel-2.6.18-i686-xen.config b/configs/kernel-2.6.18-i686-xen.config index 6b43869c..afd893b9 100644 --- a/configs/kernel-2.6.18-i686-xen.config +++ b/configs/kernel-2.6.18-i686-xen.config @@ -2,7 +2,7 @@ # # Automatically generated make config: don't edit # Linux kernel version: 2.6.18-prep -# Thu Aug 7 09:42:27 2008 +# Mon Sep 29 11:42:18 2008 # CONFIG_X86_32=y CONFIG_LOCKDEP_SUPPORT=y diff --git a/configs/kernel-2.6.18-i686.config b/configs/kernel-2.6.18-i686.config index 6d342520..b7e684a1 100644 --- a/configs/kernel-2.6.18-i686.config +++ b/configs/kernel-2.6.18-i686.config @@ -2,7 +2,7 @@ # # Automatically generated make config: don't edit # Linux kernel version: 2.6.18-prep -# Thu Aug 7 09:42:27 2008 +# Mon Sep 29 11:42:18 2008 # CONFIG_X86_32=y CONFIG_GENERIC_TIME=y diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c index bfa03a61..8ca0ff08 100644 --- a/drivers/firmware/dmi_scan.c +++ b/drivers/firmware/dmi_scan.c @@ -153,6 +153,9 @@ static void __init dmi_save_ipmi_device(struct dmi_header *dm) { struct dmi_device *dev; void * data; +#ifdef CONFIG_X86 + extern unsigned int ipmi_dev_order; +#endif data = dmi_alloc(dm->length); if (data == NULL) { @@ -172,7 +175,19 @@ static void __init dmi_save_ipmi_device(struct dmi_header *dm) dev->name = "IPMI controller"; dev->device_data = data; - list_add(&dev->list, &dmi_devices); +#ifdef CONFIG_X86 + switch(ipmi_dev_order) { + case 2: /* FIFO */ + list_add_tail(&dev->list, &dmi_devices); + break; + default: /* LIFO */ +#endif + list_add(&dev->list, &dmi_devices); +#ifdef CONFIG_X86 + break; + } +#endif + } /* diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h index 2cdae8bc..7e725ba4 100644 --- a/drivers/infiniband/hw/ehca/ehca_classes.h +++ b/drivers/infiniband/hw/ehca/ehca_classes.h @@ -66,6 +66,7 @@ struct ehca_av; #include "ehca_irq.h" #define EHCA_EQE_CACHE_SIZE 20 +#define EHCA_MAX_NUM_QUEUES 0xffff struct ehca_eqe_cache_entry { struct ehca_eqe *eqe; @@ -127,6 +128,8 @@ struct ehca_shca { /* MR pgsize: bit 0-3 means 4K, 64K, 1M, 16M respectively */ u32 hca_cap_mr_pgsize; int max_mtu; + atomic_t num_cqs; + atomic_t num_qps; }; struct ehca_pd { @@ -345,6 +348,8 @@ extern int ehca_use_hp_mr; extern int ehca_scaling_code; extern int ehca_lock_hcalls; extern int ehca_nr_ports; +extern int ehca_max_cq; +extern int ehca_max_qp; struct ipzu_queue_resp { u32 qe_size; /* queue entry size */ diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c index 0467c158..776e2d0f 100644 --- a/drivers/infiniband/hw/ehca/ehca_cq.c +++ b/drivers/infiniband/hw/ehca/ehca_cq.c @@ -134,10 +134,19 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector, if (cqe >= 0xFFFFFFFF - 64 - additional_cqe) return ERR_PTR(-EINVAL); + if (!atomic_add_unless(&shca->num_cqs, 1, ehca_max_cq)) { + ehca_err(device, "Unable to create CQ, max number of %i " + "CQs reached.", ehca_max_cq); + ehca_err(device, "To increase the maximum number of CQs " + "use the number_of_cqs module parameter.\n"); + return ERR_PTR(-ENOSPC); + } + my_cq = kmem_cache_zalloc(cq_cache, GFP_KERNEL); if (!my_cq) { ehca_err(device, "Out of memory for ehca_cq struct device=%p", device); + atomic_dec(&shca->num_cqs); return ERR_PTR(-ENOMEM); } @@ -308,6 +317,7 @@ create_cq_exit2: create_cq_exit1: kmem_cache_free(cq_cache, my_cq); + atomic_dec(&shca->num_cqs); return cq; } @@ -369,6 +379,7 @@ int ehca_destroy_cq(struct ib_cq *cq) ipz_queue_dtor(NULL, &my_cq->ipz_queue); kmem_cache_free(cq_cache, my_cq); + atomic_dec(&shca->num_cqs); return 0; } diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c index 3f5954b8..b9f2b9f3 100644 --- a/drivers/infiniband/hw/ehca/ehca_main.c +++ b/drivers/infiniband/hw/ehca/ehca_main.c @@ -68,6 +68,8 @@ int ehca_static_rate = -1; int ehca_scaling_code = 0; int ehca_mr_largepage = 1; int ehca_lock_hcalls = -1; +int ehca_max_cq = -1; +int ehca_max_qp = -1; module_param_named(open_aqp1, ehca_open_aqp1, int, S_IRUGO); module_param_named(debug_level, ehca_debug_level, int, S_IRUGO); @@ -80,6 +82,8 @@ module_param_named(static_rate, ehca_static_rate, int, S_IRUGO); module_param_named(scaling_code, ehca_scaling_code, int, S_IRUGO); module_param_named(mr_largepage, ehca_mr_largepage, int, S_IRUGO); module_param_named(lock_hcalls, ehca_lock_hcalls, bool, S_IRUGO); +module_param_named(number_of_cqs, ehca_max_cq, int, S_IRUGO); +module_param_named(number_of_qps, ehca_max_qp, int, S_IRUGO); MODULE_PARM_DESC(open_aqp1, "AQP1 on startup (0: no (default), 1: yes)"); @@ -109,6 +113,12 @@ MODULE_PARM_DESC(mr_largepage, MODULE_PARM_DESC(lock_hcalls, "serialize all hCalls made by the driver " "(default: autodetect)"); +MODULE_PARM_DESC(number_of_cqs, + "Max number of CQs which can be allocated " + "(default: autodetect)"); +MODULE_PARM_DESC(number_of_qps, + "Max number of QPs which can be allocated " + "(default: autodetect)"); DEFINE_RWLOCK(ehca_qp_idr_lock); DEFINE_RWLOCK(ehca_cq_idr_lock); @@ -355,6 +365,25 @@ static int ehca_sense_attributes(struct ehca_shca *shca) shca->hca_cap_mr_pgsize |= pgsize_map[i + 1]; } + /* Set maximum number of CQs and QPs to calculate EQ size */ + if (ehca_max_qp == -1) + ehca_max_qp = min_t(int, rblock->max_qp, EHCA_MAX_NUM_QUEUES); + else if (ehca_max_qp < 1 || ehca_max_qp > rblock->max_qp) { + ehca_gen_err("Requested number of QPs is out of range (1 - %i) " + "specified by HW", rblock->max_qp); + ret = -EINVAL; + goto sense_attributes1; + } + + if (ehca_max_cq == -1) + ehca_max_cq = min_t(int, rblock->max_cq, EHCA_MAX_NUM_QUEUES); + else if (ehca_max_cq < 1 || ehca_max_cq > rblock->max_cq) { + ehca_gen_err("Requested number of CQs is out of range (1 - %i) " + "specified by HW", rblock->max_cq); + ret = -EINVAL; + goto sense_attributes1; + } + /* query max MTU from first port -- it's the same for all ports */ port = (struct hipz_query_port *)rblock; h_ret = hipz_h_query_port(shca->ipz_hca_handle, 1, port); @@ -689,7 +718,7 @@ static int __devinit ehca_probe(struct ibmebus_dev *dev, struct ehca_shca *shca; const u64 *handle; struct ib_pd *ibpd; - int ret, i; + int ret, i, eq_size; handle = of_get_property(dev->ofdev.node, "ibm,hca-handle", NULL); if (!handle) { @@ -710,6 +739,8 @@ static int __devinit ehca_probe(struct ibmebus_dev *dev, return -ENOMEM; } mutex_init(&shca->modify_mutex); + atomic_set(&shca->num_cqs, 0); + atomic_set(&shca->num_qps, 0); for (i = 0; i < ARRAY_SIZE(shca->sport); i++) spin_lock_init(&shca->sport[i].mod_sqp_lock); @@ -729,8 +760,9 @@ static int __devinit ehca_probe(struct ibmebus_dev *dev, goto probe1; } + eq_size = 2 * ehca_max_cq + 4 * ehca_max_qp; /* create event queues */ - ret = ehca_create_eq(shca, &shca->eq, EHCA_EQ, 2048); + ret = ehca_create_eq(shca, &shca->eq, EHCA_EQ, eq_size); if (ret) { ehca_err(&shca->ib_device, "Cannot create EQ."); goto probe1; diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c index 1012f15a..162ce6f6 100644 --- a/drivers/infiniband/hw/ehca/ehca_qp.c +++ b/drivers/infiniband/hw/ehca/ehca_qp.c @@ -424,6 +424,14 @@ static struct ehca_qp *internal_create_qp( u32 swqe_size = 0, rwqe_size = 0, ib_qp_num; unsigned long flags; + if (!atomic_add_unless(&shca->num_qps, 1, ehca_max_qp)) { + ehca_err(pd->device, "Unable to create QP, max number of %i " + "QPs reached.", ehca_max_qp); + ehca_err(pd->device, "To increase the maximum number of QPs " + "use the number_of_qps module parameter.\n"); + return ERR_PTR(-ENOSPC); + } + memset(&parms, 0, sizeof(parms)); qp_type = init_attr->qp_type; @@ -431,6 +439,7 @@ static struct ehca_qp *internal_create_qp( init_attr->sq_sig_type != IB_SIGNAL_ALL_WR) { ehca_err(pd->device, "init_attr->sg_sig_type=%x not allowed", init_attr->sq_sig_type); + atomic_dec(&shca->num_qps); return ERR_PTR(-EINVAL); } @@ -455,6 +464,7 @@ static struct ehca_qp *internal_create_qp( if (is_llqp && has_srq) { ehca_err(pd->device, "LLQPs can't have an SRQ"); + atomic_dec(&shca->num_qps); return ERR_PTR(-EINVAL); } @@ -466,6 +476,7 @@ static struct ehca_qp *internal_create_qp( ehca_err(pd->device, "no more than three SGEs " "supported for SRQ pd=%p max_sge=%x", pd, init_attr->cap.max_recv_sge); + atomic_dec(&shca->num_qps); return ERR_PTR(-EINVAL); } } @@ -477,6 +488,7 @@ static struct ehca_qp *internal_create_qp( qp_type != IB_QPT_SMI && qp_type != IB_QPT_GSI) { ehca_err(pd->device, "wrong QP Type=%x", qp_type); + atomic_dec(&shca->num_qps); return ERR_PTR(-EINVAL); } @@ -490,6 +502,7 @@ static struct ehca_qp *internal_create_qp( "or max_rq_wr=%x for RC LLQP", init_attr->cap.max_send_wr, init_attr->cap.max_recv_wr); + atomic_dec(&shca->num_qps); return ERR_PTR(-EINVAL); } break; @@ -497,6 +510,7 @@ static struct ehca_qp *internal_create_qp( if (!EHCA_BMASK_GET(HCA_CAP_UD_LL_QP, shca->hca_cap)) { ehca_err(pd->device, "UD LLQP not supported " "by this adapter"); + atomic_dec(&shca->num_qps); return ERR_PTR(-ENOSYS); } if (!(init_attr->cap.max_send_sge <= 5 @@ -508,20 +522,22 @@ static struct ehca_qp *internal_create_qp( "or max_recv_sge=%x for UD LLQP", init_attr->cap.max_send_sge, init_attr->cap.max_recv_sge); + atomic_dec(&shca->num_qps); return ERR_PTR(-EINVAL); } else if (init_attr->cap.max_send_wr > 255) { ehca_err(pd->device, "Invalid Number of " "max_send_wr=%x for UD QP_TYPE=%x", init_attr->cap.max_send_wr, qp_type); + atomic_dec(&shca->num_qps); return ERR_PTR(-EINVAL); } break; default: ehca_err(pd->device, "unsupported LL QP Type=%x", qp_type); + atomic_dec(&shca->num_qps); return ERR_PTR(-EINVAL); - break; } } else { int max_sge = (qp_type == IB_QPT_UD || qp_type == IB_QPT_SMI @@ -533,6 +549,7 @@ static struct ehca_qp *internal_create_qp( "send_sge=%x recv_sge=%x max_sge=%x", init_attr->cap.max_send_sge, init_attr->cap.max_recv_sge, max_sge); + atomic_dec(&shca->num_qps); return ERR_PTR(-EINVAL); } } @@ -543,6 +560,7 @@ static struct ehca_qp *internal_create_qp( my_qp = kmem_cache_zalloc(qp_cache, GFP_KERNEL); if (!my_qp) { ehca_err(pd->device, "pd=%p not enough memory to alloc qp", pd); + atomic_dec(&shca->num_qps); return ERR_PTR(-ENOMEM); } @@ -822,6 +840,7 @@ create_qp_exit1: create_qp_exit0: kmem_cache_free(qp_cache, my_qp); + atomic_dec(&shca->num_qps); return ERR_PTR(ret); } @@ -1990,6 +2009,7 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, if (HAS_SQ(my_qp)) ipz_queue_dtor(my_pd, &my_qp->ipz_squeue); kmem_cache_free(qp_cache, my_qp); + atomic_dec(&shca->num_qps); return 0; } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 7c34ac03..ce3b4ca1 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -612,13 +612,11 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) } if (wc->byte_len < SKB_TSHOLD) { - int dlen = wc->byte_len - IPOIB_ENCAP_LEN; + int dlen = wc->byte_len; small_skb = dev_alloc_skb(dlen); if (small_skb) { - small_skb->protocol = ((struct ipoib_header *)skb->data)->proto; - skb_copy_from_linear_data_offset(skb, IPOIB_ENCAP_LEN, - small_skb->data, dlen); + skb_copy_from_linear_data(skb, small_skb->data, dlen); skb_put(small_skb, dlen); skb = small_skb; goto copied; @@ -647,11 +645,11 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) skb_put_frags(skb, IPOIB_CM_HEAD_SIZE, wc->byte_len, newskb); +copied: skb->protocol = ((struct ipoib_header *) skb->data)->proto; skb_reset_mac_header(skb); skb_pull(skb, IPOIB_ENCAP_LEN); -copied: dev->last_rx = jiffies; ++priv->stats.rx_packets; priv->stats.rx_bytes += skb->len; diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 689a5f2d..d743f197 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -258,9 +258,9 @@ static struct page *read_sb_page(mddev_t *mddev, long offset, unsigned long inde static int write_sb_page(mddev_t *mddev, long offset, struct page *page, int wait) { mdk_rdev_t *rdev; - struct list_head *tmp; - ITERATE_RDEV(mddev, rdev, tmp) + rcu_read_lock(); + rdev_for_each_rcu(rdev, mddev) if (test_bit(In_sync, &rdev->flags) && !test_bit(Faulty, &rdev->flags)) md_super_write(mddev, rdev, @@ -268,6 +268,7 @@ static int write_sb_page(mddev_t *mddev, long offset, struct page *page, int wai + page->index * (PAGE_SIZE/512), PAGE_SIZE, page); + rcu_read_unlock(); if (wait) md_super_wait(mddev); diff --git a/drivers/md/md.c b/drivers/md/md.c index 1b0449f9..0a8d76c0 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1306,13 +1306,17 @@ static mdk_rdev_t * match_dev_unit(mddev_t *mddev, mdk_rdev_t *dev) static int match_mddev_units(mddev_t *mddev1, mddev_t *mddev2) { - struct list_head *tmp; - mdk_rdev_t *rdev; - - ITERATE_RDEV(mddev1,rdev,tmp) - if (match_dev_unit(mddev2, rdev)) - return 1; - + mdk_rdev_t *rdev, *rdev2; + + rcu_read_lock(); + rdev_for_each_rcu(rdev, mddev1) + rdev_for_each_rcu(rdev2, mddev2) + if (rdev->bdev->bd_contains == + rdev2->bdev->bd_contains) { + rcu_read_unlock(); + return 1; + } + rcu_read_unlock(); return 0; } @@ -1366,7 +1370,7 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) while ( (s=strchr(rdev->kobj.k_name, '/')) != NULL) *s = '!'; - list_add(&rdev->same_set, &mddev->disks); + list_add_rcu(&rdev->same_set, &mddev->disks); rdev->mddev = mddev; printk(KERN_INFO "md: bind<%s>\n", b); @@ -1390,10 +1394,14 @@ static void unbind_rdev_from_array(mdk_rdev_t * rdev) return; } bd_release_from_disk(rdev->bdev, rdev->mddev->gendisk); - list_del_init(&rdev->same_set); + list_del_rcu(&rdev->same_set); printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b)); rdev->mddev = NULL; sysfs_remove_link(&rdev->kobj, "block"); + + /* Delay due to rcu usage. + */ + synchronize_rcu(); kobject_del(&rdev->kobj); } @@ -1445,7 +1453,6 @@ static void export_rdev(mdk_rdev_t * rdev) if (rdev->mddev) MD_BUG(); free_disk_sb(rdev); - list_del_init(&rdev->same_set); #ifndef MODULE md_autodetect_dev(rdev->bdev->bd_dev); #endif @@ -1924,10 +1931,21 @@ rdev_attr_show(struct kobject *kobj, struct attribute *attr, char *page) { struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr); mdk_rdev_t *rdev = container_of(kobj, mdk_rdev_t, kobj); + mddev_t *mddev = rdev->mddev; + ssize_t rv; if (!entry->show) return -EIO; - return entry->show(rdev, page); + + rv = mddev ? mddev_lock(mddev) : -EBUSY; + if (!rv) { + if (rdev->mddev == NULL) + rv = -EBUSY; + else + rv = entry->show(rdev, page); + mddev_unlock(mddev); + } + return rv; } static ssize_t @@ -1936,12 +1954,22 @@ rdev_attr_store(struct kobject *kobj, struct attribute *attr, { struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr); mdk_rdev_t *rdev = container_of(kobj, mdk_rdev_t, kobj); + ssize_t rv; + mddev_t *mddev = rdev->mddev; if (!entry->store) return -EIO; if (!capable(CAP_SYS_ADMIN)) return -EACCES; - return entry->store(rdev, page, length); + rv = mddev ? mddev_lock(mddev): -EBUSY; + if (!rv) { + if (rdev->mddev == NULL) + rv = -EBUSY; + else + rv = entry->store(rdev, page, length); + mddev_unlock(mddev); + } + return rv; } static void rdev_free(struct kobject *ko) @@ -3465,8 +3493,10 @@ static void autorun_devices(int part) /* on success, candidates will be empty, on error * it won't... */ - ITERATE_RDEV_GENERIC(candidates,rdev,tmp) + ITERATE_RDEV_GENERIC(candidates,rdev,tmp) { + list_del_init(&rdev->same_set); export_rdev(rdev); + } mddev_put(mddev); } printk(KERN_INFO "md: ... autorun DONE.\n"); @@ -5009,12 +5039,12 @@ int unregister_md_personality(struct mdk_personality *p) static int is_mddev_idle(mddev_t *mddev) { mdk_rdev_t * rdev; - struct list_head *tmp; int idle; unsigned long curr_events; idle = 1; - ITERATE_RDEV(mddev,rdev,tmp) { + rcu_read_lock(); + rdev_for_each_rcu(rdev, mddev) { struct gendisk *disk = rdev->bdev->bd_contains->bd_disk; curr_events = disk_stat_read(disk, sectors[0]) + disk_stat_read(disk, sectors[1]) - @@ -5037,6 +5067,7 @@ static int is_mddev_idle(mddev_t *mddev) idle = 0; } } + rcu_read_unlock(); return idle; } diff --git a/drivers/net/bnx2x.c b/drivers/net/bnx2x.c index 192f2293..e0f018fa 100644 --- a/drivers/net/bnx2x.c +++ b/drivers/net/bnx2x.c @@ -6,7 +6,8 @@ * it under the terms of the GNU General Public License as published by * the Free Software Foundation. * - * Written by: Eliezer Tamir + * Maintained by: Eilon Greenstein + * Written by: Eliezer Tamir * Based on code from Michael Chan's bnx2 driver * UDP CSUM errata workaround by Arik Gendelman * Slowpath rework by Vladislav Zolotarov @@ -63,8 +64,8 @@ #include "bnx2x.h" #include "bnx2x_init.h" -#define DRV_MODULE_VERSION "1.40.22" -#define DRV_MODULE_RELDATE "2007/11/27" +#define DRV_MODULE_VERSION "1.42.4" +#define DRV_MODULE_RELDATE "2008/4/9" #define BNX2X_BC_VER 0x040200 /* Time in jiffies before concluding the transmitter is hung. */ @@ -74,7 +75,7 @@ static char version[] __devinitdata = "Broadcom NetXtreme II 5771X 10Gigabit Ethernet Driver " DRV_MODULE_NAME " " DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n"; -MODULE_AUTHOR("Eliezer Tamir "); +MODULE_AUTHOR("Eliezer Tamir"); MODULE_DESCRIPTION("Broadcom NetXtreme II BCM57710 Driver"); MODULE_LICENSE("GPL"); MODULE_VERSION(DRV_MODULE_VERSION); @@ -6156,7 +6157,7 @@ static int bnx2x_function_init(struct bnx2x *bp, int mode) func, mode); REG_WR(bp, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_1_SET, 0xffffffff); - REG_WR(bp, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_1_SET, + REG_WR(bp, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_2_SET, 0xfffc); bnx2x_init_block(bp, MISC_COMMON_START, MISC_COMMON_END); @@ -8003,38 +8004,6 @@ static int bnx2x_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) cmd->duplex, cmd->port, cmd->phy_address, cmd->transceiver, cmd->autoneg, cmd->maxtxpkt, cmd->maxrxpkt); - switch (cmd->port) { - case PORT_TP: - if (!(bp->supported & SUPPORTED_TP)) { - DP(NETIF_MSG_LINK, "TP not supported\n"); - return -EINVAL; - } - - if (bp->phy_flags & PHY_XGXS_FLAG) { - bnx2x_link_reset(bp); - bnx2x_link_settings_supported(bp, SWITCH_CFG_1G); - bnx2x_phy_deassert(bp); - } - break; - - case PORT_FIBRE: - if (!(bp->supported & SUPPORTED_FIBRE)) { - DP(NETIF_MSG_LINK, "FIBRE not supported\n"); - return -EINVAL; - } - - if (!(bp->phy_flags & PHY_XGXS_FLAG)) { - bnx2x_link_reset(bp); - bnx2x_link_settings_supported(bp, SWITCH_CFG_10G); - bnx2x_phy_deassert(bp); - } - break; - - default: - DP(NETIF_MSG_LINK, "Unknown port type\n"); - return -EINVAL; - } - if (cmd->autoneg == AUTONEG_ENABLE) { if (!(bp->supported & SUPPORTED_Autoneg)) { DP(NETIF_MSG_LINK, "Aotoneg not supported\n"); @@ -9935,10 +9904,10 @@ static int __devinit bnx2x_init_one(struct pci_dev *pdev, (bnx2x_get_pcie_speed(bp) == 2) ? "5GHz (Gen2)" : "2.5GHz", dev->base_addr, bp->pdev->irq); - printk(KERN_INFO "node addr "); + printk("node addr "); for (i = 0; i < 6; i++) - printk(KERN_INFO "%2.2x", dev->dev_addr[i]); - printk(KERN_INFO "\n"); + printk("%2.2x", dev->dev_addr[i]); + printk("\n"); return 0; } diff --git a/drivers/net/bnx2x.h b/drivers/net/bnx2x.h index 176f4a2c..bc854894 100644 --- a/drivers/net/bnx2x.h +++ b/drivers/net/bnx2x.h @@ -6,7 +6,8 @@ * it under the terms of the GNU General Public License as published by * the Free Software Foundation. * - * Written by: Eliezer Tamir + * Maintained by: Eilon Greenstein + * Written by: Eliezer Tamir * Based on code from Michael Chan's bnx2 driver */ diff --git a/drivers/net/bnx2x_init.h b/drivers/net/bnx2x_init.h index dcaecc53..370686ee 100644 --- a/drivers/net/bnx2x_init.h +++ b/drivers/net/bnx2x_init.h @@ -6,7 +6,8 @@ * it under the terms of the GNU General Public License as published by * the Free Software Foundation. * - * Written by: Eliezer Tamir + * Maintained by: Eilon Greenstein + * Written by: Eliezer Tamir */ #ifndef BNX2X_INIT_H diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c index c3533df8..dbc370ae 100644 --- a/drivers/net/ixgbe/ixgbe_main.c +++ b/drivers/net/ixgbe/ixgbe_main.c @@ -71,8 +71,6 @@ static struct pci_device_id ixgbe_pci_tbl[] = { board_82598 }, {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598AF_SINGLE_PORT), board_82598 }, - {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598AT_DUAL_PORT), - board_82598 }, {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598EB_CX4), board_82598 }, diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c index 96eec5cc..ce6dedef 100644 --- a/drivers/serial/8250.c +++ b/drivers/serial/8250.c @@ -1199,7 +1199,17 @@ receive_chars(struct uart_8250_port *up, int *status, struct pt_regs *regs) char flag; do { - ch = serial_inp(up, UART_RX); + if (likely(lsr & UART_LSR_DR)) + ch = serial_inp(up, UART_RX); + else + /* + * Intel 82571 has a Serial Over Lan device that will set BI + * without setting UART_LSR_DR. To avoid reading from the + * receive buffer without UART_LSR_DR bit set, we just force + * the read character to be 0 + */ + ch = 0; + flag = TTY_NORMAL; up->port.icount.rx++; @@ -1256,7 +1266,7 @@ receive_chars(struct uart_8250_port *up, int *status, struct pt_regs *regs) ignore_char: lsr = serial_inp(up, UART_LSR); - } while ((lsr & UART_LSR_DR) && (max_count-- > 0)); + } while ((lsr & (UART_LSR_DR | UART_LSR_BI)) && (max_count-- > 0)); spin_unlock(&up->port.lock); tty_flip_buffer_push(tty); spin_lock(&up->port.lock); @@ -1337,7 +1347,7 @@ serial8250_handle_port(struct uart_8250_port *up, struct pt_regs *regs) DEBUG_INTR("status = %x...", status); - if (status & UART_LSR_DR) + if (status & (UART_LSR_DR | UART_LSR_BI)) receive_chars(up, &status, regs); check_modem_status(up); if (status & UART_LSR_THRE) @@ -1829,6 +1839,7 @@ static int serial8250_startup(struct uart_port *port) (void) serial_inp(up, UART_RX); (void) serial_inp(up, UART_IIR); (void) serial_inp(up, UART_MSR); + up->lsr_break_flag = 0; return 0; } diff --git a/fs/direct-io.c b/fs/direct-io.c index aa9754bd..da7a1d65 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -120,8 +120,8 @@ struct dio { int page_errors; /* errno from get_user_pages() */ /* BIO completion state */ - atomic_t refcount; /* direct_io_worker() and bios */ spinlock_t bio_lock; /* protects BIO fields below */ + unsigned long refcount; /* direct_io_worker() and bios */ struct bio *bio_list; /* singly linked via bi_private */ struct task_struct *waiter; /* waiting task (NULL if none) */ @@ -266,8 +266,8 @@ static int dio_bio_complete(struct dio *dio, struct bio *bio); static int dio_bio_end_aio(struct bio *bio, unsigned int bytes_done, int error) { struct dio *dio = bio->bi_private; - int waiter_holds_ref = 0; - int remaining; + unsigned long remaining; + unsigned long flags; if (bio->bi_size) return 1; @@ -275,10 +275,11 @@ static int dio_bio_end_aio(struct bio *bio, unsigned int bytes_done, int error) /* cleanup the bio */ dio_bio_complete(dio, bio); - waiter_holds_ref = !!dio->waiter; - remaining = atomic_sub_return(1, (&dio->refcount)); - if (remaining == 1 && waiter_holds_ref) + spin_lock_irqsave(&dio->bio_lock, flags); + remaining = --dio->refcount; + if (remaining == 1 && dio->waiter) wake_up_process(dio->waiter); + spin_unlock_irqrestore(&dio->bio_lock, flags); if (remaining == 0) { int ret = dio_complete(dio, dio->iocb->ki_pos, 0); @@ -307,7 +308,7 @@ static int dio_bio_end_io(struct bio *bio, unsigned int bytes_done, int error) spin_lock_irqsave(&dio->bio_lock, flags); bio->bi_private = dio->bio_list; dio->bio_list = bio; - if ((atomic_sub_return(1, &dio->refcount) == 1) && dio->waiter) + if (--dio->refcount == 1 && dio->waiter) wake_up_process(dio->waiter); spin_unlock_irqrestore(&dio->bio_lock, flags); return 0; @@ -344,11 +345,17 @@ dio_bio_alloc(struct dio *dio, struct block_device *bdev, static void dio_bio_submit(struct dio *dio) { struct bio *bio = dio->bio; + unsigned long flags; bio->bi_private = dio; - atomic_inc(&dio->refcount); + + spin_lock_irqsave(&dio->bio_lock, flags); + dio->refcount++; + spin_unlock_irqrestore(&dio->bio_lock, flags); + if (dio->is_async && dio->rw == READ) bio_set_pages_dirty(bio); + submit_bio(dio->rw, bio); dio->bio = NULL; @@ -364,13 +371,6 @@ static void dio_cleanup(struct dio *dio) page_cache_release(dio_get_page(dio)); } -static int wait_for_more_bios(struct dio *dio) -{ - assert_spin_locked(&dio->bio_lock); - - return (atomic_read(&dio->refcount) > 1) && (dio->bio_list == NULL); -} - /* * Wait for the next BIO to complete. Remove it and return it. NULL is * returned once all BIOs have been completed. This must only be called once @@ -383,16 +383,20 @@ static struct bio *dio_await_one(struct dio *dio) struct bio *bio = NULL; spin_lock_irqsave(&dio->bio_lock, flags); - while (wait_for_more_bios(dio)) { - set_current_state(TASK_UNINTERRUPTIBLE); - if (wait_for_more_bios(dio)) { - dio->waiter = current; - spin_unlock_irqrestore(&dio->bio_lock, flags); - io_schedule(); - spin_lock_irqsave(&dio->bio_lock, flags); - dio->waiter = NULL; - } - set_current_state(TASK_RUNNING); + /* + * Wait as long as the list is empty and there are bios in flight. bio + * completion drops the count, maybe adds to the list, and wakes while + * holding the bio_lock so we don't need set_current_state()'s barrier + * and can call it after testing our condition. + */ + while (dio->refcount > 1 && dio->bio_list == NULL) { + __set_current_state(TASK_UNINTERRUPTIBLE); + dio->waiter = current; + spin_unlock_irqrestore(&dio->bio_lock, flags); + io_schedule(); + /* wake up sets us TASK_RUNNING */ + spin_lock_irqsave(&dio->bio_lock, flags); + dio->waiter = NULL; } if (dio->bio_list) { bio = dio->bio_list; @@ -943,40 +947,27 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, struct dio *dio) { unsigned long user_addr; + unsigned long flags; int seg; ssize_t ret = 0; ssize_t ret2; size_t bytes; - dio->bio = NULL; dio->inode = inode; dio->rw = rw; dio->blkbits = blkbits; dio->blkfactor = inode->i_blkbits - blkbits; - dio->start_zero_done = 0; - dio->size = 0; dio->block_in_file = offset >> blkbits; - dio->blocks_available = 0; - dio->cur_page = NULL; - - dio->boundary = 0; - dio->reap_counter = 0; dio->get_block = get_block; dio->end_io = end_io; - dio->map_bh.b_private = NULL; dio->final_block_in_bio = -1; dio->next_block_for_io = -1; - dio->page_errors = 0; - dio->io_error = 0; - dio->result = 0; dio->iocb = iocb; dio->i_size = i_size_read(inode); - atomic_set(&dio->refcount, 1); spin_lock_init(&dio->bio_lock); - dio->bio_list = NULL; - dio->waiter = NULL; + dio->refcount = 1; /* * In case of non-aligned buffers, we may need 2 more @@ -984,8 +975,6 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, */ if (unlikely(dio->blkfactor)) dio->pages_in_io = 2; - else - dio->pages_in_io = 0; for (seg = 0; seg < nr_segs; seg++) { user_addr = (unsigned long)iov[seg].iov_base; @@ -1084,12 +1073,20 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, /* * Sync will always be dropping the final ref and completing the - * operation. AIO can if it was a broken operation described above - * or in fact if all the bios race to complete before we get here. - * In that case dio_complete() translates the EIOCBQUEUED into - * the proper return code that the caller will hand to aio_complete(). + * operation. AIO can if it was a broken operation described above or + * in fact if all the bios race to complete before we get here. In + * that case dio_complete() translates the EIOCBQUEUED into the proper + * return code that the caller will hand to aio_complete(). + * + * This is managed by the bio_lock instead of being an atomic_t so that + * completion paths can drop their ref and use the remaining count to + * decide to wake the submission path atomically. */ - if (atomic_dec_and_test(&dio->refcount)) { + spin_lock_irqsave(&dio->bio_lock, flags); + ret2 = --dio->refcount; + spin_unlock_irqrestore(&dio->bio_lock, flags); + + if (ret2 == 0) { ret = dio_complete(dio, offset, ret); kfree(dio); } else @@ -1165,7 +1162,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, } } - dio = kmalloc(sizeof(*dio), GFP_KERNEL); + dio = kzalloc(sizeof(*dio), GFP_KERNEL); retval = -ENOMEM; if (!dio) goto out; diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c index 2eb0a613..236b211a 100644 --- a/fs/lockd/svc4proc.c +++ b/fs/lockd/svc4proc.c @@ -103,7 +103,7 @@ nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp, rpc_drop_reply :rpc_success; /* Now check for conflicting locks */ - resp->status = nlmsvc_testlock(rqstp, file, &argp->lock, + resp->status = nlmsvc_testlock(rqstp, file, host, &argp->lock, &resp->lock, &resp->cookie); if (resp->status == nlm_drop_reply) return rpc_drop_reply; @@ -149,7 +149,7 @@ nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp, #endif /* Now try to lock the file */ - resp->status = nlmsvc_lock(rqstp, file, &argp->lock, + resp->status = nlmsvc_lock(rqstp, file, host, &argp->lock, argp->block, &argp->cookie); if (resp->status == nlm_drop_reply) return rpc_drop_reply; diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index 60ca5f14..48acce00 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -171,18 +171,14 @@ nlmsvc_find_block(struct nlm_cookie *cookie, struct sockaddr_in *sin) * logging industries. */ static inline struct nlm_block * -nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_file *file, - struct nlm_lock *lock, struct nlm_cookie *cookie, int conf) +nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_host *host, + struct nlm_file *file, struct nlm_lock *lock, + struct nlm_cookie *cookie, int conf) { struct nlm_block *block; - struct nlm_host *host; struct nlm_rqst *call = NULL; - /* Create host handle for callback */ - host = nlmsvc_lookup_host(rqstp); - if (host == NULL) - return NULL; - + nlm_get_host(host); call = nlm_alloc_call(host); if (call == NULL) return NULL; @@ -387,7 +383,8 @@ nlmsvc_defer_lock_rqst(struct svc_rqst *rqstp, struct nlm_block *block) */ u32 nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file, - struct nlm_lock *lock, int wait, struct nlm_cookie *cookie) + struct nlm_host *host, struct nlm_lock *lock, + int wait, struct nlm_cookie *cookie) { struct nlm_block *block = NULL; int error; @@ -401,7 +398,6 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file, (long long)lock->fl.fl_end, wait); - /* Lock file against concurrent access */ down(&file->f_sema); /* Get existing block (in case client is busy-waiting) @@ -409,7 +405,7 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file, */ block = nlmsvc_lookup_block(file, lock); if (block == NULL) { - block = nlmsvc_create_block(rqstp, file, lock, cookie, 0); + block = nlmsvc_create_block(rqstp, host, file, lock, cookie, 0); ret = nlm_lck_denied_nolocks; if (block == NULL) goto out; @@ -482,8 +478,8 @@ out: */ u32 nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file, - struct nlm_lock *lock, struct nlm_lock *conflock, - struct nlm_cookie *cookie) + struct nlm_host *host, struct nlm_lock *lock, + struct nlm_lock *conflock, struct nlm_cookie *cookie) { struct nlm_block *block = NULL; int error; @@ -500,7 +496,7 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file, block = nlmsvc_lookup_block(file, lock); if (block == NULL) { - block = nlmsvc_create_block(rqstp, file, lock, cookie, 1); + block = nlmsvc_create_block(rqstp, host, file, lock, cookie, 1); if (block == NULL) return nlm_granted; } diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c index dc6e5f78..46cf0600 100644 --- a/fs/lockd/svcproc.c +++ b/fs/lockd/svcproc.c @@ -132,8 +132,8 @@ nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp, rpc_drop_reply :rpc_success; /* Now check for conflicting locks */ - resp->status = cast_status(nlmsvc_testlock(rqstp, file, &argp->lock, - &resp->lock, &resp->cookie)); + resp->status = cast_status(nlmsvc_testlock(rqstp, file, host, + &argp->lock, &resp->lock, &resp->cookie)); if (resp->status == nlm_drop_reply) return rpc_drop_reply; @@ -179,7 +179,7 @@ nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp, #endif /* Now try to lock the file */ - resp->status = cast_status(nlmsvc_lock(rqstp, file, &argp->lock, + resp->status = cast_status(nlmsvc_lock(rqstp, file, host, &argp->lock, argp->block, &argp->cookie)); if (resp->status == nlm_drop_reply) return rpc_drop_reply; diff --git a/fs/namei.c b/fs/namei.c index 6b99c473..bb7693ca 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -485,7 +485,13 @@ static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, s */ result = d_lookup(parent, name); if (!result) { - struct dentry * dentry = d_alloc(parent, name); + struct dentry *dentry; + + /* Don't create child dentry for a dead directory. */ + result = ERR_PTR(-ENOENT); + if (IS_DEADDIR(dir)) + goto out_unlock; + dentry = d_alloc(parent, name); result = ERR_PTR(-ENOMEM); if (dentry) { result = dir->i_op->lookup(dir, dentry, nd); @@ -494,6 +500,7 @@ static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, s else result = dentry; } +out_unlock: mutex_unlock(&dir->i_mutex); return result; } @@ -1275,7 +1282,14 @@ static struct dentry * __lookup_hash(struct qstr *name, struct dentry * base, st dentry = cached_lookup(base, name, nd); if (!dentry) { - struct dentry *new = d_alloc(base, name); + struct dentry *new; + + /* Don't create child dentry for a dead directory. */ + dentry = ERR_PTR(-ENOENT); + if (IS_DEADDIR(inode)) + goto out; + + new = d_alloc(base, name); dentry = ERR_PTR(-ENOMEM); if (!new) goto out; diff --git a/fs/namespace.c b/fs/namespace.c index 37872827..6702466d 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -910,6 +910,9 @@ static int do_change_type(struct nameidata *nd, int flag) int recurse = flag & MS_REC; int type = flag & ~MS_REC; + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (nd->dentry != nd->mnt->mnt_root) return -EINVAL; diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 0f92d53c..eeaf356a 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -186,11 +186,12 @@ extern struct nlm_host *nlm_find_client(void); * Server-side lock handling */ u32 nlmsvc_lock(struct svc_rqst *, struct nlm_file *, - struct nlm_lock *, int, struct nlm_cookie *); + struct nlm_host *, struct nlm_lock *, int, + struct nlm_cookie *); u32 nlmsvc_unlock(struct nlm_file *, struct nlm_lock *); u32 nlmsvc_testlock(struct svc_rqst *, struct nlm_file *, - struct nlm_lock *, struct nlm_lock *, - struct nlm_cookie *); + struct nlm_host *, struct nlm_lock *, + struct nlm_lock *, struct nlm_cookie *); u32 nlmsvc_cancel_blocked(struct nlm_file *, struct nlm_lock *); unsigned long nlmsvc_retry_blocked(void); void nlmsvc_traverse_blocks(struct nlm_host *, struct nlm_file *, diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 9c1e428c..cab8551b 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -86,6 +86,7 @@ #define PG_reclaim 17 /* To be reclaimed asap */ #define PG_nosave_free 18 /* Free, should not be written */ #define PG_buddy 19 /* Page is free, on buddy lists */ +#define PG_xpmem 27 /* Testing for xpmem. */ /* PG_owner_priv_1 users should have descriptive aliases */ #define PG_checked PG_owner_priv_1 /* Used by some filesystems */ @@ -287,6 +288,14 @@ #define PageForeignDestructor(_page) \ ((void (*)(struct page *))(_page)->index)(_page) +#if defined(CONFIG_IA64) || defined(CONFIG_X86_64) +#define PageXpmem(page) test_bit(PG_xpmem, &(page)->flags) +#define SetPageXpmem(page) set_bit(PG_xpmem, &(page)->flags) +#define ClearPageXpmem(page) clear_bit(PG_xpmem, &(page)->flags) +#else +#define PageXpmem(page) 0 +#endif + struct page; /* forward declaration */ int test_clear_page_dirty(struct page *page); diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index d2889029..15afc45d 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -314,6 +314,9 @@ static inline char * mdname (mddev_t * mddev) #define ITERATE_RDEV(mddev,rdev,tmp) \ ITERATE_RDEV_GENERIC((mddev)->disks,rdev,tmp) +#define rdev_for_each_rcu(rdev, mddev) \ + list_for_each_entry_rcu(rdev, &((mddev)->disks), same_set) + /* * Iterates through 'pending RAID disks' */ diff --git a/kernel/signal.c b/kernel/signal.c index 53fb8cff..4c8010b4 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -165,18 +165,25 @@ static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags, int override_rlimit) { struct sigqueue *q = NULL; + struct user_struct *user; - atomic_inc(&t->user->sigpending); + /* + * In order to avoid problems with "switch_user()", we want to make + * sure that the compiler doesn't re-load "t->user" + */ + user = t->user; + barrier(); + atomic_inc(&user->sigpending); if (override_rlimit || - atomic_read(&t->user->sigpending) <= + atomic_read(&user->sigpending) <= t->signal->rlim[RLIMIT_SIGPENDING].rlim_cur) q = kmem_cache_alloc(sigqueue_cachep, flags); if (unlikely(q == NULL)) { - atomic_dec(&t->user->sigpending); + atomic_dec(&user->sigpending); } else { INIT_LIST_HEAD(&q->list); q->flags = 0; - q->user = get_uid(t->user); + q->user = get_uid(user); } return(q); } diff --git a/kernel/user.c b/kernel/user.c index 6408c042..220e5861 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -187,6 +187,17 @@ void switch_uid(struct user_struct *new_user) atomic_dec(&old_user->processes); switch_uid_keyring(new_user); current->user = new_user; + + /* + * We need to synchronize with __sigqueue_alloc() + * doing a get_uid(p->user).. If that saw the old + * user value, we need to wait until it has exited + * its critical region before we can free the old + * structure. + */ + smp_mb(); + spin_unlock_wait(¤t->sighand->siglock); + free_uid(old_user); suid_keys(current); } diff --git a/lib/kref.c b/lib/kref.c index 4a467faf..0d07cc31 100644 --- a/lib/kref.c +++ b/lib/kref.c @@ -52,12 +52,7 @@ int kref_put(struct kref *kref, void (*release)(struct kref *kref)) WARN_ON(release == NULL); WARN_ON(release == (void (*)(struct kref *))kfree); - /* - * if current count is one, we are the last user and can release object - * right now, avoiding an atomic operation on 'refcount' - */ - if ((atomic_read(&kref->refcount) == 1) || - (atomic_dec_and_test(&kref->refcount))) { + if (atomic_dec_and_test(&kref->refcount)) { release(kref); return 1; } diff --git a/mm/rmap.c b/mm/rmap.c index 8b4800a0..b44fbdb4 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -926,6 +926,9 @@ int try_to_unmap(struct page *page, int migration) BUG_ON(!PageLocked(page)); + if (PageXpmem(page)) + return SWAP_FAIL; + if (PageAnon(page)) ret = try_to_unmap_anon(page, migration); else diff --git a/mm/shmem.c b/mm/shmem.c index fb17a453..2106dd71 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1040,7 +1040,7 @@ shmem_alloc_page(gfp_t gfp, struct shmem_inode_info *info, pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, idx); pvma.vm_pgoff = idx; pvma.vm_end = PAGE_SIZE; - page = alloc_page_vma(gfp | __GFP_ZERO, &pvma, 0); + page = alloc_page_vma(gfp, &pvma, 0); mpol_free(pvma.vm_policy); return page; } @@ -1060,7 +1060,7 @@ shmem_swapin(struct shmem_inode_info *info,swp_entry_t entry,unsigned long idx) static inline struct page * shmem_alloc_page(gfp_t gfp,struct shmem_inode_info *info, unsigned long idx) { - return alloc_page(gfp | __GFP_ZERO); + return alloc_page(gfp); } #endif @@ -1269,6 +1269,7 @@ repeat: info->alloced++; spin_unlock(&info->lock); + clear_highpage(filepage); flush_dcache_page(filepage); SetPageUptodate(filepage); } diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 9dff48c2..95ea4d62 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -77,22 +77,15 @@ static int port_cost(struct net_device *dev) * Called from work queue to allow for calling functions that * might sleep (such as speed check), and to debounce. */ -static void port_carrier_check(void *arg) +void br_port_carrier_check(struct net_bridge_port *p) { - struct net_device *dev = arg; - struct net_bridge_port *p; - struct net_bridge *br; - - rtnl_lock(); - p = dev->br_port; - if (!p) - goto done; - br = p->br; + struct net_device *dev = p->dev; + struct net_bridge *br = p->br; if (netif_carrier_ok(dev)) p->path_cost = port_cost(dev); - if (br->dev->flags & IFF_UP) { + if (netif_running(br->dev)) { spin_lock_bh(&br->lock); if (netif_carrier_ok(dev)) { if (p->state == BR_STATE_DISABLED) @@ -103,9 +96,6 @@ static void port_carrier_check(void *arg) } spin_unlock_bh(&br->lock); } -done: - dev_put(dev); - rtnl_unlock(); } static void release_nbp(struct kobject *kobj) @@ -158,9 +148,6 @@ static void del_nbp(struct net_bridge_port *p) dev_set_promiscuity(dev, -1); - if (cancel_delayed_work(&p->carrier_check)) - dev_put(dev); - spin_lock_bh(&br->lock); br_stp_disable_port(p); spin_unlock_bh(&br->lock); @@ -278,7 +265,6 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br, p->port_no = index; br_init_port(p); p->state = BR_STATE_DISABLED; - INIT_WORK(&p->carrier_check, port_carrier_check, dev); br_stp_port_timer_init(p); kobject_init(&p->kobj); @@ -422,12 +408,14 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) spin_lock_bh(&br->lock); br_stp_recalculate_bridge_id(br); br_features_recompute(br); - if (schedule_delayed_work(&p->carrier_check, BR_PORT_DEBOUNCE)) - dev_hold(dev); + if ((dev->flags & IFF_UP) && netif_carrier_ok(dev) && + (br->dev->flags & IFF_UP)) + br_stp_enable_port(p); spin_unlock_bh(&br->lock); dev_set_mtu(br->dev, br_min_mtu(br)); + kobject_uevent(&p->kobj, KOBJ_ADD); return 0; diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c index c64742cb..c9a4da70 100644 --- a/net/bridge/br_notify.c +++ b/net/bridge/br_notify.c @@ -42,28 +42,28 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v br = p->br; - spin_lock_bh(&br->lock); switch (event) { case NETDEV_CHANGEMTU: dev_set_mtu(br->dev, br_min_mtu(br)); break; case NETDEV_CHANGEADDR: + spin_lock_bh(&br->lock); br_fdb_changeaddr(p, dev->dev_addr); br_ifinfo_notify(RTM_NEWLINK, p); br_stp_recalculate_bridge_id(br); + spin_unlock_bh(&br->lock); break; case NETDEV_CHANGE: - if (br->dev->flags & IFF_UP) - if (schedule_delayed_work(&p->carrier_check, - BR_PORT_DEBOUNCE)) - dev_hold(dev); + br_port_carrier_check(p); break; case NETDEV_FEAT_CHANGE: + spin_lock_bh(&br->lock); if (br->dev->flags & IFF_UP) br_features_recompute(br); + spin_unlock_bh(&br->lock); /* could do recursive feature change notification * but who would care?? @@ -71,22 +71,23 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v break; case NETDEV_DOWN: + spin_lock_bh(&br->lock); if (br->dev->flags & IFF_UP) br_stp_disable_port(p); + spin_unlock_bh(&br->lock); break; case NETDEV_UP: + spin_lock_bh(&br->lock); if (netif_carrier_ok(dev) && (br->dev->flags & IFF_UP)) br_stp_enable_port(p); + spin_unlock_bh(&br->lock); break; case NETDEV_UNREGISTER: - spin_unlock_bh(&br->lock); br_del_if(br, dev); - goto done; + break; } - spin_unlock_bh(&br->lock); - done: return NOTIFY_DONE; } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index c491fb2f..7e47ffb3 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -27,8 +27,6 @@ #define BR_PORT_BITS 10 #define BR_MAX_PORTS (1<state == BR_STATE_BLOCKING) { - if (p->br->stp_enabled) { - p->state = BR_STATE_LISTENING; - } else { - p->state = BR_STATE_LEARNING; - } - br_log_state(p); - mod_timer(&p->forward_delay_timer, jiffies + p->br->forward_delay); } + struct net_bridge *br = p->br; + + if (p->state != BR_STATE_BLOCKING) + return; + + if (br->forward_delay == 0) { + p->state = BR_STATE_FORWARDING; + br_topology_change_detection(br); + del_timer(&p->forward_delay_timer); + } + else if (p->br->stp_enabled) + p->state = BR_STATE_LISTENING; + else + p->state = BR_STATE_LEARNING; + + br_log_state(p); + + if (br->forward_delay != 0) + mod_timer(&p->forward_delay_timer, jiffies + br->forward_delay); } /* called under bridge lock */ diff --git a/sound/core/seq/oss/seq_oss_synth.c b/sound/core/seq/oss/seq_oss_synth.c index ab570a0a..24902594 100644 --- a/sound/core/seq/oss/seq_oss_synth.c +++ b/sound/core/seq/oss/seq_oss_synth.c @@ -599,6 +599,9 @@ snd_seq_oss_synth_make_info(struct seq_oss_devinfo *dp, int dev, struct synth_in { struct seq_oss_synth *rec; + if (dev < 0 || dev >= dp->max_synthdev) + return -ENXIO; + if (dp->synths[dev].is_midi) { struct midi_info minf; snd_seq_oss_midi_make_info(dp, dp->synths[dev].midi_mapped, &minf); -- 2.39.5