ia64/xen-unstable

changeset 11544:9837ff37e354

[LINUX] Backport PCI MMCONFIG patch from 2.6.17 -- only use MMCONFIG
access method if the memory area is reserved in the E820 map.
Signed-off-by: Keir Fraser <keir@xensource.com>
author kfraser@localhost.localdomain
date Wed Sep 20 12:02:13 2006 +0100 (2006-09-20)
parents 9deecd4f9cf9
children a151311fa9c7
files patches/linux-2.6.16.29/pci-mmconfig-fix-from-2.6.17.patch patches/linux-2.6.16.29/series
line diff
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/patches/linux-2.6.16.29/pci-mmconfig-fix-from-2.6.17.patch	Wed Sep 20 12:02:13 2006 +0100
     1.3 @@ -0,0 +1,284 @@
     1.4 +diff -pruN ../orig-linux-2.6.16.29/arch/i386/pci/mmconfig.c ./arch/i386/pci/mmconfig.c
     1.5 +--- ../orig-linux-2.6.16.29/arch/i386/pci/mmconfig.c	2006-09-12 19:02:10.000000000 +0100
     1.6 ++++ ./arch/i386/pci/mmconfig.c	2006-09-20 11:55:32.000000000 +0100
     1.7 +@@ -12,14 +12,22 @@
     1.8 + #include <linux/pci.h>
     1.9 + #include <linux/init.h>
    1.10 + #include <linux/acpi.h>
    1.11 ++#include <asm/e820.h>
    1.12 + #include "pci.h"
    1.13 + 
    1.14 ++/* aperture is up to 256MB but BIOS may reserve less */
    1.15 ++#define MMCONFIG_APER_MIN	(2 * 1024*1024)
    1.16 ++#define MMCONFIG_APER_MAX	(256 * 1024*1024)
    1.17 ++
    1.18 ++/* Assume systems with more busses have correct MCFG */
    1.19 ++#define MAX_CHECK_BUS 16
    1.20 ++
    1.21 + #define mmcfg_virt_addr ((void __iomem *) fix_to_virt(FIX_PCIE_MCFG))
    1.22 + 
    1.23 + /* The base address of the last MMCONFIG device accessed */
    1.24 + static u32 mmcfg_last_accessed_device;
    1.25 + 
    1.26 +-static DECLARE_BITMAP(fallback_slots, 32);
    1.27 ++static DECLARE_BITMAP(fallback_slots, MAX_CHECK_BUS*32);
    1.28 + 
    1.29 + /*
    1.30 +  * Functions for accessing PCI configuration space with MMCONFIG accesses
    1.31 +@@ -29,8 +37,8 @@ static u32 get_base_addr(unsigned int se
    1.32 + 	int cfg_num = -1;
    1.33 + 	struct acpi_table_mcfg_config *cfg;
    1.34 + 
    1.35 +-	if (seg == 0 && bus == 0 &&
    1.36 +-	    test_bit(PCI_SLOT(devfn), fallback_slots))
    1.37 ++	if (seg == 0 && bus < MAX_CHECK_BUS &&
    1.38 ++	    test_bit(PCI_SLOT(devfn) + 32*bus, fallback_slots))
    1.39 + 		return 0;
    1.40 + 
    1.41 + 	while (1) {
    1.42 +@@ -74,8 +82,10 @@ static int pci_mmcfg_read(unsigned int s
    1.43 + 	unsigned long flags;
    1.44 + 	u32 base;
    1.45 + 
    1.46 +-	if (!value || (bus > 255) || (devfn > 255) || (reg > 4095))
    1.47 ++	if ((bus > 255) || (devfn > 255) || (reg > 4095)) {
    1.48 ++		*value = -1;
    1.49 + 		return -EINVAL;
    1.50 ++	}
    1.51 + 
    1.52 + 	base = get_base_addr(seg, bus, devfn);
    1.53 + 	if (!base)
    1.54 +@@ -146,30 +156,62 @@ static struct pci_raw_ops pci_mmcfg = {
    1.55 +    Normally this can be expressed in the MCFG by not listing them
    1.56 +    and assigning suitable _SEGs, but this isn't implemented in some BIOS.
    1.57 +    Instead try to discover all devices on bus 0 that are unreachable using MM
    1.58 +-   and fallback for them.
    1.59 +-   We only do this for bus 0/seg 0 */
    1.60 ++   and fallback for them. */
    1.61 + static __init void unreachable_devices(void)
    1.62 + {
    1.63 +-	int i;
    1.64 ++	int i, k;
    1.65 + 	unsigned long flags;
    1.66 + 
    1.67 +-	for (i = 0; i < 32; i++) {
    1.68 +-		u32 val1;
    1.69 +-		u32 addr;
    1.70 ++	for (k = 0; k < MAX_CHECK_BUS; k++) {
    1.71 ++		for (i = 0; i < 32; i++) {
    1.72 ++			u32 val1;
    1.73 ++			u32 addr;
    1.74 ++
    1.75 ++			pci_conf1_read(0, k, PCI_DEVFN(i, 0), 0, 4, &val1);
    1.76 ++			if (val1 == 0xffffffff)
    1.77 ++				continue;
    1.78 ++
    1.79 ++			/* Locking probably not needed, but safer */
    1.80 ++			spin_lock_irqsave(&pci_config_lock, flags);
    1.81 ++			addr = get_base_addr(0, k, PCI_DEVFN(i, 0));
    1.82 ++			if (addr != 0)
    1.83 ++				pci_exp_set_dev_base(addr, k, PCI_DEVFN(i, 0));
    1.84 ++			if (addr == 0 ||
    1.85 ++			    readl((u32 __iomem *)mmcfg_virt_addr) != val1) {
    1.86 ++				set_bit(i, fallback_slots);
    1.87 ++				printk(KERN_NOTICE
    1.88 ++			"PCI: No mmconfig possible on %x:%x\n", k, i);
    1.89 ++			}
    1.90 ++			spin_unlock_irqrestore(&pci_config_lock, flags);
    1.91 ++		}
    1.92 ++	}
    1.93 ++}
    1.94 + 
    1.95 +-		pci_conf1_read(0, 0, PCI_DEVFN(i, 0), 0, 4, &val1);
    1.96 +-		if (val1 == 0xffffffff)
    1.97 ++/* NB. Ripped from arch/i386/kernel/setup.c for this Xen bugfix patch. */
    1.98 ++static int __init
    1.99 ++e820_all_mapped(unsigned long s, unsigned long e, unsigned type)
   1.100 ++{
   1.101 ++	u64 start = s;
   1.102 ++	u64 end = e;
   1.103 ++	int i;
   1.104 ++	for (i = 0; i < e820.nr_map; i++) {
   1.105 ++		struct e820entry *ei = &e820.map[i];
   1.106 ++		if (type && ei->type != type)
   1.107 + 			continue;
   1.108 +-
   1.109 +-		/* Locking probably not needed, but safer */
   1.110 +-		spin_lock_irqsave(&pci_config_lock, flags);
   1.111 +-		addr = get_base_addr(0, 0, PCI_DEVFN(i, 0));
   1.112 +-		if (addr != 0)
   1.113 +-			pci_exp_set_dev_base(addr, 0, PCI_DEVFN(i, 0));
   1.114 +-		if (addr == 0 || readl((u32 __iomem *)mmcfg_virt_addr) != val1)
   1.115 +-			set_bit(i, fallback_slots);
   1.116 +-		spin_unlock_irqrestore(&pci_config_lock, flags);
   1.117 ++		/* is the region (part) in overlap with the current region ?*/
   1.118 ++		if (ei->addr >= end || ei->addr + ei->size <= start)
   1.119 ++			continue;
   1.120 ++		/* if the region is at the beginning of <start,end> we move
   1.121 ++		 * start to the end of the region since it's ok until there
   1.122 ++		 */
   1.123 ++		if (ei->addr <= start)
   1.124 ++			start = ei->addr + ei->size;
   1.125 ++		/* if start is now at or beyond end, we're done, full
   1.126 ++		 * coverage */
   1.127 ++		if (start >= end)
   1.128 ++			return 1; /* we're done */
   1.129 + 	}
   1.130 ++	return 0;
   1.131 + }
   1.132 + 
   1.133 + static int __init pci_mmcfg_init(void)
   1.134 +@@ -183,6 +225,15 @@ static int __init pci_mmcfg_init(void)
   1.135 + 	    (pci_mmcfg_config[0].base_address == 0))
   1.136 + 		goto out;
   1.137 + 
   1.138 ++	if (!e820_all_mapped(pci_mmcfg_config[0].base_address,
   1.139 ++			pci_mmcfg_config[0].base_address + MMCONFIG_APER_MIN,
   1.140 ++			E820_RESERVED)) {
   1.141 ++		printk(KERN_ERR "PCI: BIOS Bug: MCFG area at %x is not E820-reserved\n",
   1.142 ++				pci_mmcfg_config[0].base_address);
   1.143 ++		printk(KERN_ERR "PCI: Not using MMCONFIG.\n");
   1.144 ++		goto out;
   1.145 ++	}
   1.146 ++
   1.147 + 	printk(KERN_INFO "PCI: Using MMCONFIG\n");
   1.148 + 	raw_pci_ops = &pci_mmcfg;
   1.149 + 	pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF;
   1.150 +diff -pruN ../orig-linux-2.6.16.29/arch/x86_64/pci/mmconfig.c ./arch/x86_64/pci/mmconfig.c
   1.151 +--- ../orig-linux-2.6.16.29/arch/x86_64/pci/mmconfig.c	2006-09-12 19:02:10.000000000 +0100
   1.152 ++++ ./arch/x86_64/pci/mmconfig.c	2006-09-20 11:55:41.000000000 +0100
   1.153 +@@ -9,11 +9,19 @@
   1.154 + #include <linux/init.h>
   1.155 + #include <linux/acpi.h>
   1.156 + #include <linux/bitmap.h>
   1.157 ++#include <asm/e820.h>
   1.158 ++
   1.159 + #include "pci.h"
   1.160 + 
   1.161 +-#define MMCONFIG_APER_SIZE (256*1024*1024)
   1.162 ++/* aperture is up to 256MB but BIOS may reserve less */
   1.163 ++#define MMCONFIG_APER_MIN	(2 * 1024*1024)
   1.164 ++#define MMCONFIG_APER_MAX	(256 * 1024*1024)
   1.165 ++
   1.166 ++/* Verify the first 16 busses. We assume that systems with more busses
   1.167 ++   get MCFG right. */
   1.168 ++#define MAX_CHECK_BUS 16
   1.169 + 
   1.170 +-static DECLARE_BITMAP(fallback_slots, 32);
   1.171 ++static DECLARE_BITMAP(fallback_slots, 32*MAX_CHECK_BUS);
   1.172 + 
   1.173 + /* Static virtual mapping of the MMCONFIG aperture */
   1.174 + struct mmcfg_virt {
   1.175 +@@ -55,7 +63,8 @@ static char __iomem *get_virt(unsigned i
   1.176 + static char __iomem *pci_dev_base(unsigned int seg, unsigned int bus, unsigned int devfn)
   1.177 + {
   1.178 + 	char __iomem *addr;
   1.179 +-	if (seg == 0 && bus == 0 && test_bit(PCI_SLOT(devfn), &fallback_slots))
   1.180 ++	if (seg == 0 && bus < MAX_CHECK_BUS &&
   1.181 ++		test_bit(32*bus + PCI_SLOT(devfn), fallback_slots))
   1.182 + 		return NULL;
   1.183 + 	addr = get_virt(seg, bus);
   1.184 + 	if (!addr)
   1.185 +@@ -69,8 +78,10 @@ static int pci_mmcfg_read(unsigned int s
   1.186 + 	char __iomem *addr;
   1.187 + 
   1.188 + 	/* Why do we have this when nobody checks it. How about a BUG()!? -AK */
   1.189 +-	if (unlikely(!value || (bus > 255) || (devfn > 255) || (reg > 4095)))
   1.190 ++	if (unlikely((bus > 255) || (devfn > 255) || (reg > 4095))) {
   1.191 ++		*value = -1;
   1.192 + 		return -EINVAL;
   1.193 ++	}
   1.194 + 
   1.195 + 	addr = pci_dev_base(seg, bus, devfn);
   1.196 + 	if (!addr)
   1.197 +@@ -129,23 +140,52 @@ static struct pci_raw_ops pci_mmcfg = {
   1.198 +    Normally this can be expressed in the MCFG by not listing them
   1.199 +    and assigning suitable _SEGs, but this isn't implemented in some BIOS.
   1.200 +    Instead try to discover all devices on bus 0 that are unreachable using MM
   1.201 +-   and fallback for them.
   1.202 +-   We only do this for bus 0/seg 0 */
   1.203 ++   and fallback for them. */
   1.204 + static __init void unreachable_devices(void)
   1.205 + {
   1.206 +-	int i;
   1.207 +-	for (i = 0; i < 32; i++) {
   1.208 +-		u32 val1;
   1.209 +-		char __iomem *addr;
   1.210 ++	int i, k;
   1.211 ++	/* Use the max bus number from ACPI here? */
   1.212 ++	for (k = 0; k < MAX_CHECK_BUS; k++) {
   1.213 ++		for (i = 0; i < 32; i++) {
   1.214 ++			u32 val1;
   1.215 ++			char __iomem *addr;
   1.216 ++
   1.217 ++			pci_conf1_read(0, k, PCI_DEVFN(i,0), 0, 4, &val1);
   1.218 ++			if (val1 == 0xffffffff)
   1.219 ++				continue;
   1.220 ++			addr = pci_dev_base(0, k, PCI_DEVFN(i, 0));
   1.221 ++			if (addr == NULL|| readl(addr) != val1) {
   1.222 ++				set_bit(i + 32*k, fallback_slots);
   1.223 ++				printk(KERN_NOTICE
   1.224 ++				"PCI: No mmconfig possible on device %x:%x\n",
   1.225 ++					k, i);
   1.226 ++			}
   1.227 ++		}
   1.228 ++	}
   1.229 ++}
   1.230 + 
   1.231 +-		pci_conf1_read(0, 0, PCI_DEVFN(i,0), 0, 4, &val1);
   1.232 +-		if (val1 == 0xffffffff)
   1.233 ++/* NB. Ripped from arch/x86_64/kernel/e820.c for this Xen bugfix patch. */
   1.234 ++static int __init e820_all_mapped(unsigned long start, unsigned long end, unsigned type)
   1.235 ++{
   1.236 ++	int i;
   1.237 ++	for (i = 0; i < e820.nr_map; i++) {
   1.238 ++		struct e820entry *ei = &e820.map[i];
   1.239 ++		if (type && ei->type != type)
   1.240 + 			continue;
   1.241 +-		addr = pci_dev_base(0, 0, PCI_DEVFN(i, 0));
   1.242 +-		if (addr == NULL|| readl(addr) != val1) {
   1.243 +-			set_bit(i, &fallback_slots);
   1.244 +-		}
   1.245 ++		/* is the region (part) in overlap with the current region ?*/
   1.246 ++		if (ei->addr >= end || ei->addr + ei->size <= start)
   1.247 ++			continue;
   1.248 ++
   1.249 ++		/* if the region is at the beginning of <start,end> we move
   1.250 ++		 * start to the end of the region since it's ok until there
   1.251 ++		 */
   1.252 ++		if (ei->addr <= start)
   1.253 ++			start = ei->addr + ei->size;
   1.254 ++		/* if start is now at or beyond end, we're done, full coverage */
   1.255 ++		if (start >= end)
   1.256 ++			return 1; /* we're done */
   1.257 + 	}
   1.258 ++	return 0;
   1.259 + }
   1.260 + 
   1.261 + static int __init pci_mmcfg_init(void)
   1.262 +@@ -161,6 +201,15 @@ static int __init pci_mmcfg_init(void)
   1.263 + 	    (pci_mmcfg_config[0].base_address == 0))
   1.264 + 		return 0;
   1.265 + 
   1.266 ++	if (!e820_all_mapped(pci_mmcfg_config[0].base_address,
   1.267 ++			pci_mmcfg_config[0].base_address + MMCONFIG_APER_MIN,
   1.268 ++			E820_RESERVED)) {
   1.269 ++		printk(KERN_ERR "PCI: BIOS Bug: MCFG area at %x is not E820-reserved\n",
   1.270 ++				pci_mmcfg_config[0].base_address);
   1.271 ++		printk(KERN_ERR "PCI: Not using MMCONFIG.\n");
   1.272 ++		return 0;
   1.273 ++	}
   1.274 ++
   1.275 + 	/* RED-PEN i386 doesn't do _nocache right now */
   1.276 + 	pci_mmcfg_virt = kmalloc(sizeof(*pci_mmcfg_virt) * pci_mmcfg_config_num, GFP_KERNEL);
   1.277 + 	if (pci_mmcfg_virt == NULL) {
   1.278 +@@ -169,7 +218,8 @@ static int __init pci_mmcfg_init(void)
   1.279 + 	}
   1.280 + 	for (i = 0; i < pci_mmcfg_config_num; ++i) {
   1.281 + 		pci_mmcfg_virt[i].cfg = &pci_mmcfg_config[i];
   1.282 +-		pci_mmcfg_virt[i].virt = ioremap_nocache(pci_mmcfg_config[i].base_address, MMCONFIG_APER_SIZE);
   1.283 ++		pci_mmcfg_virt[i].virt = ioremap_nocache(pci_mmcfg_config[i].base_address,
   1.284 ++							 MMCONFIG_APER_MAX);
   1.285 + 		if (!pci_mmcfg_virt[i].virt) {
   1.286 + 			printk("PCI: Cannot map mmconfig aperture for segment %d\n",
   1.287 + 			       pci_mmcfg_config[i].pci_segment_group_number);
     2.1 --- a/patches/linux-2.6.16.29/series	Wed Sep 20 09:56:50 2006 +0100
     2.2 +++ b/patches/linux-2.6.16.29/series	Wed Sep 20 12:02:13 2006 +0100
     2.3 @@ -10,6 +10,7 @@ net-gso-1-check-dodgy.patch
     2.4  net-gso-2-checksum-fix.patch
     2.5  net-gso-3-fix-errorcheck.patch
     2.6  net-gso-4-kill-warnon.patch
     2.7 +pci-mmconfig-fix-from-2.6.17.patch
     2.8  pmd-shared.patch
     2.9  rcu_needs_cpu.patch
    2.10  rename-TSS_sysenter_esp0-SYSENTER_stack_esp0.patch