ia64/xen-unstable
changeset 11544:9837ff37e354
[LINUX] Backport PCI MMCONFIG patch from 2.6.17 -- only use MMCONFIG
access method if the memory area is reserved in the E820 map.
Signed-off-by: Keir Fraser <keir@xensource.com>
access method if the memory area is reserved in the E820 map.
Signed-off-by: Keir Fraser <keir@xensource.com>
author | kfraser@localhost.localdomain |
---|---|
date | Wed Sep 20 12:02:13 2006 +0100 (2006-09-20) |
parents | 9deecd4f9cf9 |
children | a151311fa9c7 |
files | patches/linux-2.6.16.29/pci-mmconfig-fix-from-2.6.17.patch patches/linux-2.6.16.29/series |
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/patches/linux-2.6.16.29/pci-mmconfig-fix-from-2.6.17.patch Wed Sep 20 12:02:13 2006 +0100 1.3 @@ -0,0 +1,284 @@ 1.4 +diff -pruN ../orig-linux-2.6.16.29/arch/i386/pci/mmconfig.c ./arch/i386/pci/mmconfig.c 1.5 +--- ../orig-linux-2.6.16.29/arch/i386/pci/mmconfig.c 2006-09-12 19:02:10.000000000 +0100 1.6 ++++ ./arch/i386/pci/mmconfig.c 2006-09-20 11:55:32.000000000 +0100 1.7 +@@ -12,14 +12,22 @@ 1.8 + #include <linux/pci.h> 1.9 + #include <linux/init.h> 1.10 + #include <linux/acpi.h> 1.11 ++#include <asm/e820.h> 1.12 + #include "pci.h" 1.13 + 1.14 ++/* aperture is up to 256MB but BIOS may reserve less */ 1.15 ++#define MMCONFIG_APER_MIN (2 * 1024*1024) 1.16 ++#define MMCONFIG_APER_MAX (256 * 1024*1024) 1.17 ++ 1.18 ++/* Assume systems with more busses have correct MCFG */ 1.19 ++#define MAX_CHECK_BUS 16 1.20 ++ 1.21 + #define mmcfg_virt_addr ((void __iomem *) fix_to_virt(FIX_PCIE_MCFG)) 1.22 + 1.23 + /* The base address of the last MMCONFIG device accessed */ 1.24 + static u32 mmcfg_last_accessed_device; 1.25 + 1.26 +-static DECLARE_BITMAP(fallback_slots, 32); 1.27 ++static DECLARE_BITMAP(fallback_slots, MAX_CHECK_BUS*32); 1.28 + 1.29 + /* 1.30 + * Functions for accessing PCI configuration space with MMCONFIG accesses 1.31 +@@ -29,8 +37,8 @@ static u32 get_base_addr(unsigned int se 1.32 + int cfg_num = -1; 1.33 + struct acpi_table_mcfg_config *cfg; 1.34 + 1.35 +- if (seg == 0 && bus == 0 && 1.36 +- test_bit(PCI_SLOT(devfn), fallback_slots)) 1.37 ++ if (seg == 0 && bus < MAX_CHECK_BUS && 1.38 ++ test_bit(PCI_SLOT(devfn) + 32*bus, fallback_slots)) 1.39 + return 0; 1.40 + 1.41 + while (1) { 1.42 +@@ -74,8 +82,10 @@ static int pci_mmcfg_read(unsigned int s 1.43 + unsigned long flags; 1.44 + u32 base; 1.45 + 1.46 +- if (!value || (bus > 255) || (devfn > 255) || (reg > 4095)) 1.47 ++ if ((bus > 255) || (devfn > 255) || (reg > 4095)) { 1.48 ++ *value = -1; 1.49 + return -EINVAL; 1.50 ++ } 1.51 + 1.52 + base = get_base_addr(seg, bus, devfn); 1.53 + if (!base) 1.54 +@@ -146,30 +156,62 @@ static struct pci_raw_ops pci_mmcfg = { 1.55 + Normally this can be expressed in the MCFG by not listing them 1.56 + and assigning suitable _SEGs, but this isn't implemented in some BIOS. 1.57 + Instead try to discover all devices on bus 0 that are unreachable using MM 1.58 +- and fallback for them. 1.59 +- We only do this for bus 0/seg 0 */ 1.60 ++ and fallback for them. */ 1.61 + static __init void unreachable_devices(void) 1.62 + { 1.63 +- int i; 1.64 ++ int i, k; 1.65 + unsigned long flags; 1.66 + 1.67 +- for (i = 0; i < 32; i++) { 1.68 +- u32 val1; 1.69 +- u32 addr; 1.70 ++ for (k = 0; k < MAX_CHECK_BUS; k++) { 1.71 ++ for (i = 0; i < 32; i++) { 1.72 ++ u32 val1; 1.73 ++ u32 addr; 1.74 ++ 1.75 ++ pci_conf1_read(0, k, PCI_DEVFN(i, 0), 0, 4, &val1); 1.76 ++ if (val1 == 0xffffffff) 1.77 ++ continue; 1.78 ++ 1.79 ++ /* Locking probably not needed, but safer */ 1.80 ++ spin_lock_irqsave(&pci_config_lock, flags); 1.81 ++ addr = get_base_addr(0, k, PCI_DEVFN(i, 0)); 1.82 ++ if (addr != 0) 1.83 ++ pci_exp_set_dev_base(addr, k, PCI_DEVFN(i, 0)); 1.84 ++ if (addr == 0 || 1.85 ++ readl((u32 __iomem *)mmcfg_virt_addr) != val1) { 1.86 ++ set_bit(i, fallback_slots); 1.87 ++ printk(KERN_NOTICE 1.88 ++ "PCI: No mmconfig possible on %x:%x\n", k, i); 1.89 ++ } 1.90 ++ spin_unlock_irqrestore(&pci_config_lock, flags); 1.91 ++ } 1.92 ++ } 1.93 ++} 1.94 + 1.95 +- pci_conf1_read(0, 0, PCI_DEVFN(i, 0), 0, 4, &val1); 1.96 +- if (val1 == 0xffffffff) 1.97 ++/* NB. Ripped from arch/i386/kernel/setup.c for this Xen bugfix patch. */ 1.98 ++static int __init 1.99 ++e820_all_mapped(unsigned long s, unsigned long e, unsigned type) 1.100 ++{ 1.101 ++ u64 start = s; 1.102 ++ u64 end = e; 1.103 ++ int i; 1.104 ++ for (i = 0; i < e820.nr_map; i++) { 1.105 ++ struct e820entry *ei = &e820.map[i]; 1.106 ++ if (type && ei->type != type) 1.107 + continue; 1.108 +- 1.109 +- /* Locking probably not needed, but safer */ 1.110 +- spin_lock_irqsave(&pci_config_lock, flags); 1.111 +- addr = get_base_addr(0, 0, PCI_DEVFN(i, 0)); 1.112 +- if (addr != 0) 1.113 +- pci_exp_set_dev_base(addr, 0, PCI_DEVFN(i, 0)); 1.114 +- if (addr == 0 || readl((u32 __iomem *)mmcfg_virt_addr) != val1) 1.115 +- set_bit(i, fallback_slots); 1.116 +- spin_unlock_irqrestore(&pci_config_lock, flags); 1.117 ++ /* is the region (part) in overlap with the current region ?*/ 1.118 ++ if (ei->addr >= end || ei->addr + ei->size <= start) 1.119 ++ continue; 1.120 ++ /* if the region is at the beginning of <start,end> we move 1.121 ++ * start to the end of the region since it's ok until there 1.122 ++ */ 1.123 ++ if (ei->addr <= start) 1.124 ++ start = ei->addr + ei->size; 1.125 ++ /* if start is now at or beyond end, we're done, full 1.126 ++ * coverage */ 1.127 ++ if (start >= end) 1.128 ++ return 1; /* we're done */ 1.129 + } 1.130 ++ return 0; 1.131 + } 1.132 + 1.133 + static int __init pci_mmcfg_init(void) 1.134 +@@ -183,6 +225,15 @@ static int __init pci_mmcfg_init(void) 1.135 + (pci_mmcfg_config[0].base_address == 0)) 1.136 + goto out; 1.137 + 1.138 ++ if (!e820_all_mapped(pci_mmcfg_config[0].base_address, 1.139 ++ pci_mmcfg_config[0].base_address + MMCONFIG_APER_MIN, 1.140 ++ E820_RESERVED)) { 1.141 ++ printk(KERN_ERR "PCI: BIOS Bug: MCFG area at %x is not E820-reserved\n", 1.142 ++ pci_mmcfg_config[0].base_address); 1.143 ++ printk(KERN_ERR "PCI: Not using MMCONFIG.\n"); 1.144 ++ goto out; 1.145 ++ } 1.146 ++ 1.147 + printk(KERN_INFO "PCI: Using MMCONFIG\n"); 1.148 + raw_pci_ops = &pci_mmcfg; 1.149 + pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF; 1.150 +diff -pruN ../orig-linux-2.6.16.29/arch/x86_64/pci/mmconfig.c ./arch/x86_64/pci/mmconfig.c 1.151 +--- ../orig-linux-2.6.16.29/arch/x86_64/pci/mmconfig.c 2006-09-12 19:02:10.000000000 +0100 1.152 ++++ ./arch/x86_64/pci/mmconfig.c 2006-09-20 11:55:41.000000000 +0100 1.153 +@@ -9,11 +9,19 @@ 1.154 + #include <linux/init.h> 1.155 + #include <linux/acpi.h> 1.156 + #include <linux/bitmap.h> 1.157 ++#include <asm/e820.h> 1.158 ++ 1.159 + #include "pci.h" 1.160 + 1.161 +-#define MMCONFIG_APER_SIZE (256*1024*1024) 1.162 ++/* aperture is up to 256MB but BIOS may reserve less */ 1.163 ++#define MMCONFIG_APER_MIN (2 * 1024*1024) 1.164 ++#define MMCONFIG_APER_MAX (256 * 1024*1024) 1.165 ++ 1.166 ++/* Verify the first 16 busses. We assume that systems with more busses 1.167 ++ get MCFG right. */ 1.168 ++#define MAX_CHECK_BUS 16 1.169 + 1.170 +-static DECLARE_BITMAP(fallback_slots, 32); 1.171 ++static DECLARE_BITMAP(fallback_slots, 32*MAX_CHECK_BUS); 1.172 + 1.173 + /* Static virtual mapping of the MMCONFIG aperture */ 1.174 + struct mmcfg_virt { 1.175 +@@ -55,7 +63,8 @@ static char __iomem *get_virt(unsigned i 1.176 + static char __iomem *pci_dev_base(unsigned int seg, unsigned int bus, unsigned int devfn) 1.177 + { 1.178 + char __iomem *addr; 1.179 +- if (seg == 0 && bus == 0 && test_bit(PCI_SLOT(devfn), &fallback_slots)) 1.180 ++ if (seg == 0 && bus < MAX_CHECK_BUS && 1.181 ++ test_bit(32*bus + PCI_SLOT(devfn), fallback_slots)) 1.182 + return NULL; 1.183 + addr = get_virt(seg, bus); 1.184 + if (!addr) 1.185 +@@ -69,8 +78,10 @@ static int pci_mmcfg_read(unsigned int s 1.186 + char __iomem *addr; 1.187 + 1.188 + /* Why do we have this when nobody checks it. How about a BUG()!? -AK */ 1.189 +- if (unlikely(!value || (bus > 255) || (devfn > 255) || (reg > 4095))) 1.190 ++ if (unlikely((bus > 255) || (devfn > 255) || (reg > 4095))) { 1.191 ++ *value = -1; 1.192 + return -EINVAL; 1.193 ++ } 1.194 + 1.195 + addr = pci_dev_base(seg, bus, devfn); 1.196 + if (!addr) 1.197 +@@ -129,23 +140,52 @@ static struct pci_raw_ops pci_mmcfg = { 1.198 + Normally this can be expressed in the MCFG by not listing them 1.199 + and assigning suitable _SEGs, but this isn't implemented in some BIOS. 1.200 + Instead try to discover all devices on bus 0 that are unreachable using MM 1.201 +- and fallback for them. 1.202 +- We only do this for bus 0/seg 0 */ 1.203 ++ and fallback for them. */ 1.204 + static __init void unreachable_devices(void) 1.205 + { 1.206 +- int i; 1.207 +- for (i = 0; i < 32; i++) { 1.208 +- u32 val1; 1.209 +- char __iomem *addr; 1.210 ++ int i, k; 1.211 ++ /* Use the max bus number from ACPI here? */ 1.212 ++ for (k = 0; k < MAX_CHECK_BUS; k++) { 1.213 ++ for (i = 0; i < 32; i++) { 1.214 ++ u32 val1; 1.215 ++ char __iomem *addr; 1.216 ++ 1.217 ++ pci_conf1_read(0, k, PCI_DEVFN(i,0), 0, 4, &val1); 1.218 ++ if (val1 == 0xffffffff) 1.219 ++ continue; 1.220 ++ addr = pci_dev_base(0, k, PCI_DEVFN(i, 0)); 1.221 ++ if (addr == NULL|| readl(addr) != val1) { 1.222 ++ set_bit(i + 32*k, fallback_slots); 1.223 ++ printk(KERN_NOTICE 1.224 ++ "PCI: No mmconfig possible on device %x:%x\n", 1.225 ++ k, i); 1.226 ++ } 1.227 ++ } 1.228 ++ } 1.229 ++} 1.230 + 1.231 +- pci_conf1_read(0, 0, PCI_DEVFN(i,0), 0, 4, &val1); 1.232 +- if (val1 == 0xffffffff) 1.233 ++/* NB. Ripped from arch/x86_64/kernel/e820.c for this Xen bugfix patch. */ 1.234 ++static int __init e820_all_mapped(unsigned long start, unsigned long end, unsigned type) 1.235 ++{ 1.236 ++ int i; 1.237 ++ for (i = 0; i < e820.nr_map; i++) { 1.238 ++ struct e820entry *ei = &e820.map[i]; 1.239 ++ if (type && ei->type != type) 1.240 + continue; 1.241 +- addr = pci_dev_base(0, 0, PCI_DEVFN(i, 0)); 1.242 +- if (addr == NULL|| readl(addr) != val1) { 1.243 +- set_bit(i, &fallback_slots); 1.244 +- } 1.245 ++ /* is the region (part) in overlap with the current region ?*/ 1.246 ++ if (ei->addr >= end || ei->addr + ei->size <= start) 1.247 ++ continue; 1.248 ++ 1.249 ++ /* if the region is at the beginning of <start,end> we move 1.250 ++ * start to the end of the region since it's ok until there 1.251 ++ */ 1.252 ++ if (ei->addr <= start) 1.253 ++ start = ei->addr + ei->size; 1.254 ++ /* if start is now at or beyond end, we're done, full coverage */ 1.255 ++ if (start >= end) 1.256 ++ return 1; /* we're done */ 1.257 + } 1.258 ++ return 0; 1.259 + } 1.260 + 1.261 + static int __init pci_mmcfg_init(void) 1.262 +@@ -161,6 +201,15 @@ static int __init pci_mmcfg_init(void) 1.263 + (pci_mmcfg_config[0].base_address == 0)) 1.264 + return 0; 1.265 + 1.266 ++ if (!e820_all_mapped(pci_mmcfg_config[0].base_address, 1.267 ++ pci_mmcfg_config[0].base_address + MMCONFIG_APER_MIN, 1.268 ++ E820_RESERVED)) { 1.269 ++ printk(KERN_ERR "PCI: BIOS Bug: MCFG area at %x is not E820-reserved\n", 1.270 ++ pci_mmcfg_config[0].base_address); 1.271 ++ printk(KERN_ERR "PCI: Not using MMCONFIG.\n"); 1.272 ++ return 0; 1.273 ++ } 1.274 ++ 1.275 + /* RED-PEN i386 doesn't do _nocache right now */ 1.276 + pci_mmcfg_virt = kmalloc(sizeof(*pci_mmcfg_virt) * pci_mmcfg_config_num, GFP_KERNEL); 1.277 + if (pci_mmcfg_virt == NULL) { 1.278 +@@ -169,7 +218,8 @@ static int __init pci_mmcfg_init(void) 1.279 + } 1.280 + for (i = 0; i < pci_mmcfg_config_num; ++i) { 1.281 + pci_mmcfg_virt[i].cfg = &pci_mmcfg_config[i]; 1.282 +- pci_mmcfg_virt[i].virt = ioremap_nocache(pci_mmcfg_config[i].base_address, MMCONFIG_APER_SIZE); 1.283 ++ pci_mmcfg_virt[i].virt = ioremap_nocache(pci_mmcfg_config[i].base_address, 1.284 ++ MMCONFIG_APER_MAX); 1.285 + if (!pci_mmcfg_virt[i].virt) { 1.286 + printk("PCI: Cannot map mmconfig aperture for segment %d\n", 1.287 + pci_mmcfg_config[i].pci_segment_group_number);
2.1 --- a/patches/linux-2.6.16.29/series Wed Sep 20 09:56:50 2006 +0100 2.2 +++ b/patches/linux-2.6.16.29/series Wed Sep 20 12:02:13 2006 +0100 2.3 @@ -10,6 +10,7 @@ net-gso-1-check-dodgy.patch 2.4 net-gso-2-checksum-fix.patch 2.5 net-gso-3-fix-errorcheck.patch 2.6 net-gso-4-kill-warnon.patch 2.7 +pci-mmconfig-fix-from-2.6.17.patch 2.8 pmd-shared.patch 2.9 rcu_needs_cpu.patch 2.10 rename-TSS_sysenter_esp0-SYSENTER_stack_esp0.patch