ia64/xen-unstable

changeset 16823:f1947dddb5a0

x86: Sync cpu/intel_cacheinfo.c with Linux kernel
Signed-off-by: Xiaowei Yang <xiaowei.yang@intel.com>
author Keir Fraser <keir.fraser@citrix.com>
date Mon Jan 21 13:09:23 2008 +0000 (2008-01-21)
parents 60ee6f97cb19
children 6fed70d658ea
files xen/arch/x86/cpu/intel_cacheinfo.c
line diff
     1.1 --- a/xen/arch/x86/cpu/intel_cacheinfo.c	Mon Jan 21 13:08:44 2008 +0000
     1.2 +++ b/xen/arch/x86/cpu/intel_cacheinfo.c	Mon Jan 21 13:09:23 2008 +0000
     1.3 @@ -1,6 +1,16 @@
     1.4 +/*
     1.5 + *      Routines to indentify caches on Intel CPU.
     1.6 + *
     1.7 + *      Changes:
     1.8 + *      Venkatesh Pallipadi	: Adding cache identification through cpuid(4)
     1.9 + *		Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
    1.10 + *	Andi Kleen / Andreas Herrmann	: CPUID4 emulation on AMD.
    1.11 + */
    1.12 +
    1.13  #include <xen/config.h>
    1.14  #include <xen/init.h>
    1.15  #include <xen/lib.h>
    1.16 +#include <xen/errno.h>
    1.17  #include <asm/processor.h>
    1.18  
    1.19  #define LVL_1_INST	1
    1.20 @@ -17,7 +27,7 @@ struct _cache_table
    1.21  };
    1.22  
    1.23  /* all the cache descriptor types we care about (no TLB or trace cache entries) */
    1.24 -static struct _cache_table cache_table[] __devinitdata =
    1.25 +static struct _cache_table cache_table[] __cpuinitdata =
    1.26  {
    1.27  	{ 0x06, LVL_1_INST, 8 },	/* 4-way set assoc, 32 byte line size */
    1.28  	{ 0x08, LVL_1_INST, 16 },	/* 4-way set assoc, 32 byte line size */
    1.29 @@ -30,13 +40,23 @@ static struct _cache_table cache_table[]
    1.30  	{ 0x2c, LVL_1_DATA, 32 },	/* 8-way set assoc, 64 byte line size */
    1.31  	{ 0x30, LVL_1_INST, 32 },	/* 8-way set assoc, 64 byte line size */
    1.32  	{ 0x39, LVL_2,      128 },	/* 4-way set assoc, sectored cache, 64 byte line size */
    1.33 +	{ 0x3a, LVL_2,      192 },	/* 6-way set assoc, sectored cache, 64 byte line size */
    1.34  	{ 0x3b, LVL_2,      128 },	/* 2-way set assoc, sectored cache, 64 byte line size */
    1.35  	{ 0x3c, LVL_2,      256 },	/* 4-way set assoc, sectored cache, 64 byte line size */
    1.36 +	{ 0x3d, LVL_2,      384 },	/* 6-way set assoc, sectored cache, 64 byte line size */
    1.37 +	{ 0x3e, LVL_2,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
    1.38  	{ 0x41, LVL_2,      128 },	/* 4-way set assoc, 32 byte line size */
    1.39  	{ 0x42, LVL_2,      256 },	/* 4-way set assoc, 32 byte line size */
    1.40  	{ 0x43, LVL_2,      512 },	/* 4-way set assoc, 32 byte line size */
    1.41  	{ 0x44, LVL_2,      1024 },	/* 4-way set assoc, 32 byte line size */
    1.42  	{ 0x45, LVL_2,      2048 },	/* 4-way set assoc, 32 byte line size */
    1.43 +	{ 0x46, LVL_3,      4096 },	/* 4-way set assoc, 64 byte line size */
    1.44 +	{ 0x47, LVL_3,      8192 },	/* 8-way set assoc, 64 byte line size */
    1.45 +	{ 0x49, LVL_3,      4096 },	/* 16-way set assoc, 64 byte line size */
    1.46 +	{ 0x4a, LVL_3,      6144 },	/* 12-way set assoc, 64 byte line size */
    1.47 +	{ 0x4b, LVL_3,      8192 },	/* 16-way set assoc, 64 byte line size */
    1.48 +	{ 0x4c, LVL_3,     12288 },	/* 12-way set assoc, 64 byte line size */
    1.49 +	{ 0x4d, LVL_3,     16384 },	/* 16-way set assoc, 64 byte line size */
    1.50  	{ 0x60, LVL_1_DATA, 16 },	/* 8-way set assoc, sectored cache, 64 byte line size */
    1.51  	{ 0x66, LVL_1_DATA, 8 },	/* 4-way set assoc, sectored cache, 64 byte line size */
    1.52  	{ 0x67, LVL_1_DATA, 16 },	/* 4-way set assoc, sectored cache, 64 byte line size */
    1.53 @@ -44,6 +64,7 @@ static struct _cache_table cache_table[]
    1.54  	{ 0x70, LVL_TRACE,  12 },	/* 8-way set assoc */
    1.55  	{ 0x71, LVL_TRACE,  16 },	/* 8-way set assoc */
    1.56  	{ 0x72, LVL_TRACE,  32 },	/* 8-way set assoc */
    1.57 +	{ 0x73, LVL_TRACE,  64 },	/* 8-way set assoc */
    1.58  	{ 0x78, LVL_2,    1024 },	/* 4-way set assoc, 64 byte line size */
    1.59  	{ 0x79, LVL_2,     128 },	/* 8-way set assoc, sectored cache, 64 byte line size */
    1.60  	{ 0x7a, LVL_2,     256 },	/* 8-way set assoc, sectored cache, 64 byte line size */
    1.61 @@ -60,15 +81,276 @@ static struct _cache_table cache_table[]
    1.62  	{ 0x00, 0, 0}
    1.63  };
    1.64  
    1.65 -unsigned int __devinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
    1.66 +
    1.67 +enum _cache_type
    1.68 +{
    1.69 +	CACHE_TYPE_NULL	= 0,
    1.70 +	CACHE_TYPE_DATA = 1,
    1.71 +	CACHE_TYPE_INST = 2,
    1.72 +	CACHE_TYPE_UNIFIED = 3
    1.73 +};
    1.74 +
    1.75 +union _cpuid4_leaf_eax {
    1.76 +	struct {
    1.77 +		enum _cache_type	type:5;
    1.78 +		unsigned int		level:3;
    1.79 +		unsigned int		is_self_initializing:1;
    1.80 +		unsigned int		is_fully_associative:1;
    1.81 +		unsigned int		reserved:4;
    1.82 +		unsigned int		num_threads_sharing:12;
    1.83 +		unsigned int		num_cores_on_die:6;
    1.84 +	} split;
    1.85 +	u32 full;
    1.86 +};
    1.87 +
    1.88 +union _cpuid4_leaf_ebx {
    1.89 +	struct {
    1.90 +		unsigned int		coherency_line_size:12;
    1.91 +		unsigned int		physical_line_partition:10;
    1.92 +		unsigned int		ways_of_associativity:10;
    1.93 +	} split;
    1.94 +	u32 full;
    1.95 +};
    1.96 +
    1.97 +union _cpuid4_leaf_ecx {
    1.98 +	struct {
    1.99 +		unsigned int		number_of_sets:32;
   1.100 +	} split;
   1.101 +	u32 full;
   1.102 +};
   1.103 +
   1.104 +struct _cpuid4_info {
   1.105 +	union _cpuid4_leaf_eax eax;
   1.106 +	union _cpuid4_leaf_ebx ebx;
   1.107 +	union _cpuid4_leaf_ecx ecx;
   1.108 +	unsigned long size;
   1.109 +	cpumask_t shared_cpu_map;
   1.110 +};
   1.111 +
   1.112 +unsigned short			num_cache_leaves;
   1.113 +
   1.114 +/* AMD doesn't have CPUID4. Emulate it here to report the same
   1.115 +   information to the user.  This makes some assumptions about the machine:
   1.116 +   L2 not shared, no SMT etc. that is currently true on AMD CPUs.
   1.117 +
   1.118 +   In theory the TLBs could be reported as fake type (they are in "dummy").
   1.119 +   Maybe later */
   1.120 +union l1_cache {
   1.121 +	struct {
   1.122 +		unsigned line_size : 8;
   1.123 +		unsigned lines_per_tag : 8;
   1.124 +		unsigned assoc : 8;
   1.125 +		unsigned size_in_kb : 8;
   1.126 +	};
   1.127 +	unsigned val;
   1.128 +};
   1.129 +
   1.130 +union l2_cache {
   1.131 +	struct {
   1.132 +		unsigned line_size : 8;
   1.133 +		unsigned lines_per_tag : 4;
   1.134 +		unsigned assoc : 4;
   1.135 +		unsigned size_in_kb : 16;
   1.136 +	};
   1.137 +	unsigned val;
   1.138 +};
   1.139 +
   1.140 +union l3_cache {
   1.141 +	struct {
   1.142 +		unsigned line_size : 8;
   1.143 +		unsigned lines_per_tag : 4;
   1.144 +		unsigned assoc : 4;
   1.145 +		unsigned res : 2;
   1.146 +		unsigned size_encoded : 14;
   1.147 +	};
   1.148 +	unsigned val;
   1.149 +};
   1.150 +
   1.151 +static const unsigned short assocs[] = {
   1.152 +	[1] = 1, [2] = 2, [4] = 4, [6] = 8,
   1.153 +	[8] = 16, [0xa] = 32, [0xb] = 48,
   1.154 +	[0xc] = 64,
   1.155 +	[0xf] = 0xffff // ??
   1.156 +};
   1.157 +
   1.158 +static const unsigned char levels[] = { 1, 1, 2, 3 };
   1.159 +static const unsigned char types[] = { 1, 2, 3, 3 };
   1.160 +
   1.161 +static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
   1.162 +		       union _cpuid4_leaf_ebx *ebx,
   1.163 +		       union _cpuid4_leaf_ecx *ecx)
   1.164 +{
   1.165 +	unsigned dummy;
   1.166 +	unsigned line_size, lines_per_tag, assoc, size_in_kb;
   1.167 +	union l1_cache l1i, l1d;
   1.168 +	union l2_cache l2;
   1.169 +	union l3_cache l3;
   1.170 +	union l1_cache *l1 = &l1d;
   1.171 +
   1.172 +	eax->full = 0;
   1.173 +	ebx->full = 0;
   1.174 +	ecx->full = 0;
   1.175 +
   1.176 +	cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
   1.177 +	cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val);
   1.178 +
   1.179 +	switch (leaf) {
   1.180 +	case 1:
   1.181 +		l1 = &l1i;
   1.182 +	case 0:
   1.183 +		if (!l1->val)
   1.184 +			return;
   1.185 +		assoc = l1->assoc;
   1.186 +		line_size = l1->line_size;
   1.187 +		lines_per_tag = l1->lines_per_tag;
   1.188 +		size_in_kb = l1->size_in_kb;
   1.189 +		break;
   1.190 +	case 2:
   1.191 +		if (!l2.val)
   1.192 +			return;
   1.193 +		assoc = l2.assoc;
   1.194 +		line_size = l2.line_size;
   1.195 +		lines_per_tag = l2.lines_per_tag;
   1.196 +		/* cpu_data has errata corrections for K7 applied */
   1.197 +		size_in_kb = current_cpu_data.x86_cache_size;
   1.198 +		break;
   1.199 +	case 3:
   1.200 +		if (!l3.val)
   1.201 +			return;
   1.202 +		assoc = l3.assoc;
   1.203 +		line_size = l3.line_size;
   1.204 +		lines_per_tag = l3.lines_per_tag;
   1.205 +		size_in_kb = l3.size_encoded * 512;
   1.206 +		break;
   1.207 +	default:
   1.208 +		return;
   1.209 +	}
   1.210 +
   1.211 +	eax->split.is_self_initializing = 1;
   1.212 +	eax->split.type = types[leaf];
   1.213 +	eax->split.level = levels[leaf];
   1.214 +	if (leaf == 3)
   1.215 +		eax->split.num_threads_sharing = current_cpu_data.x86_max_cores - 1;
   1.216 +	else
   1.217 +		eax->split.num_threads_sharing = 0;
   1.218 +	eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1;
   1.219 +
   1.220 +
   1.221 +	if (assoc == 0xf)
   1.222 +		eax->split.is_fully_associative = 1;
   1.223 +	ebx->split.coherency_line_size = line_size - 1;
   1.224 +	ebx->split.ways_of_associativity = assocs[assoc] - 1;
   1.225 +	ebx->split.physical_line_partition = lines_per_tag - 1;
   1.226 +	ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
   1.227 +		(ebx->split.ways_of_associativity + 1) - 1;
   1.228 +}
   1.229 +
   1.230 +static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
   1.231 +{
   1.232 +	union _cpuid4_leaf_eax 	eax;
   1.233 +	union _cpuid4_leaf_ebx 	ebx;
   1.234 +	union _cpuid4_leaf_ecx 	ecx;
   1.235 +	unsigned		edx;
   1.236 +
   1.237 +	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
   1.238 +		amd_cpuid4(index, &eax, &ebx, &ecx);
   1.239 +	else
   1.240 +		cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full,  &edx);
   1.241 +	if (eax.split.type == CACHE_TYPE_NULL)
   1.242 +		return -EIO; /* better error ? */
   1.243 +
   1.244 +	this_leaf->eax = eax;
   1.245 +	this_leaf->ebx = ebx;
   1.246 +	this_leaf->ecx = ecx;
   1.247 +	this_leaf->size = (ecx.split.number_of_sets + 1) *
   1.248 +		(ebx.split.coherency_line_size + 1) *
   1.249 +		(ebx.split.physical_line_partition + 1) *
   1.250 +		(ebx.split.ways_of_associativity + 1);
   1.251 +	return 0;
   1.252 +}
   1.253 +
   1.254 +static int __cpuinit find_num_cache_leaves(void)
   1.255 +{
   1.256 +	unsigned int		eax, ebx, ecx, edx;
   1.257 +	union _cpuid4_leaf_eax	cache_eax;
   1.258 +	int 			i = -1;
   1.259 +
   1.260 +	do {
   1.261 +		++i;
   1.262 +		/* Do cpuid(4) loop to find out num_cache_leaves */
   1.263 +		cpuid_count(4, i, &eax, &ebx, &ecx, &edx);
   1.264 +		cache_eax.full = eax;
   1.265 +	} while (cache_eax.split.type != CACHE_TYPE_NULL);
   1.266 +	return i;
   1.267 +}
   1.268 +
   1.269 +unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
   1.270  {
   1.271  	unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */
   1.272 +	unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
   1.273 +	unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
   1.274 +	unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
   1.275  
   1.276 -	if (c->cpuid_level > 1) {
   1.277 +	if (c->cpuid_level > 3) {
   1.278 +		static int is_initialized;
   1.279 +
   1.280 +		if (is_initialized == 0) {
   1.281 +			/* Init num_cache_leaves from boot CPU */
   1.282 +			num_cache_leaves = find_num_cache_leaves();
   1.283 +			is_initialized++;
   1.284 +		}
   1.285 +
   1.286 +		/*
   1.287 +		 * Whenever possible use cpuid(4), deterministic cache
   1.288 +		 * parameters cpuid leaf to find the cache details
   1.289 +		 */
   1.290 +		for (i = 0; i < num_cache_leaves; i++) {
   1.291 +			struct _cpuid4_info this_leaf;
   1.292 +
   1.293 +			int retval;
   1.294 +
   1.295 +			retval = cpuid4_cache_lookup(i, &this_leaf);
   1.296 +			if (retval >= 0) {
   1.297 +				switch(this_leaf.eax.split.level) {
   1.298 +				    case 1:
   1.299 +					if (this_leaf.eax.split.type ==
   1.300 +							CACHE_TYPE_DATA)
   1.301 +						new_l1d = this_leaf.size/1024;
   1.302 +					else if (this_leaf.eax.split.type ==
   1.303 +							CACHE_TYPE_INST)
   1.304 +						new_l1i = this_leaf.size/1024;
   1.305 +					break;
   1.306 +				    case 2:
   1.307 +					new_l2 = this_leaf.size/1024;
   1.308 +					num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
   1.309 +					index_msb = get_count_order(num_threads_sharing);
   1.310 +					l2_id = c->apicid >> index_msb;
   1.311 +					break;
   1.312 +				    case 3:
   1.313 +					new_l3 = this_leaf.size/1024;
   1.314 +					num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
   1.315 +					index_msb = get_count_order(num_threads_sharing);
   1.316 +					l3_id = c->apicid >> index_msb;
   1.317 +					break;
   1.318 +				    default:
   1.319 +					break;
   1.320 +				}
   1.321 +			}
   1.322 +		}
   1.323 +	}
   1.324 +	/*
   1.325 +	 * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
   1.326 +	 * trace cache
   1.327 +	 */
   1.328 +	if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
   1.329  		/* supports eax=2  call */
   1.330  		int i, j, n;
   1.331  		int regs[4];
   1.332  		unsigned char *dp = (unsigned char *)regs;
   1.333 +		int only_trace = 0;
   1.334 +
   1.335 +		if (num_cache_leaves != 0 && c->x86 == 15)
   1.336 +			only_trace = 1;
   1.337  
   1.338  		/* Number of times to iterate */
   1.339  		n = cpuid_eax(2) & 0xFF;
   1.340 @@ -90,6 +372,8 @@ unsigned int __devinit init_intel_cachei
   1.341  				while (cache_table[k].descriptor != 0)
   1.342  				{
   1.343  					if (cache_table[k].descriptor == des) {
   1.344 +						if (only_trace && cache_table[k].cache_type != LVL_TRACE)
   1.345 +							break;
   1.346  						switch (cache_table[k].cache_type) {
   1.347  						case LVL_1_INST:
   1.348  							l1i += cache_table[k].size;
   1.349 @@ -115,28 +399,39 @@ unsigned int __devinit init_intel_cachei
   1.350  				}
   1.351  			}
   1.352  		}
   1.353 +	}
   1.354  
   1.355 -		if ( trace )
   1.356 -			printk (KERN_INFO "CPU: Trace cache: %dK uops", trace);
   1.357 -		else if ( l1i )
   1.358 -			printk (KERN_INFO "CPU: L1 I cache: %dK", l1i);
   1.359 -		if ( l1d )
   1.360 -			printk(", L1 D cache: %dK\n", l1d);
   1.361 -		else
   1.362 -			printk("\n");
   1.363 -		if ( l2 )
   1.364 -			printk(KERN_INFO "CPU: L2 cache: %dK\n", l2);
   1.365 -		if ( l3 )
   1.366 -			printk(KERN_INFO "CPU: L3 cache: %dK\n", l3);
   1.367 +	if (new_l1d)
   1.368 +		l1d = new_l1d;
   1.369  
   1.370 -		/*
   1.371 -		 * This assumes the L3 cache is shared; it typically lives in
   1.372 -		 * the northbridge.  The L1 caches are included by the L2
   1.373 -		 * cache, and so should not be included for the purpose of
   1.374 -		 * SMP switching weights.
   1.375 -		 */
   1.376 -		c->x86_cache_size = l2 ? l2 : (l1i+l1d);
   1.377 +	if (new_l1i)
   1.378 +		l1i = new_l1i;
   1.379 +
   1.380 +	if (new_l2) {
   1.381 +		l2 = new_l2;
   1.382  	}
   1.383  
   1.384 +	if (new_l3) {
   1.385 +		l3 = new_l3;
   1.386 +	}
   1.387 +
   1.388 +	if (trace)
   1.389 +		printk (KERN_INFO "CPU: Trace cache: %dK uops", trace);
   1.390 +	else if ( l1i )
   1.391 +		printk (KERN_INFO "CPU: L1 I cache: %dK", l1i);
   1.392 +
   1.393 +	if (l1d)
   1.394 +		printk(", L1 D cache: %dK\n", l1d);
   1.395 +	else
   1.396 +		printk("\n");
   1.397 +
   1.398 +	if (l2)
   1.399 +		printk(KERN_INFO "CPU: L2 cache: %dK\n", l2);
   1.400 +
   1.401 +	if (l3)
   1.402 +		printk(KERN_INFO "CPU: L3 cache: %dK\n", l3);
   1.403 +
   1.404 +	c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
   1.405 +
   1.406  	return l2;
   1.407  }