ia64/linux-2.6.18-xen.hg

changeset 305:a37a8c474d8b

cpufreq: Linux support for the architectural pstate driver

With the third generation Opteron parts, AMD switched to an
architecturally defined interface for PowerNow! that uses
different MSRs than previous versions.

This patch brings the PowerNow! driver up to match the mainline
Linux driver and provide support for all AMD parts that use
or will use the architectural pstate interface.

It also removes a WARN_ON statement in kernel/cpu.c that
highlights a cpu hotplug locking issue in the ondemand cpufreq
governor. It is only a warning message and the scope of
the changes to properly surpress it is a bit large to add to
the Xen 2.6.18.8 kernel. I will backport them if people
think that's a better idea.

Signed-off-by: Mark Langsdorf <mark.langsdorf@amd.com>
author Keir Fraser <keir@xensource.com>
date Mon Nov 05 10:42:26 2007 +0000 (2007-11-05)
parents 98de2b149423
children 5a6837bc5808
files arch/i386/kernel/cpu/cpufreq/powernow-k8.c kernel/cpu.c
line diff
     1.1 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c	Mon Nov 05 10:40:31 2007 +0000
     1.2 +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c	Mon Nov 05 10:42:26 2007 +0000
     1.3 @@ -46,7 +46,7 @@
     1.4  
     1.5  #define PFX "powernow-k8: "
     1.6  #define BFX PFX "BIOS error: "
     1.7 -#define VERSION "version 2.00.00"
     1.8 +#define VERSION "version 2.20.00"
     1.9  #include "powernow-k8.h"
    1.10  
    1.11  /* serialize freq changes  */
    1.12 @@ -66,36 +66,15 @@ static u32 find_freq_from_fid(u32 fid)
    1.13  	return 800 + (fid * 100);
    1.14  }
    1.15  
    1.16 -
    1.17  /* Return a frequency in KHz, given an input fid */
    1.18  static u32 find_khz_freq_from_fid(u32 fid)
    1.19  {
    1.20  	return 1000 * find_freq_from_fid(fid);
    1.21  }
    1.22  
    1.23 -/* Return a frequency in MHz, given an input fid and did */
    1.24 -static u32 find_freq_from_fiddid(u32 fid, u32 did)
    1.25 -{
    1.26 -	return 100 * (fid + 0x10) >> did;
    1.27 -}
    1.28 -
    1.29 -static u32 find_khz_freq_from_fiddid(u32 fid, u32 did)
    1.30 +static u32 find_khz_freq_from_pstate(struct cpufreq_frequency_table *data, u32 pstate)
    1.31  {
    1.32 -	return 1000 * find_freq_from_fiddid(fid, did);
    1.33 -}
    1.34 -
    1.35 -static u32 find_fid_from_pstate(u32 pstate)
    1.36 -{
    1.37 -	u32 hi, lo;
    1.38 -	rdmsr(MSR_PSTATE_DEF_BASE + pstate, lo, hi);
    1.39 -	return lo & HW_PSTATE_FID_MASK;
    1.40 -}
    1.41 -
    1.42 -static u32 find_did_from_pstate(u32 pstate)
    1.43 -{
    1.44 -	u32 hi, lo;
    1.45 -	rdmsr(MSR_PSTATE_DEF_BASE + pstate, lo, hi);
    1.46 -	return (lo & HW_PSTATE_DID_MASK) >> HW_PSTATE_DID_SHIFT;
    1.47 +	return data[pstate].frequency;
    1.48  }
    1.49  
    1.50  /* Return the vco fid for an input fid
    1.51 @@ -139,9 +118,7 @@ static int query_current_values_with_pen
    1.52  	if (cpu_family == CPU_HW_PSTATE) {
    1.53  		rdmsr(MSR_PSTATE_STATUS, lo, hi);
    1.54  		i = lo & HW_PSTATE_MASK;
    1.55 -		rdmsr(MSR_PSTATE_DEF_BASE + i, lo, hi);
    1.56 -		data->currfid = lo & HW_PSTATE_FID_MASK;
    1.57 -		data->currdid = (lo & HW_PSTATE_DID_MASK) >> HW_PSTATE_DID_SHIFT;
    1.58 +		data->currpstate = i;
    1.59  		return 0;
    1.60  	}
    1.61  	do {
    1.62 @@ -292,7 +269,7 @@ static int decrease_vid_code_by_step(str
    1.63  static int transition_pstate(struct powernow_k8_data *data, u32 pstate)
    1.64  {
    1.65  	wrmsr(MSR_PSTATE_CTRL, pstate, 0);
    1.66 -	data->currfid = find_fid_from_pstate(pstate);
    1.67 +	data->currpstate = pstate;
    1.68  	return 0;
    1.69  }
    1.70  
    1.71 @@ -882,41 +859,23 @@ err_out:
    1.72  static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table)
    1.73  {
    1.74  	int i;
    1.75 +	u32 hi = 0, lo = 0;
    1.76 +	rdmsr(MSR_PSTATE_CUR_LIMIT, hi, lo);
    1.77 +	data->max_hw_pstate = (hi & HW_PSTATE_MAX_MASK) >> HW_PSTATE_MAX_SHIFT;
    1.78  
    1.79  	for (i = 0; i < data->acpi_data->state_count; i++) {
    1.80  		u32 index;
    1.81 -		u32 hi = 0, lo = 0;
    1.82 -		u32 fid;
    1.83 -		u32 did;
    1.84  
    1.85  		index = data->acpi_data->states[i].control & HW_PSTATE_MASK;
    1.86 -		if (index > MAX_HW_PSTATE) {
    1.87 +		if (index > data->max_hw_pstate) {
    1.88  			printk(KERN_ERR PFX "invalid pstate %d - bad value %d.\n", i, index);
    1.89  			printk(KERN_ERR PFX "Please report to BIOS manufacturer\n");
    1.90 -		}
    1.91 -		rdmsr(MSR_PSTATE_DEF_BASE + index, lo, hi);
    1.92 -		if (!(hi & HW_PSTATE_VALID_MASK)) {
    1.93 -			dprintk("invalid pstate %d, ignoring\n", index);
    1.94 -			powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID;
    1.95  			continue;
    1.96  		}
    1.97  
    1.98 -		fid = lo & HW_PSTATE_FID_MASK;
    1.99 -		did = (lo & HW_PSTATE_DID_MASK) >> HW_PSTATE_DID_SHIFT;
   1.100 -
   1.101 -		dprintk("   %d : fid 0x%x, did 0x%x\n", index, fid, did);
   1.102 -
   1.103 -		powernow_table[i].index = index | (fid << HW_FID_INDEX_SHIFT) | (did << HW_DID_INDEX_SHIFT);
   1.104 +		powernow_table[i].index = index;
   1.105 +		powernow_table[i].frequency = data->acpi_data->states[i].core_frequency * 1000;
   1.106  
   1.107 -		powernow_table[i].frequency = find_khz_freq_from_fiddid(fid, did);
   1.108 -
   1.109 -		if (powernow_table[i].frequency != (data->acpi_data->states[i].core_frequency * 1000)) {
   1.110 -			printk(KERN_INFO PFX "invalid freq entries %u kHz vs. %u kHz\n",
   1.111 -				powernow_table[i].frequency,
   1.112 -				(unsigned int) (data->acpi_data->states[i].core_frequency * 1000));
   1.113 -			powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID;
   1.114 -			continue;
   1.115 -		}
   1.116  	}
   1.117  	return 0;
   1.118  }
   1.119 @@ -1058,22 +1017,18 @@ static int transition_frequency_fidvid(s
   1.120  /* Take a frequency, and issue the hardware pstate transition command */
   1.121  static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned int index)
   1.122  {
   1.123 -	u32 fid = 0;
   1.124 -	u32 did = 0;
   1.125  	u32 pstate = 0;
   1.126  	int res, i;
   1.127  	struct cpufreq_freqs freqs;
   1.128  
   1.129  	dprintk("cpu %d transition to index %u\n", smp_processor_id(), index);
   1.130  
   1.131 -	/* get fid did for hardware pstate transition */
   1.132 +	/* get MSR index for hardware pstate transition */
   1.133  	pstate = index & HW_PSTATE_MASK;
   1.134 -	if (pstate > MAX_HW_PSTATE)
   1.135 +	if (pstate > data->max_hw_pstate)
   1.136  		return 0;
   1.137 -	fid = (index & HW_FID_INDEX_MASK) >> HW_FID_INDEX_SHIFT;
   1.138 -	did = (index & HW_DID_INDEX_MASK) >> HW_DID_INDEX_SHIFT;
   1.139 -	freqs.old = find_khz_freq_from_fiddid(data->currfid, data->currdid);
   1.140 -	freqs.new = find_khz_freq_from_fiddid(fid, did);
   1.141 +	freqs.old = find_khz_freq_from_pstate(data->powernow_table, data->currpstate);
   1.142 +	freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate);
   1.143  
   1.144  	for_each_cpu_mask(i, *(data->available_cores)) {
   1.145  		freqs.cpu = i;
   1.146 @@ -1081,9 +1036,7 @@ static int transition_frequency_pstate(s
   1.147  	}
   1.148  
   1.149  	res = transition_pstate(data, pstate);
   1.150 -	data->currfid = find_fid_from_pstate(pstate);
   1.151 -	data->currdid = find_did_from_pstate(pstate);
   1.152 -	freqs.new = find_khz_freq_from_fiddid(data->currfid, data->currdid);
   1.153 +	freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate);
   1.154  
   1.155  	for_each_cpu_mask(i, *(data->available_cores)) {
   1.156  		freqs.cpu = i;
   1.157 @@ -1128,10 +1081,7 @@ static int powernowk8_target(struct cpuf
   1.158  	if (query_current_values_with_pending_wait(data))
   1.159  		goto err_out;
   1.160  
   1.161 -	if (cpu_family == CPU_HW_PSTATE)
   1.162 -		dprintk("targ: curr fid 0x%x, did 0x%x\n",
   1.163 -			data->currfid, data->currvid);
   1.164 -	else {
   1.165 +	if (cpu_family != CPU_HW_PSTATE) {
   1.166  		dprintk("targ: curr fid 0x%x, vid 0x%x\n",
   1.167  		data->currfid, data->currvid);
   1.168  
   1.169 @@ -1162,7 +1112,7 @@ static int powernowk8_target(struct cpuf
   1.170  	mutex_unlock(&fidvid_mutex);
   1.171  
   1.172  	if (cpu_family == CPU_HW_PSTATE)
   1.173 -		pol->cur = find_khz_freq_from_fiddid(data->currfid, data->currdid);
   1.174 +		pol->cur = find_khz_freq_from_pstate(data->powernow_table, newstate);
   1.175  	else
   1.176  		pol->cur = find_khz_freq_from_fid(data->currfid);
   1.177  	ret = 0;
   1.178 @@ -1259,7 +1209,7 @@ static int __cpuinit powernowk8_cpu_init
   1.179  	    + (3 * (1 << data->irt) * 10)) * 1000;
   1.180  
   1.181  	if (cpu_family == CPU_HW_PSTATE)
   1.182 -		pol->cur = find_khz_freq_from_fiddid(data->currfid, data->currdid);
   1.183 +		pol->cur = find_khz_freq_from_pstate(data->powernow_table, data->currpstate);
   1.184  	else
   1.185  		pol->cur = find_khz_freq_from_fid(data->currfid);
   1.186  	dprintk("policy current frequency %d kHz\n", pol->cur);
   1.187 @@ -1276,8 +1226,7 @@ static int __cpuinit powernowk8_cpu_init
   1.188  	cpufreq_frequency_table_get_attr(data->powernow_table, pol->cpu);
   1.189  
   1.190  	if (cpu_family == CPU_HW_PSTATE)
   1.191 -		dprintk("cpu_init done, current fid 0x%x, did 0x%x\n",
   1.192 -			data->currfid, data->currdid);
   1.193 +		dprintk("cpu_init done, current pstate 0x%x\n", data->currpstate);
   1.194  	else
   1.195  		dprintk("cpu_init done, current fid 0x%x, vid 0x%x\n",
   1.196  			data->currfid, data->currvid);
   1.197 @@ -1332,7 +1281,10 @@ static unsigned int powernowk8_get (unsi
   1.198  	if (query_current_values_with_pending_wait(data))
   1.199  		goto out;
   1.200  
   1.201 -	khz = find_khz_freq_from_fid(data->currfid);
   1.202 +	if (cpu_family == CPU_HW_PSTATE)
   1.203 +		khz = find_khz_freq_from_pstate(data->powernow_table, data->currpstate);
   1.204 +	else
   1.205 +		khz = find_khz_freq_from_fid(data->currfid);
   1.206  
   1.207  out:
   1.208  	set_cpus_allowed(current, oldmask);
     2.1 --- a/kernel/cpu.c	Mon Nov 05 10:40:31 2007 +0000
     2.2 +++ b/kernel/cpu.c	Mon Nov 05 10:42:26 2007 +0000
     2.3 @@ -48,7 +48,10 @@ EXPORT_SYMBOL_GPL(lock_cpu_hotplug);
     2.4  
     2.5  void unlock_cpu_hotplug(void)
     2.6  {
     2.7 +/* cpufreq lock-takers fixed in mainline; shut up until dom0 kernel catches up*/
     2.8 +#ifdef CONFIG_XEN
     2.9  	WARN_ON(recursive != current);
    2.10 +#endif
    2.11  	if (recursive_depth) {
    2.12  		recursive_depth--;
    2.13  		return;