ia64/linux-2.6.18-xen.hg

annotate arch/sparc64/kernel/chmc.c @ 897:329ea0ccb344

balloon: try harder to balloon up under memory pressure.

Currently if the balloon driver is unable to increase the guest's
reservation it assumes the failure was due to reaching its full
allocation, gives up on the ballooning operation and records the limit
it reached as the "hard limit". The driver will not try again until
the target is set again (even to the same value).

However it is possible that ballooning has in fact failed due to
memory pressure in the host and therefore it is desirable to keep
attempting to reach the target in case memory becomes available. The
most likely scenario is that some guests are ballooning down while
others are ballooning up and therefore there is temporary memory
pressure while things stabilise. You would not expect a well behaved
toolstack to ask a domain to balloon to more than its allocation nor
would you expect it to deliberately over-commit memory by setting
balloon targets which exceed the total host memory.

This patch drops the concept of a hard limit and causes the balloon
driver to retry increasing the reservation on a timer in the same
manner as when decreasing the reservation.

Also if we partially succeed in increasing the reservation
(i.e. receive less pages than we asked for) then we may as well keep
those pages rather than returning them to Xen.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jun 05 14:01:20 2009 +0100 (2009-06-05)
parents 831230e53067
children
rev   line source
ian@0 1 /* $Id: chmc.c,v 1.4 2002/01/08 16:00:14 davem Exp $
ian@0 2 * memctrlr.c: Driver for UltraSPARC-III memory controller.
ian@0 3 *
ian@0 4 * Copyright (C) 2001 David S. Miller (davem@redhat.com)
ian@0 5 */
ian@0 6
ian@0 7 #include <linux/module.h>
ian@0 8 #include <linux/kernel.h>
ian@0 9 #include <linux/types.h>
ian@0 10 #include <linux/slab.h>
ian@0 11 #include <linux/list.h>
ian@0 12 #include <linux/string.h>
ian@0 13 #include <linux/sched.h>
ian@0 14 #include <linux/smp.h>
ian@0 15 #include <linux/errno.h>
ian@0 16 #include <linux/init.h>
ian@0 17 #include <asm/spitfire.h>
ian@0 18 #include <asm/chmctrl.h>
ian@0 19 #include <asm/oplib.h>
ian@0 20 #include <asm/prom.h>
ian@0 21 #include <asm/io.h>
ian@0 22
ian@0 23 #define CHMCTRL_NDGRPS 2
ian@0 24 #define CHMCTRL_NDIMMS 4
ian@0 25
ian@0 26 #define DIMMS_PER_MC (CHMCTRL_NDGRPS * CHMCTRL_NDIMMS)
ian@0 27
ian@0 28 /* OBP memory-layout property format. */
ian@0 29 struct obp_map {
ian@0 30 unsigned char dimm_map[144];
ian@0 31 unsigned char pin_map[576];
ian@0 32 };
ian@0 33
ian@0 34 #define DIMM_LABEL_SZ 8
ian@0 35
ian@0 36 struct obp_mem_layout {
ian@0 37 /* One max 8-byte string label per DIMM. Usually
ian@0 38 * this matches the label on the motherboard where
ian@0 39 * that DIMM resides.
ian@0 40 */
ian@0 41 char dimm_labels[DIMMS_PER_MC][DIMM_LABEL_SZ];
ian@0 42
ian@0 43 /* If symmetric use map[0], else it is
ian@0 44 * asymmetric and map[1] should be used.
ian@0 45 */
ian@0 46 char symmetric;
ian@0 47
ian@0 48 struct obp_map map[2];
ian@0 49 };
ian@0 50
ian@0 51 #define CHMCTRL_NBANKS 4
ian@0 52
ian@0 53 struct bank_info {
ian@0 54 struct mctrl_info *mp;
ian@0 55 int bank_id;
ian@0 56
ian@0 57 u64 raw_reg;
ian@0 58 int valid;
ian@0 59 int uk;
ian@0 60 int um;
ian@0 61 int lk;
ian@0 62 int lm;
ian@0 63 int interleave;
ian@0 64 unsigned long base;
ian@0 65 unsigned long size;
ian@0 66 };
ian@0 67
ian@0 68 struct mctrl_info {
ian@0 69 struct list_head list;
ian@0 70 int portid;
ian@0 71
ian@0 72 struct obp_mem_layout layout_prop;
ian@0 73 int layout_size;
ian@0 74
ian@0 75 void __iomem *regs;
ian@0 76
ian@0 77 u64 timing_control1;
ian@0 78 u64 timing_control2;
ian@0 79 u64 timing_control3;
ian@0 80 u64 timing_control4;
ian@0 81 u64 memaddr_control;
ian@0 82
ian@0 83 struct bank_info logical_banks[CHMCTRL_NBANKS];
ian@0 84 };
ian@0 85
ian@0 86 static LIST_HEAD(mctrl_list);
ian@0 87
ian@0 88 /* Does BANK decode PHYS_ADDR? */
ian@0 89 static int bank_match(struct bank_info *bp, unsigned long phys_addr)
ian@0 90 {
ian@0 91 unsigned long upper_bits = (phys_addr & PA_UPPER_BITS) >> PA_UPPER_BITS_SHIFT;
ian@0 92 unsigned long lower_bits = (phys_addr & PA_LOWER_BITS) >> PA_LOWER_BITS_SHIFT;
ian@0 93
ian@0 94 /* Bank must be enabled to match. */
ian@0 95 if (bp->valid == 0)
ian@0 96 return 0;
ian@0 97
ian@0 98 /* Would BANK match upper bits? */
ian@0 99 upper_bits ^= bp->um; /* What bits are different? */
ian@0 100 upper_bits = ~upper_bits; /* Invert. */
ian@0 101 upper_bits |= bp->uk; /* What bits don't matter for matching? */
ian@0 102 upper_bits = ~upper_bits; /* Invert. */
ian@0 103
ian@0 104 if (upper_bits)
ian@0 105 return 0;
ian@0 106
ian@0 107 /* Would BANK match lower bits? */
ian@0 108 lower_bits ^= bp->lm; /* What bits are different? */
ian@0 109 lower_bits = ~lower_bits; /* Invert. */
ian@0 110 lower_bits |= bp->lk; /* What bits don't matter for matching? */
ian@0 111 lower_bits = ~lower_bits; /* Invert. */
ian@0 112
ian@0 113 if (lower_bits)
ian@0 114 return 0;
ian@0 115
ian@0 116 /* I always knew you'd be the one. */
ian@0 117 return 1;
ian@0 118 }
ian@0 119
ian@0 120 /* Given PHYS_ADDR, search memory controller banks for a match. */
ian@0 121 static struct bank_info *find_bank(unsigned long phys_addr)
ian@0 122 {
ian@0 123 struct list_head *mctrl_head = &mctrl_list;
ian@0 124 struct list_head *mctrl_entry = mctrl_head->next;
ian@0 125
ian@0 126 for (;;) {
ian@0 127 struct mctrl_info *mp =
ian@0 128 list_entry(mctrl_entry, struct mctrl_info, list);
ian@0 129 int bank_no;
ian@0 130
ian@0 131 if (mctrl_entry == mctrl_head)
ian@0 132 break;
ian@0 133 mctrl_entry = mctrl_entry->next;
ian@0 134
ian@0 135 for (bank_no = 0; bank_no < CHMCTRL_NBANKS; bank_no++) {
ian@0 136 struct bank_info *bp;
ian@0 137
ian@0 138 bp = &mp->logical_banks[bank_no];
ian@0 139 if (bank_match(bp, phys_addr))
ian@0 140 return bp;
ian@0 141 }
ian@0 142 }
ian@0 143
ian@0 144 return NULL;
ian@0 145 }
ian@0 146
ian@0 147 /* This is the main purpose of this driver. */
ian@0 148 #define SYNDROME_MIN -1
ian@0 149 #define SYNDROME_MAX 144
ian@0 150 int chmc_getunumber(int syndrome_code,
ian@0 151 unsigned long phys_addr,
ian@0 152 char *buf, int buflen)
ian@0 153 {
ian@0 154 struct bank_info *bp;
ian@0 155 struct obp_mem_layout *prop;
ian@0 156 int bank_in_controller, first_dimm;
ian@0 157
ian@0 158 bp = find_bank(phys_addr);
ian@0 159 if (bp == NULL ||
ian@0 160 syndrome_code < SYNDROME_MIN ||
ian@0 161 syndrome_code > SYNDROME_MAX) {
ian@0 162 buf[0] = '?';
ian@0 163 buf[1] = '?';
ian@0 164 buf[2] = '?';
ian@0 165 buf[3] = '\0';
ian@0 166 return 0;
ian@0 167 }
ian@0 168
ian@0 169 prop = &bp->mp->layout_prop;
ian@0 170 bank_in_controller = bp->bank_id & (CHMCTRL_NBANKS - 1);
ian@0 171 first_dimm = (bank_in_controller & (CHMCTRL_NDGRPS - 1));
ian@0 172 first_dimm *= CHMCTRL_NDIMMS;
ian@0 173
ian@0 174 if (syndrome_code != SYNDROME_MIN) {
ian@0 175 struct obp_map *map;
ian@0 176 int qword, where_in_line, where, map_index, map_offset;
ian@0 177 unsigned int map_val;
ian@0 178
ian@0 179 /* Yaay, single bit error so we can figure out
ian@0 180 * the exact dimm.
ian@0 181 */
ian@0 182 if (prop->symmetric)
ian@0 183 map = &prop->map[0];
ian@0 184 else
ian@0 185 map = &prop->map[1];
ian@0 186
ian@0 187 /* Covert syndrome code into the way the bits are
ian@0 188 * positioned on the bus.
ian@0 189 */
ian@0 190 if (syndrome_code < 144 - 16)
ian@0 191 syndrome_code += 16;
ian@0 192 else if (syndrome_code < 144)
ian@0 193 syndrome_code -= (144 - 7);
ian@0 194 else if (syndrome_code < (144 + 3))
ian@0 195 syndrome_code -= (144 + 3 - 4);
ian@0 196 else
ian@0 197 syndrome_code -= 144 + 3;
ian@0 198
ian@0 199 /* All this magic has to do with how a cache line
ian@0 200 * comes over the wire on Safari. A 64-bit line
ian@0 201 * comes over in 4 quadword cycles, each of which
ian@0 202 * transmit ECC/MTAG info as well as the actual
ian@0 203 * data. 144 bits per quadword, 576 total.
ian@0 204 */
ian@0 205 #define LINE_SIZE 64
ian@0 206 #define LINE_ADDR_MSK (LINE_SIZE - 1)
ian@0 207 #define QW_PER_LINE 4
ian@0 208 #define QW_BYTES (LINE_SIZE / QW_PER_LINE)
ian@0 209 #define QW_BITS 144
ian@0 210 #define LAST_BIT (576 - 1)
ian@0 211
ian@0 212 qword = (phys_addr & LINE_ADDR_MSK) / QW_BYTES;
ian@0 213 where_in_line = ((3 - qword) * QW_BITS) + syndrome_code;
ian@0 214 where = (LAST_BIT - where_in_line);
ian@0 215 map_index = where >> 2;
ian@0 216 map_offset = where & 0x3;
ian@0 217 map_val = map->dimm_map[map_index];
ian@0 218 map_val = ((map_val >> ((3 - map_offset) << 1)) & (2 - 1));
ian@0 219
ian@0 220 sprintf(buf, "%s, pin %3d",
ian@0 221 prop->dimm_labels[first_dimm + map_val],
ian@0 222 map->pin_map[where_in_line]);
ian@0 223 } else {
ian@0 224 int dimm;
ian@0 225
ian@0 226 /* Multi-bit error, we just dump out all the
ian@0 227 * dimm labels associated with this bank.
ian@0 228 */
ian@0 229 for (dimm = 0; dimm < CHMCTRL_NDIMMS; dimm++) {
ian@0 230 sprintf(buf, "%s ",
ian@0 231 prop->dimm_labels[first_dimm + dimm]);
ian@0 232 buf += strlen(buf);
ian@0 233 }
ian@0 234 }
ian@0 235 return 0;
ian@0 236 }
ian@0 237
ian@0 238 /* Accessing the registers is slightly complicated. If you want
ian@0 239 * to get at the memory controller which is on the same processor
ian@0 240 * the code is executing, you must use special ASI load/store else
ian@0 241 * you go through the global mapping.
ian@0 242 */
ian@0 243 static u64 read_mcreg(struct mctrl_info *mp, unsigned long offset)
ian@0 244 {
ian@0 245 unsigned long ret;
ian@0 246 int this_cpu = get_cpu();
ian@0 247
ian@0 248 if (mp->portid == this_cpu) {
ian@0 249 __asm__ __volatile__("ldxa [%1] %2, %0"
ian@0 250 : "=r" (ret)
ian@0 251 : "r" (offset), "i" (ASI_MCU_CTRL_REG));
ian@0 252 } else {
ian@0 253 __asm__ __volatile__("ldxa [%1] %2, %0"
ian@0 254 : "=r" (ret)
ian@0 255 : "r" (mp->regs + offset),
ian@0 256 "i" (ASI_PHYS_BYPASS_EC_E));
ian@0 257 }
ian@0 258 put_cpu();
ian@0 259
ian@0 260 return ret;
ian@0 261 }
ian@0 262
ian@0 263 #if 0 /* currently unused */
ian@0 264 static void write_mcreg(struct mctrl_info *mp, unsigned long offset, u64 val)
ian@0 265 {
ian@0 266 if (mp->portid == smp_processor_id()) {
ian@0 267 __asm__ __volatile__("stxa %0, [%1] %2"
ian@0 268 : : "r" (val),
ian@0 269 "r" (offset), "i" (ASI_MCU_CTRL_REG));
ian@0 270 } else {
ian@0 271 __asm__ __volatile__("ldxa %0, [%1] %2"
ian@0 272 : : "r" (val),
ian@0 273 "r" (mp->regs + offset),
ian@0 274 "i" (ASI_PHYS_BYPASS_EC_E));
ian@0 275 }
ian@0 276 }
ian@0 277 #endif
ian@0 278
ian@0 279 static void interpret_one_decode_reg(struct mctrl_info *mp, int which_bank, u64 val)
ian@0 280 {
ian@0 281 struct bank_info *p = &mp->logical_banks[which_bank];
ian@0 282
ian@0 283 p->mp = mp;
ian@0 284 p->bank_id = (CHMCTRL_NBANKS * mp->portid) + which_bank;
ian@0 285 p->raw_reg = val;
ian@0 286 p->valid = (val & MEM_DECODE_VALID) >> MEM_DECODE_VALID_SHIFT;
ian@0 287 p->uk = (val & MEM_DECODE_UK) >> MEM_DECODE_UK_SHIFT;
ian@0 288 p->um = (val & MEM_DECODE_UM) >> MEM_DECODE_UM_SHIFT;
ian@0 289 p->lk = (val & MEM_DECODE_LK) >> MEM_DECODE_LK_SHIFT;
ian@0 290 p->lm = (val & MEM_DECODE_LM) >> MEM_DECODE_LM_SHIFT;
ian@0 291
ian@0 292 p->base = (p->um);
ian@0 293 p->base &= ~(p->uk);
ian@0 294 p->base <<= PA_UPPER_BITS_SHIFT;
ian@0 295
ian@0 296 switch(p->lk) {
ian@0 297 case 0xf:
ian@0 298 default:
ian@0 299 p->interleave = 1;
ian@0 300 break;
ian@0 301
ian@0 302 case 0xe:
ian@0 303 p->interleave = 2;
ian@0 304 break;
ian@0 305
ian@0 306 case 0xc:
ian@0 307 p->interleave = 4;
ian@0 308 break;
ian@0 309
ian@0 310 case 0x8:
ian@0 311 p->interleave = 8;
ian@0 312 break;
ian@0 313
ian@0 314 case 0x0:
ian@0 315 p->interleave = 16;
ian@0 316 break;
ian@0 317 };
ian@0 318
ian@0 319 /* UK[10] is reserved, and UK[11] is not set for the SDRAM
ian@0 320 * bank size definition.
ian@0 321 */
ian@0 322 p->size = (((unsigned long)p->uk &
ian@0 323 ((1UL << 10UL) - 1UL)) + 1UL) << PA_UPPER_BITS_SHIFT;
ian@0 324 p->size /= p->interleave;
ian@0 325 }
ian@0 326
ian@0 327 static void fetch_decode_regs(struct mctrl_info *mp)
ian@0 328 {
ian@0 329 if (mp->layout_size == 0)
ian@0 330 return;
ian@0 331
ian@0 332 interpret_one_decode_reg(mp, 0,
ian@0 333 read_mcreg(mp, CHMCTRL_DECODE1));
ian@0 334 interpret_one_decode_reg(mp, 1,
ian@0 335 read_mcreg(mp, CHMCTRL_DECODE2));
ian@0 336 interpret_one_decode_reg(mp, 2,
ian@0 337 read_mcreg(mp, CHMCTRL_DECODE3));
ian@0 338 interpret_one_decode_reg(mp, 3,
ian@0 339 read_mcreg(mp, CHMCTRL_DECODE4));
ian@0 340 }
ian@0 341
ian@0 342 static int init_one_mctrl(struct device_node *dp)
ian@0 343 {
ian@0 344 struct mctrl_info *mp = kmalloc(sizeof(*mp), GFP_KERNEL);
ian@0 345 int portid = of_getintprop_default(dp, "portid", -1);
ian@0 346 struct linux_prom64_registers *regs;
ian@0 347 void *pval;
ian@0 348 int len;
ian@0 349
ian@0 350 if (!mp)
ian@0 351 return -1;
ian@0 352 memset(mp, 0, sizeof(*mp));
ian@0 353 if (portid == -1)
ian@0 354 goto fail;
ian@0 355
ian@0 356 mp->portid = portid;
ian@0 357 pval = of_get_property(dp, "memory-layout", &len);
ian@0 358 mp->layout_size = len;
ian@0 359 if (!pval)
ian@0 360 mp->layout_size = 0;
ian@0 361 else {
ian@0 362 if (mp->layout_size > sizeof(mp->layout_prop))
ian@0 363 goto fail;
ian@0 364 memcpy(&mp->layout_prop, pval, len);
ian@0 365 }
ian@0 366
ian@0 367 regs = of_get_property(dp, "reg", NULL);
ian@0 368 if (!regs || regs->reg_size != 0x48)
ian@0 369 goto fail;
ian@0 370
ian@0 371 mp->regs = ioremap(regs->phys_addr, regs->reg_size);
ian@0 372 if (mp->regs == NULL)
ian@0 373 goto fail;
ian@0 374
ian@0 375 if (mp->layout_size != 0UL) {
ian@0 376 mp->timing_control1 = read_mcreg(mp, CHMCTRL_TCTRL1);
ian@0 377 mp->timing_control2 = read_mcreg(mp, CHMCTRL_TCTRL2);
ian@0 378 mp->timing_control3 = read_mcreg(mp, CHMCTRL_TCTRL3);
ian@0 379 mp->timing_control4 = read_mcreg(mp, CHMCTRL_TCTRL4);
ian@0 380 mp->memaddr_control = read_mcreg(mp, CHMCTRL_MACTRL);
ian@0 381 }
ian@0 382
ian@0 383 fetch_decode_regs(mp);
ian@0 384
ian@0 385 list_add(&mp->list, &mctrl_list);
ian@0 386
ian@0 387 /* Report the device. */
ian@0 388 printk(KERN_INFO "%s: US3 memory controller at %p [%s]\n",
ian@0 389 dp->full_name,
ian@0 390 mp->regs, (mp->layout_size ? "ACTIVE" : "INACTIVE"));
ian@0 391
ian@0 392 return 0;
ian@0 393
ian@0 394 fail:
ian@0 395 if (mp) {
ian@0 396 if (mp->regs != NULL)
ian@0 397 iounmap(mp->regs);
ian@0 398 kfree(mp);
ian@0 399 }
ian@0 400 return -1;
ian@0 401 }
ian@0 402
ian@0 403 static int __init chmc_init(void)
ian@0 404 {
ian@0 405 struct device_node *dp;
ian@0 406
ian@0 407 /* This driver is only for cheetah platforms. */
ian@0 408 if (tlb_type != cheetah && tlb_type != cheetah_plus)
ian@0 409 return -ENODEV;
ian@0 410
ian@0 411 for_each_node_by_name(dp, "memory-controller")
ian@0 412 init_one_mctrl(dp);
ian@0 413
ian@0 414 for_each_node_by_name(dp, "mc-us3")
ian@0 415 init_one_mctrl(dp);
ian@0 416
ian@0 417 return 0;
ian@0 418 }
ian@0 419
ian@0 420 static void __exit chmc_cleanup(void)
ian@0 421 {
ian@0 422 struct list_head *head = &mctrl_list;
ian@0 423 struct list_head *tmp = head->next;
ian@0 424
ian@0 425 for (;;) {
ian@0 426 struct mctrl_info *p =
ian@0 427 list_entry(tmp, struct mctrl_info, list);
ian@0 428 if (tmp == head)
ian@0 429 break;
ian@0 430 tmp = tmp->next;
ian@0 431
ian@0 432 list_del(&p->list);
ian@0 433 iounmap(p->regs);
ian@0 434 kfree(p);
ian@0 435 }
ian@0 436 }
ian@0 437
ian@0 438 module_init(chmc_init);
ian@0 439 module_exit(chmc_cleanup);