ia64/linux-2.6.18-xen.hg

annotate arch/sparc64/kernel/visemul.c @ 897:329ea0ccb344

balloon: try harder to balloon up under memory pressure.

Currently if the balloon driver is unable to increase the guest's
reservation it assumes the failure was due to reaching its full
allocation, gives up on the ballooning operation and records the limit
it reached as the "hard limit". The driver will not try again until
the target is set again (even to the same value).

However it is possible that ballooning has in fact failed due to
memory pressure in the host and therefore it is desirable to keep
attempting to reach the target in case memory becomes available. The
most likely scenario is that some guests are ballooning down while
others are ballooning up and therefore there is temporary memory
pressure while things stabilise. You would not expect a well behaved
toolstack to ask a domain to balloon to more than its allocation nor
would you expect it to deliberately over-commit memory by setting
balloon targets which exceed the total host memory.

This patch drops the concept of a hard limit and causes the balloon
driver to retry increasing the reservation on a timer in the same
manner as when decreasing the reservation.

Also if we partially succeed in increasing the reservation
(i.e. receive less pages than we asked for) then we may as well keep
those pages rather than returning them to Xen.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jun 05 14:01:20 2009 +0100 (2009-06-05)
parents 831230e53067
children
rev   line source
ian@0 1 /* visemul.c: Emulation of VIS instructions.
ian@0 2 *
ian@0 3 * Copyright (C) 2006 David S. Miller (davem@davemloft.net)
ian@0 4 */
ian@0 5 #include <linux/kernel.h>
ian@0 6 #include <linux/errno.h>
ian@0 7 #include <linux/thread_info.h>
ian@0 8
ian@0 9 #include <asm/ptrace.h>
ian@0 10 #include <asm/pstate.h>
ian@0 11 #include <asm/system.h>
ian@0 12 #include <asm/fpumacro.h>
ian@0 13 #include <asm/uaccess.h>
ian@0 14
ian@0 15 /* OPF field of various VIS instructions. */
ian@0 16
ian@0 17 /* 000111011 - four 16-bit packs */
ian@0 18 #define FPACK16_OPF 0x03b
ian@0 19
ian@0 20 /* 000111010 - two 32-bit packs */
ian@0 21 #define FPACK32_OPF 0x03a
ian@0 22
ian@0 23 /* 000111101 - four 16-bit packs */
ian@0 24 #define FPACKFIX_OPF 0x03d
ian@0 25
ian@0 26 /* 001001101 - four 16-bit expands */
ian@0 27 #define FEXPAND_OPF 0x04d
ian@0 28
ian@0 29 /* 001001011 - two 32-bit merges */
ian@0 30 #define FPMERGE_OPF 0x04b
ian@0 31
ian@0 32 /* 000110001 - 8-by-16-bit partitoned product */
ian@0 33 #define FMUL8x16_OPF 0x031
ian@0 34
ian@0 35 /* 000110011 - 8-by-16-bit upper alpha partitioned product */
ian@0 36 #define FMUL8x16AU_OPF 0x033
ian@0 37
ian@0 38 /* 000110101 - 8-by-16-bit lower alpha partitioned product */
ian@0 39 #define FMUL8x16AL_OPF 0x035
ian@0 40
ian@0 41 /* 000110110 - upper 8-by-16-bit partitioned product */
ian@0 42 #define FMUL8SUx16_OPF 0x036
ian@0 43
ian@0 44 /* 000110111 - lower 8-by-16-bit partitioned product */
ian@0 45 #define FMUL8ULx16_OPF 0x037
ian@0 46
ian@0 47 /* 000111000 - upper 8-by-16-bit partitioned product */
ian@0 48 #define FMULD8SUx16_OPF 0x038
ian@0 49
ian@0 50 /* 000111001 - lower unsigned 8-by-16-bit partitioned product */
ian@0 51 #define FMULD8ULx16_OPF 0x039
ian@0 52
ian@0 53 /* 000101000 - four 16-bit compare; set rd if src1 > src2 */
ian@0 54 #define FCMPGT16_OPF 0x028
ian@0 55
ian@0 56 /* 000101100 - two 32-bit compare; set rd if src1 > src2 */
ian@0 57 #define FCMPGT32_OPF 0x02c
ian@0 58
ian@0 59 /* 000100000 - four 16-bit compare; set rd if src1 <= src2 */
ian@0 60 #define FCMPLE16_OPF 0x020
ian@0 61
ian@0 62 /* 000100100 - two 32-bit compare; set rd if src1 <= src2 */
ian@0 63 #define FCMPLE32_OPF 0x024
ian@0 64
ian@0 65 /* 000100010 - four 16-bit compare; set rd if src1 != src2 */
ian@0 66 #define FCMPNE16_OPF 0x022
ian@0 67
ian@0 68 /* 000100110 - two 32-bit compare; set rd if src1 != src2 */
ian@0 69 #define FCMPNE32_OPF 0x026
ian@0 70
ian@0 71 /* 000101010 - four 16-bit compare; set rd if src1 == src2 */
ian@0 72 #define FCMPEQ16_OPF 0x02a
ian@0 73
ian@0 74 /* 000101110 - two 32-bit compare; set rd if src1 == src2 */
ian@0 75 #define FCMPEQ32_OPF 0x02e
ian@0 76
ian@0 77 /* 000000000 - Eight 8-bit edge boundary processing */
ian@0 78 #define EDGE8_OPF 0x000
ian@0 79
ian@0 80 /* 000000001 - Eight 8-bit edge boundary processing, no CC */
ian@0 81 #define EDGE8N_OPF 0x001
ian@0 82
ian@0 83 /* 000000010 - Eight 8-bit edge boundary processing, little-endian */
ian@0 84 #define EDGE8L_OPF 0x002
ian@0 85
ian@0 86 /* 000000011 - Eight 8-bit edge boundary processing, little-endian, no CC */
ian@0 87 #define EDGE8LN_OPF 0x003
ian@0 88
ian@0 89 /* 000000100 - Four 16-bit edge boundary processing */
ian@0 90 #define EDGE16_OPF 0x004
ian@0 91
ian@0 92 /* 000000101 - Four 16-bit edge boundary processing, no CC */
ian@0 93 #define EDGE16N_OPF 0x005
ian@0 94
ian@0 95 /* 000000110 - Four 16-bit edge boundary processing, little-endian */
ian@0 96 #define EDGE16L_OPF 0x006
ian@0 97
ian@0 98 /* 000000111 - Four 16-bit edge boundary processing, little-endian, no CC */
ian@0 99 #define EDGE16LN_OPF 0x007
ian@0 100
ian@0 101 /* 000001000 - Two 32-bit edge boundary processing */
ian@0 102 #define EDGE32_OPF 0x008
ian@0 103
ian@0 104 /* 000001001 - Two 32-bit edge boundary processing, no CC */
ian@0 105 #define EDGE32N_OPF 0x009
ian@0 106
ian@0 107 /* 000001010 - Two 32-bit edge boundary processing, little-endian */
ian@0 108 #define EDGE32L_OPF 0x00a
ian@0 109
ian@0 110 /* 000001011 - Two 32-bit edge boundary processing, little-endian, no CC */
ian@0 111 #define EDGE32LN_OPF 0x00b
ian@0 112
ian@0 113 /* 000111110 - distance between 8 8-bit components */
ian@0 114 #define PDIST_OPF 0x03e
ian@0 115
ian@0 116 /* 000010000 - convert 8-bit 3-D address to blocked byte address */
ian@0 117 #define ARRAY8_OPF 0x010
ian@0 118
ian@0 119 /* 000010010 - convert 16-bit 3-D address to blocked byte address */
ian@0 120 #define ARRAY16_OPF 0x012
ian@0 121
ian@0 122 /* 000010100 - convert 32-bit 3-D address to blocked byte address */
ian@0 123 #define ARRAY32_OPF 0x014
ian@0 124
ian@0 125 /* 000011001 - Set the GSR.MASK field in preparation for a BSHUFFLE */
ian@0 126 #define BMASK_OPF 0x019
ian@0 127
ian@0 128 /* 001001100 - Permute bytes as specified by GSR.MASK */
ian@0 129 #define BSHUFFLE_OPF 0x04c
ian@0 130
ian@0 131 #define VIS_OPCODE_MASK ((0x3 << 30) | (0x3f << 19))
ian@0 132 #define VIS_OPCODE_VAL ((0x2 << 30) | (0x36 << 19))
ian@0 133
ian@0 134 #define VIS_OPF_SHIFT 5
ian@0 135 #define VIS_OPF_MASK (0x1ff << VIS_OPF_SHIFT)
ian@0 136
ian@0 137 #define RS1(INSN) (((INSN) >> 24) & 0x1f)
ian@0 138 #define RS2(INSN) (((INSN) >> 0) & 0x1f)
ian@0 139 #define RD(INSN) (((INSN) >> 25) & 0x1f)
ian@0 140
ian@0 141 static inline void maybe_flush_windows(unsigned int rs1, unsigned int rs2,
ian@0 142 unsigned int rd, int from_kernel)
ian@0 143 {
ian@0 144 if (rs2 >= 16 || rs1 >= 16 || rd >= 16) {
ian@0 145 if (from_kernel != 0)
ian@0 146 __asm__ __volatile__("flushw");
ian@0 147 else
ian@0 148 flushw_user();
ian@0 149 }
ian@0 150 }
ian@0 151
ian@0 152 static unsigned long fetch_reg(unsigned int reg, struct pt_regs *regs)
ian@0 153 {
ian@0 154 unsigned long value;
ian@0 155
ian@0 156 if (reg < 16)
ian@0 157 return (!reg ? 0 : regs->u_regs[reg]);
ian@0 158 if (regs->tstate & TSTATE_PRIV) {
ian@0 159 struct reg_window *win;
ian@0 160 win = (struct reg_window *)(regs->u_regs[UREG_FP] + STACK_BIAS);
ian@0 161 value = win->locals[reg - 16];
ian@0 162 } else if (test_thread_flag(TIF_32BIT)) {
ian@0 163 struct reg_window32 __user *win32;
ian@0 164 win32 = (struct reg_window32 __user *)((unsigned long)((u32)regs->u_regs[UREG_FP]));
ian@0 165 get_user(value, &win32->locals[reg - 16]);
ian@0 166 } else {
ian@0 167 struct reg_window __user *win;
ian@0 168 win = (struct reg_window __user *)(regs->u_regs[UREG_FP] + STACK_BIAS);
ian@0 169 get_user(value, &win->locals[reg - 16]);
ian@0 170 }
ian@0 171 return value;
ian@0 172 }
ian@0 173
ian@0 174 static inline unsigned long __user *__fetch_reg_addr_user(unsigned int reg,
ian@0 175 struct pt_regs *regs)
ian@0 176 {
ian@0 177 BUG_ON(reg < 16);
ian@0 178 BUG_ON(regs->tstate & TSTATE_PRIV);
ian@0 179
ian@0 180 if (test_thread_flag(TIF_32BIT)) {
ian@0 181 struct reg_window32 __user *win32;
ian@0 182 win32 = (struct reg_window32 __user *)((unsigned long)((u32)regs->u_regs[UREG_FP]));
ian@0 183 return (unsigned long __user *)&win32->locals[reg - 16];
ian@0 184 } else {
ian@0 185 struct reg_window __user *win;
ian@0 186 win = (struct reg_window __user *)(regs->u_regs[UREG_FP] + STACK_BIAS);
ian@0 187 return &win->locals[reg - 16];
ian@0 188 }
ian@0 189 }
ian@0 190
ian@0 191 static inline unsigned long *__fetch_reg_addr_kern(unsigned int reg,
ian@0 192 struct pt_regs *regs)
ian@0 193 {
ian@0 194 BUG_ON(reg >= 16);
ian@0 195 BUG_ON(regs->tstate & TSTATE_PRIV);
ian@0 196
ian@0 197 return &regs->u_regs[reg];
ian@0 198 }
ian@0 199
ian@0 200 static void store_reg(struct pt_regs *regs, unsigned long val, unsigned long rd)
ian@0 201 {
ian@0 202 if (rd < 16) {
ian@0 203 unsigned long *rd_kern = __fetch_reg_addr_kern(rd, regs);
ian@0 204
ian@0 205 *rd_kern = val;
ian@0 206 } else {
ian@0 207 unsigned long __user *rd_user = __fetch_reg_addr_user(rd, regs);
ian@0 208
ian@0 209 if (test_thread_flag(TIF_32BIT))
ian@0 210 __put_user((u32)val, (u32 __user *)rd_user);
ian@0 211 else
ian@0 212 __put_user(val, rd_user);
ian@0 213 }
ian@0 214 }
ian@0 215
ian@0 216 static inline unsigned long fpd_regval(struct fpustate *f,
ian@0 217 unsigned int insn_regnum)
ian@0 218 {
ian@0 219 insn_regnum = (((insn_regnum & 1) << 5) |
ian@0 220 (insn_regnum & 0x1e));
ian@0 221
ian@0 222 return *(unsigned long *) &f->regs[insn_regnum];
ian@0 223 }
ian@0 224
ian@0 225 static inline unsigned long *fpd_regaddr(struct fpustate *f,
ian@0 226 unsigned int insn_regnum)
ian@0 227 {
ian@0 228 insn_regnum = (((insn_regnum & 1) << 5) |
ian@0 229 (insn_regnum & 0x1e));
ian@0 230
ian@0 231 return (unsigned long *) &f->regs[insn_regnum];
ian@0 232 }
ian@0 233
ian@0 234 static inline unsigned int fps_regval(struct fpustate *f,
ian@0 235 unsigned int insn_regnum)
ian@0 236 {
ian@0 237 return f->regs[insn_regnum];
ian@0 238 }
ian@0 239
ian@0 240 static inline unsigned int *fps_regaddr(struct fpustate *f,
ian@0 241 unsigned int insn_regnum)
ian@0 242 {
ian@0 243 return &f->regs[insn_regnum];
ian@0 244 }
ian@0 245
ian@0 246 struct edge_tab {
ian@0 247 u16 left, right;
ian@0 248 };
ian@0 249 struct edge_tab edge8_tab[8] = {
ian@0 250 { 0xff, 0x80 },
ian@0 251 { 0x7f, 0xc0 },
ian@0 252 { 0x3f, 0xe0 },
ian@0 253 { 0x1f, 0xf0 },
ian@0 254 { 0x0f, 0xf8 },
ian@0 255 { 0x07, 0xfc },
ian@0 256 { 0x03, 0xfe },
ian@0 257 { 0x01, 0xff },
ian@0 258 };
ian@0 259 struct edge_tab edge8_tab_l[8] = {
ian@0 260 { 0xff, 0x01 },
ian@0 261 { 0xfe, 0x03 },
ian@0 262 { 0xfc, 0x07 },
ian@0 263 { 0xf8, 0x0f },
ian@0 264 { 0xf0, 0x1f },
ian@0 265 { 0xe0, 0x3f },
ian@0 266 { 0xc0, 0x7f },
ian@0 267 { 0x80, 0xff },
ian@0 268 };
ian@0 269 struct edge_tab edge16_tab[4] = {
ian@0 270 { 0xf, 0x8 },
ian@0 271 { 0x7, 0xc },
ian@0 272 { 0x3, 0xe },
ian@0 273 { 0x1, 0xf },
ian@0 274 };
ian@0 275 struct edge_tab edge16_tab_l[4] = {
ian@0 276 { 0xf, 0x1 },
ian@0 277 { 0xe, 0x3 },
ian@0 278 { 0xc, 0x7 },
ian@0 279 { 0x8, 0xf },
ian@0 280 };
ian@0 281 struct edge_tab edge32_tab[2] = {
ian@0 282 { 0x3, 0x2 },
ian@0 283 { 0x1, 0x3 },
ian@0 284 };
ian@0 285 struct edge_tab edge32_tab_l[2] = {
ian@0 286 { 0x3, 0x1 },
ian@0 287 { 0x2, 0x3 },
ian@0 288 };
ian@0 289
ian@0 290 static void edge(struct pt_regs *regs, unsigned int insn, unsigned int opf)
ian@0 291 {
ian@0 292 unsigned long orig_rs1, rs1, orig_rs2, rs2, rd_val;
ian@0 293 u16 left, right;
ian@0 294
ian@0 295 maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), 0);
ian@0 296 orig_rs1 = rs1 = fetch_reg(RS1(insn), regs);
ian@0 297 orig_rs2 = rs2 = fetch_reg(RS2(insn), regs);
ian@0 298
ian@0 299 if (test_thread_flag(TIF_32BIT)) {
ian@0 300 rs1 = rs1 & 0xffffffff;
ian@0 301 rs2 = rs2 & 0xffffffff;
ian@0 302 }
ian@0 303 switch (opf) {
ian@0 304 default:
ian@0 305 case EDGE8_OPF:
ian@0 306 case EDGE8N_OPF:
ian@0 307 left = edge8_tab[rs1 & 0x7].left;
ian@0 308 right = edge8_tab[rs2 & 0x7].right;
ian@0 309 break;
ian@0 310 case EDGE8L_OPF:
ian@0 311 case EDGE8LN_OPF:
ian@0 312 left = edge8_tab_l[rs1 & 0x7].left;
ian@0 313 right = edge8_tab_l[rs2 & 0x7].right;
ian@0 314 break;
ian@0 315
ian@0 316 case EDGE16_OPF:
ian@0 317 case EDGE16N_OPF:
ian@0 318 left = edge16_tab[(rs1 >> 1) & 0x3].left;
ian@0 319 right = edge16_tab[(rs2 >> 1) & 0x3].right;
ian@0 320 break;
ian@0 321
ian@0 322 case EDGE16L_OPF:
ian@0 323 case EDGE16LN_OPF:
ian@0 324 left = edge16_tab_l[(rs1 >> 1) & 0x3].left;
ian@0 325 right = edge16_tab_l[(rs2 >> 1) & 0x3].right;
ian@0 326 break;
ian@0 327
ian@0 328 case EDGE32_OPF:
ian@0 329 case EDGE32N_OPF:
ian@0 330 left = edge32_tab[(rs1 >> 2) & 0x1].left;
ian@0 331 right = edge32_tab[(rs2 >> 2) & 0x1].right;
ian@0 332 break;
ian@0 333
ian@0 334 case EDGE32L_OPF:
ian@0 335 case EDGE32LN_OPF:
ian@0 336 left = edge32_tab_l[(rs1 >> 2) & 0x1].left;
ian@0 337 right = edge32_tab_l[(rs2 >> 2) & 0x1].right;
ian@0 338 break;
ian@0 339 };
ian@0 340
ian@0 341 if ((rs1 & ~0x7UL) == (rs2 & ~0x7UL))
ian@0 342 rd_val = right & left;
ian@0 343 else
ian@0 344 rd_val = left;
ian@0 345
ian@0 346 store_reg(regs, rd_val, RD(insn));
ian@0 347
ian@0 348 switch (opf) {
ian@0 349 case EDGE8_OPF:
ian@0 350 case EDGE8L_OPF:
ian@0 351 case EDGE16_OPF:
ian@0 352 case EDGE16L_OPF:
ian@0 353 case EDGE32_OPF:
ian@0 354 case EDGE32L_OPF: {
ian@0 355 unsigned long ccr, tstate;
ian@0 356
ian@0 357 __asm__ __volatile__("subcc %1, %2, %%g0\n\t"
ian@0 358 "rd %%ccr, %0"
ian@0 359 : "=r" (ccr)
ian@0 360 : "r" (orig_rs1), "r" (orig_rs2)
ian@0 361 : "cc");
ian@0 362 tstate = regs->tstate & ~(TSTATE_XCC | TSTATE_ICC);
ian@0 363 regs->tstate = tstate | (ccr << 32UL);
ian@0 364 }
ian@0 365 };
ian@0 366 }
ian@0 367
ian@0 368 static void array(struct pt_regs *regs, unsigned int insn, unsigned int opf)
ian@0 369 {
ian@0 370 unsigned long rs1, rs2, rd_val;
ian@0 371 unsigned int bits, bits_mask;
ian@0 372
ian@0 373 maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), 0);
ian@0 374 rs1 = fetch_reg(RS1(insn), regs);
ian@0 375 rs2 = fetch_reg(RS2(insn), regs);
ian@0 376
ian@0 377 bits = (rs2 > 5 ? 5 : rs2);
ian@0 378 bits_mask = (1UL << bits) - 1UL;
ian@0 379
ian@0 380 rd_val = ((((rs1 >> 11) & 0x3) << 0) |
ian@0 381 (((rs1 >> 33) & 0x3) << 2) |
ian@0 382 (((rs1 >> 55) & 0x1) << 4) |
ian@0 383 (((rs1 >> 13) & 0xf) << 5) |
ian@0 384 (((rs1 >> 35) & 0xf) << 9) |
ian@0 385 (((rs1 >> 56) & 0xf) << 13) |
ian@0 386 (((rs1 >> 17) & bits_mask) << 17) |
ian@0 387 (((rs1 >> 39) & bits_mask) << (17 + bits)) |
ian@0 388 (((rs1 >> 60) & 0xf) << (17 + (2*bits))));
ian@0 389
ian@0 390 switch (opf) {
ian@0 391 case ARRAY16_OPF:
ian@0 392 rd_val <<= 1;
ian@0 393 break;
ian@0 394
ian@0 395 case ARRAY32_OPF:
ian@0 396 rd_val <<= 2;
ian@0 397 };
ian@0 398
ian@0 399 store_reg(regs, rd_val, RD(insn));
ian@0 400 }
ian@0 401
ian@0 402 static void bmask(struct pt_regs *regs, unsigned int insn)
ian@0 403 {
ian@0 404 unsigned long rs1, rs2, rd_val, gsr;
ian@0 405
ian@0 406 maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), 0);
ian@0 407 rs1 = fetch_reg(RS1(insn), regs);
ian@0 408 rs2 = fetch_reg(RS2(insn), regs);
ian@0 409 rd_val = rs1 + rs2;
ian@0 410
ian@0 411 store_reg(regs, rd_val, RD(insn));
ian@0 412
ian@0 413 gsr = current_thread_info()->gsr[0] & 0xffffffff;
ian@0 414 gsr |= rd_val << 32UL;
ian@0 415 current_thread_info()->gsr[0] = gsr;
ian@0 416 }
ian@0 417
ian@0 418 static void bshuffle(struct pt_regs *regs, unsigned int insn)
ian@0 419 {
ian@0 420 struct fpustate *f = FPUSTATE;
ian@0 421 unsigned long rs1, rs2, rd_val;
ian@0 422 unsigned long bmask, i;
ian@0 423
ian@0 424 bmask = current_thread_info()->gsr[0] >> 32UL;
ian@0 425
ian@0 426 rs1 = fpd_regval(f, RS1(insn));
ian@0 427 rs2 = fpd_regval(f, RS2(insn));
ian@0 428
ian@0 429 rd_val = 0UL;
ian@0 430 for (i = 0; i < 8; i++) {
ian@0 431 unsigned long which = (bmask >> (i * 4)) & 0xf;
ian@0 432 unsigned long byte;
ian@0 433
ian@0 434 if (which < 8)
ian@0 435 byte = (rs1 >> (which * 8)) & 0xff;
ian@0 436 else
ian@0 437 byte = (rs2 >> ((which-8)*8)) & 0xff;
ian@0 438 rd_val |= (byte << (i * 8));
ian@0 439 }
ian@0 440
ian@0 441 *fpd_regaddr(f, RD(insn)) = rd_val;
ian@0 442 }
ian@0 443
ian@0 444 static void pdist(struct pt_regs *regs, unsigned int insn)
ian@0 445 {
ian@0 446 struct fpustate *f = FPUSTATE;
ian@0 447 unsigned long rs1, rs2, *rd, rd_val;
ian@0 448 unsigned long i;
ian@0 449
ian@0 450 rs1 = fpd_regval(f, RS1(insn));
ian@0 451 rs2 = fpd_regval(f, RS1(insn));
ian@0 452 rd = fpd_regaddr(f, RD(insn));
ian@0 453
ian@0 454 rd_val = *rd;
ian@0 455
ian@0 456 for (i = 0; i < 8; i++) {
ian@0 457 s16 s1, s2;
ian@0 458
ian@0 459 s1 = (rs1 >> (56 - (i * 8))) & 0xff;
ian@0 460 s2 = (rs2 >> (56 - (i * 8))) & 0xff;
ian@0 461
ian@0 462 /* Absolute value of difference. */
ian@0 463 s1 -= s2;
ian@0 464 if (s1 < 0)
ian@0 465 s1 = ~s1 + 1;
ian@0 466
ian@0 467 rd_val += s1;
ian@0 468 }
ian@0 469
ian@0 470 *rd = rd_val;
ian@0 471 }
ian@0 472
ian@0 473 static void pformat(struct pt_regs *regs, unsigned int insn, unsigned int opf)
ian@0 474 {
ian@0 475 struct fpustate *f = FPUSTATE;
ian@0 476 unsigned long rs1, rs2, gsr, scale, rd_val;
ian@0 477
ian@0 478 gsr = current_thread_info()->gsr[0];
ian@0 479 scale = (gsr >> 3) & (opf == FPACK16_OPF ? 0xf : 0x1f);
ian@0 480 switch (opf) {
ian@0 481 case FPACK16_OPF: {
ian@0 482 unsigned long byte;
ian@0 483
ian@0 484 rs2 = fpd_regval(f, RS2(insn));
ian@0 485 rd_val = 0;
ian@0 486 for (byte = 0; byte < 4; byte++) {
ian@0 487 unsigned int val;
ian@0 488 s16 src = (rs2 >> (byte * 16UL)) & 0xffffUL;
ian@0 489 int scaled = src << scale;
ian@0 490 int from_fixed = scaled >> 7;
ian@0 491
ian@0 492 val = ((from_fixed < 0) ?
ian@0 493 0 :
ian@0 494 (from_fixed > 255) ?
ian@0 495 255 : from_fixed);
ian@0 496
ian@0 497 rd_val |= (val << (8 * byte));
ian@0 498 }
ian@0 499 *fps_regaddr(f, RD(insn)) = rd_val;
ian@0 500 break;
ian@0 501 }
ian@0 502
ian@0 503 case FPACK32_OPF: {
ian@0 504 unsigned long word;
ian@0 505
ian@0 506 rs1 = fpd_regval(f, RS1(insn));
ian@0 507 rs2 = fpd_regval(f, RS2(insn));
ian@0 508 rd_val = (rs1 << 8) & ~(0x000000ff000000ffUL);
ian@0 509 for (word = 0; word < 2; word++) {
ian@0 510 unsigned long val;
ian@0 511 s32 src = (rs2 >> (word * 32UL));
ian@0 512 s64 scaled = src << scale;
ian@0 513 s64 from_fixed = scaled >> 23;
ian@0 514
ian@0 515 val = ((from_fixed < 0) ?
ian@0 516 0 :
ian@0 517 (from_fixed > 255) ?
ian@0 518 255 : from_fixed);
ian@0 519
ian@0 520 rd_val |= (val << (32 * word));
ian@0 521 }
ian@0 522 *fpd_regaddr(f, RD(insn)) = rd_val;
ian@0 523 break;
ian@0 524 }
ian@0 525
ian@0 526 case FPACKFIX_OPF: {
ian@0 527 unsigned long word;
ian@0 528
ian@0 529 rs2 = fpd_regval(f, RS2(insn));
ian@0 530
ian@0 531 rd_val = 0;
ian@0 532 for (word = 0; word < 2; word++) {
ian@0 533 long val;
ian@0 534 s32 src = (rs2 >> (word * 32UL));
ian@0 535 s64 scaled = src << scale;
ian@0 536 s64 from_fixed = scaled >> 16;
ian@0 537
ian@0 538 val = ((from_fixed < -32768) ?
ian@0 539 -32768 :
ian@0 540 (from_fixed > 32767) ?
ian@0 541 32767 : from_fixed);
ian@0 542
ian@0 543 rd_val |= ((val & 0xffff) << (word * 16));
ian@0 544 }
ian@0 545 *fps_regaddr(f, RD(insn)) = rd_val;
ian@0 546 break;
ian@0 547 }
ian@0 548
ian@0 549 case FEXPAND_OPF: {
ian@0 550 unsigned long byte;
ian@0 551
ian@0 552 rs2 = fps_regval(f, RS2(insn));
ian@0 553
ian@0 554 rd_val = 0;
ian@0 555 for (byte = 0; byte < 4; byte++) {
ian@0 556 unsigned long val;
ian@0 557 u8 src = (rs2 >> (byte * 8)) & 0xff;
ian@0 558
ian@0 559 val = src << 4;
ian@0 560
ian@0 561 rd_val |= (val << (byte * 16));
ian@0 562 }
ian@0 563 *fpd_regaddr(f, RD(insn)) = rd_val;
ian@0 564 break;
ian@0 565 }
ian@0 566
ian@0 567 case FPMERGE_OPF: {
ian@0 568 rs1 = fps_regval(f, RS1(insn));
ian@0 569 rs2 = fps_regval(f, RS2(insn));
ian@0 570
ian@0 571 rd_val = (((rs2 & 0x000000ff) << 0) |
ian@0 572 ((rs1 & 0x000000ff) << 8) |
ian@0 573 ((rs2 & 0x0000ff00) << 8) |
ian@0 574 ((rs1 & 0x0000ff00) << 16) |
ian@0 575 ((rs2 & 0x00ff0000) << 16) |
ian@0 576 ((rs1 & 0x00ff0000) << 24) |
ian@0 577 ((rs2 & 0xff000000) << 24) |
ian@0 578 ((rs1 & 0xff000000) << 32));
ian@0 579 *fpd_regaddr(f, RD(insn)) = rd_val;
ian@0 580 break;
ian@0 581 }
ian@0 582 };
ian@0 583 }
ian@0 584
ian@0 585 static void pmul(struct pt_regs *regs, unsigned int insn, unsigned int opf)
ian@0 586 {
ian@0 587 struct fpustate *f = FPUSTATE;
ian@0 588 unsigned long rs1, rs2, rd_val;
ian@0 589
ian@0 590 switch (opf) {
ian@0 591 case FMUL8x16_OPF: {
ian@0 592 unsigned long byte;
ian@0 593
ian@0 594 rs1 = fps_regval(f, RS1(insn));
ian@0 595 rs2 = fpd_regval(f, RS2(insn));
ian@0 596
ian@0 597 rd_val = 0;
ian@0 598 for (byte = 0; byte < 4; byte++) {
ian@0 599 u16 src1 = (rs1 >> (byte * 8)) & 0x00ff;
ian@0 600 s16 src2 = (rs2 >> (byte * 16)) & 0xffff;
ian@0 601 u32 prod = src1 * src2;
ian@0 602 u16 scaled = ((prod & 0x00ffff00) >> 8);
ian@0 603
ian@0 604 /* Round up. */
ian@0 605 if (prod & 0x80)
ian@0 606 scaled++;
ian@0 607 rd_val |= ((scaled & 0xffffUL) << (byte * 16UL));
ian@0 608 }
ian@0 609
ian@0 610 *fpd_regaddr(f, RD(insn)) = rd_val;
ian@0 611 break;
ian@0 612 }
ian@0 613
ian@0 614 case FMUL8x16AU_OPF:
ian@0 615 case FMUL8x16AL_OPF: {
ian@0 616 unsigned long byte;
ian@0 617 s16 src2;
ian@0 618
ian@0 619 rs1 = fps_regval(f, RS1(insn));
ian@0 620 rs2 = fps_regval(f, RS2(insn));
ian@0 621
ian@0 622 rd_val = 0;
ian@0 623 src2 = (rs2 >> (opf == FMUL8x16AU_OPF) ? 16 : 0);
ian@0 624 for (byte = 0; byte < 4; byte++) {
ian@0 625 u16 src1 = (rs1 >> (byte * 8)) & 0x00ff;
ian@0 626 u32 prod = src1 * src2;
ian@0 627 u16 scaled = ((prod & 0x00ffff00) >> 8);
ian@0 628
ian@0 629 /* Round up. */
ian@0 630 if (prod & 0x80)
ian@0 631 scaled++;
ian@0 632 rd_val |= ((scaled & 0xffffUL) << (byte * 16UL));
ian@0 633 }
ian@0 634
ian@0 635 *fpd_regaddr(f, RD(insn)) = rd_val;
ian@0 636 break;
ian@0 637 }
ian@0 638
ian@0 639 case FMUL8SUx16_OPF:
ian@0 640 case FMUL8ULx16_OPF: {
ian@0 641 unsigned long byte, ushift;
ian@0 642
ian@0 643 rs1 = fpd_regval(f, RS1(insn));
ian@0 644 rs2 = fpd_regval(f, RS2(insn));
ian@0 645
ian@0 646 rd_val = 0;
ian@0 647 ushift = (opf == FMUL8SUx16_OPF) ? 8 : 0;
ian@0 648 for (byte = 0; byte < 4; byte++) {
ian@0 649 u16 src1;
ian@0 650 s16 src2;
ian@0 651 u32 prod;
ian@0 652 u16 scaled;
ian@0 653
ian@0 654 src1 = ((rs1 >> ((16 * byte) + ushift)) & 0x00ff);
ian@0 655 src2 = ((rs2 >> (16 * byte)) & 0xffff);
ian@0 656 prod = src1 * src2;
ian@0 657 scaled = ((prod & 0x00ffff00) >> 8);
ian@0 658
ian@0 659 /* Round up. */
ian@0 660 if (prod & 0x80)
ian@0 661 scaled++;
ian@0 662 rd_val |= ((scaled & 0xffffUL) << (byte * 16UL));
ian@0 663 }
ian@0 664
ian@0 665 *fpd_regaddr(f, RD(insn)) = rd_val;
ian@0 666 break;
ian@0 667 }
ian@0 668
ian@0 669 case FMULD8SUx16_OPF:
ian@0 670 case FMULD8ULx16_OPF: {
ian@0 671 unsigned long byte, ushift;
ian@0 672
ian@0 673 rs1 = fps_regval(f, RS1(insn));
ian@0 674 rs2 = fps_regval(f, RS2(insn));
ian@0 675
ian@0 676 rd_val = 0;
ian@0 677 ushift = (opf == FMULD8SUx16_OPF) ? 8 : 0;
ian@0 678 for (byte = 0; byte < 2; byte++) {
ian@0 679 u16 src1;
ian@0 680 s16 src2;
ian@0 681 u32 prod;
ian@0 682 u16 scaled;
ian@0 683
ian@0 684 src1 = ((rs1 >> ((16 * byte) + ushift)) & 0x00ff);
ian@0 685 src2 = ((rs2 >> (16 * byte)) & 0xffff);
ian@0 686 prod = src1 * src2;
ian@0 687 scaled = ((prod & 0x00ffff00) >> 8);
ian@0 688
ian@0 689 /* Round up. */
ian@0 690 if (prod & 0x80)
ian@0 691 scaled++;
ian@0 692 rd_val |= ((scaled & 0xffffUL) <<
ian@0 693 ((byte * 32UL) + 7UL));
ian@0 694 }
ian@0 695 *fpd_regaddr(f, RD(insn)) = rd_val;
ian@0 696 break;
ian@0 697 }
ian@0 698 };
ian@0 699 }
ian@0 700
ian@0 701 static void pcmp(struct pt_regs *regs, unsigned int insn, unsigned int opf)
ian@0 702 {
ian@0 703 struct fpustate *f = FPUSTATE;
ian@0 704 unsigned long rs1, rs2, rd_val, i;
ian@0 705
ian@0 706 rs1 = fpd_regval(f, RS1(insn));
ian@0 707 rs2 = fpd_regval(f, RS2(insn));
ian@0 708
ian@0 709 rd_val = 0;
ian@0 710
ian@0 711 switch (opf) {
ian@0 712 case FCMPGT16_OPF:
ian@0 713 for (i = 0; i < 4; i++) {
ian@0 714 s16 a = (rs1 >> (i * 16)) & 0xffff;
ian@0 715 s16 b = (rs2 >> (i * 16)) & 0xffff;
ian@0 716
ian@0 717 if (a > b)
ian@0 718 rd_val |= 1 << i;
ian@0 719 }
ian@0 720 break;
ian@0 721
ian@0 722 case FCMPGT32_OPF:
ian@0 723 for (i = 0; i < 2; i++) {
ian@0 724 s32 a = (rs1 >> (i * 32)) & 0xffff;
ian@0 725 s32 b = (rs2 >> (i * 32)) & 0xffff;
ian@0 726
ian@0 727 if (a > b)
ian@0 728 rd_val |= 1 << i;
ian@0 729 }
ian@0 730 break;
ian@0 731
ian@0 732 case FCMPLE16_OPF:
ian@0 733 for (i = 0; i < 4; i++) {
ian@0 734 s16 a = (rs1 >> (i * 16)) & 0xffff;
ian@0 735 s16 b = (rs2 >> (i * 16)) & 0xffff;
ian@0 736
ian@0 737 if (a <= b)
ian@0 738 rd_val |= 1 << i;
ian@0 739 }
ian@0 740 break;
ian@0 741
ian@0 742 case FCMPLE32_OPF:
ian@0 743 for (i = 0; i < 2; i++) {
ian@0 744 s32 a = (rs1 >> (i * 32)) & 0xffff;
ian@0 745 s32 b = (rs2 >> (i * 32)) & 0xffff;
ian@0 746
ian@0 747 if (a <= b)
ian@0 748 rd_val |= 1 << i;
ian@0 749 }
ian@0 750 break;
ian@0 751
ian@0 752 case FCMPNE16_OPF:
ian@0 753 for (i = 0; i < 4; i++) {
ian@0 754 s16 a = (rs1 >> (i * 16)) & 0xffff;
ian@0 755 s16 b = (rs2 >> (i * 16)) & 0xffff;
ian@0 756
ian@0 757 if (a != b)
ian@0 758 rd_val |= 1 << i;
ian@0 759 }
ian@0 760 break;
ian@0 761
ian@0 762 case FCMPNE32_OPF:
ian@0 763 for (i = 0; i < 2; i++) {
ian@0 764 s32 a = (rs1 >> (i * 32)) & 0xffff;
ian@0 765 s32 b = (rs2 >> (i * 32)) & 0xffff;
ian@0 766
ian@0 767 if (a != b)
ian@0 768 rd_val |= 1 << i;
ian@0 769 }
ian@0 770 break;
ian@0 771
ian@0 772 case FCMPEQ16_OPF:
ian@0 773 for (i = 0; i < 4; i++) {
ian@0 774 s16 a = (rs1 >> (i * 16)) & 0xffff;
ian@0 775 s16 b = (rs2 >> (i * 16)) & 0xffff;
ian@0 776
ian@0 777 if (a == b)
ian@0 778 rd_val |= 1 << i;
ian@0 779 }
ian@0 780 break;
ian@0 781
ian@0 782 case FCMPEQ32_OPF:
ian@0 783 for (i = 0; i < 2; i++) {
ian@0 784 s32 a = (rs1 >> (i * 32)) & 0xffff;
ian@0 785 s32 b = (rs2 >> (i * 32)) & 0xffff;
ian@0 786
ian@0 787 if (a == b)
ian@0 788 rd_val |= 1 << i;
ian@0 789 }
ian@0 790 break;
ian@0 791 };
ian@0 792
ian@0 793 maybe_flush_windows(0, 0, RD(insn), 0);
ian@0 794 store_reg(regs, rd_val, RD(insn));
ian@0 795 }
ian@0 796
ian@0 797 /* Emulate the VIS instructions which are not implemented in
ian@0 798 * hardware on Niagara.
ian@0 799 */
ian@0 800 int vis_emul(struct pt_regs *regs, unsigned int insn)
ian@0 801 {
ian@0 802 unsigned long pc = regs->tpc;
ian@0 803 unsigned int opf;
ian@0 804
ian@0 805 BUG_ON(regs->tstate & TSTATE_PRIV);
ian@0 806
ian@0 807 if (test_thread_flag(TIF_32BIT))
ian@0 808 pc = (u32)pc;
ian@0 809
ian@0 810 if (get_user(insn, (u32 __user *) pc))
ian@0 811 return -EFAULT;
ian@0 812
ian@0 813 if ((insn & VIS_OPCODE_MASK) != VIS_OPCODE_VAL)
ian@0 814 return -EINVAL;
ian@0 815
ian@0 816 opf = (insn & VIS_OPF_MASK) >> VIS_OPF_SHIFT;
ian@0 817 switch (opf) {
ian@0 818 default:
ian@0 819 return -EINVAL;
ian@0 820
ian@0 821 /* Pixel Formatting Instructions. */
ian@0 822 case FPACK16_OPF:
ian@0 823 case FPACK32_OPF:
ian@0 824 case FPACKFIX_OPF:
ian@0 825 case FEXPAND_OPF:
ian@0 826 case FPMERGE_OPF:
ian@0 827 pformat(regs, insn, opf);
ian@0 828 break;
ian@0 829
ian@0 830 /* Partitioned Multiply Instructions */
ian@0 831 case FMUL8x16_OPF:
ian@0 832 case FMUL8x16AU_OPF:
ian@0 833 case FMUL8x16AL_OPF:
ian@0 834 case FMUL8SUx16_OPF:
ian@0 835 case FMUL8ULx16_OPF:
ian@0 836 case FMULD8SUx16_OPF:
ian@0 837 case FMULD8ULx16_OPF:
ian@0 838 pmul(regs, insn, opf);
ian@0 839 break;
ian@0 840
ian@0 841 /* Pixel Compare Instructions */
ian@0 842 case FCMPGT16_OPF:
ian@0 843 case FCMPGT32_OPF:
ian@0 844 case FCMPLE16_OPF:
ian@0 845 case FCMPLE32_OPF:
ian@0 846 case FCMPNE16_OPF:
ian@0 847 case FCMPNE32_OPF:
ian@0 848 case FCMPEQ16_OPF:
ian@0 849 case FCMPEQ32_OPF:
ian@0 850 pcmp(regs, insn, opf);
ian@0 851 break;
ian@0 852
ian@0 853 /* Edge Handling Instructions */
ian@0 854 case EDGE8_OPF:
ian@0 855 case EDGE8N_OPF:
ian@0 856 case EDGE8L_OPF:
ian@0 857 case EDGE8LN_OPF:
ian@0 858 case EDGE16_OPF:
ian@0 859 case EDGE16N_OPF:
ian@0 860 case EDGE16L_OPF:
ian@0 861 case EDGE16LN_OPF:
ian@0 862 case EDGE32_OPF:
ian@0 863 case EDGE32N_OPF:
ian@0 864 case EDGE32L_OPF:
ian@0 865 case EDGE32LN_OPF:
ian@0 866 edge(regs, insn, opf);
ian@0 867 break;
ian@0 868
ian@0 869 /* Pixel Component Distance */
ian@0 870 case PDIST_OPF:
ian@0 871 pdist(regs, insn);
ian@0 872 break;
ian@0 873
ian@0 874 /* Three-Dimensional Array Addressing Instructions */
ian@0 875 case ARRAY8_OPF:
ian@0 876 case ARRAY16_OPF:
ian@0 877 case ARRAY32_OPF:
ian@0 878 array(regs, insn, opf);
ian@0 879 break;
ian@0 880
ian@0 881 /* Byte Mask and Shuffle Instructions */
ian@0 882 case BMASK_OPF:
ian@0 883 bmask(regs, insn);
ian@0 884 break;
ian@0 885
ian@0 886 case BSHUFFLE_OPF:
ian@0 887 bshuffle(regs, insn);
ian@0 888 break;
ian@0 889 };
ian@0 890
ian@0 891 regs->tpc = regs->tnpc;
ian@0 892 regs->tnpc += 4;
ian@0 893 return 0;
ian@0 894 }