ia64/linux-2.6.18-xen.hg

view arch/sparc64/kernel/visemul.c @ 897:329ea0ccb344

balloon: try harder to balloon up under memory pressure.

Currently if the balloon driver is unable to increase the guest's
reservation it assumes the failure was due to reaching its full
allocation, gives up on the ballooning operation and records the limit
it reached as the "hard limit". The driver will not try again until
the target is set again (even to the same value).

However it is possible that ballooning has in fact failed due to
memory pressure in the host and therefore it is desirable to keep
attempting to reach the target in case memory becomes available. The
most likely scenario is that some guests are ballooning down while
others are ballooning up and therefore there is temporary memory
pressure while things stabilise. You would not expect a well behaved
toolstack to ask a domain to balloon to more than its allocation nor
would you expect it to deliberately over-commit memory by setting
balloon targets which exceed the total host memory.

This patch drops the concept of a hard limit and causes the balloon
driver to retry increasing the reservation on a timer in the same
manner as when decreasing the reservation.

Also if we partially succeed in increasing the reservation
(i.e. receive less pages than we asked for) then we may as well keep
those pages rather than returning them to Xen.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jun 05 14:01:20 2009 +0100 (2009-06-05)
parents 831230e53067
children
line source
1 /* visemul.c: Emulation of VIS instructions.
2 *
3 * Copyright (C) 2006 David S. Miller (davem@davemloft.net)
4 */
5 #include <linux/kernel.h>
6 #include <linux/errno.h>
7 #include <linux/thread_info.h>
9 #include <asm/ptrace.h>
10 #include <asm/pstate.h>
11 #include <asm/system.h>
12 #include <asm/fpumacro.h>
13 #include <asm/uaccess.h>
15 /* OPF field of various VIS instructions. */
17 /* 000111011 - four 16-bit packs */
18 #define FPACK16_OPF 0x03b
20 /* 000111010 - two 32-bit packs */
21 #define FPACK32_OPF 0x03a
23 /* 000111101 - four 16-bit packs */
24 #define FPACKFIX_OPF 0x03d
26 /* 001001101 - four 16-bit expands */
27 #define FEXPAND_OPF 0x04d
29 /* 001001011 - two 32-bit merges */
30 #define FPMERGE_OPF 0x04b
32 /* 000110001 - 8-by-16-bit partitoned product */
33 #define FMUL8x16_OPF 0x031
35 /* 000110011 - 8-by-16-bit upper alpha partitioned product */
36 #define FMUL8x16AU_OPF 0x033
38 /* 000110101 - 8-by-16-bit lower alpha partitioned product */
39 #define FMUL8x16AL_OPF 0x035
41 /* 000110110 - upper 8-by-16-bit partitioned product */
42 #define FMUL8SUx16_OPF 0x036
44 /* 000110111 - lower 8-by-16-bit partitioned product */
45 #define FMUL8ULx16_OPF 0x037
47 /* 000111000 - upper 8-by-16-bit partitioned product */
48 #define FMULD8SUx16_OPF 0x038
50 /* 000111001 - lower unsigned 8-by-16-bit partitioned product */
51 #define FMULD8ULx16_OPF 0x039
53 /* 000101000 - four 16-bit compare; set rd if src1 > src2 */
54 #define FCMPGT16_OPF 0x028
56 /* 000101100 - two 32-bit compare; set rd if src1 > src2 */
57 #define FCMPGT32_OPF 0x02c
59 /* 000100000 - four 16-bit compare; set rd if src1 <= src2 */
60 #define FCMPLE16_OPF 0x020
62 /* 000100100 - two 32-bit compare; set rd if src1 <= src2 */
63 #define FCMPLE32_OPF 0x024
65 /* 000100010 - four 16-bit compare; set rd if src1 != src2 */
66 #define FCMPNE16_OPF 0x022
68 /* 000100110 - two 32-bit compare; set rd if src1 != src2 */
69 #define FCMPNE32_OPF 0x026
71 /* 000101010 - four 16-bit compare; set rd if src1 == src2 */
72 #define FCMPEQ16_OPF 0x02a
74 /* 000101110 - two 32-bit compare; set rd if src1 == src2 */
75 #define FCMPEQ32_OPF 0x02e
77 /* 000000000 - Eight 8-bit edge boundary processing */
78 #define EDGE8_OPF 0x000
80 /* 000000001 - Eight 8-bit edge boundary processing, no CC */
81 #define EDGE8N_OPF 0x001
83 /* 000000010 - Eight 8-bit edge boundary processing, little-endian */
84 #define EDGE8L_OPF 0x002
86 /* 000000011 - Eight 8-bit edge boundary processing, little-endian, no CC */
87 #define EDGE8LN_OPF 0x003
89 /* 000000100 - Four 16-bit edge boundary processing */
90 #define EDGE16_OPF 0x004
92 /* 000000101 - Four 16-bit edge boundary processing, no CC */
93 #define EDGE16N_OPF 0x005
95 /* 000000110 - Four 16-bit edge boundary processing, little-endian */
96 #define EDGE16L_OPF 0x006
98 /* 000000111 - Four 16-bit edge boundary processing, little-endian, no CC */
99 #define EDGE16LN_OPF 0x007
101 /* 000001000 - Two 32-bit edge boundary processing */
102 #define EDGE32_OPF 0x008
104 /* 000001001 - Two 32-bit edge boundary processing, no CC */
105 #define EDGE32N_OPF 0x009
107 /* 000001010 - Two 32-bit edge boundary processing, little-endian */
108 #define EDGE32L_OPF 0x00a
110 /* 000001011 - Two 32-bit edge boundary processing, little-endian, no CC */
111 #define EDGE32LN_OPF 0x00b
113 /* 000111110 - distance between 8 8-bit components */
114 #define PDIST_OPF 0x03e
116 /* 000010000 - convert 8-bit 3-D address to blocked byte address */
117 #define ARRAY8_OPF 0x010
119 /* 000010010 - convert 16-bit 3-D address to blocked byte address */
120 #define ARRAY16_OPF 0x012
122 /* 000010100 - convert 32-bit 3-D address to blocked byte address */
123 #define ARRAY32_OPF 0x014
125 /* 000011001 - Set the GSR.MASK field in preparation for a BSHUFFLE */
126 #define BMASK_OPF 0x019
128 /* 001001100 - Permute bytes as specified by GSR.MASK */
129 #define BSHUFFLE_OPF 0x04c
131 #define VIS_OPCODE_MASK ((0x3 << 30) | (0x3f << 19))
132 #define VIS_OPCODE_VAL ((0x2 << 30) | (0x36 << 19))
134 #define VIS_OPF_SHIFT 5
135 #define VIS_OPF_MASK (0x1ff << VIS_OPF_SHIFT)
137 #define RS1(INSN) (((INSN) >> 24) & 0x1f)
138 #define RS2(INSN) (((INSN) >> 0) & 0x1f)
139 #define RD(INSN) (((INSN) >> 25) & 0x1f)
141 static inline void maybe_flush_windows(unsigned int rs1, unsigned int rs2,
142 unsigned int rd, int from_kernel)
143 {
144 if (rs2 >= 16 || rs1 >= 16 || rd >= 16) {
145 if (from_kernel != 0)
146 __asm__ __volatile__("flushw");
147 else
148 flushw_user();
149 }
150 }
152 static unsigned long fetch_reg(unsigned int reg, struct pt_regs *regs)
153 {
154 unsigned long value;
156 if (reg < 16)
157 return (!reg ? 0 : regs->u_regs[reg]);
158 if (regs->tstate & TSTATE_PRIV) {
159 struct reg_window *win;
160 win = (struct reg_window *)(regs->u_regs[UREG_FP] + STACK_BIAS);
161 value = win->locals[reg - 16];
162 } else if (test_thread_flag(TIF_32BIT)) {
163 struct reg_window32 __user *win32;
164 win32 = (struct reg_window32 __user *)((unsigned long)((u32)regs->u_regs[UREG_FP]));
165 get_user(value, &win32->locals[reg - 16]);
166 } else {
167 struct reg_window __user *win;
168 win = (struct reg_window __user *)(regs->u_regs[UREG_FP] + STACK_BIAS);
169 get_user(value, &win->locals[reg - 16]);
170 }
171 return value;
172 }
174 static inline unsigned long __user *__fetch_reg_addr_user(unsigned int reg,
175 struct pt_regs *regs)
176 {
177 BUG_ON(reg < 16);
178 BUG_ON(regs->tstate & TSTATE_PRIV);
180 if (test_thread_flag(TIF_32BIT)) {
181 struct reg_window32 __user *win32;
182 win32 = (struct reg_window32 __user *)((unsigned long)((u32)regs->u_regs[UREG_FP]));
183 return (unsigned long __user *)&win32->locals[reg - 16];
184 } else {
185 struct reg_window __user *win;
186 win = (struct reg_window __user *)(regs->u_regs[UREG_FP] + STACK_BIAS);
187 return &win->locals[reg - 16];
188 }
189 }
191 static inline unsigned long *__fetch_reg_addr_kern(unsigned int reg,
192 struct pt_regs *regs)
193 {
194 BUG_ON(reg >= 16);
195 BUG_ON(regs->tstate & TSTATE_PRIV);
197 return &regs->u_regs[reg];
198 }
200 static void store_reg(struct pt_regs *regs, unsigned long val, unsigned long rd)
201 {
202 if (rd < 16) {
203 unsigned long *rd_kern = __fetch_reg_addr_kern(rd, regs);
205 *rd_kern = val;
206 } else {
207 unsigned long __user *rd_user = __fetch_reg_addr_user(rd, regs);
209 if (test_thread_flag(TIF_32BIT))
210 __put_user((u32)val, (u32 __user *)rd_user);
211 else
212 __put_user(val, rd_user);
213 }
214 }
216 static inline unsigned long fpd_regval(struct fpustate *f,
217 unsigned int insn_regnum)
218 {
219 insn_regnum = (((insn_regnum & 1) << 5) |
220 (insn_regnum & 0x1e));
222 return *(unsigned long *) &f->regs[insn_regnum];
223 }
225 static inline unsigned long *fpd_regaddr(struct fpustate *f,
226 unsigned int insn_regnum)
227 {
228 insn_regnum = (((insn_regnum & 1) << 5) |
229 (insn_regnum & 0x1e));
231 return (unsigned long *) &f->regs[insn_regnum];
232 }
234 static inline unsigned int fps_regval(struct fpustate *f,
235 unsigned int insn_regnum)
236 {
237 return f->regs[insn_regnum];
238 }
240 static inline unsigned int *fps_regaddr(struct fpustate *f,
241 unsigned int insn_regnum)
242 {
243 return &f->regs[insn_regnum];
244 }
246 struct edge_tab {
247 u16 left, right;
248 };
249 struct edge_tab edge8_tab[8] = {
250 { 0xff, 0x80 },
251 { 0x7f, 0xc0 },
252 { 0x3f, 0xe0 },
253 { 0x1f, 0xf0 },
254 { 0x0f, 0xf8 },
255 { 0x07, 0xfc },
256 { 0x03, 0xfe },
257 { 0x01, 0xff },
258 };
259 struct edge_tab edge8_tab_l[8] = {
260 { 0xff, 0x01 },
261 { 0xfe, 0x03 },
262 { 0xfc, 0x07 },
263 { 0xf8, 0x0f },
264 { 0xf0, 0x1f },
265 { 0xe0, 0x3f },
266 { 0xc0, 0x7f },
267 { 0x80, 0xff },
268 };
269 struct edge_tab edge16_tab[4] = {
270 { 0xf, 0x8 },
271 { 0x7, 0xc },
272 { 0x3, 0xe },
273 { 0x1, 0xf },
274 };
275 struct edge_tab edge16_tab_l[4] = {
276 { 0xf, 0x1 },
277 { 0xe, 0x3 },
278 { 0xc, 0x7 },
279 { 0x8, 0xf },
280 };
281 struct edge_tab edge32_tab[2] = {
282 { 0x3, 0x2 },
283 { 0x1, 0x3 },
284 };
285 struct edge_tab edge32_tab_l[2] = {
286 { 0x3, 0x1 },
287 { 0x2, 0x3 },
288 };
290 static void edge(struct pt_regs *regs, unsigned int insn, unsigned int opf)
291 {
292 unsigned long orig_rs1, rs1, orig_rs2, rs2, rd_val;
293 u16 left, right;
295 maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), 0);
296 orig_rs1 = rs1 = fetch_reg(RS1(insn), regs);
297 orig_rs2 = rs2 = fetch_reg(RS2(insn), regs);
299 if (test_thread_flag(TIF_32BIT)) {
300 rs1 = rs1 & 0xffffffff;
301 rs2 = rs2 & 0xffffffff;
302 }
303 switch (opf) {
304 default:
305 case EDGE8_OPF:
306 case EDGE8N_OPF:
307 left = edge8_tab[rs1 & 0x7].left;
308 right = edge8_tab[rs2 & 0x7].right;
309 break;
310 case EDGE8L_OPF:
311 case EDGE8LN_OPF:
312 left = edge8_tab_l[rs1 & 0x7].left;
313 right = edge8_tab_l[rs2 & 0x7].right;
314 break;
316 case EDGE16_OPF:
317 case EDGE16N_OPF:
318 left = edge16_tab[(rs1 >> 1) & 0x3].left;
319 right = edge16_tab[(rs2 >> 1) & 0x3].right;
320 break;
322 case EDGE16L_OPF:
323 case EDGE16LN_OPF:
324 left = edge16_tab_l[(rs1 >> 1) & 0x3].left;
325 right = edge16_tab_l[(rs2 >> 1) & 0x3].right;
326 break;
328 case EDGE32_OPF:
329 case EDGE32N_OPF:
330 left = edge32_tab[(rs1 >> 2) & 0x1].left;
331 right = edge32_tab[(rs2 >> 2) & 0x1].right;
332 break;
334 case EDGE32L_OPF:
335 case EDGE32LN_OPF:
336 left = edge32_tab_l[(rs1 >> 2) & 0x1].left;
337 right = edge32_tab_l[(rs2 >> 2) & 0x1].right;
338 break;
339 };
341 if ((rs1 & ~0x7UL) == (rs2 & ~0x7UL))
342 rd_val = right & left;
343 else
344 rd_val = left;
346 store_reg(regs, rd_val, RD(insn));
348 switch (opf) {
349 case EDGE8_OPF:
350 case EDGE8L_OPF:
351 case EDGE16_OPF:
352 case EDGE16L_OPF:
353 case EDGE32_OPF:
354 case EDGE32L_OPF: {
355 unsigned long ccr, tstate;
357 __asm__ __volatile__("subcc %1, %2, %%g0\n\t"
358 "rd %%ccr, %0"
359 : "=r" (ccr)
360 : "r" (orig_rs1), "r" (orig_rs2)
361 : "cc");
362 tstate = regs->tstate & ~(TSTATE_XCC | TSTATE_ICC);
363 regs->tstate = tstate | (ccr << 32UL);
364 }
365 };
366 }
368 static void array(struct pt_regs *regs, unsigned int insn, unsigned int opf)
369 {
370 unsigned long rs1, rs2, rd_val;
371 unsigned int bits, bits_mask;
373 maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), 0);
374 rs1 = fetch_reg(RS1(insn), regs);
375 rs2 = fetch_reg(RS2(insn), regs);
377 bits = (rs2 > 5 ? 5 : rs2);
378 bits_mask = (1UL << bits) - 1UL;
380 rd_val = ((((rs1 >> 11) & 0x3) << 0) |
381 (((rs1 >> 33) & 0x3) << 2) |
382 (((rs1 >> 55) & 0x1) << 4) |
383 (((rs1 >> 13) & 0xf) << 5) |
384 (((rs1 >> 35) & 0xf) << 9) |
385 (((rs1 >> 56) & 0xf) << 13) |
386 (((rs1 >> 17) & bits_mask) << 17) |
387 (((rs1 >> 39) & bits_mask) << (17 + bits)) |
388 (((rs1 >> 60) & 0xf) << (17 + (2*bits))));
390 switch (opf) {
391 case ARRAY16_OPF:
392 rd_val <<= 1;
393 break;
395 case ARRAY32_OPF:
396 rd_val <<= 2;
397 };
399 store_reg(regs, rd_val, RD(insn));
400 }
402 static void bmask(struct pt_regs *regs, unsigned int insn)
403 {
404 unsigned long rs1, rs2, rd_val, gsr;
406 maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), 0);
407 rs1 = fetch_reg(RS1(insn), regs);
408 rs2 = fetch_reg(RS2(insn), regs);
409 rd_val = rs1 + rs2;
411 store_reg(regs, rd_val, RD(insn));
413 gsr = current_thread_info()->gsr[0] & 0xffffffff;
414 gsr |= rd_val << 32UL;
415 current_thread_info()->gsr[0] = gsr;
416 }
418 static void bshuffle(struct pt_regs *regs, unsigned int insn)
419 {
420 struct fpustate *f = FPUSTATE;
421 unsigned long rs1, rs2, rd_val;
422 unsigned long bmask, i;
424 bmask = current_thread_info()->gsr[0] >> 32UL;
426 rs1 = fpd_regval(f, RS1(insn));
427 rs2 = fpd_regval(f, RS2(insn));
429 rd_val = 0UL;
430 for (i = 0; i < 8; i++) {
431 unsigned long which = (bmask >> (i * 4)) & 0xf;
432 unsigned long byte;
434 if (which < 8)
435 byte = (rs1 >> (which * 8)) & 0xff;
436 else
437 byte = (rs2 >> ((which-8)*8)) & 0xff;
438 rd_val |= (byte << (i * 8));
439 }
441 *fpd_regaddr(f, RD(insn)) = rd_val;
442 }
444 static void pdist(struct pt_regs *regs, unsigned int insn)
445 {
446 struct fpustate *f = FPUSTATE;
447 unsigned long rs1, rs2, *rd, rd_val;
448 unsigned long i;
450 rs1 = fpd_regval(f, RS1(insn));
451 rs2 = fpd_regval(f, RS1(insn));
452 rd = fpd_regaddr(f, RD(insn));
454 rd_val = *rd;
456 for (i = 0; i < 8; i++) {
457 s16 s1, s2;
459 s1 = (rs1 >> (56 - (i * 8))) & 0xff;
460 s2 = (rs2 >> (56 - (i * 8))) & 0xff;
462 /* Absolute value of difference. */
463 s1 -= s2;
464 if (s1 < 0)
465 s1 = ~s1 + 1;
467 rd_val += s1;
468 }
470 *rd = rd_val;
471 }
473 static void pformat(struct pt_regs *regs, unsigned int insn, unsigned int opf)
474 {
475 struct fpustate *f = FPUSTATE;
476 unsigned long rs1, rs2, gsr, scale, rd_val;
478 gsr = current_thread_info()->gsr[0];
479 scale = (gsr >> 3) & (opf == FPACK16_OPF ? 0xf : 0x1f);
480 switch (opf) {
481 case FPACK16_OPF: {
482 unsigned long byte;
484 rs2 = fpd_regval(f, RS2(insn));
485 rd_val = 0;
486 for (byte = 0; byte < 4; byte++) {
487 unsigned int val;
488 s16 src = (rs2 >> (byte * 16UL)) & 0xffffUL;
489 int scaled = src << scale;
490 int from_fixed = scaled >> 7;
492 val = ((from_fixed < 0) ?
493 0 :
494 (from_fixed > 255) ?
495 255 : from_fixed);
497 rd_val |= (val << (8 * byte));
498 }
499 *fps_regaddr(f, RD(insn)) = rd_val;
500 break;
501 }
503 case FPACK32_OPF: {
504 unsigned long word;
506 rs1 = fpd_regval(f, RS1(insn));
507 rs2 = fpd_regval(f, RS2(insn));
508 rd_val = (rs1 << 8) & ~(0x000000ff000000ffUL);
509 for (word = 0; word < 2; word++) {
510 unsigned long val;
511 s32 src = (rs2 >> (word * 32UL));
512 s64 scaled = src << scale;
513 s64 from_fixed = scaled >> 23;
515 val = ((from_fixed < 0) ?
516 0 :
517 (from_fixed > 255) ?
518 255 : from_fixed);
520 rd_val |= (val << (32 * word));
521 }
522 *fpd_regaddr(f, RD(insn)) = rd_val;
523 break;
524 }
526 case FPACKFIX_OPF: {
527 unsigned long word;
529 rs2 = fpd_regval(f, RS2(insn));
531 rd_val = 0;
532 for (word = 0; word < 2; word++) {
533 long val;
534 s32 src = (rs2 >> (word * 32UL));
535 s64 scaled = src << scale;
536 s64 from_fixed = scaled >> 16;
538 val = ((from_fixed < -32768) ?
539 -32768 :
540 (from_fixed > 32767) ?
541 32767 : from_fixed);
543 rd_val |= ((val & 0xffff) << (word * 16));
544 }
545 *fps_regaddr(f, RD(insn)) = rd_val;
546 break;
547 }
549 case FEXPAND_OPF: {
550 unsigned long byte;
552 rs2 = fps_regval(f, RS2(insn));
554 rd_val = 0;
555 for (byte = 0; byte < 4; byte++) {
556 unsigned long val;
557 u8 src = (rs2 >> (byte * 8)) & 0xff;
559 val = src << 4;
561 rd_val |= (val << (byte * 16));
562 }
563 *fpd_regaddr(f, RD(insn)) = rd_val;
564 break;
565 }
567 case FPMERGE_OPF: {
568 rs1 = fps_regval(f, RS1(insn));
569 rs2 = fps_regval(f, RS2(insn));
571 rd_val = (((rs2 & 0x000000ff) << 0) |
572 ((rs1 & 0x000000ff) << 8) |
573 ((rs2 & 0x0000ff00) << 8) |
574 ((rs1 & 0x0000ff00) << 16) |
575 ((rs2 & 0x00ff0000) << 16) |
576 ((rs1 & 0x00ff0000) << 24) |
577 ((rs2 & 0xff000000) << 24) |
578 ((rs1 & 0xff000000) << 32));
579 *fpd_regaddr(f, RD(insn)) = rd_val;
580 break;
581 }
582 };
583 }
585 static void pmul(struct pt_regs *regs, unsigned int insn, unsigned int opf)
586 {
587 struct fpustate *f = FPUSTATE;
588 unsigned long rs1, rs2, rd_val;
590 switch (opf) {
591 case FMUL8x16_OPF: {
592 unsigned long byte;
594 rs1 = fps_regval(f, RS1(insn));
595 rs2 = fpd_regval(f, RS2(insn));
597 rd_val = 0;
598 for (byte = 0; byte < 4; byte++) {
599 u16 src1 = (rs1 >> (byte * 8)) & 0x00ff;
600 s16 src2 = (rs2 >> (byte * 16)) & 0xffff;
601 u32 prod = src1 * src2;
602 u16 scaled = ((prod & 0x00ffff00) >> 8);
604 /* Round up. */
605 if (prod & 0x80)
606 scaled++;
607 rd_val |= ((scaled & 0xffffUL) << (byte * 16UL));
608 }
610 *fpd_regaddr(f, RD(insn)) = rd_val;
611 break;
612 }
614 case FMUL8x16AU_OPF:
615 case FMUL8x16AL_OPF: {
616 unsigned long byte;
617 s16 src2;
619 rs1 = fps_regval(f, RS1(insn));
620 rs2 = fps_regval(f, RS2(insn));
622 rd_val = 0;
623 src2 = (rs2 >> (opf == FMUL8x16AU_OPF) ? 16 : 0);
624 for (byte = 0; byte < 4; byte++) {
625 u16 src1 = (rs1 >> (byte * 8)) & 0x00ff;
626 u32 prod = src1 * src2;
627 u16 scaled = ((prod & 0x00ffff00) >> 8);
629 /* Round up. */
630 if (prod & 0x80)
631 scaled++;
632 rd_val |= ((scaled & 0xffffUL) << (byte * 16UL));
633 }
635 *fpd_regaddr(f, RD(insn)) = rd_val;
636 break;
637 }
639 case FMUL8SUx16_OPF:
640 case FMUL8ULx16_OPF: {
641 unsigned long byte, ushift;
643 rs1 = fpd_regval(f, RS1(insn));
644 rs2 = fpd_regval(f, RS2(insn));
646 rd_val = 0;
647 ushift = (opf == FMUL8SUx16_OPF) ? 8 : 0;
648 for (byte = 0; byte < 4; byte++) {
649 u16 src1;
650 s16 src2;
651 u32 prod;
652 u16 scaled;
654 src1 = ((rs1 >> ((16 * byte) + ushift)) & 0x00ff);
655 src2 = ((rs2 >> (16 * byte)) & 0xffff);
656 prod = src1 * src2;
657 scaled = ((prod & 0x00ffff00) >> 8);
659 /* Round up. */
660 if (prod & 0x80)
661 scaled++;
662 rd_val |= ((scaled & 0xffffUL) << (byte * 16UL));
663 }
665 *fpd_regaddr(f, RD(insn)) = rd_val;
666 break;
667 }
669 case FMULD8SUx16_OPF:
670 case FMULD8ULx16_OPF: {
671 unsigned long byte, ushift;
673 rs1 = fps_regval(f, RS1(insn));
674 rs2 = fps_regval(f, RS2(insn));
676 rd_val = 0;
677 ushift = (opf == FMULD8SUx16_OPF) ? 8 : 0;
678 for (byte = 0; byte < 2; byte++) {
679 u16 src1;
680 s16 src2;
681 u32 prod;
682 u16 scaled;
684 src1 = ((rs1 >> ((16 * byte) + ushift)) & 0x00ff);
685 src2 = ((rs2 >> (16 * byte)) & 0xffff);
686 prod = src1 * src2;
687 scaled = ((prod & 0x00ffff00) >> 8);
689 /* Round up. */
690 if (prod & 0x80)
691 scaled++;
692 rd_val |= ((scaled & 0xffffUL) <<
693 ((byte * 32UL) + 7UL));
694 }
695 *fpd_regaddr(f, RD(insn)) = rd_val;
696 break;
697 }
698 };
699 }
701 static void pcmp(struct pt_regs *regs, unsigned int insn, unsigned int opf)
702 {
703 struct fpustate *f = FPUSTATE;
704 unsigned long rs1, rs2, rd_val, i;
706 rs1 = fpd_regval(f, RS1(insn));
707 rs2 = fpd_regval(f, RS2(insn));
709 rd_val = 0;
711 switch (opf) {
712 case FCMPGT16_OPF:
713 for (i = 0; i < 4; i++) {
714 s16 a = (rs1 >> (i * 16)) & 0xffff;
715 s16 b = (rs2 >> (i * 16)) & 0xffff;
717 if (a > b)
718 rd_val |= 1 << i;
719 }
720 break;
722 case FCMPGT32_OPF:
723 for (i = 0; i < 2; i++) {
724 s32 a = (rs1 >> (i * 32)) & 0xffff;
725 s32 b = (rs2 >> (i * 32)) & 0xffff;
727 if (a > b)
728 rd_val |= 1 << i;
729 }
730 break;
732 case FCMPLE16_OPF:
733 for (i = 0; i < 4; i++) {
734 s16 a = (rs1 >> (i * 16)) & 0xffff;
735 s16 b = (rs2 >> (i * 16)) & 0xffff;
737 if (a <= b)
738 rd_val |= 1 << i;
739 }
740 break;
742 case FCMPLE32_OPF:
743 for (i = 0; i < 2; i++) {
744 s32 a = (rs1 >> (i * 32)) & 0xffff;
745 s32 b = (rs2 >> (i * 32)) & 0xffff;
747 if (a <= b)
748 rd_val |= 1 << i;
749 }
750 break;
752 case FCMPNE16_OPF:
753 for (i = 0; i < 4; i++) {
754 s16 a = (rs1 >> (i * 16)) & 0xffff;
755 s16 b = (rs2 >> (i * 16)) & 0xffff;
757 if (a != b)
758 rd_val |= 1 << i;
759 }
760 break;
762 case FCMPNE32_OPF:
763 for (i = 0; i < 2; i++) {
764 s32 a = (rs1 >> (i * 32)) & 0xffff;
765 s32 b = (rs2 >> (i * 32)) & 0xffff;
767 if (a != b)
768 rd_val |= 1 << i;
769 }
770 break;
772 case FCMPEQ16_OPF:
773 for (i = 0; i < 4; i++) {
774 s16 a = (rs1 >> (i * 16)) & 0xffff;
775 s16 b = (rs2 >> (i * 16)) & 0xffff;
777 if (a == b)
778 rd_val |= 1 << i;
779 }
780 break;
782 case FCMPEQ32_OPF:
783 for (i = 0; i < 2; i++) {
784 s32 a = (rs1 >> (i * 32)) & 0xffff;
785 s32 b = (rs2 >> (i * 32)) & 0xffff;
787 if (a == b)
788 rd_val |= 1 << i;
789 }
790 break;
791 };
793 maybe_flush_windows(0, 0, RD(insn), 0);
794 store_reg(regs, rd_val, RD(insn));
795 }
797 /* Emulate the VIS instructions which are not implemented in
798 * hardware on Niagara.
799 */
800 int vis_emul(struct pt_regs *regs, unsigned int insn)
801 {
802 unsigned long pc = regs->tpc;
803 unsigned int opf;
805 BUG_ON(regs->tstate & TSTATE_PRIV);
807 if (test_thread_flag(TIF_32BIT))
808 pc = (u32)pc;
810 if (get_user(insn, (u32 __user *) pc))
811 return -EFAULT;
813 if ((insn & VIS_OPCODE_MASK) != VIS_OPCODE_VAL)
814 return -EINVAL;
816 opf = (insn & VIS_OPF_MASK) >> VIS_OPF_SHIFT;
817 switch (opf) {
818 default:
819 return -EINVAL;
821 /* Pixel Formatting Instructions. */
822 case FPACK16_OPF:
823 case FPACK32_OPF:
824 case FPACKFIX_OPF:
825 case FEXPAND_OPF:
826 case FPMERGE_OPF:
827 pformat(regs, insn, opf);
828 break;
830 /* Partitioned Multiply Instructions */
831 case FMUL8x16_OPF:
832 case FMUL8x16AU_OPF:
833 case FMUL8x16AL_OPF:
834 case FMUL8SUx16_OPF:
835 case FMUL8ULx16_OPF:
836 case FMULD8SUx16_OPF:
837 case FMULD8ULx16_OPF:
838 pmul(regs, insn, opf);
839 break;
841 /* Pixel Compare Instructions */
842 case FCMPGT16_OPF:
843 case FCMPGT32_OPF:
844 case FCMPLE16_OPF:
845 case FCMPLE32_OPF:
846 case FCMPNE16_OPF:
847 case FCMPNE32_OPF:
848 case FCMPEQ16_OPF:
849 case FCMPEQ32_OPF:
850 pcmp(regs, insn, opf);
851 break;
853 /* Edge Handling Instructions */
854 case EDGE8_OPF:
855 case EDGE8N_OPF:
856 case EDGE8L_OPF:
857 case EDGE8LN_OPF:
858 case EDGE16_OPF:
859 case EDGE16N_OPF:
860 case EDGE16L_OPF:
861 case EDGE16LN_OPF:
862 case EDGE32_OPF:
863 case EDGE32N_OPF:
864 case EDGE32L_OPF:
865 case EDGE32LN_OPF:
866 edge(regs, insn, opf);
867 break;
869 /* Pixel Component Distance */
870 case PDIST_OPF:
871 pdist(regs, insn);
872 break;
874 /* Three-Dimensional Array Addressing Instructions */
875 case ARRAY8_OPF:
876 case ARRAY16_OPF:
877 case ARRAY32_OPF:
878 array(regs, insn, opf);
879 break;
881 /* Byte Mask and Shuffle Instructions */
882 case BMASK_OPF:
883 bmask(regs, insn);
884 break;
886 case BSHUFFLE_OPF:
887 bshuffle(regs, insn);
888 break;
889 };
891 regs->tpc = regs->tnpc;
892 regs->tnpc += 4;
893 return 0;
894 }