Commit | Line | Data |
---|---|---|
0c51ed93 DM |
1 | /* visemul.c: Emulation of VIS instructions. |
2 | * | |
3 | * Copyright (C) 2006 David S. Miller (davem@davemloft.net) | |
4 | */ | |
5 | #include <linux/kernel.h> | |
6 | #include <linux/errno.h> | |
7 | #include <linux/thread_info.h> | |
121dd5f2 | 8 | #include <linux/perf_event.h> |
0c51ed93 DM |
9 | |
10 | #include <asm/ptrace.h> | |
11 | #include <asm/pstate.h> | |
12 | #include <asm/system.h> | |
13 | #include <asm/fpumacro.h> | |
14 | #include <asm/uaccess.h> | |
15 | ||
16 | /* OPF field of various VIS instructions. */ | |
17 | ||
18 | /* 000111011 - four 16-bit packs */ | |
19 | #define FPACK16_OPF 0x03b | |
20 | ||
21 | /* 000111010 - two 32-bit packs */ | |
22 | #define FPACK32_OPF 0x03a | |
23 | ||
24 | /* 000111101 - four 16-bit packs */ | |
25 | #define FPACKFIX_OPF 0x03d | |
26 | ||
27 | /* 001001101 - four 16-bit expands */ | |
28 | #define FEXPAND_OPF 0x04d | |
29 | ||
30 | /* 001001011 - two 32-bit merges */ | |
31 | #define FPMERGE_OPF 0x04b | |
32 | ||
33 | /* 000110001 - 8-by-16-bit partitoned product */ | |
34 | #define FMUL8x16_OPF 0x031 | |
35 | ||
36 | /* 000110011 - 8-by-16-bit upper alpha partitioned product */ | |
37 | #define FMUL8x16AU_OPF 0x033 | |
38 | ||
39 | /* 000110101 - 8-by-16-bit lower alpha partitioned product */ | |
40 | #define FMUL8x16AL_OPF 0x035 | |
41 | ||
42 | /* 000110110 - upper 8-by-16-bit partitioned product */ | |
43 | #define FMUL8SUx16_OPF 0x036 | |
44 | ||
45 | /* 000110111 - lower 8-by-16-bit partitioned product */ | |
46 | #define FMUL8ULx16_OPF 0x037 | |
47 | ||
48 | /* 000111000 - upper 8-by-16-bit partitioned product */ | |
49 | #define FMULD8SUx16_OPF 0x038 | |
50 | ||
51 | /* 000111001 - lower unsigned 8-by-16-bit partitioned product */ | |
52 | #define FMULD8ULx16_OPF 0x039 | |
53 | ||
54 | /* 000101000 - four 16-bit compare; set rd if src1 > src2 */ | |
55 | #define FCMPGT16_OPF 0x028 | |
56 | ||
57 | /* 000101100 - two 32-bit compare; set rd if src1 > src2 */ | |
58 | #define FCMPGT32_OPF 0x02c | |
59 | ||
60 | /* 000100000 - four 16-bit compare; set rd if src1 <= src2 */ | |
61 | #define FCMPLE16_OPF 0x020 | |
62 | ||
63 | /* 000100100 - two 32-bit compare; set rd if src1 <= src2 */ | |
64 | #define FCMPLE32_OPF 0x024 | |
65 | ||
66 | /* 000100010 - four 16-bit compare; set rd if src1 != src2 */ | |
67 | #define FCMPNE16_OPF 0x022 | |
68 | ||
69 | /* 000100110 - two 32-bit compare; set rd if src1 != src2 */ | |
70 | #define FCMPNE32_OPF 0x026 | |
71 | ||
72 | /* 000101010 - four 16-bit compare; set rd if src1 == src2 */ | |
73 | #define FCMPEQ16_OPF 0x02a | |
74 | ||
75 | /* 000101110 - two 32-bit compare; set rd if src1 == src2 */ | |
76 | #define FCMPEQ32_OPF 0x02e | |
77 | ||
78 | /* 000000000 - Eight 8-bit edge boundary processing */ | |
79 | #define EDGE8_OPF 0x000 | |
80 | ||
81 | /* 000000001 - Eight 8-bit edge boundary processing, no CC */ | |
82 | #define EDGE8N_OPF 0x001 | |
83 | ||
84 | /* 000000010 - Eight 8-bit edge boundary processing, little-endian */ | |
85 | #define EDGE8L_OPF 0x002 | |
86 | ||
87 | /* 000000011 - Eight 8-bit edge boundary processing, little-endian, no CC */ | |
88 | #define EDGE8LN_OPF 0x003 | |
89 | ||
90 | /* 000000100 - Four 16-bit edge boundary processing */ | |
91 | #define EDGE16_OPF 0x004 | |
92 | ||
93 | /* 000000101 - Four 16-bit edge boundary processing, no CC */ | |
94 | #define EDGE16N_OPF 0x005 | |
95 | ||
96 | /* 000000110 - Four 16-bit edge boundary processing, little-endian */ | |
97 | #define EDGE16L_OPF 0x006 | |
98 | ||
99 | /* 000000111 - Four 16-bit edge boundary processing, little-endian, no CC */ | |
100 | #define EDGE16LN_OPF 0x007 | |
101 | ||
102 | /* 000001000 - Two 32-bit edge boundary processing */ | |
103 | #define EDGE32_OPF 0x008 | |
104 | ||
105 | /* 000001001 - Two 32-bit edge boundary processing, no CC */ | |
106 | #define EDGE32N_OPF 0x009 | |
107 | ||
108 | /* 000001010 - Two 32-bit edge boundary processing, little-endian */ | |
109 | #define EDGE32L_OPF 0x00a | |
110 | ||
111 | /* 000001011 - Two 32-bit edge boundary processing, little-endian, no CC */ | |
112 | #define EDGE32LN_OPF 0x00b | |
113 | ||
114 | /* 000111110 - distance between 8 8-bit components */ | |
115 | #define PDIST_OPF 0x03e | |
116 | ||
117 | /* 000010000 - convert 8-bit 3-D address to blocked byte address */ | |
118 | #define ARRAY8_OPF 0x010 | |
119 | ||
120 | /* 000010010 - convert 16-bit 3-D address to blocked byte address */ | |
121 | #define ARRAY16_OPF 0x012 | |
122 | ||
123 | /* 000010100 - convert 32-bit 3-D address to blocked byte address */ | |
124 | #define ARRAY32_OPF 0x014 | |
125 | ||
126 | /* 000011001 - Set the GSR.MASK field in preparation for a BSHUFFLE */ | |
127 | #define BMASK_OPF 0x019 | |
128 | ||
129 | /* 001001100 - Permute bytes as specified by GSR.MASK */ | |
130 | #define BSHUFFLE_OPF 0x04c | |
131 | ||
0c51ed93 DM |
132 | #define VIS_OPF_SHIFT 5 |
133 | #define VIS_OPF_MASK (0x1ff << VIS_OPF_SHIFT) | |
134 | ||
726c12f5 | 135 | #define RS1(INSN) (((INSN) >> 14) & 0x1f) |
0c51ed93 DM |
136 | #define RS2(INSN) (((INSN) >> 0) & 0x1f) |
137 | #define RD(INSN) (((INSN) >> 25) & 0x1f) | |
138 | ||
139 | static inline void maybe_flush_windows(unsigned int rs1, unsigned int rs2, | |
140 | unsigned int rd, int from_kernel) | |
141 | { | |
142 | if (rs2 >= 16 || rs1 >= 16 || rd >= 16) { | |
143 | if (from_kernel != 0) | |
144 | __asm__ __volatile__("flushw"); | |
145 | else | |
146 | flushw_user(); | |
147 | } | |
148 | } | |
149 | ||
150 | static unsigned long fetch_reg(unsigned int reg, struct pt_regs *regs) | |
151 | { | |
152 | unsigned long value; | |
153 | ||
154 | if (reg < 16) | |
155 | return (!reg ? 0 : regs->u_regs[reg]); | |
156 | if (regs->tstate & TSTATE_PRIV) { | |
157 | struct reg_window *win; | |
158 | win = (struct reg_window *)(regs->u_regs[UREG_FP] + STACK_BIAS); | |
159 | value = win->locals[reg - 16]; | |
160 | } else if (test_thread_flag(TIF_32BIT)) { | |
161 | struct reg_window32 __user *win32; | |
162 | win32 = (struct reg_window32 __user *)((unsigned long)((u32)regs->u_regs[UREG_FP])); | |
163 | get_user(value, &win32->locals[reg - 16]); | |
164 | } else { | |
165 | struct reg_window __user *win; | |
166 | win = (struct reg_window __user *)(regs->u_regs[UREG_FP] + STACK_BIAS); | |
167 | get_user(value, &win->locals[reg - 16]); | |
168 | } | |
169 | return value; | |
170 | } | |
171 | ||
172 | static inline unsigned long __user *__fetch_reg_addr_user(unsigned int reg, | |
173 | struct pt_regs *regs) | |
174 | { | |
175 | BUG_ON(reg < 16); | |
176 | BUG_ON(regs->tstate & TSTATE_PRIV); | |
177 | ||
178 | if (test_thread_flag(TIF_32BIT)) { | |
179 | struct reg_window32 __user *win32; | |
180 | win32 = (struct reg_window32 __user *)((unsigned long)((u32)regs->u_regs[UREG_FP])); | |
181 | return (unsigned long __user *)&win32->locals[reg - 16]; | |
182 | } else { | |
183 | struct reg_window __user *win; | |
184 | win = (struct reg_window __user *)(regs->u_regs[UREG_FP] + STACK_BIAS); | |
185 | return &win->locals[reg - 16]; | |
186 | } | |
187 | } | |
188 | ||
189 | static inline unsigned long *__fetch_reg_addr_kern(unsigned int reg, | |
190 | struct pt_regs *regs) | |
191 | { | |
192 | BUG_ON(reg >= 16); | |
193 | BUG_ON(regs->tstate & TSTATE_PRIV); | |
194 | ||
195 | return ®s->u_regs[reg]; | |
196 | } | |
197 | ||
198 | static void store_reg(struct pt_regs *regs, unsigned long val, unsigned long rd) | |
199 | { | |
200 | if (rd < 16) { | |
201 | unsigned long *rd_kern = __fetch_reg_addr_kern(rd, regs); | |
202 | ||
203 | *rd_kern = val; | |
204 | } else { | |
205 | unsigned long __user *rd_user = __fetch_reg_addr_user(rd, regs); | |
206 | ||
207 | if (test_thread_flag(TIF_32BIT)) | |
208 | __put_user((u32)val, (u32 __user *)rd_user); | |
209 | else | |
210 | __put_user(val, rd_user); | |
211 | } | |
212 | } | |
213 | ||
214 | static inline unsigned long fpd_regval(struct fpustate *f, | |
215 | unsigned int insn_regnum) | |
216 | { | |
217 | insn_regnum = (((insn_regnum & 1) << 5) | | |
218 | (insn_regnum & 0x1e)); | |
219 | ||
220 | return *(unsigned long *) &f->regs[insn_regnum]; | |
221 | } | |
222 | ||
223 | static inline unsigned long *fpd_regaddr(struct fpustate *f, | |
224 | unsigned int insn_regnum) | |
225 | { | |
226 | insn_regnum = (((insn_regnum & 1) << 5) | | |
227 | (insn_regnum & 0x1e)); | |
228 | ||
229 | return (unsigned long *) &f->regs[insn_regnum]; | |
230 | } | |
231 | ||
232 | static inline unsigned int fps_regval(struct fpustate *f, | |
233 | unsigned int insn_regnum) | |
234 | { | |
235 | return f->regs[insn_regnum]; | |
236 | } | |
237 | ||
238 | static inline unsigned int *fps_regaddr(struct fpustate *f, | |
239 | unsigned int insn_regnum) | |
240 | { | |
241 | return &f->regs[insn_regnum]; | |
242 | } | |
243 | ||
244 | struct edge_tab { | |
245 | u16 left, right; | |
246 | }; | |
7e0b1e61 | 247 | static struct edge_tab edge8_tab[8] = { |
0c51ed93 DM |
248 | { 0xff, 0x80 }, |
249 | { 0x7f, 0xc0 }, | |
250 | { 0x3f, 0xe0 }, | |
251 | { 0x1f, 0xf0 }, | |
252 | { 0x0f, 0xf8 }, | |
253 | { 0x07, 0xfc }, | |
254 | { 0x03, 0xfe }, | |
255 | { 0x01, 0xff }, | |
256 | }; | |
7e0b1e61 | 257 | static struct edge_tab edge8_tab_l[8] = { |
0c51ed93 DM |
258 | { 0xff, 0x01 }, |
259 | { 0xfe, 0x03 }, | |
260 | { 0xfc, 0x07 }, | |
261 | { 0xf8, 0x0f }, | |
262 | { 0xf0, 0x1f }, | |
263 | { 0xe0, 0x3f }, | |
264 | { 0xc0, 0x7f }, | |
265 | { 0x80, 0xff }, | |
266 | }; | |
7e0b1e61 | 267 | static struct edge_tab edge16_tab[4] = { |
0c51ed93 DM |
268 | { 0xf, 0x8 }, |
269 | { 0x7, 0xc }, | |
270 | { 0x3, 0xe }, | |
271 | { 0x1, 0xf }, | |
272 | }; | |
7e0b1e61 | 273 | static struct edge_tab edge16_tab_l[4] = { |
0c51ed93 DM |
274 | { 0xf, 0x1 }, |
275 | { 0xe, 0x3 }, | |
276 | { 0xc, 0x7 }, | |
277 | { 0x8, 0xf }, | |
278 | }; | |
7e0b1e61 | 279 | static struct edge_tab edge32_tab[2] = { |
0c51ed93 DM |
280 | { 0x3, 0x2 }, |
281 | { 0x1, 0x3 }, | |
282 | }; | |
7e0b1e61 | 283 | static struct edge_tab edge32_tab_l[2] = { |
0c51ed93 DM |
284 | { 0x3, 0x1 }, |
285 | { 0x2, 0x3 }, | |
286 | }; | |
287 | ||
288 | static void edge(struct pt_regs *regs, unsigned int insn, unsigned int opf) | |
289 | { | |
290 | unsigned long orig_rs1, rs1, orig_rs2, rs2, rd_val; | |
291 | u16 left, right; | |
292 | ||
293 | maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), 0); | |
294 | orig_rs1 = rs1 = fetch_reg(RS1(insn), regs); | |
295 | orig_rs2 = rs2 = fetch_reg(RS2(insn), regs); | |
296 | ||
297 | if (test_thread_flag(TIF_32BIT)) { | |
298 | rs1 = rs1 & 0xffffffff; | |
299 | rs2 = rs2 & 0xffffffff; | |
300 | } | |
301 | switch (opf) { | |
302 | default: | |
303 | case EDGE8_OPF: | |
304 | case EDGE8N_OPF: | |
305 | left = edge8_tab[rs1 & 0x7].left; | |
306 | right = edge8_tab[rs2 & 0x7].right; | |
307 | break; | |
308 | case EDGE8L_OPF: | |
309 | case EDGE8LN_OPF: | |
310 | left = edge8_tab_l[rs1 & 0x7].left; | |
311 | right = edge8_tab_l[rs2 & 0x7].right; | |
312 | break; | |
313 | ||
314 | case EDGE16_OPF: | |
315 | case EDGE16N_OPF: | |
316 | left = edge16_tab[(rs1 >> 1) & 0x3].left; | |
317 | right = edge16_tab[(rs2 >> 1) & 0x3].right; | |
318 | break; | |
319 | ||
320 | case EDGE16L_OPF: | |
321 | case EDGE16LN_OPF: | |
322 | left = edge16_tab_l[(rs1 >> 1) & 0x3].left; | |
323 | right = edge16_tab_l[(rs2 >> 1) & 0x3].right; | |
324 | break; | |
325 | ||
326 | case EDGE32_OPF: | |
327 | case EDGE32N_OPF: | |
328 | left = edge32_tab[(rs1 >> 2) & 0x1].left; | |
329 | right = edge32_tab[(rs2 >> 2) & 0x1].right; | |
330 | break; | |
331 | ||
332 | case EDGE32L_OPF: | |
333 | case EDGE32LN_OPF: | |
334 | left = edge32_tab_l[(rs1 >> 2) & 0x1].left; | |
335 | right = edge32_tab_l[(rs2 >> 2) & 0x1].right; | |
336 | break; | |
337 | }; | |
338 | ||
339 | if ((rs1 & ~0x7UL) == (rs2 & ~0x7UL)) | |
340 | rd_val = right & left; | |
341 | else | |
342 | rd_val = left; | |
343 | ||
344 | store_reg(regs, rd_val, RD(insn)); | |
345 | ||
346 | switch (opf) { | |
347 | case EDGE8_OPF: | |
348 | case EDGE8L_OPF: | |
349 | case EDGE16_OPF: | |
350 | case EDGE16L_OPF: | |
351 | case EDGE32_OPF: | |
352 | case EDGE32L_OPF: { | |
353 | unsigned long ccr, tstate; | |
354 | ||
355 | __asm__ __volatile__("subcc %1, %2, %%g0\n\t" | |
356 | "rd %%ccr, %0" | |
357 | : "=r" (ccr) | |
358 | : "r" (orig_rs1), "r" (orig_rs2) | |
359 | : "cc"); | |
360 | tstate = regs->tstate & ~(TSTATE_XCC | TSTATE_ICC); | |
361 | regs->tstate = tstate | (ccr << 32UL); | |
362 | } | |
363 | }; | |
364 | } | |
365 | ||
366 | static void array(struct pt_regs *regs, unsigned int insn, unsigned int opf) | |
367 | { | |
368 | unsigned long rs1, rs2, rd_val; | |
369 | unsigned int bits, bits_mask; | |
370 | ||
371 | maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), 0); | |
372 | rs1 = fetch_reg(RS1(insn), regs); | |
373 | rs2 = fetch_reg(RS2(insn), regs); | |
374 | ||
375 | bits = (rs2 > 5 ? 5 : rs2); | |
376 | bits_mask = (1UL << bits) - 1UL; | |
377 | ||
378 | rd_val = ((((rs1 >> 11) & 0x3) << 0) | | |
379 | (((rs1 >> 33) & 0x3) << 2) | | |
380 | (((rs1 >> 55) & 0x1) << 4) | | |
381 | (((rs1 >> 13) & 0xf) << 5) | | |
382 | (((rs1 >> 35) & 0xf) << 9) | | |
383 | (((rs1 >> 56) & 0xf) << 13) | | |
384 | (((rs1 >> 17) & bits_mask) << 17) | | |
385 | (((rs1 >> 39) & bits_mask) << (17 + bits)) | | |
386 | (((rs1 >> 60) & 0xf) << (17 + (2*bits)))); | |
387 | ||
388 | switch (opf) { | |
389 | case ARRAY16_OPF: | |
390 | rd_val <<= 1; | |
391 | break; | |
392 | ||
393 | case ARRAY32_OPF: | |
394 | rd_val <<= 2; | |
395 | }; | |
396 | ||
397 | store_reg(regs, rd_val, RD(insn)); | |
398 | } | |
399 | ||
400 | static void bmask(struct pt_regs *regs, unsigned int insn) | |
401 | { | |
402 | unsigned long rs1, rs2, rd_val, gsr; | |
403 | ||
404 | maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), 0); | |
405 | rs1 = fetch_reg(RS1(insn), regs); | |
406 | rs2 = fetch_reg(RS2(insn), regs); | |
407 | rd_val = rs1 + rs2; | |
408 | ||
409 | store_reg(regs, rd_val, RD(insn)); | |
410 | ||
411 | gsr = current_thread_info()->gsr[0] & 0xffffffff; | |
412 | gsr |= rd_val << 32UL; | |
413 | current_thread_info()->gsr[0] = gsr; | |
414 | } | |
415 | ||
416 | static void bshuffle(struct pt_regs *regs, unsigned int insn) | |
417 | { | |
418 | struct fpustate *f = FPUSTATE; | |
419 | unsigned long rs1, rs2, rd_val; | |
420 | unsigned long bmask, i; | |
421 | ||
422 | bmask = current_thread_info()->gsr[0] >> 32UL; | |
423 | ||
424 | rs1 = fpd_regval(f, RS1(insn)); | |
425 | rs2 = fpd_regval(f, RS2(insn)); | |
426 | ||
427 | rd_val = 0UL; | |
428 | for (i = 0; i < 8; i++) { | |
429 | unsigned long which = (bmask >> (i * 4)) & 0xf; | |
430 | unsigned long byte; | |
431 | ||
432 | if (which < 8) | |
433 | byte = (rs1 >> (which * 8)) & 0xff; | |
434 | else | |
435 | byte = (rs2 >> ((which-8)*8)) & 0xff; | |
436 | rd_val |= (byte << (i * 8)); | |
437 | } | |
438 | ||
439 | *fpd_regaddr(f, RD(insn)) = rd_val; | |
440 | } | |
441 | ||
442 | static void pdist(struct pt_regs *regs, unsigned int insn) | |
443 | { | |
444 | struct fpustate *f = FPUSTATE; | |
445 | unsigned long rs1, rs2, *rd, rd_val; | |
446 | unsigned long i; | |
447 | ||
448 | rs1 = fpd_regval(f, RS1(insn)); | |
726c12f5 | 449 | rs2 = fpd_regval(f, RS2(insn)); |
0c51ed93 DM |
450 | rd = fpd_regaddr(f, RD(insn)); |
451 | ||
452 | rd_val = *rd; | |
453 | ||
454 | for (i = 0; i < 8; i++) { | |
455 | s16 s1, s2; | |
456 | ||
457 | s1 = (rs1 >> (56 - (i * 8))) & 0xff; | |
458 | s2 = (rs2 >> (56 - (i * 8))) & 0xff; | |
459 | ||
460 | /* Absolute value of difference. */ | |
461 | s1 -= s2; | |
462 | if (s1 < 0) | |
463 | s1 = ~s1 + 1; | |
464 | ||
465 | rd_val += s1; | |
466 | } | |
467 | ||
468 | *rd = rd_val; | |
469 | } | |
470 | ||
471 | static void pformat(struct pt_regs *regs, unsigned int insn, unsigned int opf) | |
472 | { | |
473 | struct fpustate *f = FPUSTATE; | |
474 | unsigned long rs1, rs2, gsr, scale, rd_val; | |
475 | ||
476 | gsr = current_thread_info()->gsr[0]; | |
477 | scale = (gsr >> 3) & (opf == FPACK16_OPF ? 0xf : 0x1f); | |
478 | switch (opf) { | |
479 | case FPACK16_OPF: { | |
480 | unsigned long byte; | |
481 | ||
482 | rs2 = fpd_regval(f, RS2(insn)); | |
483 | rd_val = 0; | |
484 | for (byte = 0; byte < 4; byte++) { | |
485 | unsigned int val; | |
486 | s16 src = (rs2 >> (byte * 16UL)) & 0xffffUL; | |
487 | int scaled = src << scale; | |
488 | int from_fixed = scaled >> 7; | |
489 | ||
490 | val = ((from_fixed < 0) ? | |
491 | 0 : | |
492 | (from_fixed > 255) ? | |
493 | 255 : from_fixed); | |
494 | ||
495 | rd_val |= (val << (8 * byte)); | |
496 | } | |
497 | *fps_regaddr(f, RD(insn)) = rd_val; | |
498 | break; | |
499 | } | |
500 | ||
501 | case FPACK32_OPF: { | |
502 | unsigned long word; | |
503 | ||
504 | rs1 = fpd_regval(f, RS1(insn)); | |
505 | rs2 = fpd_regval(f, RS2(insn)); | |
506 | rd_val = (rs1 << 8) & ~(0x000000ff000000ffUL); | |
507 | for (word = 0; word < 2; word++) { | |
508 | unsigned long val; | |
509 | s32 src = (rs2 >> (word * 32UL)); | |
510 | s64 scaled = src << scale; | |
511 | s64 from_fixed = scaled >> 23; | |
512 | ||
513 | val = ((from_fixed < 0) ? | |
514 | 0 : | |
515 | (from_fixed > 255) ? | |
516 | 255 : from_fixed); | |
517 | ||
518 | rd_val |= (val << (32 * word)); | |
519 | } | |
520 | *fpd_regaddr(f, RD(insn)) = rd_val; | |
521 | break; | |
522 | } | |
523 | ||
524 | case FPACKFIX_OPF: { | |
525 | unsigned long word; | |
526 | ||
527 | rs2 = fpd_regval(f, RS2(insn)); | |
528 | ||
529 | rd_val = 0; | |
530 | for (word = 0; word < 2; word++) { | |
531 | long val; | |
532 | s32 src = (rs2 >> (word * 32UL)); | |
533 | s64 scaled = src << scale; | |
534 | s64 from_fixed = scaled >> 16; | |
535 | ||
536 | val = ((from_fixed < -32768) ? | |
537 | -32768 : | |
538 | (from_fixed > 32767) ? | |
539 | 32767 : from_fixed); | |
540 | ||
541 | rd_val |= ((val & 0xffff) << (word * 16)); | |
542 | } | |
543 | *fps_regaddr(f, RD(insn)) = rd_val; | |
544 | break; | |
545 | } | |
546 | ||
547 | case FEXPAND_OPF: { | |
548 | unsigned long byte; | |
549 | ||
550 | rs2 = fps_regval(f, RS2(insn)); | |
551 | ||
552 | rd_val = 0; | |
553 | for (byte = 0; byte < 4; byte++) { | |
554 | unsigned long val; | |
555 | u8 src = (rs2 >> (byte * 8)) & 0xff; | |
556 | ||
557 | val = src << 4; | |
558 | ||
559 | rd_val |= (val << (byte * 16)); | |
560 | } | |
561 | *fpd_regaddr(f, RD(insn)) = rd_val; | |
562 | break; | |
563 | } | |
564 | ||
565 | case FPMERGE_OPF: { | |
566 | rs1 = fps_regval(f, RS1(insn)); | |
567 | rs2 = fps_regval(f, RS2(insn)); | |
568 | ||
569 | rd_val = (((rs2 & 0x000000ff) << 0) | | |
570 | ((rs1 & 0x000000ff) << 8) | | |
571 | ((rs2 & 0x0000ff00) << 8) | | |
572 | ((rs1 & 0x0000ff00) << 16) | | |
573 | ((rs2 & 0x00ff0000) << 16) | | |
574 | ((rs1 & 0x00ff0000) << 24) | | |
575 | ((rs2 & 0xff000000) << 24) | | |
576 | ((rs1 & 0xff000000) << 32)); | |
577 | *fpd_regaddr(f, RD(insn)) = rd_val; | |
578 | break; | |
579 | } | |
580 | }; | |
581 | } | |
582 | ||
583 | static void pmul(struct pt_regs *regs, unsigned int insn, unsigned int opf) | |
584 | { | |
585 | struct fpustate *f = FPUSTATE; | |
586 | unsigned long rs1, rs2, rd_val; | |
587 | ||
588 | switch (opf) { | |
589 | case FMUL8x16_OPF: { | |
590 | unsigned long byte; | |
591 | ||
592 | rs1 = fps_regval(f, RS1(insn)); | |
593 | rs2 = fpd_regval(f, RS2(insn)); | |
594 | ||
595 | rd_val = 0; | |
596 | for (byte = 0; byte < 4; byte++) { | |
597 | u16 src1 = (rs1 >> (byte * 8)) & 0x00ff; | |
598 | s16 src2 = (rs2 >> (byte * 16)) & 0xffff; | |
599 | u32 prod = src1 * src2; | |
600 | u16 scaled = ((prod & 0x00ffff00) >> 8); | |
601 | ||
602 | /* Round up. */ | |
603 | if (prod & 0x80) | |
604 | scaled++; | |
605 | rd_val |= ((scaled & 0xffffUL) << (byte * 16UL)); | |
606 | } | |
607 | ||
608 | *fpd_regaddr(f, RD(insn)) = rd_val; | |
609 | break; | |
610 | } | |
611 | ||
612 | case FMUL8x16AU_OPF: | |
613 | case FMUL8x16AL_OPF: { | |
614 | unsigned long byte; | |
615 | s16 src2; | |
616 | ||
617 | rs1 = fps_regval(f, RS1(insn)); | |
618 | rs2 = fps_regval(f, RS2(insn)); | |
619 | ||
620 | rd_val = 0; | |
88b938e6 | 621 | src2 = rs2 >> (opf == FMUL8x16AU_OPF ? 16 : 0); |
0c51ed93 DM |
622 | for (byte = 0; byte < 4; byte++) { |
623 | u16 src1 = (rs1 >> (byte * 8)) & 0x00ff; | |
624 | u32 prod = src1 * src2; | |
625 | u16 scaled = ((prod & 0x00ffff00) >> 8); | |
626 | ||
627 | /* Round up. */ | |
628 | if (prod & 0x80) | |
629 | scaled++; | |
630 | rd_val |= ((scaled & 0xffffUL) << (byte * 16UL)); | |
631 | } | |
632 | ||
633 | *fpd_regaddr(f, RD(insn)) = rd_val; | |
634 | break; | |
635 | } | |
636 | ||
637 | case FMUL8SUx16_OPF: | |
638 | case FMUL8ULx16_OPF: { | |
639 | unsigned long byte, ushift; | |
640 | ||
641 | rs1 = fpd_regval(f, RS1(insn)); | |
642 | rs2 = fpd_regval(f, RS2(insn)); | |
643 | ||
644 | rd_val = 0; | |
645 | ushift = (opf == FMUL8SUx16_OPF) ? 8 : 0; | |
646 | for (byte = 0; byte < 4; byte++) { | |
647 | u16 src1; | |
648 | s16 src2; | |
649 | u32 prod; | |
650 | u16 scaled; | |
651 | ||
652 | src1 = ((rs1 >> ((16 * byte) + ushift)) & 0x00ff); | |
653 | src2 = ((rs2 >> (16 * byte)) & 0xffff); | |
654 | prod = src1 * src2; | |
655 | scaled = ((prod & 0x00ffff00) >> 8); | |
656 | ||
657 | /* Round up. */ | |
658 | if (prod & 0x80) | |
659 | scaled++; | |
660 | rd_val |= ((scaled & 0xffffUL) << (byte * 16UL)); | |
661 | } | |
662 | ||
663 | *fpd_regaddr(f, RD(insn)) = rd_val; | |
664 | break; | |
665 | } | |
666 | ||
667 | case FMULD8SUx16_OPF: | |
668 | case FMULD8ULx16_OPF: { | |
669 | unsigned long byte, ushift; | |
670 | ||
671 | rs1 = fps_regval(f, RS1(insn)); | |
672 | rs2 = fps_regval(f, RS2(insn)); | |
673 | ||
674 | rd_val = 0; | |
675 | ushift = (opf == FMULD8SUx16_OPF) ? 8 : 0; | |
676 | for (byte = 0; byte < 2; byte++) { | |
677 | u16 src1; | |
678 | s16 src2; | |
679 | u32 prod; | |
680 | u16 scaled; | |
681 | ||
682 | src1 = ((rs1 >> ((16 * byte) + ushift)) & 0x00ff); | |
683 | src2 = ((rs2 >> (16 * byte)) & 0xffff); | |
684 | prod = src1 * src2; | |
685 | scaled = ((prod & 0x00ffff00) >> 8); | |
686 | ||
687 | /* Round up. */ | |
688 | if (prod & 0x80) | |
689 | scaled++; | |
690 | rd_val |= ((scaled & 0xffffUL) << | |
691 | ((byte * 32UL) + 7UL)); | |
692 | } | |
693 | *fpd_regaddr(f, RD(insn)) = rd_val; | |
694 | break; | |
695 | } | |
696 | }; | |
697 | } | |
698 | ||
699 | static void pcmp(struct pt_regs *regs, unsigned int insn, unsigned int opf) | |
700 | { | |
701 | struct fpustate *f = FPUSTATE; | |
702 | unsigned long rs1, rs2, rd_val, i; | |
703 | ||
704 | rs1 = fpd_regval(f, RS1(insn)); | |
705 | rs2 = fpd_regval(f, RS2(insn)); | |
706 | ||
707 | rd_val = 0; | |
708 | ||
709 | switch (opf) { | |
710 | case FCMPGT16_OPF: | |
711 | for (i = 0; i < 4; i++) { | |
712 | s16 a = (rs1 >> (i * 16)) & 0xffff; | |
713 | s16 b = (rs2 >> (i * 16)) & 0xffff; | |
714 | ||
715 | if (a > b) | |
716 | rd_val |= 1 << i; | |
717 | } | |
718 | break; | |
719 | ||
720 | case FCMPGT32_OPF: | |
721 | for (i = 0; i < 2; i++) { | |
722 | s32 a = (rs1 >> (i * 32)) & 0xffff; | |
723 | s32 b = (rs2 >> (i * 32)) & 0xffff; | |
724 | ||
725 | if (a > b) | |
726 | rd_val |= 1 << i; | |
727 | } | |
728 | break; | |
729 | ||
730 | case FCMPLE16_OPF: | |
731 | for (i = 0; i < 4; i++) { | |
732 | s16 a = (rs1 >> (i * 16)) & 0xffff; | |
733 | s16 b = (rs2 >> (i * 16)) & 0xffff; | |
734 | ||
735 | if (a <= b) | |
736 | rd_val |= 1 << i; | |
737 | } | |
738 | break; | |
739 | ||
740 | case FCMPLE32_OPF: | |
741 | for (i = 0; i < 2; i++) { | |
742 | s32 a = (rs1 >> (i * 32)) & 0xffff; | |
743 | s32 b = (rs2 >> (i * 32)) & 0xffff; | |
744 | ||
745 | if (a <= b) | |
746 | rd_val |= 1 << i; | |
747 | } | |
748 | break; | |
749 | ||
750 | case FCMPNE16_OPF: | |
751 | for (i = 0; i < 4; i++) { | |
752 | s16 a = (rs1 >> (i * 16)) & 0xffff; | |
753 | s16 b = (rs2 >> (i * 16)) & 0xffff; | |
754 | ||
755 | if (a != b) | |
756 | rd_val |= 1 << i; | |
757 | } | |
758 | break; | |
759 | ||
760 | case FCMPNE32_OPF: | |
761 | for (i = 0; i < 2; i++) { | |
762 | s32 a = (rs1 >> (i * 32)) & 0xffff; | |
763 | s32 b = (rs2 >> (i * 32)) & 0xffff; | |
764 | ||
765 | if (a != b) | |
766 | rd_val |= 1 << i; | |
767 | } | |
768 | break; | |
769 | ||
770 | case FCMPEQ16_OPF: | |
771 | for (i = 0; i < 4; i++) { | |
772 | s16 a = (rs1 >> (i * 16)) & 0xffff; | |
773 | s16 b = (rs2 >> (i * 16)) & 0xffff; | |
774 | ||
775 | if (a == b) | |
776 | rd_val |= 1 << i; | |
777 | } | |
778 | break; | |
779 | ||
780 | case FCMPEQ32_OPF: | |
781 | for (i = 0; i < 2; i++) { | |
782 | s32 a = (rs1 >> (i * 32)) & 0xffff; | |
783 | s32 b = (rs2 >> (i * 32)) & 0xffff; | |
784 | ||
785 | if (a == b) | |
786 | rd_val |= 1 << i; | |
787 | } | |
788 | break; | |
789 | }; | |
790 | ||
791 | maybe_flush_windows(0, 0, RD(insn), 0); | |
792 | store_reg(regs, rd_val, RD(insn)); | |
793 | } | |
794 | ||
795 | /* Emulate the VIS instructions which are not implemented in | |
796 | * hardware on Niagara. | |
797 | */ | |
798 | int vis_emul(struct pt_regs *regs, unsigned int insn) | |
799 | { | |
800 | unsigned long pc = regs->tpc; | |
801 | unsigned int opf; | |
802 | ||
803 | BUG_ON(regs->tstate & TSTATE_PRIV); | |
804 | ||
121dd5f2 DM |
805 | perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, 0); |
806 | ||
0c51ed93 DM |
807 | if (test_thread_flag(TIF_32BIT)) |
808 | pc = (u32)pc; | |
809 | ||
810 | if (get_user(insn, (u32 __user *) pc)) | |
811 | return -EFAULT; | |
812 | ||
410d2c81 HP |
813 | save_and_clear_fpu(); |
814 | ||
0c51ed93 DM |
815 | opf = (insn & VIS_OPF_MASK) >> VIS_OPF_SHIFT; |
816 | switch (opf) { | |
817 | default: | |
818 | return -EINVAL; | |
819 | ||
820 | /* Pixel Formatting Instructions. */ | |
821 | case FPACK16_OPF: | |
822 | case FPACK32_OPF: | |
823 | case FPACKFIX_OPF: | |
824 | case FEXPAND_OPF: | |
825 | case FPMERGE_OPF: | |
826 | pformat(regs, insn, opf); | |
827 | break; | |
828 | ||
829 | /* Partitioned Multiply Instructions */ | |
830 | case FMUL8x16_OPF: | |
831 | case FMUL8x16AU_OPF: | |
832 | case FMUL8x16AL_OPF: | |
833 | case FMUL8SUx16_OPF: | |
834 | case FMUL8ULx16_OPF: | |
835 | case FMULD8SUx16_OPF: | |
836 | case FMULD8ULx16_OPF: | |
837 | pmul(regs, insn, opf); | |
838 | break; | |
839 | ||
840 | /* Pixel Compare Instructions */ | |
841 | case FCMPGT16_OPF: | |
842 | case FCMPGT32_OPF: | |
843 | case FCMPLE16_OPF: | |
844 | case FCMPLE32_OPF: | |
845 | case FCMPNE16_OPF: | |
846 | case FCMPNE32_OPF: | |
847 | case FCMPEQ16_OPF: | |
848 | case FCMPEQ32_OPF: | |
849 | pcmp(regs, insn, opf); | |
850 | break; | |
851 | ||
852 | /* Edge Handling Instructions */ | |
853 | case EDGE8_OPF: | |
854 | case EDGE8N_OPF: | |
855 | case EDGE8L_OPF: | |
856 | case EDGE8LN_OPF: | |
857 | case EDGE16_OPF: | |
858 | case EDGE16N_OPF: | |
859 | case EDGE16L_OPF: | |
860 | case EDGE16LN_OPF: | |
861 | case EDGE32_OPF: | |
862 | case EDGE32N_OPF: | |
863 | case EDGE32L_OPF: | |
864 | case EDGE32LN_OPF: | |
865 | edge(regs, insn, opf); | |
866 | break; | |
867 | ||
868 | /* Pixel Component Distance */ | |
869 | case PDIST_OPF: | |
870 | pdist(regs, insn); | |
871 | break; | |
872 | ||
873 | /* Three-Dimensional Array Addressing Instructions */ | |
874 | case ARRAY8_OPF: | |
875 | case ARRAY16_OPF: | |
876 | case ARRAY32_OPF: | |
877 | array(regs, insn, opf); | |
878 | break; | |
879 | ||
880 | /* Byte Mask and Shuffle Instructions */ | |
881 | case BMASK_OPF: | |
882 | bmask(regs, insn); | |
883 | break; | |
884 | ||
885 | case BSHUFFLE_OPF: | |
886 | bshuffle(regs, insn); | |
887 | break; | |
888 | }; | |
889 | ||
890 | regs->tpc = regs->tnpc; | |
891 | regs->tnpc += 4; | |
892 | return 0; | |
893 | } |