Commit | Line | Data |
---|---|---|
0c51ed93 DM |
1 | /* visemul.c: Emulation of VIS instructions. |
2 | * | |
3 | * Copyright (C) 2006 David S. Miller (davem@davemloft.net) | |
4 | */ | |
5 | #include <linux/kernel.h> | |
6 | #include <linux/errno.h> | |
7 | #include <linux/thread_info.h> | |
8 | ||
9 | #include <asm/ptrace.h> | |
10 | #include <asm/pstate.h> | |
11 | #include <asm/system.h> | |
12 | #include <asm/fpumacro.h> | |
13 | #include <asm/uaccess.h> | |
14 | ||
15 | /* OPF field of various VIS instructions. */ | |
16 | ||
17 | /* 000111011 - four 16-bit packs */ | |
18 | #define FPACK16_OPF 0x03b | |
19 | ||
20 | /* 000111010 - two 32-bit packs */ | |
21 | #define FPACK32_OPF 0x03a | |
22 | ||
23 | /* 000111101 - four 16-bit packs */ | |
24 | #define FPACKFIX_OPF 0x03d | |
25 | ||
26 | /* 001001101 - four 16-bit expands */ | |
27 | #define FEXPAND_OPF 0x04d | |
28 | ||
29 | /* 001001011 - two 32-bit merges */ | |
30 | #define FPMERGE_OPF 0x04b | |
31 | ||
32 | /* 000110001 - 8-by-16-bit partitoned product */ | |
33 | #define FMUL8x16_OPF 0x031 | |
34 | ||
35 | /* 000110011 - 8-by-16-bit upper alpha partitioned product */ | |
36 | #define FMUL8x16AU_OPF 0x033 | |
37 | ||
38 | /* 000110101 - 8-by-16-bit lower alpha partitioned product */ | |
39 | #define FMUL8x16AL_OPF 0x035 | |
40 | ||
41 | /* 000110110 - upper 8-by-16-bit partitioned product */ | |
42 | #define FMUL8SUx16_OPF 0x036 | |
43 | ||
44 | /* 000110111 - lower 8-by-16-bit partitioned product */ | |
45 | #define FMUL8ULx16_OPF 0x037 | |
46 | ||
47 | /* 000111000 - upper 8-by-16-bit partitioned product */ | |
48 | #define FMULD8SUx16_OPF 0x038 | |
49 | ||
50 | /* 000111001 - lower unsigned 8-by-16-bit partitioned product */ | |
51 | #define FMULD8ULx16_OPF 0x039 | |
52 | ||
53 | /* 000101000 - four 16-bit compare; set rd if src1 > src2 */ | |
54 | #define FCMPGT16_OPF 0x028 | |
55 | ||
56 | /* 000101100 - two 32-bit compare; set rd if src1 > src2 */ | |
57 | #define FCMPGT32_OPF 0x02c | |
58 | ||
59 | /* 000100000 - four 16-bit compare; set rd if src1 <= src2 */ | |
60 | #define FCMPLE16_OPF 0x020 | |
61 | ||
62 | /* 000100100 - two 32-bit compare; set rd if src1 <= src2 */ | |
63 | #define FCMPLE32_OPF 0x024 | |
64 | ||
65 | /* 000100010 - four 16-bit compare; set rd if src1 != src2 */ | |
66 | #define FCMPNE16_OPF 0x022 | |
67 | ||
68 | /* 000100110 - two 32-bit compare; set rd if src1 != src2 */ | |
69 | #define FCMPNE32_OPF 0x026 | |
70 | ||
71 | /* 000101010 - four 16-bit compare; set rd if src1 == src2 */ | |
72 | #define FCMPEQ16_OPF 0x02a | |
73 | ||
74 | /* 000101110 - two 32-bit compare; set rd if src1 == src2 */ | |
75 | #define FCMPEQ32_OPF 0x02e | |
76 | ||
77 | /* 000000000 - Eight 8-bit edge boundary processing */ | |
78 | #define EDGE8_OPF 0x000 | |
79 | ||
80 | /* 000000001 - Eight 8-bit edge boundary processing, no CC */ | |
81 | #define EDGE8N_OPF 0x001 | |
82 | ||
83 | /* 000000010 - Eight 8-bit edge boundary processing, little-endian */ | |
84 | #define EDGE8L_OPF 0x002 | |
85 | ||
86 | /* 000000011 - Eight 8-bit edge boundary processing, little-endian, no CC */ | |
87 | #define EDGE8LN_OPF 0x003 | |
88 | ||
89 | /* 000000100 - Four 16-bit edge boundary processing */ | |
90 | #define EDGE16_OPF 0x004 | |
91 | ||
92 | /* 000000101 - Four 16-bit edge boundary processing, no CC */ | |
93 | #define EDGE16N_OPF 0x005 | |
94 | ||
95 | /* 000000110 - Four 16-bit edge boundary processing, little-endian */ | |
96 | #define EDGE16L_OPF 0x006 | |
97 | ||
98 | /* 000000111 - Four 16-bit edge boundary processing, little-endian, no CC */ | |
99 | #define EDGE16LN_OPF 0x007 | |
100 | ||
101 | /* 000001000 - Two 32-bit edge boundary processing */ | |
102 | #define EDGE32_OPF 0x008 | |
103 | ||
104 | /* 000001001 - Two 32-bit edge boundary processing, no CC */ | |
105 | #define EDGE32N_OPF 0x009 | |
106 | ||
107 | /* 000001010 - Two 32-bit edge boundary processing, little-endian */ | |
108 | #define EDGE32L_OPF 0x00a | |
109 | ||
110 | /* 000001011 - Two 32-bit edge boundary processing, little-endian, no CC */ | |
111 | #define EDGE32LN_OPF 0x00b | |
112 | ||
113 | /* 000111110 - distance between 8 8-bit components */ | |
114 | #define PDIST_OPF 0x03e | |
115 | ||
116 | /* 000010000 - convert 8-bit 3-D address to blocked byte address */ | |
117 | #define ARRAY8_OPF 0x010 | |
118 | ||
119 | /* 000010010 - convert 16-bit 3-D address to blocked byte address */ | |
120 | #define ARRAY16_OPF 0x012 | |
121 | ||
122 | /* 000010100 - convert 32-bit 3-D address to blocked byte address */ | |
123 | #define ARRAY32_OPF 0x014 | |
124 | ||
125 | /* 000011001 - Set the GSR.MASK field in preparation for a BSHUFFLE */ | |
126 | #define BMASK_OPF 0x019 | |
127 | ||
128 | /* 001001100 - Permute bytes as specified by GSR.MASK */ | |
129 | #define BSHUFFLE_OPF 0x04c | |
130 | ||
131 | #define VIS_OPCODE_MASK ((0x3 << 30) | (0x3f << 19)) | |
132 | #define VIS_OPCODE_VAL ((0x2 << 30) | (0x36 << 19)) | |
133 | ||
134 | #define VIS_OPF_SHIFT 5 | |
135 | #define VIS_OPF_MASK (0x1ff << VIS_OPF_SHIFT) | |
136 | ||
137 | #define RS1(INSN) (((INSN) >> 24) & 0x1f) | |
138 | #define RS2(INSN) (((INSN) >> 0) & 0x1f) | |
139 | #define RD(INSN) (((INSN) >> 25) & 0x1f) | |
140 | ||
141 | static inline void maybe_flush_windows(unsigned int rs1, unsigned int rs2, | |
142 | unsigned int rd, int from_kernel) | |
143 | { | |
144 | if (rs2 >= 16 || rs1 >= 16 || rd >= 16) { | |
145 | if (from_kernel != 0) | |
146 | __asm__ __volatile__("flushw"); | |
147 | else | |
148 | flushw_user(); | |
149 | } | |
150 | } | |
151 | ||
152 | static unsigned long fetch_reg(unsigned int reg, struct pt_regs *regs) | |
153 | { | |
154 | unsigned long value; | |
155 | ||
156 | if (reg < 16) | |
157 | return (!reg ? 0 : regs->u_regs[reg]); | |
158 | if (regs->tstate & TSTATE_PRIV) { | |
159 | struct reg_window *win; | |
160 | win = (struct reg_window *)(regs->u_regs[UREG_FP] + STACK_BIAS); | |
161 | value = win->locals[reg - 16]; | |
162 | } else if (test_thread_flag(TIF_32BIT)) { | |
163 | struct reg_window32 __user *win32; | |
164 | win32 = (struct reg_window32 __user *)((unsigned long)((u32)regs->u_regs[UREG_FP])); | |
165 | get_user(value, &win32->locals[reg - 16]); | |
166 | } else { | |
167 | struct reg_window __user *win; | |
168 | win = (struct reg_window __user *)(regs->u_regs[UREG_FP] + STACK_BIAS); | |
169 | get_user(value, &win->locals[reg - 16]); | |
170 | } | |
171 | return value; | |
172 | } | |
173 | ||
174 | static inline unsigned long __user *__fetch_reg_addr_user(unsigned int reg, | |
175 | struct pt_regs *regs) | |
176 | { | |
177 | BUG_ON(reg < 16); | |
178 | BUG_ON(regs->tstate & TSTATE_PRIV); | |
179 | ||
180 | if (test_thread_flag(TIF_32BIT)) { | |
181 | struct reg_window32 __user *win32; | |
182 | win32 = (struct reg_window32 __user *)((unsigned long)((u32)regs->u_regs[UREG_FP])); | |
183 | return (unsigned long __user *)&win32->locals[reg - 16]; | |
184 | } else { | |
185 | struct reg_window __user *win; | |
186 | win = (struct reg_window __user *)(regs->u_regs[UREG_FP] + STACK_BIAS); | |
187 | return &win->locals[reg - 16]; | |
188 | } | |
189 | } | |
190 | ||
191 | static inline unsigned long *__fetch_reg_addr_kern(unsigned int reg, | |
192 | struct pt_regs *regs) | |
193 | { | |
194 | BUG_ON(reg >= 16); | |
195 | BUG_ON(regs->tstate & TSTATE_PRIV); | |
196 | ||
197 | return ®s->u_regs[reg]; | |
198 | } | |
199 | ||
200 | static void store_reg(struct pt_regs *regs, unsigned long val, unsigned long rd) | |
201 | { | |
202 | if (rd < 16) { | |
203 | unsigned long *rd_kern = __fetch_reg_addr_kern(rd, regs); | |
204 | ||
205 | *rd_kern = val; | |
206 | } else { | |
207 | unsigned long __user *rd_user = __fetch_reg_addr_user(rd, regs); | |
208 | ||
209 | if (test_thread_flag(TIF_32BIT)) | |
210 | __put_user((u32)val, (u32 __user *)rd_user); | |
211 | else | |
212 | __put_user(val, rd_user); | |
213 | } | |
214 | } | |
215 | ||
216 | static inline unsigned long fpd_regval(struct fpustate *f, | |
217 | unsigned int insn_regnum) | |
218 | { | |
219 | insn_regnum = (((insn_regnum & 1) << 5) | | |
220 | (insn_regnum & 0x1e)); | |
221 | ||
222 | return *(unsigned long *) &f->regs[insn_regnum]; | |
223 | } | |
224 | ||
225 | static inline unsigned long *fpd_regaddr(struct fpustate *f, | |
226 | unsigned int insn_regnum) | |
227 | { | |
228 | insn_regnum = (((insn_regnum & 1) << 5) | | |
229 | (insn_regnum & 0x1e)); | |
230 | ||
231 | return (unsigned long *) &f->regs[insn_regnum]; | |
232 | } | |
233 | ||
234 | static inline unsigned int fps_regval(struct fpustate *f, | |
235 | unsigned int insn_regnum) | |
236 | { | |
237 | return f->regs[insn_regnum]; | |
238 | } | |
239 | ||
240 | static inline unsigned int *fps_regaddr(struct fpustate *f, | |
241 | unsigned int insn_regnum) | |
242 | { | |
243 | return &f->regs[insn_regnum]; | |
244 | } | |
245 | ||
246 | struct edge_tab { | |
247 | u16 left, right; | |
248 | }; | |
249 | struct edge_tab edge8_tab[8] = { | |
250 | { 0xff, 0x80 }, | |
251 | { 0x7f, 0xc0 }, | |
252 | { 0x3f, 0xe0 }, | |
253 | { 0x1f, 0xf0 }, | |
254 | { 0x0f, 0xf8 }, | |
255 | { 0x07, 0xfc }, | |
256 | { 0x03, 0xfe }, | |
257 | { 0x01, 0xff }, | |
258 | }; | |
259 | struct edge_tab edge8_tab_l[8] = { | |
260 | { 0xff, 0x01 }, | |
261 | { 0xfe, 0x03 }, | |
262 | { 0xfc, 0x07 }, | |
263 | { 0xf8, 0x0f }, | |
264 | { 0xf0, 0x1f }, | |
265 | { 0xe0, 0x3f }, | |
266 | { 0xc0, 0x7f }, | |
267 | { 0x80, 0xff }, | |
268 | }; | |
269 | struct edge_tab edge16_tab[4] = { | |
270 | { 0xf, 0x8 }, | |
271 | { 0x7, 0xc }, | |
272 | { 0x3, 0xe }, | |
273 | { 0x1, 0xf }, | |
274 | }; | |
275 | struct edge_tab edge16_tab_l[4] = { | |
276 | { 0xf, 0x1 }, | |
277 | { 0xe, 0x3 }, | |
278 | { 0xc, 0x7 }, | |
279 | { 0x8, 0xf }, | |
280 | }; | |
281 | struct edge_tab edge32_tab[2] = { | |
282 | { 0x3, 0x2 }, | |
283 | { 0x1, 0x3 }, | |
284 | }; | |
285 | struct edge_tab edge32_tab_l[2] = { | |
286 | { 0x3, 0x1 }, | |
287 | { 0x2, 0x3 }, | |
288 | }; | |
289 | ||
290 | static void edge(struct pt_regs *regs, unsigned int insn, unsigned int opf) | |
291 | { | |
292 | unsigned long orig_rs1, rs1, orig_rs2, rs2, rd_val; | |
293 | u16 left, right; | |
294 | ||
295 | maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), 0); | |
296 | orig_rs1 = rs1 = fetch_reg(RS1(insn), regs); | |
297 | orig_rs2 = rs2 = fetch_reg(RS2(insn), regs); | |
298 | ||
299 | if (test_thread_flag(TIF_32BIT)) { | |
300 | rs1 = rs1 & 0xffffffff; | |
301 | rs2 = rs2 & 0xffffffff; | |
302 | } | |
303 | switch (opf) { | |
304 | default: | |
305 | case EDGE8_OPF: | |
306 | case EDGE8N_OPF: | |
307 | left = edge8_tab[rs1 & 0x7].left; | |
308 | right = edge8_tab[rs2 & 0x7].right; | |
309 | break; | |
310 | case EDGE8L_OPF: | |
311 | case EDGE8LN_OPF: | |
312 | left = edge8_tab_l[rs1 & 0x7].left; | |
313 | right = edge8_tab_l[rs2 & 0x7].right; | |
314 | break; | |
315 | ||
316 | case EDGE16_OPF: | |
317 | case EDGE16N_OPF: | |
318 | left = edge16_tab[(rs1 >> 1) & 0x3].left; | |
319 | right = edge16_tab[(rs2 >> 1) & 0x3].right; | |
320 | break; | |
321 | ||
322 | case EDGE16L_OPF: | |
323 | case EDGE16LN_OPF: | |
324 | left = edge16_tab_l[(rs1 >> 1) & 0x3].left; | |
325 | right = edge16_tab_l[(rs2 >> 1) & 0x3].right; | |
326 | break; | |
327 | ||
328 | case EDGE32_OPF: | |
329 | case EDGE32N_OPF: | |
330 | left = edge32_tab[(rs1 >> 2) & 0x1].left; | |
331 | right = edge32_tab[(rs2 >> 2) & 0x1].right; | |
332 | break; | |
333 | ||
334 | case EDGE32L_OPF: | |
335 | case EDGE32LN_OPF: | |
336 | left = edge32_tab_l[(rs1 >> 2) & 0x1].left; | |
337 | right = edge32_tab_l[(rs2 >> 2) & 0x1].right; | |
338 | break; | |
339 | }; | |
340 | ||
341 | if ((rs1 & ~0x7UL) == (rs2 & ~0x7UL)) | |
342 | rd_val = right & left; | |
343 | else | |
344 | rd_val = left; | |
345 | ||
346 | store_reg(regs, rd_val, RD(insn)); | |
347 | ||
348 | switch (opf) { | |
349 | case EDGE8_OPF: | |
350 | case EDGE8L_OPF: | |
351 | case EDGE16_OPF: | |
352 | case EDGE16L_OPF: | |
353 | case EDGE32_OPF: | |
354 | case EDGE32L_OPF: { | |
355 | unsigned long ccr, tstate; | |
356 | ||
357 | __asm__ __volatile__("subcc %1, %2, %%g0\n\t" | |
358 | "rd %%ccr, %0" | |
359 | : "=r" (ccr) | |
360 | : "r" (orig_rs1), "r" (orig_rs2) | |
361 | : "cc"); | |
362 | tstate = regs->tstate & ~(TSTATE_XCC | TSTATE_ICC); | |
363 | regs->tstate = tstate | (ccr << 32UL); | |
364 | } | |
365 | }; | |
366 | } | |
367 | ||
368 | static void array(struct pt_regs *regs, unsigned int insn, unsigned int opf) | |
369 | { | |
370 | unsigned long rs1, rs2, rd_val; | |
371 | unsigned int bits, bits_mask; | |
372 | ||
373 | maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), 0); | |
374 | rs1 = fetch_reg(RS1(insn), regs); | |
375 | rs2 = fetch_reg(RS2(insn), regs); | |
376 | ||
377 | bits = (rs2 > 5 ? 5 : rs2); | |
378 | bits_mask = (1UL << bits) - 1UL; | |
379 | ||
380 | rd_val = ((((rs1 >> 11) & 0x3) << 0) | | |
381 | (((rs1 >> 33) & 0x3) << 2) | | |
382 | (((rs1 >> 55) & 0x1) << 4) | | |
383 | (((rs1 >> 13) & 0xf) << 5) | | |
384 | (((rs1 >> 35) & 0xf) << 9) | | |
385 | (((rs1 >> 56) & 0xf) << 13) | | |
386 | (((rs1 >> 17) & bits_mask) << 17) | | |
387 | (((rs1 >> 39) & bits_mask) << (17 + bits)) | | |
388 | (((rs1 >> 60) & 0xf) << (17 + (2*bits)))); | |
389 | ||
390 | switch (opf) { | |
391 | case ARRAY16_OPF: | |
392 | rd_val <<= 1; | |
393 | break; | |
394 | ||
395 | case ARRAY32_OPF: | |
396 | rd_val <<= 2; | |
397 | }; | |
398 | ||
399 | store_reg(regs, rd_val, RD(insn)); | |
400 | } | |
401 | ||
402 | static void bmask(struct pt_regs *regs, unsigned int insn) | |
403 | { | |
404 | unsigned long rs1, rs2, rd_val, gsr; | |
405 | ||
406 | maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), 0); | |
407 | rs1 = fetch_reg(RS1(insn), regs); | |
408 | rs2 = fetch_reg(RS2(insn), regs); | |
409 | rd_val = rs1 + rs2; | |
410 | ||
411 | store_reg(regs, rd_val, RD(insn)); | |
412 | ||
413 | gsr = current_thread_info()->gsr[0] & 0xffffffff; | |
414 | gsr |= rd_val << 32UL; | |
415 | current_thread_info()->gsr[0] = gsr; | |
416 | } | |
417 | ||
418 | static void bshuffle(struct pt_regs *regs, unsigned int insn) | |
419 | { | |
420 | struct fpustate *f = FPUSTATE; | |
421 | unsigned long rs1, rs2, rd_val; | |
422 | unsigned long bmask, i; | |
423 | ||
424 | bmask = current_thread_info()->gsr[0] >> 32UL; | |
425 | ||
426 | rs1 = fpd_regval(f, RS1(insn)); | |
427 | rs2 = fpd_regval(f, RS2(insn)); | |
428 | ||
429 | rd_val = 0UL; | |
430 | for (i = 0; i < 8; i++) { | |
431 | unsigned long which = (bmask >> (i * 4)) & 0xf; | |
432 | unsigned long byte; | |
433 | ||
434 | if (which < 8) | |
435 | byte = (rs1 >> (which * 8)) & 0xff; | |
436 | else | |
437 | byte = (rs2 >> ((which-8)*8)) & 0xff; | |
438 | rd_val |= (byte << (i * 8)); | |
439 | } | |
440 | ||
441 | *fpd_regaddr(f, RD(insn)) = rd_val; | |
442 | } | |
443 | ||
444 | static void pdist(struct pt_regs *regs, unsigned int insn) | |
445 | { | |
446 | struct fpustate *f = FPUSTATE; | |
447 | unsigned long rs1, rs2, *rd, rd_val; | |
448 | unsigned long i; | |
449 | ||
450 | rs1 = fpd_regval(f, RS1(insn)); | |
451 | rs2 = fpd_regval(f, RS1(insn)); | |
452 | rd = fpd_regaddr(f, RD(insn)); | |
453 | ||
454 | rd_val = *rd; | |
455 | ||
456 | for (i = 0; i < 8; i++) { | |
457 | s16 s1, s2; | |
458 | ||
459 | s1 = (rs1 >> (56 - (i * 8))) & 0xff; | |
460 | s2 = (rs2 >> (56 - (i * 8))) & 0xff; | |
461 | ||
462 | /* Absolute value of difference. */ | |
463 | s1 -= s2; | |
464 | if (s1 < 0) | |
465 | s1 = ~s1 + 1; | |
466 | ||
467 | rd_val += s1; | |
468 | } | |
469 | ||
470 | *rd = rd_val; | |
471 | } | |
472 | ||
473 | static void pformat(struct pt_regs *regs, unsigned int insn, unsigned int opf) | |
474 | { | |
475 | struct fpustate *f = FPUSTATE; | |
476 | unsigned long rs1, rs2, gsr, scale, rd_val; | |
477 | ||
478 | gsr = current_thread_info()->gsr[0]; | |
479 | scale = (gsr >> 3) & (opf == FPACK16_OPF ? 0xf : 0x1f); | |
480 | switch (opf) { | |
481 | case FPACK16_OPF: { | |
482 | unsigned long byte; | |
483 | ||
484 | rs2 = fpd_regval(f, RS2(insn)); | |
485 | rd_val = 0; | |
486 | for (byte = 0; byte < 4; byte++) { | |
487 | unsigned int val; | |
488 | s16 src = (rs2 >> (byte * 16UL)) & 0xffffUL; | |
489 | int scaled = src << scale; | |
490 | int from_fixed = scaled >> 7; | |
491 | ||
492 | val = ((from_fixed < 0) ? | |
493 | 0 : | |
494 | (from_fixed > 255) ? | |
495 | 255 : from_fixed); | |
496 | ||
497 | rd_val |= (val << (8 * byte)); | |
498 | } | |
499 | *fps_regaddr(f, RD(insn)) = rd_val; | |
500 | break; | |
501 | } | |
502 | ||
503 | case FPACK32_OPF: { | |
504 | unsigned long word; | |
505 | ||
506 | rs1 = fpd_regval(f, RS1(insn)); | |
507 | rs2 = fpd_regval(f, RS2(insn)); | |
508 | rd_val = (rs1 << 8) & ~(0x000000ff000000ffUL); | |
509 | for (word = 0; word < 2; word++) { | |
510 | unsigned long val; | |
511 | s32 src = (rs2 >> (word * 32UL)); | |
512 | s64 scaled = src << scale; | |
513 | s64 from_fixed = scaled >> 23; | |
514 | ||
515 | val = ((from_fixed < 0) ? | |
516 | 0 : | |
517 | (from_fixed > 255) ? | |
518 | 255 : from_fixed); | |
519 | ||
520 | rd_val |= (val << (32 * word)); | |
521 | } | |
522 | *fpd_regaddr(f, RD(insn)) = rd_val; | |
523 | break; | |
524 | } | |
525 | ||
526 | case FPACKFIX_OPF: { | |
527 | unsigned long word; | |
528 | ||
529 | rs2 = fpd_regval(f, RS2(insn)); | |
530 | ||
531 | rd_val = 0; | |
532 | for (word = 0; word < 2; word++) { | |
533 | long val; | |
534 | s32 src = (rs2 >> (word * 32UL)); | |
535 | s64 scaled = src << scale; | |
536 | s64 from_fixed = scaled >> 16; | |
537 | ||
538 | val = ((from_fixed < -32768) ? | |
539 | -32768 : | |
540 | (from_fixed > 32767) ? | |
541 | 32767 : from_fixed); | |
542 | ||
543 | rd_val |= ((val & 0xffff) << (word * 16)); | |
544 | } | |
545 | *fps_regaddr(f, RD(insn)) = rd_val; | |
546 | break; | |
547 | } | |
548 | ||
549 | case FEXPAND_OPF: { | |
550 | unsigned long byte; | |
551 | ||
552 | rs2 = fps_regval(f, RS2(insn)); | |
553 | ||
554 | rd_val = 0; | |
555 | for (byte = 0; byte < 4; byte++) { | |
556 | unsigned long val; | |
557 | u8 src = (rs2 >> (byte * 8)) & 0xff; | |
558 | ||
559 | val = src << 4; | |
560 | ||
561 | rd_val |= (val << (byte * 16)); | |
562 | } | |
563 | *fpd_regaddr(f, RD(insn)) = rd_val; | |
564 | break; | |
565 | } | |
566 | ||
567 | case FPMERGE_OPF: { | |
568 | rs1 = fps_regval(f, RS1(insn)); | |
569 | rs2 = fps_regval(f, RS2(insn)); | |
570 | ||
571 | rd_val = (((rs2 & 0x000000ff) << 0) | | |
572 | ((rs1 & 0x000000ff) << 8) | | |
573 | ((rs2 & 0x0000ff00) << 8) | | |
574 | ((rs1 & 0x0000ff00) << 16) | | |
575 | ((rs2 & 0x00ff0000) << 16) | | |
576 | ((rs1 & 0x00ff0000) << 24) | | |
577 | ((rs2 & 0xff000000) << 24) | | |
578 | ((rs1 & 0xff000000) << 32)); | |
579 | *fpd_regaddr(f, RD(insn)) = rd_val; | |
580 | break; | |
581 | } | |
582 | }; | |
583 | } | |
584 | ||
585 | static void pmul(struct pt_regs *regs, unsigned int insn, unsigned int opf) | |
586 | { | |
587 | struct fpustate *f = FPUSTATE; | |
588 | unsigned long rs1, rs2, rd_val; | |
589 | ||
590 | switch (opf) { | |
591 | case FMUL8x16_OPF: { | |
592 | unsigned long byte; | |
593 | ||
594 | rs1 = fps_regval(f, RS1(insn)); | |
595 | rs2 = fpd_regval(f, RS2(insn)); | |
596 | ||
597 | rd_val = 0; | |
598 | for (byte = 0; byte < 4; byte++) { | |
599 | u16 src1 = (rs1 >> (byte * 8)) & 0x00ff; | |
600 | s16 src2 = (rs2 >> (byte * 16)) & 0xffff; | |
601 | u32 prod = src1 * src2; | |
602 | u16 scaled = ((prod & 0x00ffff00) >> 8); | |
603 | ||
604 | /* Round up. */ | |
605 | if (prod & 0x80) | |
606 | scaled++; | |
607 | rd_val |= ((scaled & 0xffffUL) << (byte * 16UL)); | |
608 | } | |
609 | ||
610 | *fpd_regaddr(f, RD(insn)) = rd_val; | |
611 | break; | |
612 | } | |
613 | ||
614 | case FMUL8x16AU_OPF: | |
615 | case FMUL8x16AL_OPF: { | |
616 | unsigned long byte; | |
617 | s16 src2; | |
618 | ||
619 | rs1 = fps_regval(f, RS1(insn)); | |
620 | rs2 = fps_regval(f, RS2(insn)); | |
621 | ||
622 | rd_val = 0; | |
623 | src2 = (rs2 >> (opf == FMUL8x16AU_OPF) ? 16 : 0); | |
624 | for (byte = 0; byte < 4; byte++) { | |
625 | u16 src1 = (rs1 >> (byte * 8)) & 0x00ff; | |
626 | u32 prod = src1 * src2; | |
627 | u16 scaled = ((prod & 0x00ffff00) >> 8); | |
628 | ||
629 | /* Round up. */ | |
630 | if (prod & 0x80) | |
631 | scaled++; | |
632 | rd_val |= ((scaled & 0xffffUL) << (byte * 16UL)); | |
633 | } | |
634 | ||
635 | *fpd_regaddr(f, RD(insn)) = rd_val; | |
636 | break; | |
637 | } | |
638 | ||
639 | case FMUL8SUx16_OPF: | |
640 | case FMUL8ULx16_OPF: { | |
641 | unsigned long byte, ushift; | |
642 | ||
643 | rs1 = fpd_regval(f, RS1(insn)); | |
644 | rs2 = fpd_regval(f, RS2(insn)); | |
645 | ||
646 | rd_val = 0; | |
647 | ushift = (opf == FMUL8SUx16_OPF) ? 8 : 0; | |
648 | for (byte = 0; byte < 4; byte++) { | |
649 | u16 src1; | |
650 | s16 src2; | |
651 | u32 prod; | |
652 | u16 scaled; | |
653 | ||
654 | src1 = ((rs1 >> ((16 * byte) + ushift)) & 0x00ff); | |
655 | src2 = ((rs2 >> (16 * byte)) & 0xffff); | |
656 | prod = src1 * src2; | |
657 | scaled = ((prod & 0x00ffff00) >> 8); | |
658 | ||
659 | /* Round up. */ | |
660 | if (prod & 0x80) | |
661 | scaled++; | |
662 | rd_val |= ((scaled & 0xffffUL) << (byte * 16UL)); | |
663 | } | |
664 | ||
665 | *fpd_regaddr(f, RD(insn)) = rd_val; | |
666 | break; | |
667 | } | |
668 | ||
669 | case FMULD8SUx16_OPF: | |
670 | case FMULD8ULx16_OPF: { | |
671 | unsigned long byte, ushift; | |
672 | ||
673 | rs1 = fps_regval(f, RS1(insn)); | |
674 | rs2 = fps_regval(f, RS2(insn)); | |
675 | ||
676 | rd_val = 0; | |
677 | ushift = (opf == FMULD8SUx16_OPF) ? 8 : 0; | |
678 | for (byte = 0; byte < 2; byte++) { | |
679 | u16 src1; | |
680 | s16 src2; | |
681 | u32 prod; | |
682 | u16 scaled; | |
683 | ||
684 | src1 = ((rs1 >> ((16 * byte) + ushift)) & 0x00ff); | |
685 | src2 = ((rs2 >> (16 * byte)) & 0xffff); | |
686 | prod = src1 * src2; | |
687 | scaled = ((prod & 0x00ffff00) >> 8); | |
688 | ||
689 | /* Round up. */ | |
690 | if (prod & 0x80) | |
691 | scaled++; | |
692 | rd_val |= ((scaled & 0xffffUL) << | |
693 | ((byte * 32UL) + 7UL)); | |
694 | } | |
695 | *fpd_regaddr(f, RD(insn)) = rd_val; | |
696 | break; | |
697 | } | |
698 | }; | |
699 | } | |
700 | ||
701 | static void pcmp(struct pt_regs *regs, unsigned int insn, unsigned int opf) | |
702 | { | |
703 | struct fpustate *f = FPUSTATE; | |
704 | unsigned long rs1, rs2, rd_val, i; | |
705 | ||
706 | rs1 = fpd_regval(f, RS1(insn)); | |
707 | rs2 = fpd_regval(f, RS2(insn)); | |
708 | ||
709 | rd_val = 0; | |
710 | ||
711 | switch (opf) { | |
712 | case FCMPGT16_OPF: | |
713 | for (i = 0; i < 4; i++) { | |
714 | s16 a = (rs1 >> (i * 16)) & 0xffff; | |
715 | s16 b = (rs2 >> (i * 16)) & 0xffff; | |
716 | ||
717 | if (a > b) | |
718 | rd_val |= 1 << i; | |
719 | } | |
720 | break; | |
721 | ||
722 | case FCMPGT32_OPF: | |
723 | for (i = 0; i < 2; i++) { | |
724 | s32 a = (rs1 >> (i * 32)) & 0xffff; | |
725 | s32 b = (rs2 >> (i * 32)) & 0xffff; | |
726 | ||
727 | if (a > b) | |
728 | rd_val |= 1 << i; | |
729 | } | |
730 | break; | |
731 | ||
732 | case FCMPLE16_OPF: | |
733 | for (i = 0; i < 4; i++) { | |
734 | s16 a = (rs1 >> (i * 16)) & 0xffff; | |
735 | s16 b = (rs2 >> (i * 16)) & 0xffff; | |
736 | ||
737 | if (a <= b) | |
738 | rd_val |= 1 << i; | |
739 | } | |
740 | break; | |
741 | ||
742 | case FCMPLE32_OPF: | |
743 | for (i = 0; i < 2; i++) { | |
744 | s32 a = (rs1 >> (i * 32)) & 0xffff; | |
745 | s32 b = (rs2 >> (i * 32)) & 0xffff; | |
746 | ||
747 | if (a <= b) | |
748 | rd_val |= 1 << i; | |
749 | } | |
750 | break; | |
751 | ||
752 | case FCMPNE16_OPF: | |
753 | for (i = 0; i < 4; i++) { | |
754 | s16 a = (rs1 >> (i * 16)) & 0xffff; | |
755 | s16 b = (rs2 >> (i * 16)) & 0xffff; | |
756 | ||
757 | if (a != b) | |
758 | rd_val |= 1 << i; | |
759 | } | |
760 | break; | |
761 | ||
762 | case FCMPNE32_OPF: | |
763 | for (i = 0; i < 2; i++) { | |
764 | s32 a = (rs1 >> (i * 32)) & 0xffff; | |
765 | s32 b = (rs2 >> (i * 32)) & 0xffff; | |
766 | ||
767 | if (a != b) | |
768 | rd_val |= 1 << i; | |
769 | } | |
770 | break; | |
771 | ||
772 | case FCMPEQ16_OPF: | |
773 | for (i = 0; i < 4; i++) { | |
774 | s16 a = (rs1 >> (i * 16)) & 0xffff; | |
775 | s16 b = (rs2 >> (i * 16)) & 0xffff; | |
776 | ||
777 | if (a == b) | |
778 | rd_val |= 1 << i; | |
779 | } | |
780 | break; | |
781 | ||
782 | case FCMPEQ32_OPF: | |
783 | for (i = 0; i < 2; i++) { | |
784 | s32 a = (rs1 >> (i * 32)) & 0xffff; | |
785 | s32 b = (rs2 >> (i * 32)) & 0xffff; | |
786 | ||
787 | if (a == b) | |
788 | rd_val |= 1 << i; | |
789 | } | |
790 | break; | |
791 | }; | |
792 | ||
793 | maybe_flush_windows(0, 0, RD(insn), 0); | |
794 | store_reg(regs, rd_val, RD(insn)); | |
795 | } | |
796 | ||
797 | /* Emulate the VIS instructions which are not implemented in | |
798 | * hardware on Niagara. | |
799 | */ | |
800 | int vis_emul(struct pt_regs *regs, unsigned int insn) | |
801 | { | |
802 | unsigned long pc = regs->tpc; | |
803 | unsigned int opf; | |
804 | ||
805 | BUG_ON(regs->tstate & TSTATE_PRIV); | |
806 | ||
807 | if (test_thread_flag(TIF_32BIT)) | |
808 | pc = (u32)pc; | |
809 | ||
810 | if (get_user(insn, (u32 __user *) pc)) | |
811 | return -EFAULT; | |
812 | ||
813 | if ((insn & VIS_OPCODE_MASK) != VIS_OPCODE_VAL) | |
814 | return -EINVAL; | |
815 | ||
816 | opf = (insn & VIS_OPF_MASK) >> VIS_OPF_SHIFT; | |
817 | switch (opf) { | |
818 | default: | |
819 | return -EINVAL; | |
820 | ||
821 | /* Pixel Formatting Instructions. */ | |
822 | case FPACK16_OPF: | |
823 | case FPACK32_OPF: | |
824 | case FPACKFIX_OPF: | |
825 | case FEXPAND_OPF: | |
826 | case FPMERGE_OPF: | |
827 | pformat(regs, insn, opf); | |
828 | break; | |
829 | ||
830 | /* Partitioned Multiply Instructions */ | |
831 | case FMUL8x16_OPF: | |
832 | case FMUL8x16AU_OPF: | |
833 | case FMUL8x16AL_OPF: | |
834 | case FMUL8SUx16_OPF: | |
835 | case FMUL8ULx16_OPF: | |
836 | case FMULD8SUx16_OPF: | |
837 | case FMULD8ULx16_OPF: | |
838 | pmul(regs, insn, opf); | |
839 | break; | |
840 | ||
841 | /* Pixel Compare Instructions */ | |
842 | case FCMPGT16_OPF: | |
843 | case FCMPGT32_OPF: | |
844 | case FCMPLE16_OPF: | |
845 | case FCMPLE32_OPF: | |
846 | case FCMPNE16_OPF: | |
847 | case FCMPNE32_OPF: | |
848 | case FCMPEQ16_OPF: | |
849 | case FCMPEQ32_OPF: | |
850 | pcmp(regs, insn, opf); | |
851 | break; | |
852 | ||
853 | /* Edge Handling Instructions */ | |
854 | case EDGE8_OPF: | |
855 | case EDGE8N_OPF: | |
856 | case EDGE8L_OPF: | |
857 | case EDGE8LN_OPF: | |
858 | case EDGE16_OPF: | |
859 | case EDGE16N_OPF: | |
860 | case EDGE16L_OPF: | |
861 | case EDGE16LN_OPF: | |
862 | case EDGE32_OPF: | |
863 | case EDGE32N_OPF: | |
864 | case EDGE32L_OPF: | |
865 | case EDGE32LN_OPF: | |
866 | edge(regs, insn, opf); | |
867 | break; | |
868 | ||
869 | /* Pixel Component Distance */ | |
870 | case PDIST_OPF: | |
871 | pdist(regs, insn); | |
872 | break; | |
873 | ||
874 | /* Three-Dimensional Array Addressing Instructions */ | |
875 | case ARRAY8_OPF: | |
876 | case ARRAY16_OPF: | |
877 | case ARRAY32_OPF: | |
878 | array(regs, insn, opf); | |
879 | break; | |
880 | ||
881 | /* Byte Mask and Shuffle Instructions */ | |
882 | case BMASK_OPF: | |
883 | bmask(regs, insn); | |
884 | break; | |
885 | ||
886 | case BSHUFFLE_OPF: | |
887 | bshuffle(regs, insn); | |
888 | break; | |
889 | }; | |
890 | ||
891 | regs->tpc = regs->tnpc; | |
892 | regs->tnpc += 4; | |
893 | return 0; | |
894 | } |