Commit | Line | Data |
---|---|---|
800eeca4 JW |
1 | // |
2 | // Detect WAW violations. Cases taken from DV tables. | |
3 | // | |
4 | .text | |
5 | .explicit | |
6 | // AR[BSP] | |
7 | mov ar.bsp = r0 | |
8 | mov ar.bsp = r1 | |
9 | ;; | |
10 | // AR[BSPSTORE] | |
11 | mov ar.bspstore = r2 | |
12 | mov ar.bspstore = r3 | |
13 | ;; | |
14 | ||
15 | // AR[CCV] | |
16 | mov ar.ccv = r4 | |
17 | mov ar.ccv = r4 | |
18 | ;; | |
19 | ||
20 | // AR[EC] | |
21 | br.wtop.sptk L | |
22 | mov ar.ec = r0 | |
23 | ;; | |
24 | ||
25 | // AR[FPSR].sf0.controls | |
26 | mov ar.fpsr = r0 | |
27 | fsetc.s0 0x7f, 0x0f | |
28 | ;; | |
29 | ||
30 | // AR[FPSR].sf1.controls | |
31 | mov ar.fpsr = r0 | |
32 | fsetc.s1 0x7f, 0x0f | |
33 | ;; | |
34 | ||
35 | // AR[FPSR].sf2.controls | |
36 | mov ar.fpsr = r0 | |
37 | fsetc.s2 0x7f, 0x0f | |
38 | ;; | |
39 | ||
40 | // AR[FPSR].sf3.controls | |
41 | mov ar.fpsr = r0 | |
42 | fsetc.s3 0x7f, 0x0f | |
43 | ;; | |
44 | ||
45 | // AR[FPSR].sf0.flags | |
46 | fcmp.eq.s0 p1, p2 = f3, f4 | |
47 | fcmp.eq.s0 p3, p4 = f3, f4 // no DV here | |
48 | ;; | |
49 | fcmp.eq.s0 p1, p2 = f3, f4 | |
50 | fclrf.s0 | |
51 | ;; | |
52 | ||
53 | // AR[FPSR].sf1.flags | |
54 | fcmp.eq.s1 p1, p2 = f3, f4 | |
55 | fcmp.eq.s1 p3, p4 = f3, f4 // no DV here | |
56 | ;; | |
57 | fcmp.eq.s1 p1, p2 = f3, f4 | |
58 | fclrf.s1 | |
59 | ;; | |
60 | ||
61 | // AR[FPSR].sf2.flags | |
62 | fcmp.eq.s2 p1, p2 = f3, f4 | |
63 | fcmp.eq.s2 p3, p4 = f3, f4 // no DV here | |
64 | ;; | |
65 | fcmp.eq.s2 p1, p2 = f3, f4 | |
66 | fclrf.s2 | |
67 | ;; | |
68 | ||
69 | // AR[FPSR].sf3.flags | |
70 | fcmp.eq.s3 p1, p2 = f3, f4 | |
71 | fcmp.eq.s3 p3, p4 = f3, f4 // no DV here | |
72 | ;; | |
73 | fcmp.eq.s3 p1, p2 = f3, f4 | |
74 | fclrf.s3 | |
75 | ;; | |
76 | ||
77 | // AR[FPSR].traps/rv plus all controls/flags | |
78 | mov ar.fpsr = r0 | |
79 | mov ar.fpsr = r0 | |
80 | ;; | |
81 | ||
82 | // AR[ITC] | |
83 | mov ar.itc = r1 | |
84 | mov ar.itc = r1 | |
85 | ;; | |
86 | ||
87 | // AR[K] | |
88 | mov ar.k2 = r3 | |
89 | mov ar.k2 = r3 | |
90 | ;; | |
91 | ||
92 | // AR[LC] | |
93 | br.cloop.sptk L | |
94 | mov ar.lc = r0 | |
95 | ;; | |
96 | ||
97 | // AR[PFS] | |
98 | mov ar.pfs = r0 | |
99 | br.call.sptk b0 = L | |
100 | ;; | |
101 | ||
102 | // AR[RNAT] (see also AR[BSPSTORE]) | |
103 | mov ar.rnat = r8 | |
104 | mov ar.rnat = r8 | |
105 | ;; | |
106 | ||
107 | // AR[RSC] | |
108 | mov ar.rsc = r10 | |
109 | mov ar.rsc = r10 | |
110 | ;; | |
111 | ||
112 | // AR[UNAT] | |
113 | mov ar.unat = r12 | |
114 | st8.spill [r0] = r1 | |
115 | ;; | |
116 | ||
117 | // AR% | |
118 | mov ar48 = r0 | |
119 | mov ar48 = r0 | |
120 | ;; | |
121 | ||
122 | // BR% | |
123 | mov b1 = r0 | |
124 | mov b1 = r1 | |
125 | ;; | |
126 | ||
127 | // CFM (and others) | |
128 | br.wtop.sptk L | |
129 | br.wtop.sptk L | |
130 | ;; | |
131 | ||
132 | // CR[CMCV] | |
133 | mov cr.cmcv = r1 | |
134 | mov cr.cmcv = r2 | |
135 | ;; | |
136 | ||
137 | // CR[DCR] | |
138 | mov cr.dcr = r3 | |
139 | mov cr.dcr = r3 | |
140 | ;; | |
141 | ||
142 | // CR[EOI] (and InService) | |
143 | mov cr.eoi = r0 | |
144 | mov cr.eoi = r0 | |
145 | ;; | |
146 | srlz.d | |
147 | ||
148 | // CR[GPTA] | |
149 | mov cr.gpta = r6 | |
150 | mov cr.gpta = r7 | |
151 | ;; | |
152 | ||
153 | // CR[IFA] | |
154 | mov cr.ifa = r9 | |
155 | mov cr.ifa = r10 | |
156 | ;; | |
157 | ||
158 | // CR[IFS] | |
159 | mov cr.ifs = r11 | |
160 | cover | |
161 | ;; | |
162 | ||
163 | // CR[IHA] | |
164 | mov cr.iha = r13 | |
165 | mov cr.iha = r14 | |
166 | ;; | |
167 | ||
168 | // CR[IIM] | |
169 | mov cr.iim = r15 | |
170 | mov cr.iim = r16 | |
171 | ;; | |
172 | ||
173 | // CR[IIP] | |
174 | mov cr.iip = r17 | |
175 | mov cr.iip = r17 | |
176 | ;; | |
177 | ||
178 | // CR[IIPA] | |
179 | mov cr.iipa = r19 | |
180 | mov cr.iipa = r20 | |
181 | ;; | |
182 | ||
183 | // CR[IPSR] | |
184 | mov cr.ipsr = r21 | |
185 | mov cr.ipsr = r22 | |
186 | ;; | |
187 | ||
188 | // CR[IRR%] (and others) | |
189 | mov r0 = cr.ivr | |
190 | mov r1 = cr.ivr | |
191 | ;; | |
192 | ||
193 | // CR[ISR] | |
194 | mov cr.isr = r24 | |
195 | mov cr.isr = r25 | |
196 | ;; | |
197 | ||
198 | // CR[ITIR] | |
199 | mov cr.itir = r26 | |
200 | mov cr.itir = r27 | |
201 | ;; | |
202 | ||
203 | // CR[ITM] | |
204 | mov cr.itm = r28 | |
205 | mov cr.itm = r29 | |
206 | ;; | |
207 | ||
208 | // CR[ITV] | |
209 | mov cr.itv = r0 | |
210 | mov cr.itv = r1 | |
211 | ;; | |
212 | ||
213 | // CR[IVA] | |
214 | mov cr.iva = r0 | |
215 | mov cr.iva = r1 | |
216 | ;; | |
217 | ||
218 | // CR[IVR] (no explicit writers) | |
219 | ||
220 | // CR[LID] | |
221 | mov cr.lid = r0 | |
222 | mov cr.lid = r1 | |
223 | ;; | |
224 | ||
225 | // CR[LRR%] | |
226 | mov cr.lrr0 = r0 | |
227 | mov cr.lrr1 = r0 // no DV here | |
228 | ;; | |
229 | mov cr.lrr0 = r0 | |
230 | mov cr.lrr0 = r0 | |
231 | ;; | |
232 | ||
233 | // CR[PMV] | |
234 | mov cr.pmv = r0 | |
235 | mov cr.pmv = r1 | |
236 | ;; | |
237 | ||
238 | // CR[PTA] | |
239 | mov cr.pta = r0 | |
240 | mov cr.pta = r1 | |
241 | ;; | |
242 | ||
243 | // CR[TPR] | |
244 | mov cr.tpr = r0 | |
245 | mov cr.tpr = r1 | |
246 | ;; | |
247 | ||
248 | // DBR# | |
249 | mov dbr[r1] = r1 | |
250 | mov dbr[r1] = r2 | |
251 | ;; | |
252 | srlz.d | |
253 | ||
254 | // DTC | |
255 | ptc.e r0 | |
256 | ptc.e r1 // no DVs here | |
257 | ;; | |
258 | ptc.e r0 // (and others) | |
259 | itc.i r0 | |
260 | ;; | |
261 | srlz.d | |
262 | ||
263 | // DTC_LIMIT | |
264 | ptc.g r0, r1 // NOTE: GAS automatically emits stops after | |
265 | ptc.ga r2, r3 // ptc.g/ptc.ga, so this conflict is no | |
266 | ;; // longer possible in GAS-generated assembly | |
267 | srlz.d | |
268 | ||
269 | // DTR | |
270 | itr.d dtr[r0] = r1 // (and others) | |
271 | ptr.d r2, r3 | |
272 | ;; | |
273 | srlz.d | |
274 | ||
275 | // FR% | |
276 | mov f3 = f2 | |
277 | ldfs.c.clr f3 = [r1] | |
278 | ;; | |
279 | ||
280 | // GR% | |
281 | mov r2 = r0 | |
282 | ld8.c.clr r2 = [r1] | |
283 | ;; | |
284 | ||
285 | // IBR# | |
286 | mov ibr[r0] = r2 | |
287 | mov ibr[r1] = r2 | |
288 | ;; | |
289 | ||
290 | // InService | |
291 | mov cr.eoi = r0 | |
292 | mov r1 = cr.ivr | |
293 | ;; | |
294 | srlz.d | |
295 | ||
296 | // ITC | |
297 | ptc.e r0 | |
298 | itc.i r1 | |
299 | ;; | |
300 | srlz.i | |
301 | ;; | |
302 | ||
303 | // ITR | |
304 | itr.i itr[r0] = r1 | |
305 | ptr.i r2, r3 | |
306 | ;; | |
307 | srlz.i | |
308 | ;; | |
309 | ||
310 | // PKR# | |
311 | .reg.val r1, 0x1 | |
312 | .reg.val r2, ~0x1 | |
313 | mov pkr[r1] = r1 | |
314 | mov pkr[r2] = r1 // no DV here | |
315 | ;; | |
316 | mov pkr[r1] = r1 | |
317 | mov pkr[r1] = r1 | |
318 | ;; | |
319 | ||
320 | // PMC# | |
321 | mov pmc[r3] = r1 | |
322 | mov pmc[r4] = r1 | |
323 | ;; | |
324 | ||
325 | // PMD# | |
326 | mov pmd[r3] = r1 | |
327 | mov pmd[r4] = r1 | |
328 | ;; | |
329 | ||
139368c9 | 330 | // PR%, 1 - 15 |
800eeca4 JW |
331 | cmp.eq p1, p0 = r0, r1 |
332 | cmp.eq p1, p0 = r2, r3 | |
333 | ;; | |
334 | fcmp.eq p1, p2 = f2, f3 | |
335 | fcmp.eq p1, p3 = f2, f3 | |
336 | ;; | |
337 | cmp.eq.and p1, p2 = r0, r1 | |
338 | cmp.eq.or p1, p3 = r2, r3 | |
339 | ;; | |
340 | cmp.eq.or p1, p3 = r2, r3 | |
341 | cmp.eq.and p1, p2 = r0, r1 | |
342 | ;; | |
343 | cmp.eq.and p1, p2 = r0, r1 | |
344 | cmp.eq.and p1, p3 = r2, r3 // no DV here | |
345 | ;; | |
346 | cmp.eq.or p1, p2 = r0, r1 | |
347 | cmp.eq.or p1, p3 = r2, r3 // no DV here | |
348 | ;; | |
349 | ||
350 | // PR63 | |
351 | br.wtop.sptk L | |
352 | br.wtop.sptk L | |
353 | ;; | |
354 | cmp.eq p63, p0 = r0, r1 | |
355 | cmp.eq p63, p0 = r2, r3 | |
356 | ;; | |
357 | fcmp.eq p63, p2 = f2, f3 | |
358 | fcmp.eq p63, p3 = f2, f3 | |
359 | ;; | |
360 | cmp.eq.and p63, p2 = r0, r1 | |
361 | cmp.eq.or p63, p3 = r2, r3 | |
362 | ;; | |
363 | cmp.eq.or p63, p3 = r2, r3 | |
364 | cmp.eq.and p63, p2 = r0, r1 | |
365 | ;; | |
366 | cmp.eq.and p63, p2 = r0, r1 | |
367 | cmp.eq.and p63, p3 = r2, r3 // no DV here | |
368 | ;; | |
369 | cmp.eq.or p63, p2 = r0, r1 | |
370 | cmp.eq.or p63, p3 = r2, r3 // no DV here | |
371 | ;; | |
372 | ||
373 | // PSR.ac | |
374 | rum (1<<3) | |
375 | rum (1<<3) | |
376 | ;; | |
377 | ||
378 | // PSR.be | |
379 | rum (1<<1) | |
380 | rum (1<<1) | |
381 | ;; | |
382 | ||
383 | // PSR.bn | |
384 | bsw.0 // GAS automatically emits a stop after bsw.n | |
385 | bsw.0 // so this conflict is avoided | |
386 | ;; | |
387 | ||
388 | // PSR.cpl | |
389 | epc | |
390 | br.ret.sptk b0 | |
391 | ;; | |
392 | ||
393 | // PSR.da (rfi is the only writer) | |
394 | // PSR.db (and others) | |
395 | mov psr.l = r0 | |
396 | mov psr.l = r1 | |
397 | ;; | |
398 | srlz.d | |
399 | ||
400 | // PSR.dd (rfi is the only writer) | |
401 | ||
402 | // PSR.dfh | |
403 | ssm (1<<19) | |
404 | ssm (1<<19) | |
405 | ;; | |
406 | srlz.d | |
407 | ||
408 | // PSR.dfl | |
409 | ssm (1<<18) | |
410 | ssm (1<<18) | |
411 | ;; | |
412 | srlz.d | |
413 | ||
414 | // PSR.di | |
415 | rsm (1<<22) | |
416 | rsm (1<<22) | |
417 | ;; | |
418 | ||
419 | // PSR.dt | |
420 | rsm (1<<17) | |
421 | rsm (1<<17) | |
422 | ;; | |
423 | ||
424 | // PSR.ed (rfi is the only writer) | |
425 | // PSR.i | |
426 | ssm (1<<14) | |
427 | ssm (1<<14) | |
428 | ;; | |
429 | ||
430 | // PSR.ia (no DV semantics) | |
431 | // PSR.ic | |
432 | ssm (1<<13) | |
433 | ssm (1<<13) | |
434 | ;; | |
435 | ||
436 | // PSR.id (rfi is the only writer) | |
437 | // PSR.is (br.ia and rfi are the only writers) | |
438 | // PSR.it (rfi is the only writer) | |
439 | // PSR.lp (see PSR.db) | |
440 | ||
441 | // PSR.mc (rfi is the only writer) | |
442 | // PSR.mfh | |
443 | mov f32 = f33 | |
444 | mov r0 = psr | |
445 | ;; | |
446 | ssm (1<<5) | |
447 | ssm (1<<5) | |
448 | ;; | |
449 | ssm (1<<5) | |
450 | mov psr.um = r0 | |
451 | ;; | |
452 | rum (1<<5) | |
453 | rum (1<<5) | |
454 | ;; | |
455 | mov f32 = f33 | |
456 | mov f34 = f35 // no DV here | |
457 | ;; | |
458 | ||
459 | // PSR.mfl | |
460 | mov f2 = f3 | |
461 | mov r0 = psr | |
462 | ;; | |
463 | ssm (1<<4) | |
464 | ssm (1<<4) | |
465 | ;; | |
466 | ssm (1<<4) | |
467 | mov psr.um = r0 | |
468 | ;; | |
469 | rum (1<<4) | |
470 | rum (1<<4) | |
471 | ;; | |
472 | mov f2 = f3 | |
473 | mov f4 = f5 // no DV here | |
474 | ;; | |
475 | ||
476 | // PSR.pk | |
477 | rsm (1<<15) | |
478 | rsm (1<<15) | |
479 | ;; | |
480 | ||
481 | // PSR.pp | |
482 | rsm (1<<21) | |
483 | rsm (1<<21) | |
484 | ;; | |
485 | ||
486 | // PSR.ri (no DV semantics) | |
487 | // PSR.rt (see PSR.db) | |
488 | ||
489 | // PSR.si | |
490 | rsm (1<<23) | |
491 | ssm (1<<23) | |
492 | ;; | |
493 | ||
494 | // PSR.sp | |
495 | ssm (1<<20) | |
496 | rsm (1<<20) | |
497 | ;; | |
498 | srlz.d | |
499 | ||
500 | // PSR.ss (rfi is the only writer) | |
501 | // PSR.tb (see PSR.db) | |
502 | ||
503 | // PSR.up | |
504 | rsm (1<<2) | |
505 | rsm (1<<2) | |
506 | ;; | |
507 | rum (1<<2) | |
508 | mov psr.um = r0 | |
509 | ;; | |
510 | ||
511 | // RR# | |
512 | mov rr[r2] = r1 | |
513 | mov rr[r2] = r3 | |
514 | ;; | |
139368c9 | 515 | |
7484b8e6 TW |
516 | // PR, additional cases (or.andcm and and.orcm interaction) |
517 | cmp.eq.or.andcm p6, p7 = 1, r32 | |
518 | cmp.eq.or.andcm p6, p7 = 5, r36 // no DV here | |
519 | ;; | |
520 | cmp.eq.and.orcm p6, p7 = 1, r32 | |
521 | cmp.eq.and.orcm p6, p7 = 5, r36 // no DV here | |
522 | ;; | |
523 | cmp.eq.or.andcm p63, p7 = 1, r32 | |
524 | cmp.eq.or.andcm p63, p7 = 5, r36 // no DV here | |
525 | ;; | |
526 | cmp.eq.or.andcm p6, p63 = 1, r32 | |
527 | cmp.eq.or.andcm p6, p63 = 5, r36 // no DV here | |
528 | ;; | |
529 | cmp.eq.and.orcm p63, p7 = 1, r32 | |
530 | cmp.eq.and.orcm p63, p7 = 5, r36 // no DV here | |
531 | ;; | |
532 | cmp.eq.and.orcm p6, p63 = 1, r32 | |
533 | cmp.eq.and.orcm p6, p63 = 5, r36 // no DV here | |
534 | ;; | |
535 | cmp.eq.or.andcm p6, p7 = 1, r32 | |
536 | cmp.eq.and.orcm p6, p7 = 5, r36 | |
537 | ;; | |
538 | cmp.eq.or.andcm p63, p7 = 1, r32 | |
539 | cmp.eq.and.orcm p63, p7 = 5, r36 | |
540 | ;; | |
541 | cmp.eq.or.andcm p6, p63 = 1, r32 | |
542 | cmp.eq.and.orcm p6, p63 = 5, r36 | |
543 | ;; | |
139368c9 JW |
544 | |
545 | // PR%, 16 - 62 | |
546 | cmp.eq p21, p0 = r0, r1 | |
547 | cmp.eq p21, p0 = r2, r3 | |
548 | ;; | |
549 | fcmp.eq p21, p22 = f2, f3 | |
550 | fcmp.eq p21, p23 = f2, f3 | |
551 | ;; | |
552 | cmp.eq.and p21, p22 = r0, r1 | |
553 | cmp.eq.or p21, p23 = r2, r3 | |
554 | ;; | |
555 | cmp.eq.or p21, p23 = r2, r3 | |
556 | cmp.eq.and p21, p22 = r0, r1 | |
557 | ;; | |
558 | cmp.eq.and p21, p22 = r0, r1 | |
559 | cmp.eq.and p21, p23 = r2, r3 // no DV here | |
560 | ;; | |
561 | cmp.eq.or p21, p22 = r0, r1 | |
562 | cmp.eq.or p21, p23 = r2, r3 // no DV here | |
563 | ;; | |
564 | ||
565 | // RSE | |
566 | ||
567 | L: |