Commit | Line | Data |
---|---|---|
252b5132 RH |
1 | # D30V parallel optimization test |
2 | # assemble with "-O" | |
3 | ||
4 | .text | |
5 | start: | |
6 | abs r1,r2 | |
7 | abs r3,r4 | |
8 | ||
9 | notfg f0,f4 | |
10 | notfg f1,f2 | |
11 | ||
12 | abs r1,r2 | |
13 | notfg f1,f2 | |
14 | ||
15 | # both change C flag | |
16 | add r1,r2,r3 | |
17 | notfg C,f0 | |
18 | ||
19 | # one uses and one changes C flag | |
20 | add r1,r2,r3 | |
21 | notfg f0,C | |
22 | ||
23 | bra . | |
24 | abs r1,r2 | |
25 | ||
26 | abs r1,r2 | |
27 | bra . | |
28 | ||
29 | bsr . | |
30 | abs r1,r2 | |
31 | ||
32 | abs r1,r2 | |
33 | abs r1,r2 | |
34 | bsr . | |
35 | ||
36 | ldb r1,@(r2,r3) | |
37 | stb r7,@(r8,r9) | |
38 | ||
39 | stb r7,@(r8,r9) | |
40 | ldb r1,@(r2,r3) | |
41 | ||
42 | ldb r7,@(r8,r9) | |
43 | ldb r1,@(r2,r3) | |
44 | ||
45 | stb r7,@(r8,r9) | |
46 | stb r1,@(r2,r3) | |
47 | ||
48 | add r3, r3, r6 | |
49 | stw r2, @(r3, 0) | |
50 | ||
51 | # should be serial because of conditional execution | |
52 | cmple f0,r4,r5 | |
53 | jmp/tx 0x0 | |
54 | ||
55 | cmple f0,r4,r5 | |
56 | jmp/fx 0x0 | |
57 | ||
58 | cmple f0,r4,r5 | |
59 | jmp/xt 0x0 | |
60 | ||
61 | cmple f0,r4,r5 | |
62 | jmp/xf 0x0 | |
63 | ||
64 | cmple f0,r4,r5 | |
65 | jmp/tt 0x0 | |
66 | ||
67 | cmple f0,r4,r5 | |
68 | jmp/tf 0x0 | |
69 | ||
70 | cmple f1,r4,r5 | |
71 | jmp/tx 0x0 | |
72 | ||
73 | cmple f1,r4,r5 | |
74 | jmp/xt 0x0 | |
75 | ||
76 | # serial because of the r4 dependency | |
77 | add r4, r0, 1 | |
78 | cmple f0, r4, r5 | |
79 | ||
80 | # parallel | |
81 | add r4, r0, 1 | |
82 | cmple f0, r3, r5 | |
83 | ||
84 | # serial because ld2w loads r5 | |
85 | ld2w r4,@(r0,r6) | |
86 | adds r5,r19,r20 | |
87 | ||
88 | # serial because ld2w loads r5 | |
89 | ld2w r4,@(r0,r6) | |
90 | adds r3,r5,r20 | |
91 | ||
92 | # parallel even though ld2w uses r6 and adds changes it | |
93 | ld2w r4,@(r0,r6) | |
94 | adds r6,r19,r20 | |
95 | ||
96 | # parallel | |
97 | ld2w r4,@(r0,r6) | |
98 | adds r7,r19,r20 | |
99 | ||
100 | # parallel | |
101 | ld2w r4,@(r0,r6) | |
102 | adds r7,r0,r20 | |
103 | ||
104 | # parallel even though st2w uses r5 and adds modifies it | |
105 | st2w r4,@(r0,r6) | |
106 | adds r5,r19,r20 | |
107 | ||
108 | # parallel, both use but don't modify r5 | |
109 | st2w r4,@(r0,r6) | |
110 | adds r3,r5,r20 | |
111 | ||
112 | # parallel even though st2w uses r6 and adds changes it | |
113 | st2w r4,@(r0,r6) | |
114 | adds r6,r19,r20 | |
115 | ||
116 | # parallel | |
117 | st2w r4,@(r0,r6) | |
118 | adds r7,r19,r20 | |
119 | ||
120 | # parallel | |
121 | st2w r4,@(r0,r6) | |
122 | adds r7,r0,r20 | |
123 | ||
124 | # test memory dependencies | |
125 | ||
126 | # always serial because one could overwrite the other | |
127 | st2w r10,@(r3,r4) | |
128 | st2w r40,@(r43,r44) | |
129 | ||
130 | # always serial | |
131 | stw r1,@(r2,r3) | |
132 | ldw r41,@(r42,r43) | |
133 | ||
134 | # reads can happen in parallel but the current architecture | |
135 | # doesn't support it | |
136 | ldw r1,@(r2,r3) | |
137 | ldb r41,@(r42,r43) | |
138 | ||
139 | # test post increment and decrement dependencies | |
140 | ||
141 | # serial | |
142 | ldw r4,@(r6+,r11) | |
143 | adds r9,r6,2 | |
144 | ||
145 | # parallel, modification to r6 happens last | |
146 | adds r9,r6,2 | |
147 | ldw r4,@(r6-,r11) | |
148 | ||
149 | # serial | |
150 | stw r4,@(r6-,r11) | |
151 | adds r9,r6,2 | |
152 | ||
153 | # parallel | |
154 | ldw r4,@(r6,r11) | |
155 | adds r9,r6,2 | |
156 | ||
157 | # parallel | |
158 | adds r9,r6,2 | |
159 | ldw r4,@(r6,r11) | |
160 | ||
161 | # if the first instruction is a jmp, don't parallelize | |
162 | jmp 0 | |
163 | abs r1,r2 | |
164 | ||
165 | jsr 0 | |
166 | abs r1,r2 | |
167 | ||
168 | .align 3 | |
169 | ||
170 | bra 0 | |
171 | abs r1,r2 | |
172 | ||
173 | bsr 0 | |
174 | abs r1,r2 | |
175 | ||
176 | # Explicitly prohibited from parallel execution. | |
177 | # The labels are here to prevent instruction pairs | |
178 | # from being merged with following pairs. | |
179 | ||
180 | label1: | |
181 | st2w r2, @(r2, r3) | |
182 | addhlll r4, r5, r6 | |
183 | label2: | |
184 | st4hb r8, @(r8, r9) | |
185 | subhllh r10, r11, r12 | |
186 | label3: | |
187 | ld2w r14, @(r14, r15) | |
188 | mulhxhl r16, r17, r18 | |
189 | label4: | |
190 | ldw r19, @(r20, r21) | |
191 | mulx2h r22, r23, r24 | |
192 | label5: | |
193 | ldh r25, @(r26, r27) | |
194 | mul2h r28, r29, r30 | |
195 | ||
196 | # Insertion of NOPs required to prevent pipeline clashes. | |
197 | ||
198 | label6: | |
199 | mul r1,r2,r3 | |
200 | mulhxll r4,r5,r6 | |
201 | add r7, r8, r9 | |
202 | label7: | |
203 | ||
204 | mul r2,r3,r4 | |
205 | ldw r5, @(r6,r0) | |
206 | ||
207 | ldw r10, @(r11, r0) <- mul r7,r8,r9 | |
208 | ||
209 | mul r12,r13,r14 -> ldw r15, @(r16, r0) | |
210 | ||
211 | mac1 r2,r3,r4 | |
212 | ldw r5, @(r6,r0) | |
213 | ||
214 | ldw r10, @(r11, r0) <- mac0 r7,r8,r9 | |
215 | ldw r10, @(r11, r0) | |
216 |