Commit | Line | Data |
---|---|---|
2483b4ea CK |
1 | /* |
2 | * Copyright 2010 Advanced Micro Devices, Inc. | |
3 | * | |
4 | * Permission is hereby granted, free of charge, to any person obtaining a | |
5 | * copy of this software and associated documentation files (the "Software"), | |
6 | * to deal in the Software without restriction, including without limitation | |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
8 | * and/or sell copies of the Software, and to permit persons to whom the | |
9 | * Software is furnished to do so, subject to the following conditions: | |
10 | * | |
11 | * The above copyright notice and this permission notice shall be included in | |
12 | * all copies or substantial portions of the Software. | |
13 | * | |
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | |
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | |
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
20 | * OTHER DEALINGS IN THE SOFTWARE. | |
21 | * | |
22 | * Authors: Alex Deucher | |
23 | */ | |
24 | #include <drm/drmP.h> | |
25 | #include "radeon.h" | |
26 | #include "radeon_asic.h" | |
74d360f6 | 27 | #include "radeon_trace.h" |
2483b4ea CK |
28 | #include "nid.h" |
29 | ||
30 | u32 cayman_gpu_check_soft_reset(struct radeon_device *rdev); | |
31 | ||
32 | /* | |
33 | * DMA | |
34 | * Starting with R600, the GPU has an asynchronous | |
35 | * DMA engine. The programming model is very similar | |
36 | * to the 3D engine (ring buffer, IBs, etc.), but the | |
37 | * DMA controller has it's own packet format that is | |
38 | * different form the PM4 format used by the 3D engine. | |
39 | * It supports copying data, writing embedded data, | |
40 | * solid fills, and a number of other things. It also | |
41 | * has support for tiling/detiling of buffers. | |
42 | * Cayman and newer support two asynchronous DMA engines. | |
43 | */ | |
44 | ||
ea31bf69 AD |
45 | /** |
46 | * cayman_dma_get_rptr - get the current read pointer | |
47 | * | |
48 | * @rdev: radeon_device pointer | |
49 | * @ring: radeon ring pointer | |
50 | * | |
51 | * Get the current rptr from the hardware (cayman+). | |
52 | */ | |
53 | uint32_t cayman_dma_get_rptr(struct radeon_device *rdev, | |
54 | struct radeon_ring *ring) | |
55 | { | |
56 | u32 rptr, reg; | |
57 | ||
58 | if (rdev->wb.enabled) { | |
59 | rptr = rdev->wb.wb[ring->rptr_offs/4]; | |
60 | } else { | |
61 | if (ring->idx == R600_RING_TYPE_DMA_INDEX) | |
62 | reg = DMA_RB_RPTR + DMA0_REGISTER_OFFSET; | |
63 | else | |
64 | reg = DMA_RB_RPTR + DMA1_REGISTER_OFFSET; | |
65 | ||
66 | rptr = RREG32(reg); | |
67 | } | |
68 | ||
69 | return (rptr & 0x3fffc) >> 2; | |
70 | } | |
71 | ||
72 | /** | |
73 | * cayman_dma_get_wptr - get the current write pointer | |
74 | * | |
75 | * @rdev: radeon_device pointer | |
76 | * @ring: radeon ring pointer | |
77 | * | |
78 | * Get the current wptr from the hardware (cayman+). | |
79 | */ | |
80 | uint32_t cayman_dma_get_wptr(struct radeon_device *rdev, | |
81 | struct radeon_ring *ring) | |
82 | { | |
83 | u32 reg; | |
84 | ||
85 | if (ring->idx == R600_RING_TYPE_DMA_INDEX) | |
86 | reg = DMA_RB_WPTR + DMA0_REGISTER_OFFSET; | |
87 | else | |
88 | reg = DMA_RB_WPTR + DMA1_REGISTER_OFFSET; | |
89 | ||
90 | return (RREG32(reg) & 0x3fffc) >> 2; | |
91 | } | |
92 | ||
93 | /** | |
94 | * cayman_dma_set_wptr - commit the write pointer | |
95 | * | |
96 | * @rdev: radeon_device pointer | |
97 | * @ring: radeon ring pointer | |
98 | * | |
99 | * Write the wptr back to the hardware (cayman+). | |
100 | */ | |
101 | void cayman_dma_set_wptr(struct radeon_device *rdev, | |
102 | struct radeon_ring *ring) | |
103 | { | |
104 | u32 reg; | |
105 | ||
106 | if (ring->idx == R600_RING_TYPE_DMA_INDEX) | |
107 | reg = DMA_RB_WPTR + DMA0_REGISTER_OFFSET; | |
108 | else | |
109 | reg = DMA_RB_WPTR + DMA1_REGISTER_OFFSET; | |
110 | ||
111 | WREG32(reg, (ring->wptr << 2) & 0x3fffc); | |
112 | } | |
113 | ||
2483b4ea CK |
114 | /** |
115 | * cayman_dma_ring_ib_execute - Schedule an IB on the DMA engine | |
116 | * | |
117 | * @rdev: radeon_device pointer | |
118 | * @ib: IB object to schedule | |
119 | * | |
120 | * Schedule an IB in the DMA ring (cayman-SI). | |
121 | */ | |
122 | void cayman_dma_ring_ib_execute(struct radeon_device *rdev, | |
123 | struct radeon_ib *ib) | |
124 | { | |
125 | struct radeon_ring *ring = &rdev->ring[ib->ring]; | |
126 | ||
127 | if (rdev->wb.enabled) { | |
128 | u32 next_rptr = ring->wptr + 4; | |
129 | while ((next_rptr & 7) != 5) | |
130 | next_rptr++; | |
131 | next_rptr += 3; | |
132 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1)); | |
133 | radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); | |
134 | radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff); | |
135 | radeon_ring_write(ring, next_rptr); | |
136 | } | |
137 | ||
138 | /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring. | |
139 | * Pad as necessary with NOPs. | |
140 | */ | |
141 | while ((ring->wptr & 7) != 5) | |
142 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); | |
143 | radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, ib->vm ? ib->vm->id : 0, 0)); | |
144 | radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0)); | |
145 | radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF)); | |
146 | ||
147 | } | |
148 | ||
149 | /** | |
150 | * cayman_dma_stop - stop the async dma engines | |
151 | * | |
152 | * @rdev: radeon_device pointer | |
153 | * | |
154 | * Stop the async dma engines (cayman-SI). | |
155 | */ | |
156 | void cayman_dma_stop(struct radeon_device *rdev) | |
157 | { | |
158 | u32 rb_cntl; | |
159 | ||
50efa51a AD |
160 | if ((rdev->asic->copy.copy_ring_index == R600_RING_TYPE_DMA_INDEX) || |
161 | (rdev->asic->copy.copy_ring_index == CAYMAN_RING_TYPE_DMA1_INDEX)) | |
162 | radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); | |
2483b4ea CK |
163 | |
164 | /* dma0 */ | |
165 | rb_cntl = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET); | |
166 | rb_cntl &= ~DMA_RB_ENABLE; | |
167 | WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, rb_cntl); | |
168 | ||
169 | /* dma1 */ | |
170 | rb_cntl = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET); | |
171 | rb_cntl &= ~DMA_RB_ENABLE; | |
172 | WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, rb_cntl); | |
173 | ||
174 | rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false; | |
175 | rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX].ready = false; | |
176 | } | |
177 | ||
178 | /** | |
179 | * cayman_dma_resume - setup and start the async dma engines | |
180 | * | |
181 | * @rdev: radeon_device pointer | |
182 | * | |
183 | * Set up the DMA ring buffers and enable them. (cayman-SI). | |
184 | * Returns 0 for success, error for failure. | |
185 | */ | |
186 | int cayman_dma_resume(struct radeon_device *rdev) | |
187 | { | |
188 | struct radeon_ring *ring; | |
189 | u32 rb_cntl, dma_cntl, ib_cntl; | |
190 | u32 rb_bufsz; | |
191 | u32 reg_offset, wb_offset; | |
192 | int i, r; | |
193 | ||
194 | /* Reset dma */ | |
195 | WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA | SOFT_RESET_DMA1); | |
196 | RREG32(SRBM_SOFT_RESET); | |
197 | udelay(50); | |
198 | WREG32(SRBM_SOFT_RESET, 0); | |
199 | ||
200 | for (i = 0; i < 2; i++) { | |
201 | if (i == 0) { | |
202 | ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; | |
203 | reg_offset = DMA0_REGISTER_OFFSET; | |
204 | wb_offset = R600_WB_DMA_RPTR_OFFSET; | |
205 | } else { | |
206 | ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]; | |
207 | reg_offset = DMA1_REGISTER_OFFSET; | |
208 | wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET; | |
209 | } | |
210 | ||
211 | WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0); | |
212 | WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0); | |
213 | ||
214 | /* Set ring buffer size in dwords */ | |
9c725e5b | 215 | rb_bufsz = order_base_2(ring->ring_size / 4); |
2483b4ea CK |
216 | rb_cntl = rb_bufsz << 1; |
217 | #ifdef __BIG_ENDIAN | |
218 | rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE; | |
219 | #endif | |
220 | WREG32(DMA_RB_CNTL + reg_offset, rb_cntl); | |
221 | ||
222 | /* Initialize the ring buffer's read and write pointers */ | |
223 | WREG32(DMA_RB_RPTR + reg_offset, 0); | |
224 | WREG32(DMA_RB_WPTR + reg_offset, 0); | |
225 | ||
226 | /* set the wb address whether it's enabled or not */ | |
227 | WREG32(DMA_RB_RPTR_ADDR_HI + reg_offset, | |
228 | upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFF); | |
229 | WREG32(DMA_RB_RPTR_ADDR_LO + reg_offset, | |
230 | ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC)); | |
231 | ||
232 | if (rdev->wb.enabled) | |
233 | rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE; | |
234 | ||
235 | WREG32(DMA_RB_BASE + reg_offset, ring->gpu_addr >> 8); | |
236 | ||
237 | /* enable DMA IBs */ | |
238 | ib_cntl = DMA_IB_ENABLE | CMD_VMID_FORCE; | |
239 | #ifdef __BIG_ENDIAN | |
240 | ib_cntl |= DMA_IB_SWAP_ENABLE; | |
241 | #endif | |
242 | WREG32(DMA_IB_CNTL + reg_offset, ib_cntl); | |
243 | ||
244 | dma_cntl = RREG32(DMA_CNTL + reg_offset); | |
245 | dma_cntl &= ~CTXEMPTY_INT_ENABLE; | |
246 | WREG32(DMA_CNTL + reg_offset, dma_cntl); | |
247 | ||
248 | ring->wptr = 0; | |
249 | WREG32(DMA_RB_WPTR + reg_offset, ring->wptr << 2); | |
250 | ||
2483b4ea CK |
251 | WREG32(DMA_RB_CNTL + reg_offset, rb_cntl | DMA_RB_ENABLE); |
252 | ||
253 | ring->ready = true; | |
254 | ||
255 | r = radeon_ring_test(rdev, ring->idx, ring); | |
256 | if (r) { | |
257 | ring->ready = false; | |
258 | return r; | |
259 | } | |
260 | } | |
261 | ||
50efa51a AD |
262 | if ((rdev->asic->copy.copy_ring_index == R600_RING_TYPE_DMA_INDEX) || |
263 | (rdev->asic->copy.copy_ring_index == CAYMAN_RING_TYPE_DMA1_INDEX)) | |
264 | radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size); | |
2483b4ea CK |
265 | |
266 | return 0; | |
267 | } | |
268 | ||
269 | /** | |
270 | * cayman_dma_fini - tear down the async dma engines | |
271 | * | |
272 | * @rdev: radeon_device pointer | |
273 | * | |
274 | * Stop the async dma engines and free the rings (cayman-SI). | |
275 | */ | |
276 | void cayman_dma_fini(struct radeon_device *rdev) | |
277 | { | |
278 | cayman_dma_stop(rdev); | |
279 | radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]); | |
280 | radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]); | |
281 | } | |
282 | ||
283 | /** | |
284 | * cayman_dma_is_lockup - Check if the DMA engine is locked up | |
285 | * | |
286 | * @rdev: radeon_device pointer | |
287 | * @ring: radeon_ring structure holding ring information | |
288 | * | |
289 | * Check if the async DMA engine is locked up. | |
290 | * Returns true if the engine appears to be locked up, false if not. | |
291 | */ | |
292 | bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) | |
293 | { | |
294 | u32 reset_mask = cayman_gpu_check_soft_reset(rdev); | |
295 | u32 mask; | |
296 | ||
297 | if (ring->idx == R600_RING_TYPE_DMA_INDEX) | |
298 | mask = RADEON_RESET_DMA; | |
299 | else | |
300 | mask = RADEON_RESET_DMA1; | |
301 | ||
302 | if (!(reset_mask & mask)) { | |
ff212f25 | 303 | radeon_ring_lockup_update(rdev, ring); |
2483b4ea CK |
304 | return false; |
305 | } | |
2483b4ea CK |
306 | return radeon_ring_test_lockup(rdev, ring); |
307 | } | |
308 | ||
309 | /** | |
310 | * cayman_dma_vm_set_page - update the page tables using the DMA | |
311 | * | |
312 | * @rdev: radeon_device pointer | |
313 | * @ib: indirect buffer to fill with commands | |
314 | * @pe: addr of the page entry | |
315 | * @addr: dst addr to write into pe | |
316 | * @count: number of page entries to update | |
317 | * @incr: increase next addr by incr bytes | |
24c16439 | 318 | * @flags: hw access flags |
2483b4ea CK |
319 | * |
320 | * Update the page tables using the DMA (cayman/TN). | |
321 | */ | |
322 | void cayman_dma_vm_set_page(struct radeon_device *rdev, | |
323 | struct radeon_ib *ib, | |
324 | uint64_t pe, | |
325 | uint64_t addr, unsigned count, | |
326 | uint32_t incr, uint32_t flags) | |
327 | { | |
2483b4ea CK |
328 | uint64_t value; |
329 | unsigned ndw; | |
330 | ||
24c16439 | 331 | trace_radeon_vm_set_page(pe, addr, count, incr, flags); |
74d360f6 | 332 | |
24c16439 | 333 | if ((flags & R600_PTE_SYSTEM) || (count == 1)) { |
2483b4ea CK |
334 | while (count) { |
335 | ndw = count * 2; | |
336 | if (ndw > 0xFFFFE) | |
337 | ndw = 0xFFFFE; | |
338 | ||
339 | /* for non-physically contiguous pages (system) */ | |
340 | ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, ndw); | |
341 | ib->ptr[ib->length_dw++] = pe; | |
342 | ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; | |
343 | for (; ndw > 0; ndw -= 2, --count, pe += 8) { | |
24c16439 | 344 | if (flags & R600_PTE_SYSTEM) { |
2483b4ea CK |
345 | value = radeon_vm_map_gart(rdev, addr); |
346 | value &= 0xFFFFFFFFFFFFF000ULL; | |
24c16439 | 347 | } else if (flags & R600_PTE_VALID) { |
2483b4ea CK |
348 | value = addr; |
349 | } else { | |
350 | value = 0; | |
351 | } | |
352 | addr += incr; | |
24c16439 | 353 | value |= flags; |
2483b4ea CK |
354 | ib->ptr[ib->length_dw++] = value; |
355 | ib->ptr[ib->length_dw++] = upper_32_bits(value); | |
356 | } | |
357 | } | |
358 | } else { | |
359 | while (count) { | |
360 | ndw = count * 2; | |
361 | if (ndw > 0xFFFFE) | |
362 | ndw = 0xFFFFE; | |
363 | ||
24c16439 | 364 | if (flags & R600_PTE_VALID) |
2483b4ea CK |
365 | value = addr; |
366 | else | |
367 | value = 0; | |
368 | /* for physically contiguous pages (vram) */ | |
369 | ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw); | |
370 | ib->ptr[ib->length_dw++] = pe; /* dst addr */ | |
371 | ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; | |
24c16439 | 372 | ib->ptr[ib->length_dw++] = flags; /* mask */ |
2483b4ea CK |
373 | ib->ptr[ib->length_dw++] = 0; |
374 | ib->ptr[ib->length_dw++] = value; /* value */ | |
375 | ib->ptr[ib->length_dw++] = upper_32_bits(value); | |
376 | ib->ptr[ib->length_dw++] = incr; /* increment size */ | |
377 | ib->ptr[ib->length_dw++] = 0; | |
378 | pe += ndw * 4; | |
379 | addr += (ndw / 2) * incr; | |
380 | count -= ndw / 2; | |
381 | } | |
382 | } | |
383 | while (ib->length_dw & 0x7) | |
384 | ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0); | |
385 | } | |
386 | ||
387 | void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) | |
388 | { | |
389 | struct radeon_ring *ring = &rdev->ring[ridx]; | |
390 | ||
391 | if (vm == NULL) | |
392 | return; | |
393 | ||
394 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0)); | |
395 | radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2)); | |
396 | radeon_ring_write(ring, vm->pd_gpu_addr >> 12); | |
397 | ||
398 | /* flush hdp cache */ | |
399 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0)); | |
400 | radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2)); | |
401 | radeon_ring_write(ring, 1); | |
402 | ||
403 | /* bits 0-7 are the VM contexts0-7 */ | |
404 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0)); | |
405 | radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2)); | |
406 | radeon_ring_write(ring, 1 << vm->id); | |
407 | } | |
408 |