Commit | Line | Data |
---|---|---|
2483b4ea CK |
1 | /* |
2 | * Copyright 2013 Advanced Micro Devices, Inc. | |
3 | * | |
4 | * Permission is hereby granted, free of charge, to any person obtaining a | |
5 | * copy of this software and associated documentation files (the "Software"), | |
6 | * to deal in the Software without restriction, including without limitation | |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
8 | * and/or sell copies of the Software, and to permit persons to whom the | |
9 | * Software is furnished to do so, subject to the following conditions: | |
10 | * | |
11 | * The above copyright notice and this permission notice shall be included in | |
12 | * all copies or substantial portions of the Software. | |
13 | * | |
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | |
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | |
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
20 | * OTHER DEALINGS IN THE SOFTWARE. | |
21 | * | |
22 | * Authors: Alex Deucher | |
23 | */ | |
24 | #include <drm/drmP.h> | |
25 | #include "radeon.h" | |
26 | #include "radeon_asic.h" | |
27 | #include "r600d.h" | |
28 | ||
29 | u32 r600_gpu_check_soft_reset(struct radeon_device *rdev); | |
30 | ||
31 | /* | |
32 | * DMA | |
33 | * Starting with R600, the GPU has an asynchronous | |
34 | * DMA engine. The programming model is very similar | |
35 | * to the 3D engine (ring buffer, IBs, etc.), but the | |
36 | * DMA controller has it's own packet format that is | |
37 | * different form the PM4 format used by the 3D engine. | |
38 | * It supports copying data, writing embedded data, | |
39 | * solid fills, and a number of other things. It also | |
40 | * has support for tiling/detiling of buffers. | |
41 | */ | |
42 | ||
43 | /** | |
44 | * r600_dma_get_rptr - get the current read pointer | |
45 | * | |
46 | * @rdev: radeon_device pointer | |
47 | * @ring: radeon ring pointer | |
48 | * | |
49 | * Get the current rptr from the hardware (r6xx+). | |
50 | */ | |
51 | uint32_t r600_dma_get_rptr(struct radeon_device *rdev, | |
52 | struct radeon_ring *ring) | |
53 | { | |
54 | return (radeon_ring_generic_get_rptr(rdev, ring) & 0x3fffc) >> 2; | |
55 | } | |
56 | ||
57 | /** | |
58 | * r600_dma_get_wptr - get the current write pointer | |
59 | * | |
60 | * @rdev: radeon_device pointer | |
61 | * @ring: radeon ring pointer | |
62 | * | |
63 | * Get the current wptr from the hardware (r6xx+). | |
64 | */ | |
65 | uint32_t r600_dma_get_wptr(struct radeon_device *rdev, | |
66 | struct radeon_ring *ring) | |
67 | { | |
68 | return (RREG32(ring->wptr_reg) & 0x3fffc) >> 2; | |
69 | } | |
70 | ||
71 | /** | |
72 | * r600_dma_set_wptr - commit the write pointer | |
73 | * | |
74 | * @rdev: radeon_device pointer | |
75 | * @ring: radeon ring pointer | |
76 | * | |
77 | * Write the wptr back to the hardware (r6xx+). | |
78 | */ | |
79 | void r600_dma_set_wptr(struct radeon_device *rdev, | |
80 | struct radeon_ring *ring) | |
81 | { | |
82 | WREG32(ring->wptr_reg, (ring->wptr << 2) & 0x3fffc); | |
83 | } | |
84 | ||
85 | /** | |
86 | * r600_dma_stop - stop the async dma engine | |
87 | * | |
88 | * @rdev: radeon_device pointer | |
89 | * | |
90 | * Stop the async dma engine (r6xx-evergreen). | |
91 | */ | |
92 | void r600_dma_stop(struct radeon_device *rdev) | |
93 | { | |
94 | u32 rb_cntl = RREG32(DMA_RB_CNTL); | |
95 | ||
96 | radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); | |
97 | ||
98 | rb_cntl &= ~DMA_RB_ENABLE; | |
99 | WREG32(DMA_RB_CNTL, rb_cntl); | |
100 | ||
101 | rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false; | |
102 | } | |
103 | ||
104 | /** | |
105 | * r600_dma_resume - setup and start the async dma engine | |
106 | * | |
107 | * @rdev: radeon_device pointer | |
108 | * | |
109 | * Set up the DMA ring buffer and enable it. (r6xx-evergreen). | |
110 | * Returns 0 for success, error for failure. | |
111 | */ | |
112 | int r600_dma_resume(struct radeon_device *rdev) | |
113 | { | |
114 | struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; | |
115 | u32 rb_cntl, dma_cntl, ib_cntl; | |
116 | u32 rb_bufsz; | |
117 | int r; | |
118 | ||
119 | /* Reset dma */ | |
120 | if (rdev->family >= CHIP_RV770) | |
121 | WREG32(SRBM_SOFT_RESET, RV770_SOFT_RESET_DMA); | |
122 | else | |
123 | WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA); | |
124 | RREG32(SRBM_SOFT_RESET); | |
125 | udelay(50); | |
126 | WREG32(SRBM_SOFT_RESET, 0); | |
127 | ||
128 | WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL, 0); | |
129 | WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL, 0); | |
130 | ||
131 | /* Set ring buffer size in dwords */ | |
9c725e5b | 132 | rb_bufsz = order_base_2(ring->ring_size / 4); |
2483b4ea CK |
133 | rb_cntl = rb_bufsz << 1; |
134 | #ifdef __BIG_ENDIAN | |
135 | rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE; | |
136 | #endif | |
137 | WREG32(DMA_RB_CNTL, rb_cntl); | |
138 | ||
139 | /* Initialize the ring buffer's read and write pointers */ | |
140 | WREG32(DMA_RB_RPTR, 0); | |
141 | WREG32(DMA_RB_WPTR, 0); | |
142 | ||
143 | /* set the wb address whether it's enabled or not */ | |
144 | WREG32(DMA_RB_RPTR_ADDR_HI, | |
145 | upper_32_bits(rdev->wb.gpu_addr + R600_WB_DMA_RPTR_OFFSET) & 0xFF); | |
146 | WREG32(DMA_RB_RPTR_ADDR_LO, | |
147 | ((rdev->wb.gpu_addr + R600_WB_DMA_RPTR_OFFSET) & 0xFFFFFFFC)); | |
148 | ||
149 | if (rdev->wb.enabled) | |
150 | rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE; | |
151 | ||
152 | WREG32(DMA_RB_BASE, ring->gpu_addr >> 8); | |
153 | ||
154 | /* enable DMA IBs */ | |
155 | ib_cntl = DMA_IB_ENABLE; | |
156 | #ifdef __BIG_ENDIAN | |
157 | ib_cntl |= DMA_IB_SWAP_ENABLE; | |
158 | #endif | |
159 | WREG32(DMA_IB_CNTL, ib_cntl); | |
160 | ||
161 | dma_cntl = RREG32(DMA_CNTL); | |
162 | dma_cntl &= ~CTXEMPTY_INT_ENABLE; | |
163 | WREG32(DMA_CNTL, dma_cntl); | |
164 | ||
165 | if (rdev->family >= CHIP_RV770) | |
166 | WREG32(DMA_MODE, 1); | |
167 | ||
168 | ring->wptr = 0; | |
169 | WREG32(DMA_RB_WPTR, ring->wptr << 2); | |
170 | ||
171 | ring->rptr = RREG32(DMA_RB_RPTR) >> 2; | |
172 | ||
173 | WREG32(DMA_RB_CNTL, rb_cntl | DMA_RB_ENABLE); | |
174 | ||
175 | ring->ready = true; | |
176 | ||
177 | r = radeon_ring_test(rdev, R600_RING_TYPE_DMA_INDEX, ring); | |
178 | if (r) { | |
179 | ring->ready = false; | |
180 | return r; | |
181 | } | |
182 | ||
183 | radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size); | |
184 | ||
185 | return 0; | |
186 | } | |
187 | ||
188 | /** | |
189 | * r600_dma_fini - tear down the async dma engine | |
190 | * | |
191 | * @rdev: radeon_device pointer | |
192 | * | |
193 | * Stop the async dma engine and free the ring (r6xx-evergreen). | |
194 | */ | |
195 | void r600_dma_fini(struct radeon_device *rdev) | |
196 | { | |
197 | r600_dma_stop(rdev); | |
198 | radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]); | |
199 | } | |
200 | ||
201 | /** | |
202 | * r600_dma_is_lockup - Check if the DMA engine is locked up | |
203 | * | |
204 | * @rdev: radeon_device pointer | |
205 | * @ring: radeon_ring structure holding ring information | |
206 | * | |
207 | * Check if the async DMA engine is locked up. | |
208 | * Returns true if the engine appears to be locked up, false if not. | |
209 | */ | |
210 | bool r600_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) | |
211 | { | |
212 | u32 reset_mask = r600_gpu_check_soft_reset(rdev); | |
213 | ||
214 | if (!(reset_mask & RADEON_RESET_DMA)) { | |
215 | radeon_ring_lockup_update(ring); | |
216 | return false; | |
217 | } | |
218 | /* force ring activities */ | |
219 | radeon_ring_force_activity(rdev, ring); | |
220 | return radeon_ring_test_lockup(rdev, ring); | |
221 | } | |
222 | ||
223 | ||
224 | /** | |
225 | * r600_dma_ring_test - simple async dma engine test | |
226 | * | |
227 | * @rdev: radeon_device pointer | |
228 | * @ring: radeon_ring structure holding ring information | |
229 | * | |
230 | * Test the DMA engine by writing using it to write an | |
231 | * value to memory. (r6xx-SI). | |
232 | * Returns 0 for success, error for failure. | |
233 | */ | |
234 | int r600_dma_ring_test(struct radeon_device *rdev, | |
235 | struct radeon_ring *ring) | |
236 | { | |
237 | unsigned i; | |
238 | int r; | |
239 | void __iomem *ptr = (void *)rdev->vram_scratch.ptr; | |
240 | u32 tmp; | |
241 | ||
242 | if (!ptr) { | |
243 | DRM_ERROR("invalid vram scratch pointer\n"); | |
244 | return -EINVAL; | |
245 | } | |
246 | ||
247 | tmp = 0xCAFEDEAD; | |
248 | writel(tmp, ptr); | |
249 | ||
250 | r = radeon_ring_lock(rdev, ring, 4); | |
251 | if (r) { | |
252 | DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r); | |
253 | return r; | |
254 | } | |
255 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1)); | |
256 | radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc); | |
257 | radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xff); | |
258 | radeon_ring_write(ring, 0xDEADBEEF); | |
259 | radeon_ring_unlock_commit(rdev, ring); | |
260 | ||
261 | for (i = 0; i < rdev->usec_timeout; i++) { | |
262 | tmp = readl(ptr); | |
263 | if (tmp == 0xDEADBEEF) | |
264 | break; | |
265 | DRM_UDELAY(1); | |
266 | } | |
267 | ||
268 | if (i < rdev->usec_timeout) { | |
269 | DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i); | |
270 | } else { | |
271 | DRM_ERROR("radeon: ring %d test failed (0x%08X)\n", | |
272 | ring->idx, tmp); | |
273 | r = -EINVAL; | |
274 | } | |
275 | return r; | |
276 | } | |
277 | ||
278 | /** | |
279 | * r600_dma_fence_ring_emit - emit a fence on the DMA ring | |
280 | * | |
281 | * @rdev: radeon_device pointer | |
282 | * @fence: radeon fence object | |
283 | * | |
284 | * Add a DMA fence packet to the ring to write | |
285 | * the fence seq number and DMA trap packet to generate | |
286 | * an interrupt if needed (r6xx-r7xx). | |
287 | */ | |
288 | void r600_dma_fence_ring_emit(struct radeon_device *rdev, | |
289 | struct radeon_fence *fence) | |
290 | { | |
291 | struct radeon_ring *ring = &rdev->ring[fence->ring]; | |
292 | u64 addr = rdev->fence_drv[fence->ring].gpu_addr; | |
293 | ||
294 | /* write the fence */ | |
295 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0, 0)); | |
296 | radeon_ring_write(ring, addr & 0xfffffffc); | |
297 | radeon_ring_write(ring, (upper_32_bits(addr) & 0xff)); | |
298 | radeon_ring_write(ring, lower_32_bits(fence->seq)); | |
299 | /* generate an interrupt */ | |
300 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_TRAP, 0, 0, 0)); | |
301 | } | |
302 | ||
303 | /** | |
304 | * r600_dma_semaphore_ring_emit - emit a semaphore on the dma ring | |
305 | * | |
306 | * @rdev: radeon_device pointer | |
307 | * @ring: radeon_ring structure holding ring information | |
308 | * @semaphore: radeon semaphore object | |
309 | * @emit_wait: wait or signal semaphore | |
310 | * | |
311 | * Add a DMA semaphore packet to the ring wait on or signal | |
312 | * other rings (r6xx-SI). | |
313 | */ | |
314 | void r600_dma_semaphore_ring_emit(struct radeon_device *rdev, | |
315 | struct radeon_ring *ring, | |
316 | struct radeon_semaphore *semaphore, | |
317 | bool emit_wait) | |
318 | { | |
319 | u64 addr = semaphore->gpu_addr; | |
320 | u32 s = emit_wait ? 0 : 1; | |
321 | ||
322 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SEMAPHORE, 0, s, 0)); | |
323 | radeon_ring_write(ring, addr & 0xfffffffc); | |
324 | radeon_ring_write(ring, upper_32_bits(addr) & 0xff); | |
325 | } | |
326 | ||
327 | /** | |
328 | * r600_dma_ib_test - test an IB on the DMA engine | |
329 | * | |
330 | * @rdev: radeon_device pointer | |
331 | * @ring: radeon_ring structure holding ring information | |
332 | * | |
333 | * Test a simple IB in the DMA ring (r6xx-SI). | |
334 | * Returns 0 on success, error on failure. | |
335 | */ | |
336 | int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) | |
337 | { | |
338 | struct radeon_ib ib; | |
339 | unsigned i; | |
340 | int r; | |
341 | void __iomem *ptr = (void *)rdev->vram_scratch.ptr; | |
342 | u32 tmp = 0; | |
343 | ||
344 | if (!ptr) { | |
345 | DRM_ERROR("invalid vram scratch pointer\n"); | |
346 | return -EINVAL; | |
347 | } | |
348 | ||
349 | tmp = 0xCAFEDEAD; | |
350 | writel(tmp, ptr); | |
351 | ||
352 | r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256); | |
353 | if (r) { | |
354 | DRM_ERROR("radeon: failed to get ib (%d).\n", r); | |
355 | return r; | |
356 | } | |
357 | ||
358 | ib.ptr[0] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1); | |
359 | ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc; | |
360 | ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xff; | |
361 | ib.ptr[3] = 0xDEADBEEF; | |
362 | ib.length_dw = 4; | |
363 | ||
364 | r = radeon_ib_schedule(rdev, &ib, NULL); | |
365 | if (r) { | |
366 | radeon_ib_free(rdev, &ib); | |
367 | DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); | |
368 | return r; | |
369 | } | |
370 | r = radeon_fence_wait(ib.fence, false); | |
371 | if (r) { | |
372 | DRM_ERROR("radeon: fence wait failed (%d).\n", r); | |
373 | return r; | |
374 | } | |
375 | for (i = 0; i < rdev->usec_timeout; i++) { | |
376 | tmp = readl(ptr); | |
377 | if (tmp == 0xDEADBEEF) | |
378 | break; | |
379 | DRM_UDELAY(1); | |
380 | } | |
381 | if (i < rdev->usec_timeout) { | |
382 | DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i); | |
383 | } else { | |
384 | DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp); | |
385 | r = -EINVAL; | |
386 | } | |
387 | radeon_ib_free(rdev, &ib); | |
388 | return r; | |
389 | } | |
390 | ||
391 | /** | |
392 | * r600_dma_ring_ib_execute - Schedule an IB on the DMA engine | |
393 | * | |
394 | * @rdev: radeon_device pointer | |
395 | * @ib: IB object to schedule | |
396 | * | |
397 | * Schedule an IB in the DMA ring (r6xx-r7xx). | |
398 | */ | |
399 | void r600_dma_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) | |
400 | { | |
401 | struct radeon_ring *ring = &rdev->ring[ib->ring]; | |
402 | ||
403 | if (rdev->wb.enabled) { | |
404 | u32 next_rptr = ring->wptr + 4; | |
405 | while ((next_rptr & 7) != 5) | |
406 | next_rptr++; | |
407 | next_rptr += 3; | |
408 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1)); | |
409 | radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); | |
410 | radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff); | |
411 | radeon_ring_write(ring, next_rptr); | |
412 | } | |
413 | ||
414 | /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring. | |
415 | * Pad as necessary with NOPs. | |
416 | */ | |
417 | while ((ring->wptr & 7) != 5) | |
418 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); | |
419 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_INDIRECT_BUFFER, 0, 0, 0)); | |
420 | radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0)); | |
421 | radeon_ring_write(ring, (ib->length_dw << 16) | (upper_32_bits(ib->gpu_addr) & 0xFF)); | |
422 | ||
423 | } | |
424 | ||
425 | /** | |
426 | * r600_copy_dma - copy pages using the DMA engine | |
427 | * | |
428 | * @rdev: radeon_device pointer | |
429 | * @src_offset: src GPU address | |
430 | * @dst_offset: dst GPU address | |
431 | * @num_gpu_pages: number of GPU pages to xfer | |
432 | * @fence: radeon fence object | |
433 | * | |
434 | * Copy GPU paging using the DMA engine (r6xx). | |
435 | * Used by the radeon ttm implementation to move pages if | |
436 | * registered as the asic copy callback. | |
437 | */ | |
438 | int r600_copy_dma(struct radeon_device *rdev, | |
439 | uint64_t src_offset, uint64_t dst_offset, | |
440 | unsigned num_gpu_pages, | |
441 | struct radeon_fence **fence) | |
442 | { | |
443 | struct radeon_semaphore *sem = NULL; | |
444 | int ring_index = rdev->asic->copy.dma_ring_index; | |
445 | struct radeon_ring *ring = &rdev->ring[ring_index]; | |
446 | u32 size_in_dw, cur_size_in_dw; | |
447 | int i, num_loops; | |
448 | int r = 0; | |
449 | ||
450 | r = radeon_semaphore_create(rdev, &sem); | |
451 | if (r) { | |
452 | DRM_ERROR("radeon: moving bo (%d).\n", r); | |
453 | return r; | |
454 | } | |
455 | ||
456 | size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4; | |
457 | num_loops = DIV_ROUND_UP(size_in_dw, 0xFFFE); | |
458 | r = radeon_ring_lock(rdev, ring, num_loops * 4 + 8); | |
459 | if (r) { | |
460 | DRM_ERROR("radeon: moving bo (%d).\n", r); | |
461 | radeon_semaphore_free(rdev, &sem, NULL); | |
462 | return r; | |
463 | } | |
464 | ||
465 | if (radeon_fence_need_sync(*fence, ring->idx)) { | |
466 | radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, | |
467 | ring->idx); | |
468 | radeon_fence_note_sync(*fence, ring->idx); | |
469 | } else { | |
470 | radeon_semaphore_free(rdev, &sem, NULL); | |
471 | } | |
472 | ||
473 | for (i = 0; i < num_loops; i++) { | |
474 | cur_size_in_dw = size_in_dw; | |
475 | if (cur_size_in_dw > 0xFFFE) | |
476 | cur_size_in_dw = 0xFFFE; | |
477 | size_in_dw -= cur_size_in_dw; | |
478 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, 0, cur_size_in_dw)); | |
479 | radeon_ring_write(ring, dst_offset & 0xfffffffc); | |
480 | radeon_ring_write(ring, src_offset & 0xfffffffc); | |
481 | radeon_ring_write(ring, (((upper_32_bits(dst_offset) & 0xff) << 16) | | |
482 | (upper_32_bits(src_offset) & 0xff))); | |
483 | src_offset += cur_size_in_dw * 4; | |
484 | dst_offset += cur_size_in_dw * 4; | |
485 | } | |
486 | ||
487 | r = radeon_fence_emit(rdev, fence, ring->idx); | |
488 | if (r) { | |
489 | radeon_ring_unlock_undo(rdev, ring); | |
490 | return r; | |
491 | } | |
492 | ||
493 | radeon_ring_unlock_commit(rdev, ring); | |
494 | radeon_semaphore_free(rdev, &sem, *fence); | |
495 | ||
496 | return r; | |
497 | } |