Commit | Line | Data |
---|---|---|
2483b4ea CK |
1 | /* |
2 | * Copyright 2013 Advanced Micro Devices, Inc. | |
3 | * | |
4 | * Permission is hereby granted, free of charge, to any person obtaining a | |
5 | * copy of this software and associated documentation files (the "Software"), | |
6 | * to deal in the Software without restriction, including without limitation | |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
8 | * and/or sell copies of the Software, and to permit persons to whom the | |
9 | * Software is furnished to do so, subject to the following conditions: | |
10 | * | |
11 | * The above copyright notice and this permission notice shall be included in | |
12 | * all copies or substantial portions of the Software. | |
13 | * | |
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | |
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | |
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
20 | * OTHER DEALINGS IN THE SOFTWARE. | |
21 | * | |
22 | * Authors: Alex Deucher | |
23 | */ | |
24 | #include <drm/drmP.h> | |
25 | #include "radeon.h" | |
26 | #include "radeon_asic.h" | |
74d360f6 | 27 | #include "radeon_trace.h" |
2483b4ea CK |
28 | #include "sid.h" |
29 | ||
30 | u32 si_gpu_check_soft_reset(struct radeon_device *rdev); | |
31 | ||
32 | /** | |
33 | * si_dma_is_lockup - Check if the DMA engine is locked up | |
34 | * | |
35 | * @rdev: radeon_device pointer | |
36 | * @ring: radeon_ring structure holding ring information | |
37 | * | |
38 | * Check if the async DMA engine is locked up. | |
39 | * Returns true if the engine appears to be locked up, false if not. | |
40 | */ | |
41 | bool si_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) | |
42 | { | |
43 | u32 reset_mask = si_gpu_check_soft_reset(rdev); | |
44 | u32 mask; | |
45 | ||
46 | if (ring->idx == R600_RING_TYPE_DMA_INDEX) | |
47 | mask = RADEON_RESET_DMA; | |
48 | else | |
49 | mask = RADEON_RESET_DMA1; | |
50 | ||
51 | if (!(reset_mask & mask)) { | |
52 | radeon_ring_lockup_update(ring); | |
53 | return false; | |
54 | } | |
55 | /* force ring activities */ | |
56 | radeon_ring_force_activity(rdev, ring); | |
57 | return radeon_ring_test_lockup(rdev, ring); | |
58 | } | |
59 | ||
60 | /** | |
61 | * si_dma_vm_set_page - update the page tables using the DMA | |
62 | * | |
63 | * @rdev: radeon_device pointer | |
64 | * @ib: indirect buffer to fill with commands | |
65 | * @pe: addr of the page entry | |
66 | * @addr: dst addr to write into pe | |
67 | * @count: number of page entries to update | |
68 | * @incr: increase next addr by incr bytes | |
69 | * @flags: access flags | |
70 | * | |
71 | * Update the page tables using the DMA (SI). | |
72 | */ | |
73 | void si_dma_vm_set_page(struct radeon_device *rdev, | |
74 | struct radeon_ib *ib, | |
75 | uint64_t pe, | |
76 | uint64_t addr, unsigned count, | |
77 | uint32_t incr, uint32_t flags) | |
78 | { | |
2483b4ea CK |
79 | uint64_t value; |
80 | unsigned ndw; | |
81 | ||
24c16439 | 82 | trace_radeon_vm_set_page(pe, addr, count, incr, flags); |
74d360f6 | 83 | |
24c16439 | 84 | if (flags & R600_PTE_SYSTEM) { |
2483b4ea CK |
85 | while (count) { |
86 | ndw = count * 2; | |
87 | if (ndw > 0xFFFFE) | |
88 | ndw = 0xFFFFE; | |
89 | ||
90 | /* for non-physically contiguous pages (system) */ | |
91 | ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw); | |
92 | ib->ptr[ib->length_dw++] = pe; | |
93 | ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; | |
94 | for (; ndw > 0; ndw -= 2, --count, pe += 8) { | |
24c16439 CK |
95 | value = radeon_vm_map_gart(rdev, addr); |
96 | value &= 0xFFFFFFFFFFFFF000ULL; | |
2483b4ea | 97 | addr += incr; |
24c16439 | 98 | value |= flags; |
2483b4ea CK |
99 | ib->ptr[ib->length_dw++] = value; |
100 | ib->ptr[ib->length_dw++] = upper_32_bits(value); | |
101 | } | |
102 | } | |
103 | } else { | |
104 | while (count) { | |
105 | ndw = count * 2; | |
106 | if (ndw > 0xFFFFE) | |
107 | ndw = 0xFFFFE; | |
108 | ||
24c16439 | 109 | if (flags & R600_PTE_VALID) |
2483b4ea CK |
110 | value = addr; |
111 | else | |
112 | value = 0; | |
113 | /* for physically contiguous pages (vram) */ | |
114 | ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw); | |
115 | ib->ptr[ib->length_dw++] = pe; /* dst addr */ | |
116 | ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; | |
24c16439 | 117 | ib->ptr[ib->length_dw++] = flags; /* mask */ |
2483b4ea CK |
118 | ib->ptr[ib->length_dw++] = 0; |
119 | ib->ptr[ib->length_dw++] = value; /* value */ | |
120 | ib->ptr[ib->length_dw++] = upper_32_bits(value); | |
121 | ib->ptr[ib->length_dw++] = incr; /* increment size */ | |
122 | ib->ptr[ib->length_dw++] = 0; | |
123 | pe += ndw * 4; | |
124 | addr += (ndw / 2) * incr; | |
125 | count -= ndw / 2; | |
126 | } | |
127 | } | |
128 | while (ib->length_dw & 0x7) | |
129 | ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0); | |
130 | } | |
131 | ||
132 | void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) | |
133 | { | |
134 | struct radeon_ring *ring = &rdev->ring[ridx]; | |
135 | ||
136 | if (vm == NULL) | |
137 | return; | |
138 | ||
139 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0)); | |
140 | if (vm->id < 8) { | |
141 | radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2)); | |
142 | } else { | |
143 | radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2)); | |
144 | } | |
145 | radeon_ring_write(ring, vm->pd_gpu_addr >> 12); | |
146 | ||
147 | /* flush hdp cache */ | |
148 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0)); | |
149 | radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2)); | |
150 | radeon_ring_write(ring, 1); | |
151 | ||
152 | /* bits 0-7 are the VM contexts0-7 */ | |
153 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0)); | |
154 | radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2)); | |
155 | radeon_ring_write(ring, 1 << vm->id); | |
156 | } | |
157 | ||
158 | /** | |
159 | * si_copy_dma - copy pages using the DMA engine | |
160 | * | |
161 | * @rdev: radeon_device pointer | |
162 | * @src_offset: src GPU address | |
163 | * @dst_offset: dst GPU address | |
164 | * @num_gpu_pages: number of GPU pages to xfer | |
165 | * @fence: radeon fence object | |
166 | * | |
167 | * Copy GPU paging using the DMA engine (SI). | |
168 | * Used by the radeon ttm implementation to move pages if | |
169 | * registered as the asic copy callback. | |
170 | */ | |
171 | int si_copy_dma(struct radeon_device *rdev, | |
172 | uint64_t src_offset, uint64_t dst_offset, | |
173 | unsigned num_gpu_pages, | |
174 | struct radeon_fence **fence) | |
175 | { | |
176 | struct radeon_semaphore *sem = NULL; | |
177 | int ring_index = rdev->asic->copy.dma_ring_index; | |
178 | struct radeon_ring *ring = &rdev->ring[ring_index]; | |
179 | u32 size_in_bytes, cur_size_in_bytes; | |
180 | int i, num_loops; | |
181 | int r = 0; | |
182 | ||
183 | r = radeon_semaphore_create(rdev, &sem); | |
184 | if (r) { | |
185 | DRM_ERROR("radeon: moving bo (%d).\n", r); | |
186 | return r; | |
187 | } | |
188 | ||
189 | size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT); | |
190 | num_loops = DIV_ROUND_UP(size_in_bytes, 0xfffff); | |
191 | r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11); | |
192 | if (r) { | |
193 | DRM_ERROR("radeon: moving bo (%d).\n", r); | |
194 | radeon_semaphore_free(rdev, &sem, NULL); | |
195 | return r; | |
196 | } | |
197 | ||
1654b817 CK |
198 | radeon_semaphore_sync_to(sem, *fence); |
199 | radeon_semaphore_sync_rings(rdev, sem, ring->idx); | |
2483b4ea CK |
200 | |
201 | for (i = 0; i < num_loops; i++) { | |
202 | cur_size_in_bytes = size_in_bytes; | |
203 | if (cur_size_in_bytes > 0xFFFFF) | |
204 | cur_size_in_bytes = 0xFFFFF; | |
205 | size_in_bytes -= cur_size_in_bytes; | |
206 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 1, 0, 0, cur_size_in_bytes)); | |
207 | radeon_ring_write(ring, dst_offset & 0xffffffff); | |
208 | radeon_ring_write(ring, src_offset & 0xffffffff); | |
209 | radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff); | |
210 | radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff); | |
211 | src_offset += cur_size_in_bytes; | |
212 | dst_offset += cur_size_in_bytes; | |
213 | } | |
214 | ||
215 | r = radeon_fence_emit(rdev, fence, ring->idx); | |
216 | if (r) { | |
217 | radeon_ring_unlock_undo(rdev, ring); | |
218 | return r; | |
219 | } | |
220 | ||
221 | radeon_ring_unlock_commit(rdev, ring); | |
222 | radeon_semaphore_free(rdev, &sem, *fence); | |
223 | ||
224 | return r; | |
225 | } | |
226 |