drm/radeon: force UVD buffers into VRAM on RS[78]80 v2
[deliverable/linux.git] / drivers / gpu / drm / radeon / radeon_cs.c
CommitLineData
771fe6b9
JG
1/*
2 * Copyright 2008 Jerome Glisse.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Jerome Glisse <glisse@freedesktop.org>
26 */
4330441a 27#include <linux/list_sort.h>
760285e7
DH
28#include <drm/drmP.h>
29#include <drm/radeon_drm.h>
771fe6b9
JG
30#include "radeon_reg.h"
31#include "radeon.h"
860024e5 32#include "radeon_trace.h"
771fe6b9 33
c9b76548
MO
34#define RADEON_CS_MAX_PRIORITY 32u
35#define RADEON_CS_NUM_BUCKETS (RADEON_CS_MAX_PRIORITY + 1)
36
37/* This is based on the bucket sort with O(n) time complexity.
38 * An item with priority "i" is added to bucket[i]. The lists are then
39 * concatenated in descending order.
40 */
41struct radeon_cs_buckets {
42 struct list_head bucket[RADEON_CS_NUM_BUCKETS];
43};
44
45static void radeon_cs_buckets_init(struct radeon_cs_buckets *b)
46{
47 unsigned i;
48
49 for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++)
50 INIT_LIST_HEAD(&b->bucket[i]);
51}
52
53static void radeon_cs_buckets_add(struct radeon_cs_buckets *b,
54 struct list_head *item, unsigned priority)
55{
56 /* Since buffers which appear sooner in the relocation list are
57 * likely to be used more often than buffers which appear later
58 * in the list, the sort mustn't change the ordering of buffers
59 * with the same priority, i.e. it must be stable.
60 */
61 list_add_tail(item, &b->bucket[min(priority, RADEON_CS_MAX_PRIORITY)]);
62}
63
64static void radeon_cs_buckets_get_list(struct radeon_cs_buckets *b,
65 struct list_head *out_list)
66{
67 unsigned i;
68
69 /* Connect the sorted buckets in the output list. */
70 for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++) {
71 list_splice(&b->bucket[i], out_list);
72 }
73}
74
1109ca09 75static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
771fe6b9
JG
76{
77 struct drm_device *ddev = p->rdev->ddev;
78 struct radeon_cs_chunk *chunk;
c9b76548 79 struct radeon_cs_buckets buckets;
771fe6b9 80 unsigned i, j;
f72a113a
CK
81 bool duplicate, need_mmap_lock = false;
82 int r;
771fe6b9
JG
83
84 if (p->chunk_relocs_idx == -1) {
85 return 0;
86 }
87 chunk = &p->chunks[p->chunk_relocs_idx];
cf4ccd01 88 p->dma_reloc_idx = 0;
771fe6b9
JG
89 /* FIXME: we assume that each relocs use 4 dwords */
90 p->nrelocs = chunk->length_dw / 4;
91 p->relocs_ptr = kcalloc(p->nrelocs, sizeof(void *), GFP_KERNEL);
92 if (p->relocs_ptr == NULL) {
93 return -ENOMEM;
94 }
95 p->relocs = kcalloc(p->nrelocs, sizeof(struct radeon_cs_reloc), GFP_KERNEL);
96 if (p->relocs == NULL) {
97 return -ENOMEM;
98 }
c9b76548
MO
99
100 radeon_cs_buckets_init(&buckets);
101
771fe6b9
JG
102 for (i = 0; i < p->nrelocs; i++) {
103 struct drm_radeon_cs_reloc *r;
c9b76548 104 unsigned priority;
771fe6b9
JG
105
106 duplicate = false;
107 r = (struct drm_radeon_cs_reloc *)&chunk->kdata[i*4];
16557f1e 108 for (j = 0; j < i; j++) {
771fe6b9
JG
109 if (r->handle == p->relocs[j].handle) {
110 p->relocs_ptr[i] = &p->relocs[j];
111 duplicate = true;
112 break;
113 }
114 }
4474f3a9 115 if (duplicate) {
16557f1e 116 p->relocs[i].handle = 0;
4474f3a9
CK
117 continue;
118 }
119
120 p->relocs[i].gobj = drm_gem_object_lookup(ddev, p->filp,
121 r->handle);
122 if (p->relocs[i].gobj == NULL) {
123 DRM_ERROR("gem object lookup failed 0x%x\n",
124 r->handle);
125 return -ENOENT;
126 }
127 p->relocs_ptr[i] = &p->relocs[i];
128 p->relocs[i].robj = gem_to_radeon_bo(p->relocs[i].gobj);
c9b76548
MO
129
130 /* The userspace buffer priorities are from 0 to 15. A higher
131 * number means the buffer is more important.
132 * Also, the buffers used for write have a higher priority than
133 * the buffers used for read only, which doubles the range
134 * to 0 to 31. 32 is reserved for the kernel driver.
135 */
701e1e78
CK
136 priority = (r->flags & RADEON_RELOC_PRIO_MASK) * 2
137 + !!r->write_domain;
4474f3a9 138
4f66c599 139 /* the first reloc of an UVD job is the msg and that must be in
b6a7eeea
CK
140 VRAM, also but everything into VRAM on AGP cards and older
141 IGP chips to avoid image corruptions */
4f66c599 142 if (p->ring == R600_RING_TYPE_UVD_INDEX &&
b6a7eeea
CK
143 (i == 0 || drm_pci_device_is_agp(p->rdev->ddev) ||
144 p->rdev->family == CHIP_RS780 ||
145 p->rdev->family == CHIP_RS880)) {
146
bcf6f1e9 147 /* TODO: is this still needed for NI+ ? */
ce6758c8 148 p->relocs[i].prefered_domains =
f2ba57b5
CK
149 RADEON_GEM_DOMAIN_VRAM;
150
ce6758c8 151 p->relocs[i].allowed_domains =
f2ba57b5
CK
152 RADEON_GEM_DOMAIN_VRAM;
153
c9b76548
MO
154 /* prioritize this over any other relocation */
155 priority = RADEON_CS_MAX_PRIORITY;
f2ba57b5
CK
156 } else {
157 uint32_t domain = r->write_domain ?
158 r->write_domain : r->read_domains;
159
ec65da38
MO
160 if (domain & RADEON_GEM_DOMAIN_CPU) {
161 DRM_ERROR("RADEON_GEM_DOMAIN_CPU is not valid "
162 "for command submission\n");
163 return -EINVAL;
164 }
165
ce6758c8 166 p->relocs[i].prefered_domains = domain;
f2ba57b5
CK
167 if (domain == RADEON_GEM_DOMAIN_VRAM)
168 domain |= RADEON_GEM_DOMAIN_GTT;
ce6758c8 169 p->relocs[i].allowed_domains = domain;
f2ba57b5 170 }
4474f3a9 171
f72a113a
CK
172 if (radeon_ttm_tt_has_userptr(p->relocs[i].robj->tbo.ttm)) {
173 uint32_t domain = p->relocs[i].prefered_domains;
174 if (!(domain & RADEON_GEM_DOMAIN_GTT)) {
175 DRM_ERROR("Only RADEON_GEM_DOMAIN_GTT is "
176 "allowed for userptr BOs\n");
177 return -EINVAL;
178 }
179 need_mmap_lock = true;
180 domain = RADEON_GEM_DOMAIN_GTT;
181 p->relocs[i].prefered_domains = domain;
182 p->relocs[i].allowed_domains = domain;
183 }
184
df0af440 185 p->relocs[i].tv.bo = &p->relocs[i].robj->tbo;
4474f3a9
CK
186 p->relocs[i].handle = r->handle;
187
df0af440 188 radeon_cs_buckets_add(&buckets, &p->relocs[i].tv.head,
c9b76548 189 priority);
771fe6b9 190 }
c9b76548
MO
191
192 radeon_cs_buckets_get_list(&buckets, &p->validated);
193
6d2f2944
CK
194 if (p->cs_flags & RADEON_CS_USE_VM)
195 p->vm_bos = radeon_vm_get_bos(p->rdev, p->ib.vm,
196 &p->validated);
f72a113a
CK
197 if (need_mmap_lock)
198 down_read(&current->mm->mmap_sem);
199
200 r = radeon_bo_list_validate(p->rdev, &p->ticket, &p->validated, p->ring);
201
202 if (need_mmap_lock)
203 up_read(&current->mm->mmap_sem);
6d2f2944 204
f72a113a 205 return r;
771fe6b9
JG
206}
207
721604a1
JG
208static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority)
209{
210 p->priority = priority;
211
212 switch (ring) {
213 default:
214 DRM_ERROR("unknown ring id: %d\n", ring);
215 return -EINVAL;
216 case RADEON_CS_RING_GFX:
217 p->ring = RADEON_RING_TYPE_GFX_INDEX;
218 break;
219 case RADEON_CS_RING_COMPUTE:
963e81f9 220 if (p->rdev->family >= CHIP_TAHITI) {
8d5ef7b1
AD
221 if (p->priority > 0)
222 p->ring = CAYMAN_RING_TYPE_CP1_INDEX;
223 else
224 p->ring = CAYMAN_RING_TYPE_CP2_INDEX;
225 } else
226 p->ring = RADEON_RING_TYPE_GFX_INDEX;
721604a1 227 break;
278a334c
AD
228 case RADEON_CS_RING_DMA:
229 if (p->rdev->family >= CHIP_CAYMAN) {
230 if (p->priority > 0)
231 p->ring = R600_RING_TYPE_DMA_INDEX;
232 else
233 p->ring = CAYMAN_RING_TYPE_DMA1_INDEX;
b9ace36f 234 } else if (p->rdev->family >= CHIP_RV770) {
278a334c
AD
235 p->ring = R600_RING_TYPE_DMA_INDEX;
236 } else {
237 return -EINVAL;
238 }
239 break;
f2ba57b5
CK
240 case RADEON_CS_RING_UVD:
241 p->ring = R600_RING_TYPE_UVD_INDEX;
242 break;
d93f7937
CK
243 case RADEON_CS_RING_VCE:
244 /* TODO: only use the low priority ring for now */
245 p->ring = TN_RING_TYPE_VCE1_INDEX;
246 break;
721604a1
JG
247 }
248 return 0;
249}
250
220907d9 251static void radeon_cs_sync_rings(struct radeon_cs_parser *p)
93504fce 252{
220907d9 253 int i;
93504fce 254
cdac5504 255 for (i = 0; i < p->nrelocs; i++) {
f82cbddd 256 if (!p->relocs[i].robj)
cdac5504
CK
257 continue;
258
1654b817
CK
259 radeon_semaphore_sync_to(p->ib.semaphore,
260 p->relocs[i].robj->tbo.sync_obj);
8f676c4c 261 }
93504fce
CK
262}
263
9b00147d 264/* XXX: note that this is called from the legacy UMS CS ioctl as well */
771fe6b9
JG
265int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
266{
267 struct drm_radeon_cs *cs = data;
268 uint64_t *chunk_array_ptr;
721604a1
JG
269 unsigned size, i;
270 u32 ring = RADEON_CS_RING_GFX;
271 s32 priority = 0;
771fe6b9
JG
272
273 if (!cs->num_chunks) {
274 return 0;
275 }
276 /* get chunks */
277 INIT_LIST_HEAD(&p->validated);
278 p->idx = 0;
f2e39221
JG
279 p->ib.sa_bo = NULL;
280 p->ib.semaphore = NULL;
281 p->const_ib.sa_bo = NULL;
282 p->const_ib.semaphore = NULL;
771fe6b9
JG
283 p->chunk_ib_idx = -1;
284 p->chunk_relocs_idx = -1;
721604a1 285 p->chunk_flags_idx = -1;
dfcf5f36 286 p->chunk_const_ib_idx = -1;
771fe6b9
JG
287 p->chunks_array = kcalloc(cs->num_chunks, sizeof(uint64_t), GFP_KERNEL);
288 if (p->chunks_array == NULL) {
289 return -ENOMEM;
290 }
291 chunk_array_ptr = (uint64_t *)(unsigned long)(cs->chunks);
1d6ac185 292 if (copy_from_user(p->chunks_array, chunk_array_ptr,
771fe6b9
JG
293 sizeof(uint64_t)*cs->num_chunks)) {
294 return -EFAULT;
295 }
721604a1 296 p->cs_flags = 0;
771fe6b9
JG
297 p->nchunks = cs->num_chunks;
298 p->chunks = kcalloc(p->nchunks, sizeof(struct radeon_cs_chunk), GFP_KERNEL);
299 if (p->chunks == NULL) {
300 return -ENOMEM;
301 }
302 for (i = 0; i < p->nchunks; i++) {
303 struct drm_radeon_cs_chunk __user **chunk_ptr = NULL;
304 struct drm_radeon_cs_chunk user_chunk;
305 uint32_t __user *cdata;
306
307 chunk_ptr = (void __user*)(unsigned long)p->chunks_array[i];
1d6ac185 308 if (copy_from_user(&user_chunk, chunk_ptr,
771fe6b9
JG
309 sizeof(struct drm_radeon_cs_chunk))) {
310 return -EFAULT;
311 }
5176fdc4 312 p->chunks[i].length_dw = user_chunk.length_dw;
771fe6b9
JG
313 p->chunks[i].chunk_id = user_chunk.chunk_id;
314 if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_RELOCS) {
315 p->chunk_relocs_idx = i;
316 }
317 if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_IB) {
318 p->chunk_ib_idx = i;
5176fdc4
DA
319 /* zero length IB isn't useful */
320 if (p->chunks[i].length_dw == 0)
321 return -EINVAL;
771fe6b9 322 }
dfcf5f36
AD
323 if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_CONST_IB) {
324 p->chunk_const_ib_idx = i;
325 /* zero length CONST IB isn't useful */
326 if (p->chunks[i].length_dw == 0)
327 return -EINVAL;
328 }
721604a1
JG
329 if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_FLAGS) {
330 p->chunk_flags_idx = i;
331 /* zero length flags aren't useful */
332 if (p->chunks[i].length_dw == 0)
333 return -EINVAL;
e70f224c 334 }
5176fdc4 335
28a326c5
ML
336 size = p->chunks[i].length_dw;
337 cdata = (void __user *)(unsigned long)user_chunk.chunk_data;
338 p->chunks[i].user_ptr = cdata;
339 if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_CONST_IB)
340 continue;
341
342 if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_IB) {
343 if (!p->rdev || !(p->rdev->flags & RADEON_IS_AGP))
344 continue;
345 }
346
347 p->chunks[i].kdata = drm_malloc_ab(size, sizeof(uint32_t));
348 size *= sizeof(uint32_t);
349 if (p->chunks[i].kdata == NULL) {
350 return -ENOMEM;
351 }
1d6ac185 352 if (copy_from_user(p->chunks[i].kdata, cdata, size)) {
28a326c5
ML
353 return -EFAULT;
354 }
355 if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_FLAGS) {
356 p->cs_flags = p->chunks[i].kdata[0];
357 if (p->chunks[i].length_dw > 1)
358 ring = p->chunks[i].kdata[1];
359 if (p->chunks[i].length_dw > 2)
360 priority = (s32)p->chunks[i].kdata[2];
771fe6b9
JG
361 }
362 }
721604a1 363
9b00147d
AD
364 /* these are KMS only */
365 if (p->rdev) {
366 if ((p->cs_flags & RADEON_CS_USE_VM) &&
367 !p->rdev->vm_manager.enabled) {
368 DRM_ERROR("VM not active on asic!\n");
369 return -EINVAL;
370 }
1b5475db 371
57449040 372 if (radeon_cs_get_ring(p, ring, priority))
9b00147d 373 return -EINVAL;
721604a1 374
57449040 375 /* we only support VM on some SI+ rings */
60a44540
CK
376 if ((p->cs_flags & RADEON_CS_USE_VM) == 0) {
377 if (p->rdev->asic->ring[p->ring]->cs_parse == NULL) {
378 DRM_ERROR("Ring %d requires VM!\n", p->ring);
379 return -EINVAL;
380 }
381 } else {
382 if (p->rdev->asic->ring[p->ring]->ib_parse == NULL) {
383 DRM_ERROR("VM not supported on ring %d!\n",
384 p->ring);
385 return -EINVAL;
386 }
57449040 387 }
9b00147d 388 }
721604a1 389
771fe6b9
JG
390 return 0;
391}
392
4330441a
MO
393static int cmp_size_smaller_first(void *priv, struct list_head *a,
394 struct list_head *b)
395{
df0af440
CK
396 struct radeon_cs_reloc *la = list_entry(a, struct radeon_cs_reloc, tv.head);
397 struct radeon_cs_reloc *lb = list_entry(b, struct radeon_cs_reloc, tv.head);
4330441a
MO
398
399 /* Sort A before B if A is smaller. */
df0af440 400 return (int)la->robj->tbo.num_pages - (int)lb->robj->tbo.num_pages;
4330441a
MO
401}
402
771fe6b9
JG
403/**
404 * cs_parser_fini() - clean parser states
405 * @parser: parser structure holding parsing context.
406 * @error: error number
407 *
408 * If error is set than unvalidate buffer, otherwise just free memory
409 * used by parsing context.
410 **/
ecff665f 411static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bool backoff)
771fe6b9
JG
412{
413 unsigned i;
414
e43b5ec0 415 if (!error) {
4330441a
MO
416 /* Sort the buffer list from the smallest to largest buffer,
417 * which affects the order of buffers in the LRU list.
418 * This assures that the smallest buffers are added first
419 * to the LRU list, so they are likely to be later evicted
420 * first, instead of large buffers whose eviction is more
421 * expensive.
422 *
423 * This slightly lowers the number of bytes moved by TTM
424 * per frame under memory pressure.
425 */
426 list_sort(NULL, &parser->validated, cmp_size_smaller_first);
427
ecff665f
ML
428 ttm_eu_fence_buffer_objects(&parser->ticket,
429 &parser->validated,
f2e39221 430 parser->ib.fence);
ecff665f
ML
431 } else if (backoff) {
432 ttm_eu_backoff_reservation(&parser->ticket,
433 &parser->validated);
e43b5ec0 434 }
147666fb 435
fcbc451b
PN
436 if (parser->relocs != NULL) {
437 for (i = 0; i < parser->nrelocs; i++) {
438 if (parser->relocs[i].gobj)
439 drm_gem_object_unreference_unlocked(parser->relocs[i].gobj);
440 }
771fe6b9 441 }
48e113e5 442 kfree(parser->track);
771fe6b9
JG
443 kfree(parser->relocs);
444 kfree(parser->relocs_ptr);
6d2f2944 445 kfree(parser->vm_bos);
28a326c5
ML
446 for (i = 0; i < parser->nchunks; i++)
447 drm_free_large(parser->chunks[i].kdata);
771fe6b9
JG
448 kfree(parser->chunks);
449 kfree(parser->chunks_array);
450 radeon_ib_free(parser->rdev, &parser->ib);
f2e39221 451 radeon_ib_free(parser->rdev, &parser->const_ib);
771fe6b9
JG
452}
453
721604a1
JG
454static int radeon_cs_ib_chunk(struct radeon_device *rdev,
455 struct radeon_cs_parser *parser)
456{
721604a1
JG
457 int r;
458
459 if (parser->chunk_ib_idx == -1)
460 return 0;
461
462 if (parser->cs_flags & RADEON_CS_USE_VM)
463 return 0;
464
eb0c19c5 465 r = radeon_cs_parse(rdev, parser->ring, parser);
721604a1
JG
466 if (r || parser->parser_error) {
467 DRM_ERROR("Invalid command stream !\n");
468 return r;
469 }
ce3537d5
AD
470
471 if (parser->ring == R600_RING_TYPE_UVD_INDEX)
472 radeon_uvd_note_usage(rdev);
03afe6f6
AD
473 else if ((parser->ring == TN_RING_TYPE_VCE1_INDEX) ||
474 (parser->ring == TN_RING_TYPE_VCE2_INDEX))
475 radeon_vce_note_usage(rdev);
ce3537d5 476
220907d9 477 radeon_cs_sync_rings(parser);
1538a9e0 478 r = radeon_ib_schedule(rdev, &parser->ib, NULL, true);
721604a1
JG
479 if (r) {
480 DRM_ERROR("Failed to schedule IB !\n");
481 }
93bf888c 482 return r;
721604a1
JG
483}
484
6d2f2944 485static int radeon_bo_vm_update_pte(struct radeon_cs_parser *p,
721604a1
JG
486 struct radeon_vm *vm)
487{
6d2f2944 488 struct radeon_device *rdev = p->rdev;
036bf46a 489 struct radeon_bo_va *bo_va;
6d2f2944 490 int i, r;
721604a1 491
6d2f2944
CK
492 r = radeon_vm_update_page_directory(rdev, vm);
493 if (r)
3e8970f9 494 return r;
6d2f2944 495
036bf46a
CK
496 r = radeon_vm_clear_freed(rdev, vm);
497 if (r)
498 return r;
499
cc9e67e3 500 if (vm->ib_bo_va == NULL) {
036bf46a
CK
501 DRM_ERROR("Tmp BO not in VM!\n");
502 return -EINVAL;
503 }
504
cc9e67e3
CK
505 r = radeon_vm_bo_update(rdev, vm->ib_bo_va,
506 &rdev->ring_tmp_bo.bo->tbo.mem);
6d2f2944
CK
507 if (r)
508 return r;
509
510 for (i = 0; i < p->nrelocs; i++) {
511 struct radeon_bo *bo;
512
513 /* ignore duplicates */
514 if (p->relocs_ptr[i] != &p->relocs[i])
515 continue;
516
517 bo = p->relocs[i].robj;
036bf46a
CK
518 bo_va = radeon_vm_bo_find(vm, bo);
519 if (bo_va == NULL) {
520 dev_err(rdev->dev, "bo %p not in vm %p\n", bo, vm);
521 return -EINVAL;
522 }
523
524 r = radeon_vm_bo_update(rdev, bo_va, &bo->tbo.mem);
6d2f2944 525 if (r)
721604a1 526 return r;
721604a1 527 }
e31ad969
CK
528
529 return radeon_vm_clear_invalids(rdev, vm);
721604a1
JG
530}
531
532static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
533 struct radeon_cs_parser *parser)
534{
721604a1
JG
535 struct radeon_fpriv *fpriv = parser->filp->driver_priv;
536 struct radeon_vm *vm = &fpriv->vm;
537 int r;
538
539 if (parser->chunk_ib_idx == -1)
540 return 0;
721604a1
JG
541 if ((parser->cs_flags & RADEON_CS_USE_VM) == 0)
542 return 0;
543
28a326c5 544 if (parser->const_ib.length_dw) {
f2e39221 545 r = radeon_ring_ib_parse(rdev, parser->ring, &parser->const_ib);
dfcf5f36
AD
546 if (r) {
547 return r;
548 }
549 }
550
f2e39221 551 r = radeon_ring_ib_parse(rdev, parser->ring, &parser->ib);
721604a1
JG
552 if (r) {
553 return r;
554 }
555
ce3537d5
AD
556 if (parser->ring == R600_RING_TYPE_UVD_INDEX)
557 radeon_uvd_note_usage(rdev);
558
721604a1 559 mutex_lock(&vm->mutex);
721604a1
JG
560 r = radeon_bo_vm_update_pte(parser, vm);
561 if (r) {
562 goto out;
563 }
220907d9 564 radeon_cs_sync_rings(parser);
1654b817 565 radeon_semaphore_sync_to(parser->ib.semaphore, vm->fence);
4ef72566 566
dfcf5f36
AD
567 if ((rdev->family >= CHIP_TAHITI) &&
568 (parser->chunk_const_ib_idx != -1)) {
1538a9e0 569 r = radeon_ib_schedule(rdev, &parser->ib, &parser->const_ib, true);
4ef72566 570 } else {
1538a9e0 571 r = radeon_ib_schedule(rdev, &parser->ib, NULL, true);
dfcf5f36
AD
572 }
573
ee60e29f 574out:
36ff39c4 575 mutex_unlock(&vm->mutex);
721604a1
JG
576 return r;
577}
578
6c6f4783
CK
579static int radeon_cs_handle_lockup(struct radeon_device *rdev, int r)
580{
581 if (r == -EDEADLK) {
582 r = radeon_gpu_reset(rdev);
583 if (!r)
584 r = -EAGAIN;
585 }
586 return r;
587}
588
28a326c5
ML
589static int radeon_cs_ib_fill(struct radeon_device *rdev, struct radeon_cs_parser *parser)
590{
591 struct radeon_cs_chunk *ib_chunk;
592 struct radeon_vm *vm = NULL;
593 int r;
594
595 if (parser->chunk_ib_idx == -1)
596 return 0;
597
598 if (parser->cs_flags & RADEON_CS_USE_VM) {
599 struct radeon_fpriv *fpriv = parser->filp->driver_priv;
600 vm = &fpriv->vm;
601
602 if ((rdev->family >= CHIP_TAHITI) &&
603 (parser->chunk_const_ib_idx != -1)) {
604 ib_chunk = &parser->chunks[parser->chunk_const_ib_idx];
605 if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) {
606 DRM_ERROR("cs IB CONST too big: %d\n", ib_chunk->length_dw);
607 return -EINVAL;
608 }
609 r = radeon_ib_get(rdev, parser->ring, &parser->const_ib,
610 vm, ib_chunk->length_dw * 4);
611 if (r) {
612 DRM_ERROR("Failed to get const ib !\n");
613 return r;
614 }
615 parser->const_ib.is_const_ib = true;
616 parser->const_ib.length_dw = ib_chunk->length_dw;
1d6ac185 617 if (copy_from_user(parser->const_ib.ptr,
28a326c5
ML
618 ib_chunk->user_ptr,
619 ib_chunk->length_dw * 4))
620 return -EFAULT;
621 }
622
623 ib_chunk = &parser->chunks[parser->chunk_ib_idx];
624 if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) {
625 DRM_ERROR("cs IB too big: %d\n", ib_chunk->length_dw);
626 return -EINVAL;
627 }
628 }
629 ib_chunk = &parser->chunks[parser->chunk_ib_idx];
630
631 r = radeon_ib_get(rdev, parser->ring, &parser->ib,
632 vm, ib_chunk->length_dw * 4);
633 if (r) {
634 DRM_ERROR("Failed to get ib !\n");
635 return r;
636 }
637 parser->ib.length_dw = ib_chunk->length_dw;
638 if (ib_chunk->kdata)
639 memcpy(parser->ib.ptr, ib_chunk->kdata, ib_chunk->length_dw * 4);
1d6ac185 640 else if (copy_from_user(parser->ib.ptr, ib_chunk->user_ptr, ib_chunk->length_dw * 4))
28a326c5
ML
641 return -EFAULT;
642 return 0;
643}
644
771fe6b9
JG
645int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
646{
647 struct radeon_device *rdev = dev->dev_private;
648 struct radeon_cs_parser parser;
771fe6b9
JG
649 int r;
650
dee53e7f 651 down_read(&rdev->exclusive_lock);
6b7746e8 652 if (!rdev->accel_working) {
dee53e7f 653 up_read(&rdev->exclusive_lock);
6b7746e8
JG
654 return -EBUSY;
655 }
771fe6b9
JG
656 /* initialize parser */
657 memset(&parser, 0, sizeof(struct radeon_cs_parser));
658 parser.filp = filp;
659 parser.rdev = rdev;
c8c15ff1 660 parser.dev = rdev->dev;
428c6e36 661 parser.family = rdev->family;
771fe6b9
JG
662 r = radeon_cs_parser_init(&parser, data);
663 if (r) {
664 DRM_ERROR("Failed to initialize parser !\n");
ecff665f 665 radeon_cs_parser_fini(&parser, r, false);
dee53e7f 666 up_read(&rdev->exclusive_lock);
6c6f4783 667 r = radeon_cs_handle_lockup(rdev, r);
771fe6b9
JG
668 return r;
669 }
28a326c5
ML
670
671 r = radeon_cs_ib_fill(rdev, &parser);
672 if (!r) {
673 r = radeon_cs_parser_relocs(&parser);
674 if (r && r != -ERESTARTSYS)
97f23b3d 675 DRM_ERROR("Failed to parse relocation %d!\n", r);
28a326c5
ML
676 }
677
678 if (r) {
ecff665f 679 radeon_cs_parser_fini(&parser, r, false);
dee53e7f 680 up_read(&rdev->exclusive_lock);
6c6f4783 681 r = radeon_cs_handle_lockup(rdev, r);
771fe6b9
JG
682 return r;
683 }
55b51c88 684
860024e5
CK
685 trace_radeon_cs(&parser);
686
721604a1 687 r = radeon_cs_ib_chunk(rdev, &parser);
771fe6b9 688 if (r) {
721604a1 689 goto out;
771fe6b9 690 }
721604a1 691 r = radeon_cs_ib_vm_chunk(rdev, &parser);
771fe6b9 692 if (r) {
721604a1 693 goto out;
771fe6b9 694 }
721604a1 695out:
ecff665f 696 radeon_cs_parser_fini(&parser, r, true);
dee53e7f 697 up_read(&rdev->exclusive_lock);
6c6f4783 698 r = radeon_cs_handle_lockup(rdev, r);
771fe6b9
JG
699 return r;
700}
513bcb46 701
4db01311
IH
702/**
703 * radeon_cs_packet_parse() - parse cp packet and point ib index to next packet
704 * @parser: parser structure holding parsing context.
705 * @pkt: where to store packet information
706 *
707 * Assume that chunk_ib_index is properly set. Will return -EINVAL
708 * if packet is bigger than remaining ib size. or if packets is unknown.
709 **/
710int radeon_cs_packet_parse(struct radeon_cs_parser *p,
711 struct radeon_cs_packet *pkt,
712 unsigned idx)
713{
714 struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx];
715 struct radeon_device *rdev = p->rdev;
716 uint32_t header;
717
718 if (idx >= ib_chunk->length_dw) {
719 DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
720 idx, ib_chunk->length_dw);
721 return -EINVAL;
722 }
723 header = radeon_get_ib_value(p, idx);
724 pkt->idx = idx;
725 pkt->type = RADEON_CP_PACKET_GET_TYPE(header);
726 pkt->count = RADEON_CP_PACKET_GET_COUNT(header);
727 pkt->one_reg_wr = 0;
728 switch (pkt->type) {
729 case RADEON_PACKET_TYPE0:
730 if (rdev->family < CHIP_R600) {
731 pkt->reg = R100_CP_PACKET0_GET_REG(header);
732 pkt->one_reg_wr =
733 RADEON_CP_PACKET0_GET_ONE_REG_WR(header);
734 } else
735 pkt->reg = R600_CP_PACKET0_GET_REG(header);
736 break;
737 case RADEON_PACKET_TYPE3:
738 pkt->opcode = RADEON_CP_PACKET3_GET_OPCODE(header);
739 break;
740 case RADEON_PACKET_TYPE2:
741 pkt->count = -1;
742 break;
743 default:
744 DRM_ERROR("Unknown packet type %d at %d !\n", pkt->type, idx);
745 return -EINVAL;
746 }
747 if ((pkt->count + 1 + pkt->idx) >= ib_chunk->length_dw) {
748 DRM_ERROR("Packet (%d:%d:%d) end after CS buffer (%d) !\n",
749 pkt->idx, pkt->type, pkt->count, ib_chunk->length_dw);
750 return -EINVAL;
751 }
752 return 0;
753}
9ffb7a6d
IH
754
755/**
756 * radeon_cs_packet_next_is_pkt3_nop() - test if the next packet is P3 NOP
757 * @p: structure holding the parser context.
758 *
759 * Check if the next packet is NOP relocation packet3.
760 **/
761bool radeon_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser *p)
762{
763 struct radeon_cs_packet p3reloc;
764 int r;
765
766 r = radeon_cs_packet_parse(p, &p3reloc, p->idx);
767 if (r)
768 return false;
769 if (p3reloc.type != RADEON_PACKET_TYPE3)
770 return false;
771 if (p3reloc.opcode != RADEON_PACKET3_NOP)
772 return false;
773 return true;
774}
c3ad63af
IH
775
776/**
777 * radeon_cs_dump_packet() - dump raw packet context
778 * @p: structure holding the parser context.
779 * @pkt: structure holding the packet.
780 *
781 * Used mostly for debugging and error reporting.
782 **/
783void radeon_cs_dump_packet(struct radeon_cs_parser *p,
784 struct radeon_cs_packet *pkt)
785{
786 volatile uint32_t *ib;
787 unsigned i;
788 unsigned idx;
789
790 ib = p->ib.ptr;
791 idx = pkt->idx;
792 for (i = 0; i <= (pkt->count + 1); i++, idx++)
793 DRM_INFO("ib[%d]=0x%08X\n", idx, ib[idx]);
794}
795
e9716993
IH
796/**
797 * radeon_cs_packet_next_reloc() - parse next (should be reloc) packet
798 * @parser: parser structure holding parsing context.
799 * @data: pointer to relocation data
800 * @offset_start: starting offset
801 * @offset_mask: offset mask (to align start offset on)
802 * @reloc: reloc informations
803 *
804 * Check if next packet is relocation packet3, do bo validation and compute
805 * GPU offset using the provided start.
806 **/
807int radeon_cs_packet_next_reloc(struct radeon_cs_parser *p,
808 struct radeon_cs_reloc **cs_reloc,
809 int nomm)
810{
811 struct radeon_cs_chunk *relocs_chunk;
812 struct radeon_cs_packet p3reloc;
813 unsigned idx;
814 int r;
815
816 if (p->chunk_relocs_idx == -1) {
817 DRM_ERROR("No relocation chunk !\n");
818 return -EINVAL;
819 }
820 *cs_reloc = NULL;
821 relocs_chunk = &p->chunks[p->chunk_relocs_idx];
822 r = radeon_cs_packet_parse(p, &p3reloc, p->idx);
823 if (r)
824 return r;
825 p->idx += p3reloc.count + 2;
826 if (p3reloc.type != RADEON_PACKET_TYPE3 ||
827 p3reloc.opcode != RADEON_PACKET3_NOP) {
828 DRM_ERROR("No packet3 for relocation for packet at %d.\n",
829 p3reloc.idx);
830 radeon_cs_dump_packet(p, &p3reloc);
831 return -EINVAL;
832 }
833 idx = radeon_get_ib_value(p, p3reloc.idx + 1);
834 if (idx >= relocs_chunk->length_dw) {
835 DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
836 idx, relocs_chunk->length_dw);
837 radeon_cs_dump_packet(p, &p3reloc);
838 return -EINVAL;
839 }
840 /* FIXME: we assume reloc size is 4 dwords */
841 if (nomm) {
842 *cs_reloc = p->relocs;
df0af440 843 (*cs_reloc)->gpu_offset =
e9716993 844 (u64)relocs_chunk->kdata[idx + 3] << 32;
df0af440 845 (*cs_reloc)->gpu_offset |= relocs_chunk->kdata[idx + 0];
e9716993
IH
846 } else
847 *cs_reloc = p->relocs_ptr[(idx / 4)];
848 return 0;
849}
This page took 0.33098 seconds and 5 git commands to generate.