Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dtor/input
[deliverable/linux.git] / drivers / gpu / drm / radeon / radeon_cs.c
CommitLineData
771fe6b9
JG
1/*
2 * Copyright 2008 Jerome Glisse.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Jerome Glisse <glisse@freedesktop.org>
26 */
4330441a 27#include <linux/list_sort.h>
760285e7
DH
28#include <drm/drmP.h>
29#include <drm/radeon_drm.h>
771fe6b9
JG
30#include "radeon_reg.h"
31#include "radeon.h"
860024e5 32#include "radeon_trace.h"
771fe6b9 33
c9b76548
MO
34#define RADEON_CS_MAX_PRIORITY 32u
35#define RADEON_CS_NUM_BUCKETS (RADEON_CS_MAX_PRIORITY + 1)
36
37/* This is based on the bucket sort with O(n) time complexity.
38 * An item with priority "i" is added to bucket[i]. The lists are then
39 * concatenated in descending order.
40 */
41struct radeon_cs_buckets {
42 struct list_head bucket[RADEON_CS_NUM_BUCKETS];
43};
44
45static void radeon_cs_buckets_init(struct radeon_cs_buckets *b)
46{
47 unsigned i;
48
49 for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++)
50 INIT_LIST_HEAD(&b->bucket[i]);
51}
52
53static void radeon_cs_buckets_add(struct radeon_cs_buckets *b,
54 struct list_head *item, unsigned priority)
55{
56 /* Since buffers which appear sooner in the relocation list are
57 * likely to be used more often than buffers which appear later
58 * in the list, the sort mustn't change the ordering of buffers
59 * with the same priority, i.e. it must be stable.
60 */
61 list_add_tail(item, &b->bucket[min(priority, RADEON_CS_MAX_PRIORITY)]);
62}
63
64static void radeon_cs_buckets_get_list(struct radeon_cs_buckets *b,
65 struct list_head *out_list)
66{
67 unsigned i;
68
69 /* Connect the sorted buckets in the output list. */
70 for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++) {
71 list_splice(&b->bucket[i], out_list);
72 }
73}
74
1109ca09 75static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
771fe6b9 76{
771fe6b9 77 struct radeon_cs_chunk *chunk;
c9b76548 78 struct radeon_cs_buckets buckets;
466be338
CK
79 unsigned i;
80 bool need_mmap_lock = false;
f72a113a 81 int r;
771fe6b9 82
6d2d13dd 83 if (p->chunk_relocs == NULL) {
771fe6b9
JG
84 return 0;
85 }
6d2d13dd 86 chunk = p->chunk_relocs;
cf4ccd01 87 p->dma_reloc_idx = 0;
771fe6b9
JG
88 /* FIXME: we assume that each relocs use 4 dwords */
89 p->nrelocs = chunk->length_dw / 4;
b421ed15 90 p->relocs = drm_calloc_large(p->nrelocs, sizeof(struct radeon_bo_list));
771fe6b9
JG
91 if (p->relocs == NULL) {
92 return -ENOMEM;
93 }
c9b76548
MO
94
95 radeon_cs_buckets_init(&buckets);
96
771fe6b9
JG
97 for (i = 0; i < p->nrelocs; i++) {
98 struct drm_radeon_cs_reloc *r;
d33a8fc7 99 struct drm_gem_object *gobj;
c9b76548 100 unsigned priority;
771fe6b9 101
771fe6b9 102 r = (struct drm_radeon_cs_reloc *)&chunk->kdata[i*4];
a8ad0bd8 103 gobj = drm_gem_object_lookup(p->filp, r->handle);
d33a8fc7 104 if (gobj == NULL) {
4474f3a9
CK
105 DRM_ERROR("gem object lookup failed 0x%x\n",
106 r->handle);
107 return -ENOENT;
108 }
d33a8fc7 109 p->relocs[i].robj = gem_to_radeon_bo(gobj);
c9b76548
MO
110
111 /* The userspace buffer priorities are from 0 to 15. A higher
112 * number means the buffer is more important.
113 * Also, the buffers used for write have a higher priority than
114 * the buffers used for read only, which doubles the range
115 * to 0 to 31. 32 is reserved for the kernel driver.
116 */
701e1e78
CK
117 priority = (r->flags & RADEON_RELOC_PRIO_MASK) * 2
118 + !!r->write_domain;
4474f3a9 119
4f66c599 120 /* the first reloc of an UVD job is the msg and that must be in
b6a7eeea
CK
121 VRAM, also but everything into VRAM on AGP cards and older
122 IGP chips to avoid image corruptions */
4f66c599 123 if (p->ring == R600_RING_TYPE_UVD_INDEX &&
b6a7eeea
CK
124 (i == 0 || drm_pci_device_is_agp(p->rdev->ddev) ||
125 p->rdev->family == CHIP_RS780 ||
126 p->rdev->family == CHIP_RS880)) {
127
bcf6f1e9 128 /* TODO: is this still needed for NI+ ? */
ce6758c8 129 p->relocs[i].prefered_domains =
f2ba57b5
CK
130 RADEON_GEM_DOMAIN_VRAM;
131
ce6758c8 132 p->relocs[i].allowed_domains =
f2ba57b5
CK
133 RADEON_GEM_DOMAIN_VRAM;
134
c9b76548
MO
135 /* prioritize this over any other relocation */
136 priority = RADEON_CS_MAX_PRIORITY;
f2ba57b5
CK
137 } else {
138 uint32_t domain = r->write_domain ?
139 r->write_domain : r->read_domains;
140
ec65da38
MO
141 if (domain & RADEON_GEM_DOMAIN_CPU) {
142 DRM_ERROR("RADEON_GEM_DOMAIN_CPU is not valid "
143 "for command submission\n");
144 return -EINVAL;
145 }
146
ce6758c8 147 p->relocs[i].prefered_domains = domain;
f2ba57b5
CK
148 if (domain == RADEON_GEM_DOMAIN_VRAM)
149 domain |= RADEON_GEM_DOMAIN_GTT;
ce6758c8 150 p->relocs[i].allowed_domains = domain;
f2ba57b5 151 }
4474f3a9 152
f72a113a
CK
153 if (radeon_ttm_tt_has_userptr(p->relocs[i].robj->tbo.ttm)) {
154 uint32_t domain = p->relocs[i].prefered_domains;
155 if (!(domain & RADEON_GEM_DOMAIN_GTT)) {
156 DRM_ERROR("Only RADEON_GEM_DOMAIN_GTT is "
157 "allowed for userptr BOs\n");
158 return -EINVAL;
159 }
160 need_mmap_lock = true;
161 domain = RADEON_GEM_DOMAIN_GTT;
162 p->relocs[i].prefered_domains = domain;
163 p->relocs[i].allowed_domains = domain;
164 }
165
df0af440 166 p->relocs[i].tv.bo = &p->relocs[i].robj->tbo;
298593b6 167 p->relocs[i].tv.shared = !r->write_domain;
4474f3a9 168
df0af440 169 radeon_cs_buckets_add(&buckets, &p->relocs[i].tv.head,
c9b76548 170 priority);
771fe6b9 171 }
c9b76548
MO
172
173 radeon_cs_buckets_get_list(&buckets, &p->validated);
174
6d2f2944
CK
175 if (p->cs_flags & RADEON_CS_USE_VM)
176 p->vm_bos = radeon_vm_get_bos(p->rdev, p->ib.vm,
177 &p->validated);
f72a113a
CK
178 if (need_mmap_lock)
179 down_read(&current->mm->mmap_sem);
180
181 r = radeon_bo_list_validate(p->rdev, &p->ticket, &p->validated, p->ring);
182
183 if (need_mmap_lock)
184 up_read(&current->mm->mmap_sem);
6d2f2944 185
f72a113a 186 return r;
771fe6b9
JG
187}
188
721604a1
JG
189static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority)
190{
191 p->priority = priority;
192
193 switch (ring) {
194 default:
195 DRM_ERROR("unknown ring id: %d\n", ring);
196 return -EINVAL;
197 case RADEON_CS_RING_GFX:
198 p->ring = RADEON_RING_TYPE_GFX_INDEX;
199 break;
200 case RADEON_CS_RING_COMPUTE:
963e81f9 201 if (p->rdev->family >= CHIP_TAHITI) {
8d5ef7b1
AD
202 if (p->priority > 0)
203 p->ring = CAYMAN_RING_TYPE_CP1_INDEX;
204 else
205 p->ring = CAYMAN_RING_TYPE_CP2_INDEX;
206 } else
207 p->ring = RADEON_RING_TYPE_GFX_INDEX;
721604a1 208 break;
278a334c
AD
209 case RADEON_CS_RING_DMA:
210 if (p->rdev->family >= CHIP_CAYMAN) {
211 if (p->priority > 0)
212 p->ring = R600_RING_TYPE_DMA_INDEX;
213 else
214 p->ring = CAYMAN_RING_TYPE_DMA1_INDEX;
b9ace36f 215 } else if (p->rdev->family >= CHIP_RV770) {
278a334c
AD
216 p->ring = R600_RING_TYPE_DMA_INDEX;
217 } else {
218 return -EINVAL;
219 }
220 break;
f2ba57b5
CK
221 case RADEON_CS_RING_UVD:
222 p->ring = R600_RING_TYPE_UVD_INDEX;
223 break;
d93f7937
CK
224 case RADEON_CS_RING_VCE:
225 /* TODO: only use the low priority ring for now */
226 p->ring = TN_RING_TYPE_VCE1_INDEX;
227 break;
721604a1
JG
228 }
229 return 0;
230}
231
392a250b 232static int radeon_cs_sync_rings(struct radeon_cs_parser *p)
93504fce 233{
1d0c0942 234 struct radeon_bo_list *reloc;
c1f0a9c2 235 int r;
93504fce 236
c1f0a9c2 237 list_for_each_entry(reloc, &p->validated, tv.head) {
f2c24b83 238 struct reservation_object *resv;
f2c24b83 239
c1f0a9c2 240 resv = reloc->robj->tbo.resv;
975700d2 241 r = radeon_sync_resv(p->rdev, &p->ib.sync, resv,
c1f0a9c2 242 reloc->tv.shared);
392a250b 243 if (r)
c1f0a9c2 244 return r;
8f676c4c 245 }
c1f0a9c2 246 return 0;
93504fce
CK
247}
248
9b00147d 249/* XXX: note that this is called from the legacy UMS CS ioctl as well */
771fe6b9
JG
250int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
251{
252 struct drm_radeon_cs *cs = data;
253 uint64_t *chunk_array_ptr;
721604a1
JG
254 unsigned size, i;
255 u32 ring = RADEON_CS_RING_GFX;
256 s32 priority = 0;
771fe6b9 257
a28b2a47
TR
258 INIT_LIST_HEAD(&p->validated);
259
771fe6b9
JG
260 if (!cs->num_chunks) {
261 return 0;
262 }
a28b2a47 263
771fe6b9 264 /* get chunks */
771fe6b9 265 p->idx = 0;
f2e39221 266 p->ib.sa_bo = NULL;
f2e39221 267 p->const_ib.sa_bo = NULL;
6d2d13dd
CK
268 p->chunk_ib = NULL;
269 p->chunk_relocs = NULL;
270 p->chunk_flags = NULL;
271 p->chunk_const_ib = NULL;
771fe6b9
JG
272 p->chunks_array = kcalloc(cs->num_chunks, sizeof(uint64_t), GFP_KERNEL);
273 if (p->chunks_array == NULL) {
274 return -ENOMEM;
275 }
276 chunk_array_ptr = (uint64_t *)(unsigned long)(cs->chunks);
1d6ac185 277 if (copy_from_user(p->chunks_array, chunk_array_ptr,
771fe6b9
JG
278 sizeof(uint64_t)*cs->num_chunks)) {
279 return -EFAULT;
280 }
721604a1 281 p->cs_flags = 0;
771fe6b9
JG
282 p->nchunks = cs->num_chunks;
283 p->chunks = kcalloc(p->nchunks, sizeof(struct radeon_cs_chunk), GFP_KERNEL);
284 if (p->chunks == NULL) {
285 return -ENOMEM;
286 }
287 for (i = 0; i < p->nchunks; i++) {
288 struct drm_radeon_cs_chunk __user **chunk_ptr = NULL;
289 struct drm_radeon_cs_chunk user_chunk;
290 uint32_t __user *cdata;
291
292 chunk_ptr = (void __user*)(unsigned long)p->chunks_array[i];
1d6ac185 293 if (copy_from_user(&user_chunk, chunk_ptr,
771fe6b9
JG
294 sizeof(struct drm_radeon_cs_chunk))) {
295 return -EFAULT;
296 }
5176fdc4 297 p->chunks[i].length_dw = user_chunk.length_dw;
6d2d13dd
CK
298 if (user_chunk.chunk_id == RADEON_CHUNK_ID_RELOCS) {
299 p->chunk_relocs = &p->chunks[i];
771fe6b9 300 }
6d2d13dd
CK
301 if (user_chunk.chunk_id == RADEON_CHUNK_ID_IB) {
302 p->chunk_ib = &p->chunks[i];
5176fdc4
DA
303 /* zero length IB isn't useful */
304 if (p->chunks[i].length_dw == 0)
305 return -EINVAL;
771fe6b9 306 }
6d2d13dd
CK
307 if (user_chunk.chunk_id == RADEON_CHUNK_ID_CONST_IB) {
308 p->chunk_const_ib = &p->chunks[i];
dfcf5f36
AD
309 /* zero length CONST IB isn't useful */
310 if (p->chunks[i].length_dw == 0)
311 return -EINVAL;
312 }
6d2d13dd
CK
313 if (user_chunk.chunk_id == RADEON_CHUNK_ID_FLAGS) {
314 p->chunk_flags = &p->chunks[i];
721604a1
JG
315 /* zero length flags aren't useful */
316 if (p->chunks[i].length_dw == 0)
317 return -EINVAL;
e70f224c 318 }
5176fdc4 319
28a326c5
ML
320 size = p->chunks[i].length_dw;
321 cdata = (void __user *)(unsigned long)user_chunk.chunk_data;
322 p->chunks[i].user_ptr = cdata;
6d2d13dd 323 if (user_chunk.chunk_id == RADEON_CHUNK_ID_CONST_IB)
28a326c5
ML
324 continue;
325
6d2d13dd 326 if (user_chunk.chunk_id == RADEON_CHUNK_ID_IB) {
28a326c5
ML
327 if (!p->rdev || !(p->rdev->flags & RADEON_IS_AGP))
328 continue;
329 }
330
331 p->chunks[i].kdata = drm_malloc_ab(size, sizeof(uint32_t));
332 size *= sizeof(uint32_t);
333 if (p->chunks[i].kdata == NULL) {
334 return -ENOMEM;
335 }
1d6ac185 336 if (copy_from_user(p->chunks[i].kdata, cdata, size)) {
28a326c5
ML
337 return -EFAULT;
338 }
6d2d13dd 339 if (user_chunk.chunk_id == RADEON_CHUNK_ID_FLAGS) {
28a326c5
ML
340 p->cs_flags = p->chunks[i].kdata[0];
341 if (p->chunks[i].length_dw > 1)
342 ring = p->chunks[i].kdata[1];
343 if (p->chunks[i].length_dw > 2)
344 priority = (s32)p->chunks[i].kdata[2];
771fe6b9
JG
345 }
346 }
721604a1 347
9b00147d
AD
348 /* these are KMS only */
349 if (p->rdev) {
350 if ((p->cs_flags & RADEON_CS_USE_VM) &&
351 !p->rdev->vm_manager.enabled) {
352 DRM_ERROR("VM not active on asic!\n");
353 return -EINVAL;
354 }
1b5475db 355
57449040 356 if (radeon_cs_get_ring(p, ring, priority))
9b00147d 357 return -EINVAL;
721604a1 358
57449040 359 /* we only support VM on some SI+ rings */
60a44540
CK
360 if ((p->cs_flags & RADEON_CS_USE_VM) == 0) {
361 if (p->rdev->asic->ring[p->ring]->cs_parse == NULL) {
362 DRM_ERROR("Ring %d requires VM!\n", p->ring);
363 return -EINVAL;
364 }
365 } else {
366 if (p->rdev->asic->ring[p->ring]->ib_parse == NULL) {
367 DRM_ERROR("VM not supported on ring %d!\n",
368 p->ring);
369 return -EINVAL;
370 }
57449040 371 }
9b00147d 372 }
721604a1 373
771fe6b9
JG
374 return 0;
375}
376
4330441a
MO
377static int cmp_size_smaller_first(void *priv, struct list_head *a,
378 struct list_head *b)
379{
1d0c0942
CK
380 struct radeon_bo_list *la = list_entry(a, struct radeon_bo_list, tv.head);
381 struct radeon_bo_list *lb = list_entry(b, struct radeon_bo_list, tv.head);
4330441a
MO
382
383 /* Sort A before B if A is smaller. */
df0af440 384 return (int)la->robj->tbo.num_pages - (int)lb->robj->tbo.num_pages;
4330441a
MO
385}
386
771fe6b9
JG
387/**
388 * cs_parser_fini() - clean parser states
389 * @parser: parser structure holding parsing context.
390 * @error: error number
391 *
392 * If error is set than unvalidate buffer, otherwise just free memory
393 * used by parsing context.
394 **/
ecff665f 395static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bool backoff)
771fe6b9
JG
396{
397 unsigned i;
398
e43b5ec0 399 if (!error) {
4330441a
MO
400 /* Sort the buffer list from the smallest to largest buffer,
401 * which affects the order of buffers in the LRU list.
402 * This assures that the smallest buffers are added first
403 * to the LRU list, so they are likely to be later evicted
404 * first, instead of large buffers whose eviction is more
405 * expensive.
406 *
407 * This slightly lowers the number of bytes moved by TTM
408 * per frame under memory pressure.
409 */
410 list_sort(NULL, &parser->validated, cmp_size_smaller_first);
411
ecff665f
ML
412 ttm_eu_fence_buffer_objects(&parser->ticket,
413 &parser->validated,
f2c24b83 414 &parser->ib.fence->base);
ecff665f
ML
415 } else if (backoff) {
416 ttm_eu_backoff_reservation(&parser->ticket,
417 &parser->validated);
e43b5ec0 418 }
147666fb 419
fcbc451b
PN
420 if (parser->relocs != NULL) {
421 for (i = 0; i < parser->nrelocs; i++) {
d33a8fc7
CK
422 struct radeon_bo *bo = parser->relocs[i].robj;
423 if (bo == NULL)
424 continue;
425
426 drm_gem_object_unreference_unlocked(&bo->gem_base);
fcbc451b 427 }
771fe6b9 428 }
48e113e5 429 kfree(parser->track);
b421ed15 430 drm_free_large(parser->relocs);
e5a5fd4d 431 drm_free_large(parser->vm_bos);
28a326c5
ML
432 for (i = 0; i < parser->nchunks; i++)
433 drm_free_large(parser->chunks[i].kdata);
771fe6b9
JG
434 kfree(parser->chunks);
435 kfree(parser->chunks_array);
436 radeon_ib_free(parser->rdev, &parser->ib);
f2e39221 437 radeon_ib_free(parser->rdev, &parser->const_ib);
771fe6b9
JG
438}
439
721604a1
JG
440static int radeon_cs_ib_chunk(struct radeon_device *rdev,
441 struct radeon_cs_parser *parser)
442{
721604a1
JG
443 int r;
444
6d2d13dd 445 if (parser->chunk_ib == NULL)
721604a1
JG
446 return 0;
447
448 if (parser->cs_flags & RADEON_CS_USE_VM)
449 return 0;
450
eb0c19c5 451 r = radeon_cs_parse(rdev, parser->ring, parser);
721604a1
JG
452 if (r || parser->parser_error) {
453 DRM_ERROR("Invalid command stream !\n");
454 return r;
455 }
ce3537d5 456
392a250b
ML
457 r = radeon_cs_sync_rings(parser);
458 if (r) {
459 if (r != -ERESTARTSYS)
460 DRM_ERROR("Failed to sync rings: %i\n", r);
461 return r;
462 }
463
ce3537d5
AD
464 if (parser->ring == R600_RING_TYPE_UVD_INDEX)
465 radeon_uvd_note_usage(rdev);
03afe6f6
AD
466 else if ((parser->ring == TN_RING_TYPE_VCE1_INDEX) ||
467 (parser->ring == TN_RING_TYPE_VCE2_INDEX))
468 radeon_vce_note_usage(rdev);
ce3537d5 469
1538a9e0 470 r = radeon_ib_schedule(rdev, &parser->ib, NULL, true);
721604a1
JG
471 if (r) {
472 DRM_ERROR("Failed to schedule IB !\n");
473 }
93bf888c 474 return r;
721604a1
JG
475}
476
6d2f2944 477static int radeon_bo_vm_update_pte(struct radeon_cs_parser *p,
721604a1
JG
478 struct radeon_vm *vm)
479{
6d2f2944 480 struct radeon_device *rdev = p->rdev;
036bf46a 481 struct radeon_bo_va *bo_va;
6d2f2944 482 int i, r;
721604a1 483
6d2f2944
CK
484 r = radeon_vm_update_page_directory(rdev, vm);
485 if (r)
3e8970f9 486 return r;
6d2f2944 487
036bf46a
CK
488 r = radeon_vm_clear_freed(rdev, vm);
489 if (r)
490 return r;
491
cc9e67e3 492 if (vm->ib_bo_va == NULL) {
036bf46a
CK
493 DRM_ERROR("Tmp BO not in VM!\n");
494 return -EINVAL;
495 }
496
cc9e67e3
CK
497 r = radeon_vm_bo_update(rdev, vm->ib_bo_va,
498 &rdev->ring_tmp_bo.bo->tbo.mem);
6d2f2944
CK
499 if (r)
500 return r;
501
502 for (i = 0; i < p->nrelocs; i++) {
503 struct radeon_bo *bo;
504
6d2f2944 505 bo = p->relocs[i].robj;
036bf46a
CK
506 bo_va = radeon_vm_bo_find(vm, bo);
507 if (bo_va == NULL) {
508 dev_err(rdev->dev, "bo %p not in vm %p\n", bo, vm);
509 return -EINVAL;
510 }
511
512 r = radeon_vm_bo_update(rdev, bo_va, &bo->tbo.mem);
6d2f2944 513 if (r)
721604a1 514 return r;
94214635
CK
515
516 radeon_sync_fence(&p->ib.sync, bo_va->last_pt_update);
721604a1 517 }
e31ad969
CK
518
519 return radeon_vm_clear_invalids(rdev, vm);
721604a1
JG
520}
521
522static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
523 struct radeon_cs_parser *parser)
524{
721604a1
JG
525 struct radeon_fpriv *fpriv = parser->filp->driver_priv;
526 struct radeon_vm *vm = &fpriv->vm;
527 int r;
528
6d2d13dd 529 if (parser->chunk_ib == NULL)
721604a1 530 return 0;
721604a1
JG
531 if ((parser->cs_flags & RADEON_CS_USE_VM) == 0)
532 return 0;
533
28a326c5 534 if (parser->const_ib.length_dw) {
f2e39221 535 r = radeon_ring_ib_parse(rdev, parser->ring, &parser->const_ib);
dfcf5f36
AD
536 if (r) {
537 return r;
538 }
539 }
540
f2e39221 541 r = radeon_ring_ib_parse(rdev, parser->ring, &parser->ib);
721604a1
JG
542 if (r) {
543 return r;
544 }
545
ce3537d5
AD
546 if (parser->ring == R600_RING_TYPE_UVD_INDEX)
547 radeon_uvd_note_usage(rdev);
548
721604a1 549 mutex_lock(&vm->mutex);
721604a1
JG
550 r = radeon_bo_vm_update_pte(parser, vm);
551 if (r) {
552 goto out;
553 }
392a250b
ML
554
555 r = radeon_cs_sync_rings(parser);
556 if (r) {
557 if (r != -ERESTARTSYS)
558 DRM_ERROR("Failed to sync rings: %i\n", r);
559 goto out;
560 }
4ef72566 561
dfcf5f36 562 if ((rdev->family >= CHIP_TAHITI) &&
6d2d13dd 563 (parser->chunk_const_ib != NULL)) {
1538a9e0 564 r = radeon_ib_schedule(rdev, &parser->ib, &parser->const_ib, true);
4ef72566 565 } else {
1538a9e0 566 r = radeon_ib_schedule(rdev, &parser->ib, NULL, true);
dfcf5f36
AD
567 }
568
ee60e29f 569out:
36ff39c4 570 mutex_unlock(&vm->mutex);
721604a1
JG
571 return r;
572}
573
6c6f4783
CK
574static int radeon_cs_handle_lockup(struct radeon_device *rdev, int r)
575{
576 if (r == -EDEADLK) {
577 r = radeon_gpu_reset(rdev);
578 if (!r)
579 r = -EAGAIN;
580 }
581 return r;
582}
583
28a326c5
ML
584static int radeon_cs_ib_fill(struct radeon_device *rdev, struct radeon_cs_parser *parser)
585{
586 struct radeon_cs_chunk *ib_chunk;
587 struct radeon_vm *vm = NULL;
588 int r;
589
6d2d13dd 590 if (parser->chunk_ib == NULL)
28a326c5
ML
591 return 0;
592
593 if (parser->cs_flags & RADEON_CS_USE_VM) {
594 struct radeon_fpriv *fpriv = parser->filp->driver_priv;
595 vm = &fpriv->vm;
596
597 if ((rdev->family >= CHIP_TAHITI) &&
6d2d13dd
CK
598 (parser->chunk_const_ib != NULL)) {
599 ib_chunk = parser->chunk_const_ib;
28a326c5
ML
600 if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) {
601 DRM_ERROR("cs IB CONST too big: %d\n", ib_chunk->length_dw);
602 return -EINVAL;
603 }
604 r = radeon_ib_get(rdev, parser->ring, &parser->const_ib,
605 vm, ib_chunk->length_dw * 4);
606 if (r) {
607 DRM_ERROR("Failed to get const ib !\n");
608 return r;
609 }
610 parser->const_ib.is_const_ib = true;
611 parser->const_ib.length_dw = ib_chunk->length_dw;
1d6ac185 612 if (copy_from_user(parser->const_ib.ptr,
28a326c5
ML
613 ib_chunk->user_ptr,
614 ib_chunk->length_dw * 4))
615 return -EFAULT;
616 }
617
6d2d13dd 618 ib_chunk = parser->chunk_ib;
28a326c5
ML
619 if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) {
620 DRM_ERROR("cs IB too big: %d\n", ib_chunk->length_dw);
621 return -EINVAL;
622 }
623 }
6d2d13dd 624 ib_chunk = parser->chunk_ib;
28a326c5
ML
625
626 r = radeon_ib_get(rdev, parser->ring, &parser->ib,
627 vm, ib_chunk->length_dw * 4);
628 if (r) {
629 DRM_ERROR("Failed to get ib !\n");
630 return r;
631 }
632 parser->ib.length_dw = ib_chunk->length_dw;
633 if (ib_chunk->kdata)
634 memcpy(parser->ib.ptr, ib_chunk->kdata, ib_chunk->length_dw * 4);
1d6ac185 635 else if (copy_from_user(parser->ib.ptr, ib_chunk->user_ptr, ib_chunk->length_dw * 4))
28a326c5
ML
636 return -EFAULT;
637 return 0;
638}
639
771fe6b9
JG
640int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
641{
642 struct radeon_device *rdev = dev->dev_private;
643 struct radeon_cs_parser parser;
771fe6b9
JG
644 int r;
645
dee53e7f 646 down_read(&rdev->exclusive_lock);
6b7746e8 647 if (!rdev->accel_working) {
dee53e7f 648 up_read(&rdev->exclusive_lock);
6b7746e8
JG
649 return -EBUSY;
650 }
9bb39ff4
ML
651 if (rdev->in_reset) {
652 up_read(&rdev->exclusive_lock);
653 r = radeon_gpu_reset(rdev);
654 if (!r)
655 r = -EAGAIN;
656 return r;
657 }
771fe6b9
JG
658 /* initialize parser */
659 memset(&parser, 0, sizeof(struct radeon_cs_parser));
660 parser.filp = filp;
661 parser.rdev = rdev;
c8c15ff1 662 parser.dev = rdev->dev;
428c6e36 663 parser.family = rdev->family;
771fe6b9
JG
664 r = radeon_cs_parser_init(&parser, data);
665 if (r) {
666 DRM_ERROR("Failed to initialize parser !\n");
ecff665f 667 radeon_cs_parser_fini(&parser, r, false);
dee53e7f 668 up_read(&rdev->exclusive_lock);
6c6f4783 669 r = radeon_cs_handle_lockup(rdev, r);
771fe6b9
JG
670 return r;
671 }
28a326c5
ML
672
673 r = radeon_cs_ib_fill(rdev, &parser);
674 if (!r) {
675 r = radeon_cs_parser_relocs(&parser);
676 if (r && r != -ERESTARTSYS)
97f23b3d 677 DRM_ERROR("Failed to parse relocation %d!\n", r);
28a326c5
ML
678 }
679
680 if (r) {
ecff665f 681 radeon_cs_parser_fini(&parser, r, false);
dee53e7f 682 up_read(&rdev->exclusive_lock);
6c6f4783 683 r = radeon_cs_handle_lockup(rdev, r);
771fe6b9
JG
684 return r;
685 }
55b51c88 686
860024e5
CK
687 trace_radeon_cs(&parser);
688
721604a1 689 r = radeon_cs_ib_chunk(rdev, &parser);
771fe6b9 690 if (r) {
721604a1 691 goto out;
771fe6b9 692 }
721604a1 693 r = radeon_cs_ib_vm_chunk(rdev, &parser);
771fe6b9 694 if (r) {
721604a1 695 goto out;
771fe6b9 696 }
721604a1 697out:
ecff665f 698 radeon_cs_parser_fini(&parser, r, true);
dee53e7f 699 up_read(&rdev->exclusive_lock);
6c6f4783 700 r = radeon_cs_handle_lockup(rdev, r);
771fe6b9
JG
701 return r;
702}
513bcb46 703
4db01311
IH
704/**
705 * radeon_cs_packet_parse() - parse cp packet and point ib index to next packet
706 * @parser: parser structure holding parsing context.
707 * @pkt: where to store packet information
708 *
709 * Assume that chunk_ib_index is properly set. Will return -EINVAL
710 * if packet is bigger than remaining ib size. or if packets is unknown.
711 **/
712int radeon_cs_packet_parse(struct radeon_cs_parser *p,
713 struct radeon_cs_packet *pkt,
714 unsigned idx)
715{
6d2d13dd 716 struct radeon_cs_chunk *ib_chunk = p->chunk_ib;
4db01311
IH
717 struct radeon_device *rdev = p->rdev;
718 uint32_t header;
e1b4e722 719 int ret = 0, i;
4db01311
IH
720
721 if (idx >= ib_chunk->length_dw) {
722 DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
723 idx, ib_chunk->length_dw);
724 return -EINVAL;
725 }
726 header = radeon_get_ib_value(p, idx);
727 pkt->idx = idx;
728 pkt->type = RADEON_CP_PACKET_GET_TYPE(header);
729 pkt->count = RADEON_CP_PACKET_GET_COUNT(header);
730 pkt->one_reg_wr = 0;
731 switch (pkt->type) {
732 case RADEON_PACKET_TYPE0:
733 if (rdev->family < CHIP_R600) {
734 pkt->reg = R100_CP_PACKET0_GET_REG(header);
735 pkt->one_reg_wr =
736 RADEON_CP_PACKET0_GET_ONE_REG_WR(header);
737 } else
738 pkt->reg = R600_CP_PACKET0_GET_REG(header);
739 break;
740 case RADEON_PACKET_TYPE3:
741 pkt->opcode = RADEON_CP_PACKET3_GET_OPCODE(header);
742 break;
743 case RADEON_PACKET_TYPE2:
744 pkt->count = -1;
745 break;
746 default:
747 DRM_ERROR("Unknown packet type %d at %d !\n", pkt->type, idx);
e1b4e722
AD
748 ret = -EINVAL;
749 goto dump_ib;
4db01311
IH
750 }
751 if ((pkt->count + 1 + pkt->idx) >= ib_chunk->length_dw) {
752 DRM_ERROR("Packet (%d:%d:%d) end after CS buffer (%d) !\n",
753 pkt->idx, pkt->type, pkt->count, ib_chunk->length_dw);
e1b4e722
AD
754 ret = -EINVAL;
755 goto dump_ib;
4db01311
IH
756 }
757 return 0;
e1b4e722
AD
758
759dump_ib:
760 for (i = 0; i < ib_chunk->length_dw; i++) {
761 if (i == idx)
762 printk("\t0x%08x <---\n", radeon_get_ib_value(p, i));
763 else
764 printk("\t0x%08x\n", radeon_get_ib_value(p, i));
765 }
766 return ret;
4db01311 767}
9ffb7a6d
IH
768
769/**
770 * radeon_cs_packet_next_is_pkt3_nop() - test if the next packet is P3 NOP
771 * @p: structure holding the parser context.
772 *
773 * Check if the next packet is NOP relocation packet3.
774 **/
775bool radeon_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser *p)
776{
777 struct radeon_cs_packet p3reloc;
778 int r;
779
780 r = radeon_cs_packet_parse(p, &p3reloc, p->idx);
781 if (r)
782 return false;
783 if (p3reloc.type != RADEON_PACKET_TYPE3)
784 return false;
785 if (p3reloc.opcode != RADEON_PACKET3_NOP)
786 return false;
787 return true;
788}
c3ad63af
IH
789
790/**
791 * radeon_cs_dump_packet() - dump raw packet context
792 * @p: structure holding the parser context.
793 * @pkt: structure holding the packet.
794 *
795 * Used mostly for debugging and error reporting.
796 **/
797void radeon_cs_dump_packet(struct radeon_cs_parser *p,
798 struct radeon_cs_packet *pkt)
799{
800 volatile uint32_t *ib;
801 unsigned i;
802 unsigned idx;
803
804 ib = p->ib.ptr;
805 idx = pkt->idx;
806 for (i = 0; i <= (pkt->count + 1); i++, idx++)
807 DRM_INFO("ib[%d]=0x%08X\n", idx, ib[idx]);
808}
809
e9716993
IH
810/**
811 * radeon_cs_packet_next_reloc() - parse next (should be reloc) packet
812 * @parser: parser structure holding parsing context.
813 * @data: pointer to relocation data
814 * @offset_start: starting offset
815 * @offset_mask: offset mask (to align start offset on)
816 * @reloc: reloc informations
817 *
818 * Check if next packet is relocation packet3, do bo validation and compute
819 * GPU offset using the provided start.
820 **/
821int radeon_cs_packet_next_reloc(struct radeon_cs_parser *p,
1d0c0942 822 struct radeon_bo_list **cs_reloc,
e9716993
IH
823 int nomm)
824{
825 struct radeon_cs_chunk *relocs_chunk;
826 struct radeon_cs_packet p3reloc;
827 unsigned idx;
828 int r;
829
6d2d13dd 830 if (p->chunk_relocs == NULL) {
e9716993
IH
831 DRM_ERROR("No relocation chunk !\n");
832 return -EINVAL;
833 }
834 *cs_reloc = NULL;
6d2d13dd 835 relocs_chunk = p->chunk_relocs;
e9716993
IH
836 r = radeon_cs_packet_parse(p, &p3reloc, p->idx);
837 if (r)
838 return r;
839 p->idx += p3reloc.count + 2;
840 if (p3reloc.type != RADEON_PACKET_TYPE3 ||
841 p3reloc.opcode != RADEON_PACKET3_NOP) {
842 DRM_ERROR("No packet3 for relocation for packet at %d.\n",
843 p3reloc.idx);
844 radeon_cs_dump_packet(p, &p3reloc);
845 return -EINVAL;
846 }
847 idx = radeon_get_ib_value(p, p3reloc.idx + 1);
848 if (idx >= relocs_chunk->length_dw) {
849 DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
850 idx, relocs_chunk->length_dw);
851 radeon_cs_dump_packet(p, &p3reloc);
852 return -EINVAL;
853 }
854 /* FIXME: we assume reloc size is 4 dwords */
855 if (nomm) {
856 *cs_reloc = p->relocs;
df0af440 857 (*cs_reloc)->gpu_offset =
e9716993 858 (u64)relocs_chunk->kdata[idx + 3] << 32;
df0af440 859 (*cs_reloc)->gpu_offset |= relocs_chunk->kdata[idx + 0];
e9716993 860 } else
466be338 861 *cs_reloc = &p->relocs[(idx / 4)];
e9716993
IH
862 return 0;
863}
This page took 0.656359 seconds and 5 git commands to generate.