drm/i915/bdw: Two-stage execlist submit process
[deliverable/linux.git] / drivers / gpu / drm / i915 / intel_lrc.c
1 /*
2 * Copyright © 2014 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Ben Widawsky <ben@bwidawsk.net>
25 * Michel Thierry <michel.thierry@intel.com>
26 * Thomas Daniel <thomas.daniel@intel.com>
27 * Oscar Mateo <oscar.mateo@intel.com>
28 *
29 */
30
31 /*
32 * GEN8 brings an expansion of the HW contexts: "Logical Ring Contexts".
33 * These expanded contexts enable a number of new abilities, especially
34 * "Execlists" (also implemented in this file).
35 *
36 * Execlists are the new method by which, on gen8+ hardware, workloads are
37 * submitted for execution (as opposed to the legacy, ringbuffer-based, method).
38 */
39
40 #include <drm/drmP.h>
41 #include <drm/i915_drm.h>
42 #include "i915_drv.h"
43
44 #define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE)
45 #define GEN8_LR_CONTEXT_OTHER_SIZE (2 * PAGE_SIZE)
46
47 #define GEN8_LR_CONTEXT_ALIGN 4096
48
49 #define RING_ELSP(ring) ((ring)->mmio_base+0x230)
50 #define RING_EXECLIST_STATUS(ring) ((ring)->mmio_base+0x234)
51 #define RING_CONTEXT_CONTROL(ring) ((ring)->mmio_base+0x244)
52
53 #define CTX_LRI_HEADER_0 0x01
54 #define CTX_CONTEXT_CONTROL 0x02
55 #define CTX_RING_HEAD 0x04
56 #define CTX_RING_TAIL 0x06
57 #define CTX_RING_BUFFER_START 0x08
58 #define CTX_RING_BUFFER_CONTROL 0x0a
59 #define CTX_BB_HEAD_U 0x0c
60 #define CTX_BB_HEAD_L 0x0e
61 #define CTX_BB_STATE 0x10
62 #define CTX_SECOND_BB_HEAD_U 0x12
63 #define CTX_SECOND_BB_HEAD_L 0x14
64 #define CTX_SECOND_BB_STATE 0x16
65 #define CTX_BB_PER_CTX_PTR 0x18
66 #define CTX_RCS_INDIRECT_CTX 0x1a
67 #define CTX_RCS_INDIRECT_CTX_OFFSET 0x1c
68 #define CTX_LRI_HEADER_1 0x21
69 #define CTX_CTX_TIMESTAMP 0x22
70 #define CTX_PDP3_UDW 0x24
71 #define CTX_PDP3_LDW 0x26
72 #define CTX_PDP2_UDW 0x28
73 #define CTX_PDP2_LDW 0x2a
74 #define CTX_PDP1_UDW 0x2c
75 #define CTX_PDP1_LDW 0x2e
76 #define CTX_PDP0_UDW 0x30
77 #define CTX_PDP0_LDW 0x32
78 #define CTX_LRI_HEADER_2 0x41
79 #define CTX_R_PWR_CLK_STATE 0x42
80 #define CTX_GPGPU_CSR_BASE_ADDRESS 0x44
81
82 #define GEN8_CTX_VALID (1<<0)
83 #define GEN8_CTX_FORCE_PD_RESTORE (1<<1)
84 #define GEN8_CTX_FORCE_RESTORE (1<<2)
85 #define GEN8_CTX_L3LLC_COHERENT (1<<5)
86 #define GEN8_CTX_PRIVILEGE (1<<8)
87 enum {
88 ADVANCED_CONTEXT = 0,
89 LEGACY_CONTEXT,
90 ADVANCED_AD_CONTEXT,
91 LEGACY_64B_CONTEXT
92 };
93 #define GEN8_CTX_MODE_SHIFT 3
94 enum {
95 FAULT_AND_HANG = 0,
96 FAULT_AND_HALT, /* Debug only */
97 FAULT_AND_STREAM,
98 FAULT_AND_CONTINUE /* Unsupported */
99 };
100 #define GEN8_CTX_ID_SHIFT 32
101
102 int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists)
103 {
104 WARN_ON(i915.enable_ppgtt == -1);
105
106 if (enable_execlists == 0)
107 return 0;
108
109 if (HAS_LOGICAL_RING_CONTEXTS(dev) && USES_PPGTT(dev) &&
110 i915.use_mmio_flip >= 0)
111 return 1;
112
113 return 0;
114 }
115
116 u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj)
117 {
118 u32 lrca = i915_gem_obj_ggtt_offset(ctx_obj);
119
120 /* LRCA is required to be 4K aligned so the more significant 20 bits
121 * are globally unique */
122 return lrca >> 12;
123 }
124
125 static uint64_t execlists_ctx_descriptor(struct drm_i915_gem_object *ctx_obj)
126 {
127 uint64_t desc;
128 uint64_t lrca = i915_gem_obj_ggtt_offset(ctx_obj);
129
130 WARN_ON(lrca & 0xFFFFFFFF00000FFFULL);
131
132 desc = GEN8_CTX_VALID;
133 desc |= LEGACY_CONTEXT << GEN8_CTX_MODE_SHIFT;
134 desc |= GEN8_CTX_L3LLC_COHERENT;
135 desc |= GEN8_CTX_PRIVILEGE;
136 desc |= lrca;
137 desc |= (u64)intel_execlists_ctx_id(ctx_obj) << GEN8_CTX_ID_SHIFT;
138
139 /* TODO: WaDisableLiteRestore when we start using semaphore
140 * signalling between Command Streamers */
141 /* desc |= GEN8_CTX_FORCE_RESTORE; */
142
143 return desc;
144 }
145
146 static void execlists_elsp_write(struct intel_engine_cs *ring,
147 struct drm_i915_gem_object *ctx_obj0,
148 struct drm_i915_gem_object *ctx_obj1)
149 {
150 struct drm_i915_private *dev_priv = ring->dev->dev_private;
151 uint64_t temp = 0;
152 uint32_t desc[4];
153
154 /* XXX: You must always write both descriptors in the order below. */
155 if (ctx_obj1)
156 temp = execlists_ctx_descriptor(ctx_obj1);
157 else
158 temp = 0;
159 desc[1] = (u32)(temp >> 32);
160 desc[0] = (u32)temp;
161
162 temp = execlists_ctx_descriptor(ctx_obj0);
163 desc[3] = (u32)(temp >> 32);
164 desc[2] = (u32)temp;
165
166 /* Set Force Wakeup bit to prevent GT from entering C6 while
167 * ELSP writes are in progress */
168 gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL);
169
170 I915_WRITE(RING_ELSP(ring), desc[1]);
171 I915_WRITE(RING_ELSP(ring), desc[0]);
172 I915_WRITE(RING_ELSP(ring), desc[3]);
173 /* The context is automatically loaded after the following */
174 I915_WRITE(RING_ELSP(ring), desc[2]);
175
176 /* ELSP is a wo register, so use another nearby reg for posting instead */
177 POSTING_READ(RING_EXECLIST_STATUS(ring));
178
179 gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL);
180 }
181
182 static int execlists_ctx_write_tail(struct drm_i915_gem_object *ctx_obj, u32 tail)
183 {
184 struct page *page;
185 uint32_t *reg_state;
186
187 page = i915_gem_object_get_page(ctx_obj, 1);
188 reg_state = kmap_atomic(page);
189
190 reg_state[CTX_RING_TAIL+1] = tail;
191
192 kunmap_atomic(reg_state);
193
194 return 0;
195 }
196
197 static int execlists_submit_context(struct intel_engine_cs *ring,
198 struct intel_context *to0, u32 tail0,
199 struct intel_context *to1, u32 tail1)
200 {
201 struct drm_i915_gem_object *ctx_obj0;
202 struct drm_i915_gem_object *ctx_obj1 = NULL;
203
204 ctx_obj0 = to0->engine[ring->id].state;
205 BUG_ON(!ctx_obj0);
206 WARN_ON(!i915_gem_obj_is_pinned(ctx_obj0));
207
208 execlists_ctx_write_tail(ctx_obj0, tail0);
209
210 if (to1) {
211 ctx_obj1 = to1->engine[ring->id].state;
212 BUG_ON(!ctx_obj1);
213 WARN_ON(!i915_gem_obj_is_pinned(ctx_obj1));
214
215 execlists_ctx_write_tail(ctx_obj1, tail1);
216 }
217
218 execlists_elsp_write(ring, ctx_obj0, ctx_obj1);
219
220 return 0;
221 }
222
223 static void execlists_context_unqueue(struct intel_engine_cs *ring)
224 {
225 struct intel_ctx_submit_request *req0 = NULL, *req1 = NULL;
226 struct intel_ctx_submit_request *cursor = NULL, *tmp = NULL;
227
228 if (list_empty(&ring->execlist_queue))
229 return;
230
231 /* Try to read in pairs */
232 list_for_each_entry_safe(cursor, tmp, &ring->execlist_queue,
233 execlist_link) {
234 if (!req0) {
235 req0 = cursor;
236 } else if (req0->ctx == cursor->ctx) {
237 /* Same ctx: ignore first request, as second request
238 * will update tail past first request's workload */
239 list_del(&req0->execlist_link);
240 i915_gem_context_unreference(req0->ctx);
241 kfree(req0);
242 req0 = cursor;
243 } else {
244 req1 = cursor;
245 break;
246 }
247 }
248
249 WARN_ON(execlists_submit_context(ring, req0->ctx, req0->tail,
250 req1 ? req1->ctx : NULL,
251 req1 ? req1->tail : 0));
252 }
253
254 static int execlists_context_queue(struct intel_engine_cs *ring,
255 struct intel_context *to,
256 u32 tail)
257 {
258 struct intel_ctx_submit_request *req = NULL;
259 unsigned long flags;
260 bool was_empty;
261
262 req = kzalloc(sizeof(*req), GFP_KERNEL);
263 if (req == NULL)
264 return -ENOMEM;
265 req->ctx = to;
266 i915_gem_context_reference(req->ctx);
267 req->ring = ring;
268 req->tail = tail;
269
270 spin_lock_irqsave(&ring->execlist_lock, flags);
271
272 was_empty = list_empty(&ring->execlist_queue);
273 list_add_tail(&req->execlist_link, &ring->execlist_queue);
274 if (was_empty)
275 execlists_context_unqueue(ring);
276
277 spin_unlock_irqrestore(&ring->execlist_lock, flags);
278
279 return 0;
280 }
281
282 static int logical_ring_invalidate_all_caches(struct intel_ringbuffer *ringbuf)
283 {
284 struct intel_engine_cs *ring = ringbuf->ring;
285 uint32_t flush_domains;
286 int ret;
287
288 flush_domains = 0;
289 if (ring->gpu_caches_dirty)
290 flush_domains = I915_GEM_GPU_DOMAINS;
291
292 ret = ring->emit_flush(ringbuf, I915_GEM_GPU_DOMAINS, flush_domains);
293 if (ret)
294 return ret;
295
296 ring->gpu_caches_dirty = false;
297 return 0;
298 }
299
300 static int execlists_move_to_gpu(struct intel_ringbuffer *ringbuf,
301 struct list_head *vmas)
302 {
303 struct intel_engine_cs *ring = ringbuf->ring;
304 struct i915_vma *vma;
305 uint32_t flush_domains = 0;
306 bool flush_chipset = false;
307 int ret;
308
309 list_for_each_entry(vma, vmas, exec_list) {
310 struct drm_i915_gem_object *obj = vma->obj;
311
312 ret = i915_gem_object_sync(obj, ring);
313 if (ret)
314 return ret;
315
316 if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
317 flush_chipset |= i915_gem_clflush_object(obj, false);
318
319 flush_domains |= obj->base.write_domain;
320 }
321
322 if (flush_domains & I915_GEM_DOMAIN_GTT)
323 wmb();
324
325 /* Unconditionally invalidate gpu caches and ensure that we do flush
326 * any residual writes from the previous batch.
327 */
328 return logical_ring_invalidate_all_caches(ringbuf);
329 }
330
331 int intel_execlists_submission(struct drm_device *dev, struct drm_file *file,
332 struct intel_engine_cs *ring,
333 struct intel_context *ctx,
334 struct drm_i915_gem_execbuffer2 *args,
335 struct list_head *vmas,
336 struct drm_i915_gem_object *batch_obj,
337 u64 exec_start, u32 flags)
338 {
339 struct drm_i915_private *dev_priv = dev->dev_private;
340 struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
341 int instp_mode;
342 u32 instp_mask;
343 int ret;
344
345 instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK;
346 instp_mask = I915_EXEC_CONSTANTS_MASK;
347 switch (instp_mode) {
348 case I915_EXEC_CONSTANTS_REL_GENERAL:
349 case I915_EXEC_CONSTANTS_ABSOLUTE:
350 case I915_EXEC_CONSTANTS_REL_SURFACE:
351 if (instp_mode != 0 && ring != &dev_priv->ring[RCS]) {
352 DRM_DEBUG("non-0 rel constants mode on non-RCS\n");
353 return -EINVAL;
354 }
355
356 if (instp_mode != dev_priv->relative_constants_mode) {
357 if (instp_mode == I915_EXEC_CONSTANTS_REL_SURFACE) {
358 DRM_DEBUG("rel surface constants mode invalid on gen5+\n");
359 return -EINVAL;
360 }
361
362 /* The HW changed the meaning on this bit on gen6 */
363 instp_mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE;
364 }
365 break;
366 default:
367 DRM_DEBUG("execbuf with unknown constants: %d\n", instp_mode);
368 return -EINVAL;
369 }
370
371 if (args->num_cliprects != 0) {
372 DRM_DEBUG("clip rectangles are only valid on pre-gen5\n");
373 return -EINVAL;
374 } else {
375 if (args->DR4 == 0xffffffff) {
376 DRM_DEBUG("UXA submitting garbage DR4, fixing up\n");
377 args->DR4 = 0;
378 }
379
380 if (args->DR1 || args->DR4 || args->cliprects_ptr) {
381 DRM_DEBUG("0 cliprects but dirt in cliprects fields\n");
382 return -EINVAL;
383 }
384 }
385
386 if (args->flags & I915_EXEC_GEN7_SOL_RESET) {
387 DRM_DEBUG("sol reset is gen7 only\n");
388 return -EINVAL;
389 }
390
391 ret = execlists_move_to_gpu(ringbuf, vmas);
392 if (ret)
393 return ret;
394
395 if (ring == &dev_priv->ring[RCS] &&
396 instp_mode != dev_priv->relative_constants_mode) {
397 ret = intel_logical_ring_begin(ringbuf, 4);
398 if (ret)
399 return ret;
400
401 intel_logical_ring_emit(ringbuf, MI_NOOP);
402 intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(1));
403 intel_logical_ring_emit(ringbuf, INSTPM);
404 intel_logical_ring_emit(ringbuf, instp_mask << 16 | instp_mode);
405 intel_logical_ring_advance(ringbuf);
406
407 dev_priv->relative_constants_mode = instp_mode;
408 }
409
410 ret = ring->emit_bb_start(ringbuf, exec_start, flags);
411 if (ret)
412 return ret;
413
414 i915_gem_execbuffer_move_to_active(vmas, ring);
415 i915_gem_execbuffer_retire_commands(dev, file, ring, batch_obj);
416
417 return 0;
418 }
419
420 void intel_logical_ring_stop(struct intel_engine_cs *ring)
421 {
422 struct drm_i915_private *dev_priv = ring->dev->dev_private;
423 int ret;
424
425 if (!intel_ring_initialized(ring))
426 return;
427
428 ret = intel_ring_idle(ring);
429 if (ret && !i915_reset_in_progress(&to_i915(ring->dev)->gpu_error))
430 DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n",
431 ring->name, ret);
432
433 /* TODO: Is this correct with Execlists enabled? */
434 I915_WRITE_MODE(ring, _MASKED_BIT_ENABLE(STOP_RING));
435 if (wait_for_atomic((I915_READ_MODE(ring) & MODE_IDLE) != 0, 1000)) {
436 DRM_ERROR("%s :timed out trying to stop ring\n", ring->name);
437 return;
438 }
439 I915_WRITE_MODE(ring, _MASKED_BIT_DISABLE(STOP_RING));
440 }
441
442 int logical_ring_flush_all_caches(struct intel_ringbuffer *ringbuf)
443 {
444 struct intel_engine_cs *ring = ringbuf->ring;
445 int ret;
446
447 if (!ring->gpu_caches_dirty)
448 return 0;
449
450 ret = ring->emit_flush(ringbuf, 0, I915_GEM_GPU_DOMAINS);
451 if (ret)
452 return ret;
453
454 ring->gpu_caches_dirty = false;
455 return 0;
456 }
457
458 void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf)
459 {
460 struct intel_engine_cs *ring = ringbuf->ring;
461 struct intel_context *ctx = ringbuf->FIXME_lrc_ctx;
462
463 intel_logical_ring_advance(ringbuf);
464
465 if (intel_ring_stopped(ring))
466 return;
467
468 execlists_context_queue(ring, ctx, ringbuf->tail);
469 }
470
471 static int logical_ring_alloc_seqno(struct intel_engine_cs *ring,
472 struct intel_context *ctx)
473 {
474 if (ring->outstanding_lazy_seqno)
475 return 0;
476
477 if (ring->preallocated_lazy_request == NULL) {
478 struct drm_i915_gem_request *request;
479
480 request = kmalloc(sizeof(*request), GFP_KERNEL);
481 if (request == NULL)
482 return -ENOMEM;
483
484 /* Hold a reference to the context this request belongs to
485 * (we will need it when the time comes to emit/retire the
486 * request).
487 */
488 request->ctx = ctx;
489 i915_gem_context_reference(request->ctx);
490
491 ring->preallocated_lazy_request = request;
492 }
493
494 return i915_gem_get_seqno(ring->dev, &ring->outstanding_lazy_seqno);
495 }
496
497 static int logical_ring_wait_request(struct intel_ringbuffer *ringbuf,
498 int bytes)
499 {
500 struct intel_engine_cs *ring = ringbuf->ring;
501 struct drm_i915_gem_request *request;
502 u32 seqno = 0;
503 int ret;
504
505 if (ringbuf->last_retired_head != -1) {
506 ringbuf->head = ringbuf->last_retired_head;
507 ringbuf->last_retired_head = -1;
508
509 ringbuf->space = intel_ring_space(ringbuf);
510 if (ringbuf->space >= bytes)
511 return 0;
512 }
513
514 list_for_each_entry(request, &ring->request_list, list) {
515 if (__intel_ring_space(request->tail, ringbuf->tail,
516 ringbuf->size) >= bytes) {
517 seqno = request->seqno;
518 break;
519 }
520 }
521
522 if (seqno == 0)
523 return -ENOSPC;
524
525 ret = i915_wait_seqno(ring, seqno);
526 if (ret)
527 return ret;
528
529 i915_gem_retire_requests_ring(ring);
530 ringbuf->head = ringbuf->last_retired_head;
531 ringbuf->last_retired_head = -1;
532
533 ringbuf->space = intel_ring_space(ringbuf);
534 return 0;
535 }
536
537 static int logical_ring_wait_for_space(struct intel_ringbuffer *ringbuf,
538 int bytes)
539 {
540 struct intel_engine_cs *ring = ringbuf->ring;
541 struct drm_device *dev = ring->dev;
542 struct drm_i915_private *dev_priv = dev->dev_private;
543 unsigned long end;
544 int ret;
545
546 ret = logical_ring_wait_request(ringbuf, bytes);
547 if (ret != -ENOSPC)
548 return ret;
549
550 /* Force the context submission in case we have been skipping it */
551 intel_logical_ring_advance_and_submit(ringbuf);
552
553 /* With GEM the hangcheck timer should kick us out of the loop,
554 * leaving it early runs the risk of corrupting GEM state (due
555 * to running on almost untested codepaths). But on resume
556 * timers don't work yet, so prevent a complete hang in that
557 * case by choosing an insanely large timeout. */
558 end = jiffies + 60 * HZ;
559
560 do {
561 ringbuf->head = I915_READ_HEAD(ring);
562 ringbuf->space = intel_ring_space(ringbuf);
563 if (ringbuf->space >= bytes) {
564 ret = 0;
565 break;
566 }
567
568 msleep(1);
569
570 if (dev_priv->mm.interruptible && signal_pending(current)) {
571 ret = -ERESTARTSYS;
572 break;
573 }
574
575 ret = i915_gem_check_wedge(&dev_priv->gpu_error,
576 dev_priv->mm.interruptible);
577 if (ret)
578 break;
579
580 if (time_after(jiffies, end)) {
581 ret = -EBUSY;
582 break;
583 }
584 } while (1);
585
586 return ret;
587 }
588
589 static int logical_ring_wrap_buffer(struct intel_ringbuffer *ringbuf)
590 {
591 uint32_t __iomem *virt;
592 int rem = ringbuf->size - ringbuf->tail;
593
594 if (ringbuf->space < rem) {
595 int ret = logical_ring_wait_for_space(ringbuf, rem);
596
597 if (ret)
598 return ret;
599 }
600
601 virt = ringbuf->virtual_start + ringbuf->tail;
602 rem /= 4;
603 while (rem--)
604 iowrite32(MI_NOOP, virt++);
605
606 ringbuf->tail = 0;
607 ringbuf->space = intel_ring_space(ringbuf);
608
609 return 0;
610 }
611
612 static int logical_ring_prepare(struct intel_ringbuffer *ringbuf, int bytes)
613 {
614 int ret;
615
616 if (unlikely(ringbuf->tail + bytes > ringbuf->effective_size)) {
617 ret = logical_ring_wrap_buffer(ringbuf);
618 if (unlikely(ret))
619 return ret;
620 }
621
622 if (unlikely(ringbuf->space < bytes)) {
623 ret = logical_ring_wait_for_space(ringbuf, bytes);
624 if (unlikely(ret))
625 return ret;
626 }
627
628 return 0;
629 }
630
631 int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf, int num_dwords)
632 {
633 struct intel_engine_cs *ring = ringbuf->ring;
634 struct drm_device *dev = ring->dev;
635 struct drm_i915_private *dev_priv = dev->dev_private;
636 int ret;
637
638 ret = i915_gem_check_wedge(&dev_priv->gpu_error,
639 dev_priv->mm.interruptible);
640 if (ret)
641 return ret;
642
643 ret = logical_ring_prepare(ringbuf, num_dwords * sizeof(uint32_t));
644 if (ret)
645 return ret;
646
647 /* Preallocate the olr before touching the ring */
648 ret = logical_ring_alloc_seqno(ring, ringbuf->FIXME_lrc_ctx);
649 if (ret)
650 return ret;
651
652 ringbuf->space -= num_dwords * sizeof(uint32_t);
653 return 0;
654 }
655
656 static int gen8_init_common_ring(struct intel_engine_cs *ring)
657 {
658 struct drm_device *dev = ring->dev;
659 struct drm_i915_private *dev_priv = dev->dev_private;
660
661 I915_WRITE_IMR(ring, ~(ring->irq_enable_mask | ring->irq_keep_mask));
662 I915_WRITE(RING_HWSTAM(ring->mmio_base), 0xffffffff);
663
664 I915_WRITE(RING_MODE_GEN7(ring),
665 _MASKED_BIT_DISABLE(GFX_REPLAY_MODE) |
666 _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE));
667 POSTING_READ(RING_MODE_GEN7(ring));
668 DRM_DEBUG_DRIVER("Execlists enabled for %s\n", ring->name);
669
670 memset(&ring->hangcheck, 0, sizeof(ring->hangcheck));
671
672 return 0;
673 }
674
675 static int gen8_init_render_ring(struct intel_engine_cs *ring)
676 {
677 struct drm_device *dev = ring->dev;
678 struct drm_i915_private *dev_priv = dev->dev_private;
679 int ret;
680
681 ret = gen8_init_common_ring(ring);
682 if (ret)
683 return ret;
684
685 /* We need to disable the AsyncFlip performance optimisations in order
686 * to use MI_WAIT_FOR_EVENT within the CS. It should already be
687 * programmed to '1' on all products.
688 *
689 * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv,bdw,chv
690 */
691 I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE));
692
693 ret = intel_init_pipe_control(ring);
694 if (ret)
695 return ret;
696
697 I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
698
699 return ret;
700 }
701
702 static int gen8_emit_bb_start(struct intel_ringbuffer *ringbuf,
703 u64 offset, unsigned flags)
704 {
705 bool ppgtt = !(flags & I915_DISPATCH_SECURE);
706 int ret;
707
708 ret = intel_logical_ring_begin(ringbuf, 4);
709 if (ret)
710 return ret;
711
712 /* FIXME(BDW): Address space and security selectors. */
713 intel_logical_ring_emit(ringbuf, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8));
714 intel_logical_ring_emit(ringbuf, lower_32_bits(offset));
715 intel_logical_ring_emit(ringbuf, upper_32_bits(offset));
716 intel_logical_ring_emit(ringbuf, MI_NOOP);
717 intel_logical_ring_advance(ringbuf);
718
719 return 0;
720 }
721
722 static bool gen8_logical_ring_get_irq(struct intel_engine_cs *ring)
723 {
724 struct drm_device *dev = ring->dev;
725 struct drm_i915_private *dev_priv = dev->dev_private;
726 unsigned long flags;
727
728 if (!dev->irq_enabled)
729 return false;
730
731 spin_lock_irqsave(&dev_priv->irq_lock, flags);
732 if (ring->irq_refcount++ == 0) {
733 I915_WRITE_IMR(ring, ~(ring->irq_enable_mask | ring->irq_keep_mask));
734 POSTING_READ(RING_IMR(ring->mmio_base));
735 }
736 spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
737
738 return true;
739 }
740
741 static void gen8_logical_ring_put_irq(struct intel_engine_cs *ring)
742 {
743 struct drm_device *dev = ring->dev;
744 struct drm_i915_private *dev_priv = dev->dev_private;
745 unsigned long flags;
746
747 spin_lock_irqsave(&dev_priv->irq_lock, flags);
748 if (--ring->irq_refcount == 0) {
749 I915_WRITE_IMR(ring, ~ring->irq_keep_mask);
750 POSTING_READ(RING_IMR(ring->mmio_base));
751 }
752 spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
753 }
754
755 static int gen8_emit_flush(struct intel_ringbuffer *ringbuf,
756 u32 invalidate_domains,
757 u32 unused)
758 {
759 struct intel_engine_cs *ring = ringbuf->ring;
760 struct drm_device *dev = ring->dev;
761 struct drm_i915_private *dev_priv = dev->dev_private;
762 uint32_t cmd;
763 int ret;
764
765 ret = intel_logical_ring_begin(ringbuf, 4);
766 if (ret)
767 return ret;
768
769 cmd = MI_FLUSH_DW + 1;
770
771 if (ring == &dev_priv->ring[VCS]) {
772 if (invalidate_domains & I915_GEM_GPU_DOMAINS)
773 cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD |
774 MI_FLUSH_DW_STORE_INDEX |
775 MI_FLUSH_DW_OP_STOREDW;
776 } else {
777 if (invalidate_domains & I915_GEM_DOMAIN_RENDER)
778 cmd |= MI_INVALIDATE_TLB | MI_FLUSH_DW_STORE_INDEX |
779 MI_FLUSH_DW_OP_STOREDW;
780 }
781
782 intel_logical_ring_emit(ringbuf, cmd);
783 intel_logical_ring_emit(ringbuf,
784 I915_GEM_HWS_SCRATCH_ADDR |
785 MI_FLUSH_DW_USE_GTT);
786 intel_logical_ring_emit(ringbuf, 0); /* upper addr */
787 intel_logical_ring_emit(ringbuf, 0); /* value */
788 intel_logical_ring_advance(ringbuf);
789
790 return 0;
791 }
792
793 static int gen8_emit_flush_render(struct intel_ringbuffer *ringbuf,
794 u32 invalidate_domains,
795 u32 flush_domains)
796 {
797 struct intel_engine_cs *ring = ringbuf->ring;
798 u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
799 u32 flags = 0;
800 int ret;
801
802 flags |= PIPE_CONTROL_CS_STALL;
803
804 if (flush_domains) {
805 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
806 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
807 }
808
809 if (invalidate_domains) {
810 flags |= PIPE_CONTROL_TLB_INVALIDATE;
811 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
812 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
813 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
814 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
815 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
816 flags |= PIPE_CONTROL_QW_WRITE;
817 flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
818 }
819
820 ret = intel_logical_ring_begin(ringbuf, 6);
821 if (ret)
822 return ret;
823
824 intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6));
825 intel_logical_ring_emit(ringbuf, flags);
826 intel_logical_ring_emit(ringbuf, scratch_addr);
827 intel_logical_ring_emit(ringbuf, 0);
828 intel_logical_ring_emit(ringbuf, 0);
829 intel_logical_ring_emit(ringbuf, 0);
830 intel_logical_ring_advance(ringbuf);
831
832 return 0;
833 }
834
835 static u32 gen8_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
836 {
837 return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
838 }
839
840 static void gen8_set_seqno(struct intel_engine_cs *ring, u32 seqno)
841 {
842 intel_write_status_page(ring, I915_GEM_HWS_INDEX, seqno);
843 }
844
845 static int gen8_emit_request(struct intel_ringbuffer *ringbuf)
846 {
847 struct intel_engine_cs *ring = ringbuf->ring;
848 u32 cmd;
849 int ret;
850
851 ret = intel_logical_ring_begin(ringbuf, 6);
852 if (ret)
853 return ret;
854
855 cmd = MI_STORE_DWORD_IMM_GEN8;
856 cmd |= MI_GLOBAL_GTT;
857
858 intel_logical_ring_emit(ringbuf, cmd);
859 intel_logical_ring_emit(ringbuf,
860 (ring->status_page.gfx_addr +
861 (I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT)));
862 intel_logical_ring_emit(ringbuf, 0);
863 intel_logical_ring_emit(ringbuf, ring->outstanding_lazy_seqno);
864 intel_logical_ring_emit(ringbuf, MI_USER_INTERRUPT);
865 intel_logical_ring_emit(ringbuf, MI_NOOP);
866 intel_logical_ring_advance_and_submit(ringbuf);
867
868 return 0;
869 }
870
871 void intel_logical_ring_cleanup(struct intel_engine_cs *ring)
872 {
873 struct drm_i915_private *dev_priv = ring->dev->dev_private;
874
875 if (!intel_ring_initialized(ring))
876 return;
877
878 intel_logical_ring_stop(ring);
879 WARN_ON((I915_READ_MODE(ring) & MODE_IDLE) == 0);
880 ring->preallocated_lazy_request = NULL;
881 ring->outstanding_lazy_seqno = 0;
882
883 if (ring->cleanup)
884 ring->cleanup(ring);
885
886 i915_cmd_parser_fini_ring(ring);
887
888 if (ring->status_page.obj) {
889 kunmap(sg_page(ring->status_page.obj->pages->sgl));
890 ring->status_page.obj = NULL;
891 }
892 }
893
894 static int logical_ring_init(struct drm_device *dev, struct intel_engine_cs *ring)
895 {
896 int ret;
897 struct intel_context *dctx = ring->default_context;
898 struct drm_i915_gem_object *dctx_obj;
899
900 /* Intentionally left blank. */
901 ring->buffer = NULL;
902
903 ring->dev = dev;
904 INIT_LIST_HEAD(&ring->active_list);
905 INIT_LIST_HEAD(&ring->request_list);
906 init_waitqueue_head(&ring->irq_queue);
907
908 INIT_LIST_HEAD(&ring->execlist_queue);
909 spin_lock_init(&ring->execlist_lock);
910
911 ret = intel_lr_context_deferred_create(dctx, ring);
912 if (ret)
913 return ret;
914
915 /* The status page is offset 0 from the context object in LRCs. */
916 dctx_obj = dctx->engine[ring->id].state;
917 ring->status_page.gfx_addr = i915_gem_obj_ggtt_offset(dctx_obj);
918 ring->status_page.page_addr = kmap(sg_page(dctx_obj->pages->sgl));
919 if (ring->status_page.page_addr == NULL)
920 return -ENOMEM;
921 ring->status_page.obj = dctx_obj;
922
923 ret = i915_cmd_parser_init_ring(ring);
924 if (ret)
925 return ret;
926
927 if (ring->init) {
928 ret = ring->init(ring);
929 if (ret)
930 return ret;
931 }
932
933 return 0;
934 }
935
936 static int logical_render_ring_init(struct drm_device *dev)
937 {
938 struct drm_i915_private *dev_priv = dev->dev_private;
939 struct intel_engine_cs *ring = &dev_priv->ring[RCS];
940
941 ring->name = "render ring";
942 ring->id = RCS;
943 ring->mmio_base = RENDER_RING_BASE;
944 ring->irq_enable_mask =
945 GT_RENDER_USER_INTERRUPT << GEN8_RCS_IRQ_SHIFT;
946 ring->irq_keep_mask =
947 GT_CONTEXT_SWITCH_INTERRUPT << GEN8_RCS_IRQ_SHIFT;
948 if (HAS_L3_DPF(dev))
949 ring->irq_keep_mask |= GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
950
951 ring->init = gen8_init_render_ring;
952 ring->cleanup = intel_fini_pipe_control;
953 ring->get_seqno = gen8_get_seqno;
954 ring->set_seqno = gen8_set_seqno;
955 ring->emit_request = gen8_emit_request;
956 ring->emit_flush = gen8_emit_flush_render;
957 ring->irq_get = gen8_logical_ring_get_irq;
958 ring->irq_put = gen8_logical_ring_put_irq;
959 ring->emit_bb_start = gen8_emit_bb_start;
960
961 return logical_ring_init(dev, ring);
962 }
963
964 static int logical_bsd_ring_init(struct drm_device *dev)
965 {
966 struct drm_i915_private *dev_priv = dev->dev_private;
967 struct intel_engine_cs *ring = &dev_priv->ring[VCS];
968
969 ring->name = "bsd ring";
970 ring->id = VCS;
971 ring->mmio_base = GEN6_BSD_RING_BASE;
972 ring->irq_enable_mask =
973 GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT;
974 ring->irq_keep_mask =
975 GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS1_IRQ_SHIFT;
976
977 ring->init = gen8_init_common_ring;
978 ring->get_seqno = gen8_get_seqno;
979 ring->set_seqno = gen8_set_seqno;
980 ring->emit_request = gen8_emit_request;
981 ring->emit_flush = gen8_emit_flush;
982 ring->irq_get = gen8_logical_ring_get_irq;
983 ring->irq_put = gen8_logical_ring_put_irq;
984 ring->emit_bb_start = gen8_emit_bb_start;
985
986 return logical_ring_init(dev, ring);
987 }
988
989 static int logical_bsd2_ring_init(struct drm_device *dev)
990 {
991 struct drm_i915_private *dev_priv = dev->dev_private;
992 struct intel_engine_cs *ring = &dev_priv->ring[VCS2];
993
994 ring->name = "bds2 ring";
995 ring->id = VCS2;
996 ring->mmio_base = GEN8_BSD2_RING_BASE;
997 ring->irq_enable_mask =
998 GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT;
999 ring->irq_keep_mask =
1000 GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS2_IRQ_SHIFT;
1001
1002 ring->init = gen8_init_common_ring;
1003 ring->get_seqno = gen8_get_seqno;
1004 ring->set_seqno = gen8_set_seqno;
1005 ring->emit_request = gen8_emit_request;
1006 ring->emit_flush = gen8_emit_flush;
1007 ring->irq_get = gen8_logical_ring_get_irq;
1008 ring->irq_put = gen8_logical_ring_put_irq;
1009 ring->emit_bb_start = gen8_emit_bb_start;
1010
1011 return logical_ring_init(dev, ring);
1012 }
1013
1014 static int logical_blt_ring_init(struct drm_device *dev)
1015 {
1016 struct drm_i915_private *dev_priv = dev->dev_private;
1017 struct intel_engine_cs *ring = &dev_priv->ring[BCS];
1018
1019 ring->name = "blitter ring";
1020 ring->id = BCS;
1021 ring->mmio_base = BLT_RING_BASE;
1022 ring->irq_enable_mask =
1023 GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT;
1024 ring->irq_keep_mask =
1025 GT_CONTEXT_SWITCH_INTERRUPT << GEN8_BCS_IRQ_SHIFT;
1026
1027 ring->init = gen8_init_common_ring;
1028 ring->get_seqno = gen8_get_seqno;
1029 ring->set_seqno = gen8_set_seqno;
1030 ring->emit_request = gen8_emit_request;
1031 ring->emit_flush = gen8_emit_flush;
1032 ring->irq_get = gen8_logical_ring_get_irq;
1033 ring->irq_put = gen8_logical_ring_put_irq;
1034 ring->emit_bb_start = gen8_emit_bb_start;
1035
1036 return logical_ring_init(dev, ring);
1037 }
1038
1039 static int logical_vebox_ring_init(struct drm_device *dev)
1040 {
1041 struct drm_i915_private *dev_priv = dev->dev_private;
1042 struct intel_engine_cs *ring = &dev_priv->ring[VECS];
1043
1044 ring->name = "video enhancement ring";
1045 ring->id = VECS;
1046 ring->mmio_base = VEBOX_RING_BASE;
1047 ring->irq_enable_mask =
1048 GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT;
1049 ring->irq_keep_mask =
1050 GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VECS_IRQ_SHIFT;
1051
1052 ring->init = gen8_init_common_ring;
1053 ring->get_seqno = gen8_get_seqno;
1054 ring->set_seqno = gen8_set_seqno;
1055 ring->emit_request = gen8_emit_request;
1056 ring->emit_flush = gen8_emit_flush;
1057 ring->irq_get = gen8_logical_ring_get_irq;
1058 ring->irq_put = gen8_logical_ring_put_irq;
1059 ring->emit_bb_start = gen8_emit_bb_start;
1060
1061 return logical_ring_init(dev, ring);
1062 }
1063
1064 int intel_logical_rings_init(struct drm_device *dev)
1065 {
1066 struct drm_i915_private *dev_priv = dev->dev_private;
1067 int ret;
1068
1069 ret = logical_render_ring_init(dev);
1070 if (ret)
1071 return ret;
1072
1073 if (HAS_BSD(dev)) {
1074 ret = logical_bsd_ring_init(dev);
1075 if (ret)
1076 goto cleanup_render_ring;
1077 }
1078
1079 if (HAS_BLT(dev)) {
1080 ret = logical_blt_ring_init(dev);
1081 if (ret)
1082 goto cleanup_bsd_ring;
1083 }
1084
1085 if (HAS_VEBOX(dev)) {
1086 ret = logical_vebox_ring_init(dev);
1087 if (ret)
1088 goto cleanup_blt_ring;
1089 }
1090
1091 if (HAS_BSD2(dev)) {
1092 ret = logical_bsd2_ring_init(dev);
1093 if (ret)
1094 goto cleanup_vebox_ring;
1095 }
1096
1097 ret = i915_gem_set_seqno(dev, ((u32)~0 - 0x1000));
1098 if (ret)
1099 goto cleanup_bsd2_ring;
1100
1101 return 0;
1102
1103 cleanup_bsd2_ring:
1104 intel_logical_ring_cleanup(&dev_priv->ring[VCS2]);
1105 cleanup_vebox_ring:
1106 intel_logical_ring_cleanup(&dev_priv->ring[VECS]);
1107 cleanup_blt_ring:
1108 intel_logical_ring_cleanup(&dev_priv->ring[BCS]);
1109 cleanup_bsd_ring:
1110 intel_logical_ring_cleanup(&dev_priv->ring[VCS]);
1111 cleanup_render_ring:
1112 intel_logical_ring_cleanup(&dev_priv->ring[RCS]);
1113
1114 return ret;
1115 }
1116
1117 static int
1118 populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_obj,
1119 struct intel_engine_cs *ring, struct intel_ringbuffer *ringbuf)
1120 {
1121 struct drm_i915_gem_object *ring_obj = ringbuf->obj;
1122 struct i915_hw_ppgtt *ppgtt = ctx->ppgtt;
1123 struct page *page;
1124 uint32_t *reg_state;
1125 int ret;
1126
1127 ret = i915_gem_object_set_to_cpu_domain(ctx_obj, true);
1128 if (ret) {
1129 DRM_DEBUG_DRIVER("Could not set to CPU domain\n");
1130 return ret;
1131 }
1132
1133 ret = i915_gem_object_get_pages(ctx_obj);
1134 if (ret) {
1135 DRM_DEBUG_DRIVER("Could not get object pages\n");
1136 return ret;
1137 }
1138
1139 i915_gem_object_pin_pages(ctx_obj);
1140
1141 /* The second page of the context object contains some fields which must
1142 * be set up prior to the first execution. */
1143 page = i915_gem_object_get_page(ctx_obj, 1);
1144 reg_state = kmap_atomic(page);
1145
1146 /* A context is actually a big batch buffer with several MI_LOAD_REGISTER_IMM
1147 * commands followed by (reg, value) pairs. The values we are setting here are
1148 * only for the first context restore: on a subsequent save, the GPU will
1149 * recreate this batchbuffer with new values (including all the missing
1150 * MI_LOAD_REGISTER_IMM commands that we are not initializing here). */
1151 if (ring->id == RCS)
1152 reg_state[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(14);
1153 else
1154 reg_state[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(11);
1155 reg_state[CTX_LRI_HEADER_0] |= MI_LRI_FORCE_POSTED;
1156 reg_state[CTX_CONTEXT_CONTROL] = RING_CONTEXT_CONTROL(ring);
1157 reg_state[CTX_CONTEXT_CONTROL+1] =
1158 _MASKED_BIT_ENABLE((1<<3) | MI_RESTORE_INHIBIT);
1159 reg_state[CTX_RING_HEAD] = RING_HEAD(ring->mmio_base);
1160 reg_state[CTX_RING_HEAD+1] = 0;
1161 reg_state[CTX_RING_TAIL] = RING_TAIL(ring->mmio_base);
1162 reg_state[CTX_RING_TAIL+1] = 0;
1163 reg_state[CTX_RING_BUFFER_START] = RING_START(ring->mmio_base);
1164 reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(ring_obj);
1165 reg_state[CTX_RING_BUFFER_CONTROL] = RING_CTL(ring->mmio_base);
1166 reg_state[CTX_RING_BUFFER_CONTROL+1] =
1167 ((ringbuf->size - PAGE_SIZE) & RING_NR_PAGES) | RING_VALID;
1168 reg_state[CTX_BB_HEAD_U] = ring->mmio_base + 0x168;
1169 reg_state[CTX_BB_HEAD_U+1] = 0;
1170 reg_state[CTX_BB_HEAD_L] = ring->mmio_base + 0x140;
1171 reg_state[CTX_BB_HEAD_L+1] = 0;
1172 reg_state[CTX_BB_STATE] = ring->mmio_base + 0x110;
1173 reg_state[CTX_BB_STATE+1] = (1<<5);
1174 reg_state[CTX_SECOND_BB_HEAD_U] = ring->mmio_base + 0x11c;
1175 reg_state[CTX_SECOND_BB_HEAD_U+1] = 0;
1176 reg_state[CTX_SECOND_BB_HEAD_L] = ring->mmio_base + 0x114;
1177 reg_state[CTX_SECOND_BB_HEAD_L+1] = 0;
1178 reg_state[CTX_SECOND_BB_STATE] = ring->mmio_base + 0x118;
1179 reg_state[CTX_SECOND_BB_STATE+1] = 0;
1180 if (ring->id == RCS) {
1181 /* TODO: according to BSpec, the register state context
1182 * for CHV does not have these. OTOH, these registers do
1183 * exist in CHV. I'm waiting for a clarification */
1184 reg_state[CTX_BB_PER_CTX_PTR] = ring->mmio_base + 0x1c0;
1185 reg_state[CTX_BB_PER_CTX_PTR+1] = 0;
1186 reg_state[CTX_RCS_INDIRECT_CTX] = ring->mmio_base + 0x1c4;
1187 reg_state[CTX_RCS_INDIRECT_CTX+1] = 0;
1188 reg_state[CTX_RCS_INDIRECT_CTX_OFFSET] = ring->mmio_base + 0x1c8;
1189 reg_state[CTX_RCS_INDIRECT_CTX_OFFSET+1] = 0;
1190 }
1191 reg_state[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9);
1192 reg_state[CTX_LRI_HEADER_1] |= MI_LRI_FORCE_POSTED;
1193 reg_state[CTX_CTX_TIMESTAMP] = ring->mmio_base + 0x3a8;
1194 reg_state[CTX_CTX_TIMESTAMP+1] = 0;
1195 reg_state[CTX_PDP3_UDW] = GEN8_RING_PDP_UDW(ring, 3);
1196 reg_state[CTX_PDP3_LDW] = GEN8_RING_PDP_LDW(ring, 3);
1197 reg_state[CTX_PDP2_UDW] = GEN8_RING_PDP_UDW(ring, 2);
1198 reg_state[CTX_PDP2_LDW] = GEN8_RING_PDP_LDW(ring, 2);
1199 reg_state[CTX_PDP1_UDW] = GEN8_RING_PDP_UDW(ring, 1);
1200 reg_state[CTX_PDP1_LDW] = GEN8_RING_PDP_LDW(ring, 1);
1201 reg_state[CTX_PDP0_UDW] = GEN8_RING_PDP_UDW(ring, 0);
1202 reg_state[CTX_PDP0_LDW] = GEN8_RING_PDP_LDW(ring, 0);
1203 reg_state[CTX_PDP3_UDW+1] = upper_32_bits(ppgtt->pd_dma_addr[3]);
1204 reg_state[CTX_PDP3_LDW+1] = lower_32_bits(ppgtt->pd_dma_addr[3]);
1205 reg_state[CTX_PDP2_UDW+1] = upper_32_bits(ppgtt->pd_dma_addr[2]);
1206 reg_state[CTX_PDP2_LDW+1] = lower_32_bits(ppgtt->pd_dma_addr[2]);
1207 reg_state[CTX_PDP1_UDW+1] = upper_32_bits(ppgtt->pd_dma_addr[1]);
1208 reg_state[CTX_PDP1_LDW+1] = lower_32_bits(ppgtt->pd_dma_addr[1]);
1209 reg_state[CTX_PDP0_UDW+1] = upper_32_bits(ppgtt->pd_dma_addr[0]);
1210 reg_state[CTX_PDP0_LDW+1] = lower_32_bits(ppgtt->pd_dma_addr[0]);
1211 if (ring->id == RCS) {
1212 reg_state[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
1213 reg_state[CTX_R_PWR_CLK_STATE] = 0x20c8;
1214 reg_state[CTX_R_PWR_CLK_STATE+1] = 0;
1215 }
1216
1217 kunmap_atomic(reg_state);
1218
1219 ctx_obj->dirty = 1;
1220 set_page_dirty(page);
1221 i915_gem_object_unpin_pages(ctx_obj);
1222
1223 return 0;
1224 }
1225
1226 void intel_lr_context_free(struct intel_context *ctx)
1227 {
1228 int i;
1229
1230 for (i = 0; i < I915_NUM_RINGS; i++) {
1231 struct drm_i915_gem_object *ctx_obj = ctx->engine[i].state;
1232 struct intel_ringbuffer *ringbuf = ctx->engine[i].ringbuf;
1233
1234 if (ctx_obj) {
1235 intel_destroy_ringbuffer_obj(ringbuf);
1236 kfree(ringbuf);
1237 i915_gem_object_ggtt_unpin(ctx_obj);
1238 drm_gem_object_unreference(&ctx_obj->base);
1239 }
1240 }
1241 }
1242
1243 static uint32_t get_lr_context_size(struct intel_engine_cs *ring)
1244 {
1245 int ret = 0;
1246
1247 WARN_ON(INTEL_INFO(ring->dev)->gen != 8);
1248
1249 switch (ring->id) {
1250 case RCS:
1251 ret = GEN8_LR_CONTEXT_RENDER_SIZE;
1252 break;
1253 case VCS:
1254 case BCS:
1255 case VECS:
1256 case VCS2:
1257 ret = GEN8_LR_CONTEXT_OTHER_SIZE;
1258 break;
1259 }
1260
1261 return ret;
1262 }
1263
1264 int intel_lr_context_deferred_create(struct intel_context *ctx,
1265 struct intel_engine_cs *ring)
1266 {
1267 struct drm_device *dev = ring->dev;
1268 struct drm_i915_gem_object *ctx_obj;
1269 uint32_t context_size;
1270 struct intel_ringbuffer *ringbuf;
1271 int ret;
1272
1273 WARN_ON(ctx->legacy_hw_ctx.rcs_state != NULL);
1274 if (ctx->engine[ring->id].state)
1275 return 0;
1276
1277 context_size = round_up(get_lr_context_size(ring), 4096);
1278
1279 ctx_obj = i915_gem_alloc_context_obj(dev, context_size);
1280 if (IS_ERR(ctx_obj)) {
1281 ret = PTR_ERR(ctx_obj);
1282 DRM_DEBUG_DRIVER("Alloc LRC backing obj failed: %d\n", ret);
1283 return ret;
1284 }
1285
1286 ret = i915_gem_obj_ggtt_pin(ctx_obj, GEN8_LR_CONTEXT_ALIGN, 0);
1287 if (ret) {
1288 DRM_DEBUG_DRIVER("Pin LRC backing obj failed: %d\n", ret);
1289 drm_gem_object_unreference(&ctx_obj->base);
1290 return ret;
1291 }
1292
1293 ringbuf = kzalloc(sizeof(*ringbuf), GFP_KERNEL);
1294 if (!ringbuf) {
1295 DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s\n",
1296 ring->name);
1297 i915_gem_object_ggtt_unpin(ctx_obj);
1298 drm_gem_object_unreference(&ctx_obj->base);
1299 ret = -ENOMEM;
1300 return ret;
1301 }
1302
1303 ringbuf->ring = ring;
1304 ringbuf->FIXME_lrc_ctx = ctx;
1305
1306 ringbuf->size = 32 * PAGE_SIZE;
1307 ringbuf->effective_size = ringbuf->size;
1308 ringbuf->head = 0;
1309 ringbuf->tail = 0;
1310 ringbuf->space = ringbuf->size;
1311 ringbuf->last_retired_head = -1;
1312
1313 /* TODO: For now we put this in the mappable region so that we can reuse
1314 * the existing ringbuffer code which ioremaps it. When we start
1315 * creating many contexts, this will no longer work and we must switch
1316 * to a kmapish interface.
1317 */
1318 ret = intel_alloc_ringbuffer_obj(dev, ringbuf);
1319 if (ret) {
1320 DRM_DEBUG_DRIVER("Failed to allocate ringbuffer obj %s: %d\n",
1321 ring->name, ret);
1322 goto error;
1323 }
1324
1325 ret = populate_lr_context(ctx, ctx_obj, ring, ringbuf);
1326 if (ret) {
1327 DRM_DEBUG_DRIVER("Failed to populate LRC: %d\n", ret);
1328 intel_destroy_ringbuffer_obj(ringbuf);
1329 goto error;
1330 }
1331
1332 ctx->engine[ring->id].ringbuf = ringbuf;
1333 ctx->engine[ring->id].state = ctx_obj;
1334
1335 return 0;
1336
1337 error:
1338 kfree(ringbuf);
1339 i915_gem_object_ggtt_unpin(ctx_obj);
1340 drm_gem_object_unreference(&ctx_obj->base);
1341 return ret;
1342 }
This page took 0.057952 seconds and 5 git commands to generate.