drm/i915: Rename intel_context[engine].ringbuf
[deliverable/linux.git] / drivers / gpu / drm / i915 / intel_ringbuffer.c
1 /*
2 * Copyright © 2008-2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Eric Anholt <eric@anholt.net>
25 * Zou Nan hai <nanhai.zou@intel.com>
26 * Xiang Hai hao<haihao.xiang@intel.com>
27 *
28 */
29
30 #include <linux/log2.h>
31 #include <drm/drmP.h>
32 #include "i915_drv.h"
33 #include <drm/i915_drm.h>
34 #include "i915_trace.h"
35 #include "intel_drv.h"
36
37 /* Rough estimate of the typical request size, performing a flush,
38 * set-context and then emitting the batch.
39 */
40 #define LEGACY_REQUEST_SIZE 200
41
42 int __intel_ring_space(int head, int tail, int size)
43 {
44 int space = head - tail;
45 if (space <= 0)
46 space += size;
47 return space - I915_RING_FREE_SPACE;
48 }
49
50 void intel_ring_update_space(struct intel_ringbuffer *ringbuf)
51 {
52 if (ringbuf->last_retired_head != -1) {
53 ringbuf->head = ringbuf->last_retired_head;
54 ringbuf->last_retired_head = -1;
55 }
56
57 ringbuf->space = __intel_ring_space(ringbuf->head & HEAD_ADDR,
58 ringbuf->tail, ringbuf->size);
59 }
60
61 static void __intel_engine_submit(struct intel_engine_cs *engine)
62 {
63 struct intel_ringbuffer *ringbuf = engine->buffer;
64 ringbuf->tail &= ringbuf->size - 1;
65 engine->write_tail(engine, ringbuf->tail);
66 }
67
68 static int
69 gen2_render_ring_flush(struct drm_i915_gem_request *req,
70 u32 invalidate_domains,
71 u32 flush_domains)
72 {
73 struct intel_ringbuffer *ring = req->ring;
74 u32 cmd;
75 int ret;
76
77 cmd = MI_FLUSH;
78 if (((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER) == 0)
79 cmd |= MI_NO_WRITE_FLUSH;
80
81 if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER)
82 cmd |= MI_READ_FLUSH;
83
84 ret = intel_ring_begin(req, 2);
85 if (ret)
86 return ret;
87
88 intel_ring_emit(ring, cmd);
89 intel_ring_emit(ring, MI_NOOP);
90 intel_ring_advance(ring);
91
92 return 0;
93 }
94
95 static int
96 gen4_render_ring_flush(struct drm_i915_gem_request *req,
97 u32 invalidate_domains,
98 u32 flush_domains)
99 {
100 struct intel_ringbuffer *ring = req->ring;
101 u32 cmd;
102 int ret;
103
104 /*
105 * read/write caches:
106 *
107 * I915_GEM_DOMAIN_RENDER is always invalidated, but is
108 * only flushed if MI_NO_WRITE_FLUSH is unset. On 965, it is
109 * also flushed at 2d versus 3d pipeline switches.
110 *
111 * read-only caches:
112 *
113 * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if
114 * MI_READ_FLUSH is set, and is always flushed on 965.
115 *
116 * I915_GEM_DOMAIN_COMMAND may not exist?
117 *
118 * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is
119 * invalidated when MI_EXE_FLUSH is set.
120 *
121 * I915_GEM_DOMAIN_VERTEX, which exists on 965, is
122 * invalidated with every MI_FLUSH.
123 *
124 * TLBs:
125 *
126 * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND
127 * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and
128 * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER
129 * are flushed at any MI_FLUSH.
130 */
131
132 cmd = MI_FLUSH;
133 if (invalidate_domains) {
134 cmd |= MI_EXE_FLUSH;
135 if (IS_G4X(req->i915) || IS_GEN5(req->i915))
136 cmd |= MI_INVALIDATE_ISP;
137 }
138
139 ret = intel_ring_begin(req, 2);
140 if (ret)
141 return ret;
142
143 intel_ring_emit(ring, cmd);
144 intel_ring_emit(ring, MI_NOOP);
145 intel_ring_advance(ring);
146
147 return 0;
148 }
149
150 /**
151 * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
152 * implementing two workarounds on gen6. From section 1.4.7.1
153 * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
154 *
155 * [DevSNB-C+{W/A}] Before any depth stall flush (including those
156 * produced by non-pipelined state commands), software needs to first
157 * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
158 * 0.
159 *
160 * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
161 * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
162 *
163 * And the workaround for these two requires this workaround first:
164 *
165 * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
166 * BEFORE the pipe-control with a post-sync op and no write-cache
167 * flushes.
168 *
169 * And this last workaround is tricky because of the requirements on
170 * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
171 * volume 2 part 1:
172 *
173 * "1 of the following must also be set:
174 * - Render Target Cache Flush Enable ([12] of DW1)
175 * - Depth Cache Flush Enable ([0] of DW1)
176 * - Stall at Pixel Scoreboard ([1] of DW1)
177 * - Depth Stall ([13] of DW1)
178 * - Post-Sync Operation ([13] of DW1)
179 * - Notify Enable ([8] of DW1)"
180 *
181 * The cache flushes require the workaround flush that triggered this
182 * one, so we can't use it. Depth stall would trigger the same.
183 * Post-sync nonzero is what triggered this second workaround, so we
184 * can't use that one either. Notify enable is IRQs, which aren't
185 * really our business. That leaves only stall at scoreboard.
186 */
187 static int
188 intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req)
189 {
190 struct intel_ringbuffer *ring = req->ring;
191 u32 scratch_addr =
192 req->engine->scratch.gtt_offset + 2 * CACHELINE_BYTES;
193 int ret;
194
195 ret = intel_ring_begin(req, 6);
196 if (ret)
197 return ret;
198
199 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
200 intel_ring_emit(ring, PIPE_CONTROL_CS_STALL |
201 PIPE_CONTROL_STALL_AT_SCOREBOARD);
202 intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT);
203 intel_ring_emit(ring, 0); /* low dword */
204 intel_ring_emit(ring, 0); /* high dword */
205 intel_ring_emit(ring, MI_NOOP);
206 intel_ring_advance(ring);
207
208 ret = intel_ring_begin(req, 6);
209 if (ret)
210 return ret;
211
212 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
213 intel_ring_emit(ring, PIPE_CONTROL_QW_WRITE);
214 intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT);
215 intel_ring_emit(ring, 0);
216 intel_ring_emit(ring, 0);
217 intel_ring_emit(ring, MI_NOOP);
218 intel_ring_advance(ring);
219
220 return 0;
221 }
222
223 static int
224 gen6_render_ring_flush(struct drm_i915_gem_request *req,
225 u32 invalidate_domains, u32 flush_domains)
226 {
227 struct intel_ringbuffer *ring = req->ring;
228 u32 scratch_addr =
229 req->engine->scratch.gtt_offset + 2 * CACHELINE_BYTES;
230 u32 flags = 0;
231 int ret;
232
233 /* Force SNB workarounds for PIPE_CONTROL flushes */
234 ret = intel_emit_post_sync_nonzero_flush(req);
235 if (ret)
236 return ret;
237
238 /* Just flush everything. Experiments have shown that reducing the
239 * number of bits based on the write domains has little performance
240 * impact.
241 */
242 if (flush_domains) {
243 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
244 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
245 /*
246 * Ensure that any following seqno writes only happen
247 * when the render cache is indeed flushed.
248 */
249 flags |= PIPE_CONTROL_CS_STALL;
250 }
251 if (invalidate_domains) {
252 flags |= PIPE_CONTROL_TLB_INVALIDATE;
253 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
254 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
255 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
256 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
257 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
258 /*
259 * TLB invalidate requires a post-sync write.
260 */
261 flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL;
262 }
263
264 ret = intel_ring_begin(req, 4);
265 if (ret)
266 return ret;
267
268 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
269 intel_ring_emit(ring, flags);
270 intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT);
271 intel_ring_emit(ring, 0);
272 intel_ring_advance(ring);
273
274 return 0;
275 }
276
277 static int
278 gen7_render_ring_cs_stall_wa(struct drm_i915_gem_request *req)
279 {
280 struct intel_ringbuffer *ring = req->ring;
281 int ret;
282
283 ret = intel_ring_begin(req, 4);
284 if (ret)
285 return ret;
286
287 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
288 intel_ring_emit(ring,
289 PIPE_CONTROL_CS_STALL |
290 PIPE_CONTROL_STALL_AT_SCOREBOARD);
291 intel_ring_emit(ring, 0);
292 intel_ring_emit(ring, 0);
293 intel_ring_advance(ring);
294
295 return 0;
296 }
297
298 static int
299 gen7_render_ring_flush(struct drm_i915_gem_request *req,
300 u32 invalidate_domains, u32 flush_domains)
301 {
302 struct intel_ringbuffer *ring = req->ring;
303 u32 scratch_addr =
304 req->engine->scratch.gtt_offset + 2 * CACHELINE_BYTES;
305 u32 flags = 0;
306 int ret;
307
308 /*
309 * Ensure that any following seqno writes only happen when the render
310 * cache is indeed flushed.
311 *
312 * Workaround: 4th PIPE_CONTROL command (except the ones with only
313 * read-cache invalidate bits set) must have the CS_STALL bit set. We
314 * don't try to be clever and just set it unconditionally.
315 */
316 flags |= PIPE_CONTROL_CS_STALL;
317
318 /* Just flush everything. Experiments have shown that reducing the
319 * number of bits based on the write domains has little performance
320 * impact.
321 */
322 if (flush_domains) {
323 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
324 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
325 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
326 flags |= PIPE_CONTROL_FLUSH_ENABLE;
327 }
328 if (invalidate_domains) {
329 flags |= PIPE_CONTROL_TLB_INVALIDATE;
330 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
331 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
332 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
333 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
334 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
335 flags |= PIPE_CONTROL_MEDIA_STATE_CLEAR;
336 /*
337 * TLB invalidate requires a post-sync write.
338 */
339 flags |= PIPE_CONTROL_QW_WRITE;
340 flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
341
342 flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD;
343
344 /* Workaround: we must issue a pipe_control with CS-stall bit
345 * set before a pipe_control command that has the state cache
346 * invalidate bit set. */
347 gen7_render_ring_cs_stall_wa(req);
348 }
349
350 ret = intel_ring_begin(req, 4);
351 if (ret)
352 return ret;
353
354 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
355 intel_ring_emit(ring, flags);
356 intel_ring_emit(ring, scratch_addr);
357 intel_ring_emit(ring, 0);
358 intel_ring_advance(ring);
359
360 return 0;
361 }
362
363 static int
364 gen8_emit_pipe_control(struct drm_i915_gem_request *req,
365 u32 flags, u32 scratch_addr)
366 {
367 struct intel_ringbuffer *ring = req->ring;
368 int ret;
369
370 ret = intel_ring_begin(req, 6);
371 if (ret)
372 return ret;
373
374 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6));
375 intel_ring_emit(ring, flags);
376 intel_ring_emit(ring, scratch_addr);
377 intel_ring_emit(ring, 0);
378 intel_ring_emit(ring, 0);
379 intel_ring_emit(ring, 0);
380 intel_ring_advance(ring);
381
382 return 0;
383 }
384
385 static int
386 gen8_render_ring_flush(struct drm_i915_gem_request *req,
387 u32 invalidate_domains, u32 flush_domains)
388 {
389 u32 scratch_addr = req->engine->scratch.gtt_offset + 2 * CACHELINE_BYTES;
390 u32 flags = 0;
391 int ret;
392
393 flags |= PIPE_CONTROL_CS_STALL;
394
395 if (flush_domains) {
396 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
397 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
398 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
399 flags |= PIPE_CONTROL_FLUSH_ENABLE;
400 }
401 if (invalidate_domains) {
402 flags |= PIPE_CONTROL_TLB_INVALIDATE;
403 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
404 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
405 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
406 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
407 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
408 flags |= PIPE_CONTROL_QW_WRITE;
409 flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
410
411 /* WaCsStallBeforeStateCacheInvalidate:bdw,chv */
412 ret = gen8_emit_pipe_control(req,
413 PIPE_CONTROL_CS_STALL |
414 PIPE_CONTROL_STALL_AT_SCOREBOARD,
415 0);
416 if (ret)
417 return ret;
418 }
419
420 return gen8_emit_pipe_control(req, flags, scratch_addr);
421 }
422
423 static void ring_write_tail(struct intel_engine_cs *engine,
424 u32 value)
425 {
426 struct drm_i915_private *dev_priv = engine->i915;
427 I915_WRITE_TAIL(engine, value);
428 }
429
430 u64 intel_ring_get_active_head(struct intel_engine_cs *engine)
431 {
432 struct drm_i915_private *dev_priv = engine->i915;
433 u64 acthd;
434
435 if (INTEL_GEN(dev_priv) >= 8)
436 acthd = I915_READ64_2x32(RING_ACTHD(engine->mmio_base),
437 RING_ACTHD_UDW(engine->mmio_base));
438 else if (INTEL_GEN(dev_priv) >= 4)
439 acthd = I915_READ(RING_ACTHD(engine->mmio_base));
440 else
441 acthd = I915_READ(ACTHD);
442
443 return acthd;
444 }
445
446 static void ring_setup_phys_status_page(struct intel_engine_cs *engine)
447 {
448 struct drm_i915_private *dev_priv = engine->i915;
449 u32 addr;
450
451 addr = dev_priv->status_page_dmah->busaddr;
452 if (INTEL_GEN(dev_priv) >= 4)
453 addr |= (dev_priv->status_page_dmah->busaddr >> 28) & 0xf0;
454 I915_WRITE(HWS_PGA, addr);
455 }
456
457 static void intel_ring_setup_status_page(struct intel_engine_cs *engine)
458 {
459 struct drm_i915_private *dev_priv = engine->i915;
460 i915_reg_t mmio;
461
462 /* The ring status page addresses are no longer next to the rest of
463 * the ring registers as of gen7.
464 */
465 if (IS_GEN7(dev_priv)) {
466 switch (engine->id) {
467 case RCS:
468 mmio = RENDER_HWS_PGA_GEN7;
469 break;
470 case BCS:
471 mmio = BLT_HWS_PGA_GEN7;
472 break;
473 /*
474 * VCS2 actually doesn't exist on Gen7. Only shut up
475 * gcc switch check warning
476 */
477 case VCS2:
478 case VCS:
479 mmio = BSD_HWS_PGA_GEN7;
480 break;
481 case VECS:
482 mmio = VEBOX_HWS_PGA_GEN7;
483 break;
484 }
485 } else if (IS_GEN6(dev_priv)) {
486 mmio = RING_HWS_PGA_GEN6(engine->mmio_base);
487 } else {
488 /* XXX: gen8 returns to sanity */
489 mmio = RING_HWS_PGA(engine->mmio_base);
490 }
491
492 I915_WRITE(mmio, (u32)engine->status_page.gfx_addr);
493 POSTING_READ(mmio);
494
495 /*
496 * Flush the TLB for this page
497 *
498 * FIXME: These two bits have disappeared on gen8, so a question
499 * arises: do we still need this and if so how should we go about
500 * invalidating the TLB?
501 */
502 if (IS_GEN(dev_priv, 6, 7)) {
503 i915_reg_t reg = RING_INSTPM(engine->mmio_base);
504
505 /* ring should be idle before issuing a sync flush*/
506 WARN_ON((I915_READ_MODE(engine) & MODE_IDLE) == 0);
507
508 I915_WRITE(reg,
509 _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE |
510 INSTPM_SYNC_FLUSH));
511 if (intel_wait_for_register(dev_priv,
512 reg, INSTPM_SYNC_FLUSH, 0,
513 1000))
514 DRM_ERROR("%s: wait for SyncFlush to complete for TLB invalidation timed out\n",
515 engine->name);
516 }
517 }
518
519 static bool stop_ring(struct intel_engine_cs *engine)
520 {
521 struct drm_i915_private *dev_priv = engine->i915;
522
523 if (!IS_GEN2(dev_priv)) {
524 I915_WRITE_MODE(engine, _MASKED_BIT_ENABLE(STOP_RING));
525 if (intel_wait_for_register(dev_priv,
526 RING_MI_MODE(engine->mmio_base),
527 MODE_IDLE,
528 MODE_IDLE,
529 1000)) {
530 DRM_ERROR("%s : timed out trying to stop ring\n",
531 engine->name);
532 /* Sometimes we observe that the idle flag is not
533 * set even though the ring is empty. So double
534 * check before giving up.
535 */
536 if (I915_READ_HEAD(engine) != I915_READ_TAIL(engine))
537 return false;
538 }
539 }
540
541 I915_WRITE_CTL(engine, 0);
542 I915_WRITE_HEAD(engine, 0);
543 engine->write_tail(engine, 0);
544
545 if (!IS_GEN2(dev_priv)) {
546 (void)I915_READ_CTL(engine);
547 I915_WRITE_MODE(engine, _MASKED_BIT_DISABLE(STOP_RING));
548 }
549
550 return (I915_READ_HEAD(engine) & HEAD_ADDR) == 0;
551 }
552
553 static int init_ring_common(struct intel_engine_cs *engine)
554 {
555 struct drm_i915_private *dev_priv = engine->i915;
556 struct intel_ringbuffer *ringbuf = engine->buffer;
557 struct drm_i915_gem_object *obj = ringbuf->obj;
558 int ret = 0;
559
560 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
561
562 if (!stop_ring(engine)) {
563 /* G45 ring initialization often fails to reset head to zero */
564 DRM_DEBUG_KMS("%s head not reset to zero "
565 "ctl %08x head %08x tail %08x start %08x\n",
566 engine->name,
567 I915_READ_CTL(engine),
568 I915_READ_HEAD(engine),
569 I915_READ_TAIL(engine),
570 I915_READ_START(engine));
571
572 if (!stop_ring(engine)) {
573 DRM_ERROR("failed to set %s head to zero "
574 "ctl %08x head %08x tail %08x start %08x\n",
575 engine->name,
576 I915_READ_CTL(engine),
577 I915_READ_HEAD(engine),
578 I915_READ_TAIL(engine),
579 I915_READ_START(engine));
580 ret = -EIO;
581 goto out;
582 }
583 }
584
585 if (I915_NEED_GFX_HWS(dev_priv))
586 intel_ring_setup_status_page(engine);
587 else
588 ring_setup_phys_status_page(engine);
589
590 /* Enforce ordering by reading HEAD register back */
591 I915_READ_HEAD(engine);
592
593 /* Initialize the ring. This must happen _after_ we've cleared the ring
594 * registers with the above sequence (the readback of the HEAD registers
595 * also enforces ordering), otherwise the hw might lose the new ring
596 * register values. */
597 I915_WRITE_START(engine, i915_gem_obj_ggtt_offset(obj));
598
599 /* WaClearRingBufHeadRegAtInit:ctg,elk */
600 if (I915_READ_HEAD(engine))
601 DRM_DEBUG("%s initialization failed [head=%08x], fudging\n",
602 engine->name, I915_READ_HEAD(engine));
603 I915_WRITE_HEAD(engine, 0);
604 (void)I915_READ_HEAD(engine);
605
606 I915_WRITE_CTL(engine,
607 ((ringbuf->size - PAGE_SIZE) & RING_NR_PAGES)
608 | RING_VALID);
609
610 /* If the head is still not zero, the ring is dead */
611 if (wait_for((I915_READ_CTL(engine) & RING_VALID) != 0 &&
612 I915_READ_START(engine) == i915_gem_obj_ggtt_offset(obj) &&
613 (I915_READ_HEAD(engine) & HEAD_ADDR) == 0, 50)) {
614 DRM_ERROR("%s initialization failed "
615 "ctl %08x (valid? %d) head %08x tail %08x start %08x [expected %08lx]\n",
616 engine->name,
617 I915_READ_CTL(engine),
618 I915_READ_CTL(engine) & RING_VALID,
619 I915_READ_HEAD(engine), I915_READ_TAIL(engine),
620 I915_READ_START(engine),
621 (unsigned long)i915_gem_obj_ggtt_offset(obj));
622 ret = -EIO;
623 goto out;
624 }
625
626 ringbuf->last_retired_head = -1;
627 ringbuf->head = I915_READ_HEAD(engine);
628 ringbuf->tail = I915_READ_TAIL(engine) & TAIL_ADDR;
629 intel_ring_update_space(ringbuf);
630
631 intel_engine_init_hangcheck(engine);
632
633 out:
634 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
635
636 return ret;
637 }
638
639 void intel_fini_pipe_control(struct intel_engine_cs *engine)
640 {
641 if (engine->scratch.obj == NULL)
642 return;
643
644 i915_gem_object_ggtt_unpin(engine->scratch.obj);
645 i915_gem_object_put(engine->scratch.obj);
646 engine->scratch.obj = NULL;
647 }
648
649 int intel_init_pipe_control(struct intel_engine_cs *engine, int size)
650 {
651 struct drm_i915_gem_object *obj;
652 int ret;
653
654 WARN_ON(engine->scratch.obj);
655
656 obj = i915_gem_object_create_stolen(&engine->i915->drm, size);
657 if (!obj)
658 obj = i915_gem_object_create(&engine->i915->drm, size);
659 if (IS_ERR(obj)) {
660 DRM_ERROR("Failed to allocate scratch page\n");
661 ret = PTR_ERR(obj);
662 goto err;
663 }
664
665 ret = i915_gem_obj_ggtt_pin(obj, 4096, PIN_HIGH);
666 if (ret)
667 goto err_unref;
668
669 engine->scratch.obj = obj;
670 engine->scratch.gtt_offset = i915_gem_obj_ggtt_offset(obj);
671 DRM_DEBUG_DRIVER("%s pipe control offset: 0x%08x\n",
672 engine->name, engine->scratch.gtt_offset);
673 return 0;
674
675 err_unref:
676 i915_gem_object_put(engine->scratch.obj);
677 err:
678 return ret;
679 }
680
681 static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req)
682 {
683 struct intel_ringbuffer *ring = req->ring;
684 struct i915_workarounds *w = &req->i915->workarounds;
685 int ret, i;
686
687 if (w->count == 0)
688 return 0;
689
690 req->engine->gpu_caches_dirty = true;
691 ret = intel_ring_flush_all_caches(req);
692 if (ret)
693 return ret;
694
695 ret = intel_ring_begin(req, (w->count * 2 + 2));
696 if (ret)
697 return ret;
698
699 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(w->count));
700 for (i = 0; i < w->count; i++) {
701 intel_ring_emit_reg(ring, w->reg[i].addr);
702 intel_ring_emit(ring, w->reg[i].value);
703 }
704 intel_ring_emit(ring, MI_NOOP);
705
706 intel_ring_advance(ring);
707
708 req->engine->gpu_caches_dirty = true;
709 ret = intel_ring_flush_all_caches(req);
710 if (ret)
711 return ret;
712
713 DRM_DEBUG_DRIVER("Number of Workarounds emitted: %d\n", w->count);
714
715 return 0;
716 }
717
718 static int intel_rcs_ctx_init(struct drm_i915_gem_request *req)
719 {
720 int ret;
721
722 ret = intel_ring_workarounds_emit(req);
723 if (ret != 0)
724 return ret;
725
726 ret = i915_gem_render_state_init(req);
727 if (ret)
728 return ret;
729
730 return 0;
731 }
732
733 static int wa_add(struct drm_i915_private *dev_priv,
734 i915_reg_t addr,
735 const u32 mask, const u32 val)
736 {
737 const u32 idx = dev_priv->workarounds.count;
738
739 if (WARN_ON(idx >= I915_MAX_WA_REGS))
740 return -ENOSPC;
741
742 dev_priv->workarounds.reg[idx].addr = addr;
743 dev_priv->workarounds.reg[idx].value = val;
744 dev_priv->workarounds.reg[idx].mask = mask;
745
746 dev_priv->workarounds.count++;
747
748 return 0;
749 }
750
751 #define WA_REG(addr, mask, val) do { \
752 const int r = wa_add(dev_priv, (addr), (mask), (val)); \
753 if (r) \
754 return r; \
755 } while (0)
756
757 #define WA_SET_BIT_MASKED(addr, mask) \
758 WA_REG(addr, (mask), _MASKED_BIT_ENABLE(mask))
759
760 #define WA_CLR_BIT_MASKED(addr, mask) \
761 WA_REG(addr, (mask), _MASKED_BIT_DISABLE(mask))
762
763 #define WA_SET_FIELD_MASKED(addr, mask, value) \
764 WA_REG(addr, mask, _MASKED_FIELD(mask, value))
765
766 #define WA_SET_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) | (mask))
767 #define WA_CLR_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) & ~(mask))
768
769 #define WA_WRITE(addr, val) WA_REG(addr, 0xffffffff, val)
770
771 static int wa_ring_whitelist_reg(struct intel_engine_cs *engine,
772 i915_reg_t reg)
773 {
774 struct drm_i915_private *dev_priv = engine->i915;
775 struct i915_workarounds *wa = &dev_priv->workarounds;
776 const uint32_t index = wa->hw_whitelist_count[engine->id];
777
778 if (WARN_ON(index >= RING_MAX_NONPRIV_SLOTS))
779 return -EINVAL;
780
781 WA_WRITE(RING_FORCE_TO_NONPRIV(engine->mmio_base, index),
782 i915_mmio_reg_offset(reg));
783 wa->hw_whitelist_count[engine->id]++;
784
785 return 0;
786 }
787
788 static int gen8_init_workarounds(struct intel_engine_cs *engine)
789 {
790 struct drm_i915_private *dev_priv = engine->i915;
791
792 WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING);
793
794 /* WaDisableAsyncFlipPerfMode:bdw,chv */
795 WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE);
796
797 /* WaDisablePartialInstShootdown:bdw,chv */
798 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
799 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
800
801 /* Use Force Non-Coherent whenever executing a 3D context. This is a
802 * workaround for for a possible hang in the unlikely event a TLB
803 * invalidation occurs during a PSD flush.
804 */
805 /* WaForceEnableNonCoherent:bdw,chv */
806 /* WaHdcDisableFetchWhenMasked:bdw,chv */
807 WA_SET_BIT_MASKED(HDC_CHICKEN0,
808 HDC_DONOT_FETCH_MEM_WHEN_MASKED |
809 HDC_FORCE_NON_COHERENT);
810
811 /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0:
812 * "The Hierarchical Z RAW Stall Optimization allows non-overlapping
813 * polygons in the same 8x4 pixel/sample area to be processed without
814 * stalling waiting for the earlier ones to write to Hierarchical Z
815 * buffer."
816 *
817 * This optimization is off by default for BDW and CHV; turn it on.
818 */
819 WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);
820
821 /* Wa4x4STCOptimizationDisable:bdw,chv */
822 WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE);
823
824 /*
825 * BSpec recommends 8x4 when MSAA is used,
826 * however in practice 16x4 seems fastest.
827 *
828 * Note that PS/WM thread counts depend on the WIZ hashing
829 * disable bit, which we don't touch here, but it's good
830 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
831 */
832 WA_SET_FIELD_MASKED(GEN7_GT_MODE,
833 GEN6_WIZ_HASHING_MASK,
834 GEN6_WIZ_HASHING_16x4);
835
836 return 0;
837 }
838
839 static int bdw_init_workarounds(struct intel_engine_cs *engine)
840 {
841 struct drm_i915_private *dev_priv = engine->i915;
842 int ret;
843
844 ret = gen8_init_workarounds(engine);
845 if (ret)
846 return ret;
847
848 /* WaDisableThreadStallDopClockGating:bdw (pre-production) */
849 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
850
851 /* WaDisableDopClockGating:bdw */
852 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
853 DOP_CLOCK_GATING_DISABLE);
854
855 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
856 GEN8_SAMPLER_POWER_BYPASS_DIS);
857
858 WA_SET_BIT_MASKED(HDC_CHICKEN0,
859 /* WaForceContextSaveRestoreNonCoherent:bdw */
860 HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
861 /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
862 (IS_BDW_GT3(dev_priv) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
863
864 return 0;
865 }
866
867 static int chv_init_workarounds(struct intel_engine_cs *engine)
868 {
869 struct drm_i915_private *dev_priv = engine->i915;
870 int ret;
871
872 ret = gen8_init_workarounds(engine);
873 if (ret)
874 return ret;
875
876 /* WaDisableThreadStallDopClockGating:chv */
877 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
878
879 /* Improve HiZ throughput on CHV. */
880 WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X);
881
882 return 0;
883 }
884
885 static int gen9_init_workarounds(struct intel_engine_cs *engine)
886 {
887 struct drm_i915_private *dev_priv = engine->i915;
888 int ret;
889
890 /* WaConextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl */
891 I915_WRITE(GEN9_CSFE_CHICKEN1_RCS, _MASKED_BIT_ENABLE(GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE));
892
893 /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl */
894 I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) |
895 GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
896
897 /* WaDisableKillLogic:bxt,skl,kbl */
898 I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
899 ECOCHK_DIS_TLB);
900
901 /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl */
902 /* WaDisablePartialInstShootdown:skl,bxt,kbl */
903 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
904 FLOW_CONTROL_ENABLE |
905 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
906
907 /* Syncing dependencies between camera and graphics:skl,bxt,kbl */
908 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
909 GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC);
910
911 /* WaDisableDgMirrorFixInHalfSliceChicken5:skl,bxt */
912 if (IS_SKL_REVID(dev_priv, 0, SKL_REVID_B0) ||
913 IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1))
914 WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
915 GEN9_DG_MIRROR_FIX_ENABLE);
916
917 /* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:skl,bxt */
918 if (IS_SKL_REVID(dev_priv, 0, SKL_REVID_B0) ||
919 IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) {
920 WA_SET_BIT_MASKED(GEN7_COMMON_SLICE_CHICKEN1,
921 GEN9_RHWO_OPTIMIZATION_DISABLE);
922 /*
923 * WA also requires GEN9_SLICE_COMMON_ECO_CHICKEN0[14:14] to be set
924 * but we do that in per ctx batchbuffer as there is an issue
925 * with this register not getting restored on ctx restore
926 */
927 }
928
929 /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl */
930 /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl */
931 WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
932 GEN9_ENABLE_YV12_BUGFIX |
933 GEN9_ENABLE_GPGPU_PREEMPTION);
934
935 /* Wa4x4STCOptimizationDisable:skl,bxt,kbl */
936 /* WaDisablePartialResolveInVc:skl,bxt,kbl */
937 WA_SET_BIT_MASKED(CACHE_MODE_1, (GEN8_4x4_STC_OPTIMIZATION_DISABLE |
938 GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE));
939
940 /* WaCcsTlbPrefetchDisable:skl,bxt,kbl */
941 WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
942 GEN9_CCS_TLB_PREFETCH_ENABLE);
943
944 /* WaDisableMaskBasedCammingInRCC:skl,bxt */
945 if (IS_SKL_REVID(dev_priv, SKL_REVID_C0, SKL_REVID_C0) ||
946 IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1))
947 WA_SET_BIT_MASKED(SLICE_ECO_CHICKEN0,
948 PIXEL_MASK_CAMMING_DISABLE);
949
950 /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl */
951 WA_SET_BIT_MASKED(HDC_CHICKEN0,
952 HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
953 HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE);
954
955 /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are
956 * both tied to WaForceContextSaveRestoreNonCoherent
957 * in some hsds for skl. We keep the tie for all gen9. The
958 * documentation is a bit hazy and so we want to get common behaviour,
959 * even though there is no clear evidence we would need both on kbl/bxt.
960 * This area has been source of system hangs so we play it safe
961 * and mimic the skl regardless of what bspec says.
962 *
963 * Use Force Non-Coherent whenever executing a 3D context. This
964 * is a workaround for a possible hang in the unlikely event
965 * a TLB invalidation occurs during a PSD flush.
966 */
967
968 /* WaForceEnableNonCoherent:skl,bxt,kbl */
969 WA_SET_BIT_MASKED(HDC_CHICKEN0,
970 HDC_FORCE_NON_COHERENT);
971
972 /* WaDisableHDCInvalidation:skl,bxt,kbl */
973 I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
974 BDW_DISABLE_HDC_INVALIDATION);
975
976 /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl */
977 if (IS_SKYLAKE(dev_priv) ||
978 IS_KABYLAKE(dev_priv) ||
979 IS_BXT_REVID(dev_priv, 0, BXT_REVID_B0))
980 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
981 GEN8_SAMPLER_POWER_BYPASS_DIS);
982
983 /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl */
984 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
985
986 /* WaOCLCoherentLineFlush:skl,bxt,kbl */
987 I915_WRITE(GEN8_L3SQCREG4, (I915_READ(GEN8_L3SQCREG4) |
988 GEN8_LQSC_FLUSH_COHERENT_LINES));
989
990 /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt */
991 ret = wa_ring_whitelist_reg(engine, GEN9_CTX_PREEMPT_REG);
992 if (ret)
993 return ret;
994
995 /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl */
996 ret= wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1);
997 if (ret)
998 return ret;
999
1000 /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl */
1001 ret = wa_ring_whitelist_reg(engine, GEN8_HDC_CHICKEN1);
1002 if (ret)
1003 return ret;
1004
1005 return 0;
1006 }
1007
1008 static int skl_tune_iz_hashing(struct intel_engine_cs *engine)
1009 {
1010 struct drm_i915_private *dev_priv = engine->i915;
1011 u8 vals[3] = { 0, 0, 0 };
1012 unsigned int i;
1013
1014 for (i = 0; i < 3; i++) {
1015 u8 ss;
1016
1017 /*
1018 * Only consider slices where one, and only one, subslice has 7
1019 * EUs
1020 */
1021 if (!is_power_of_2(dev_priv->info.subslice_7eu[i]))
1022 continue;
1023
1024 /*
1025 * subslice_7eu[i] != 0 (because of the check above) and
1026 * ss_max == 4 (maximum number of subslices possible per slice)
1027 *
1028 * -> 0 <= ss <= 3;
1029 */
1030 ss = ffs(dev_priv->info.subslice_7eu[i]) - 1;
1031 vals[i] = 3 - ss;
1032 }
1033
1034 if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0)
1035 return 0;
1036
1037 /* Tune IZ hashing. See intel_device_info_runtime_init() */
1038 WA_SET_FIELD_MASKED(GEN7_GT_MODE,
1039 GEN9_IZ_HASHING_MASK(2) |
1040 GEN9_IZ_HASHING_MASK(1) |
1041 GEN9_IZ_HASHING_MASK(0),
1042 GEN9_IZ_HASHING(2, vals[2]) |
1043 GEN9_IZ_HASHING(1, vals[1]) |
1044 GEN9_IZ_HASHING(0, vals[0]));
1045
1046 return 0;
1047 }
1048
1049 static int skl_init_workarounds(struct intel_engine_cs *engine)
1050 {
1051 struct drm_i915_private *dev_priv = engine->i915;
1052 int ret;
1053
1054 ret = gen9_init_workarounds(engine);
1055 if (ret)
1056 return ret;
1057
1058 /*
1059 * Actual WA is to disable percontext preemption granularity control
1060 * until D0 which is the default case so this is equivalent to
1061 * !WaDisablePerCtxtPreemptionGranularityControl:skl
1062 */
1063 if (IS_SKL_REVID(dev_priv, SKL_REVID_E0, REVID_FOREVER)) {
1064 I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1,
1065 _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL));
1066 }
1067
1068 if (IS_SKL_REVID(dev_priv, 0, SKL_REVID_E0)) {
1069 /* WaDisableChickenBitTSGBarrierAckForFFSliceCS:skl */
1070 I915_WRITE(FF_SLICE_CS_CHICKEN2,
1071 _MASKED_BIT_ENABLE(GEN9_TSG_BARRIER_ACK_DISABLE));
1072 }
1073
1074 /* GEN8_L3SQCREG4 has a dependency with WA batch so any new changes
1075 * involving this register should also be added to WA batch as required.
1076 */
1077 if (IS_SKL_REVID(dev_priv, 0, SKL_REVID_E0))
1078 /* WaDisableLSQCROPERFforOCL:skl */
1079 I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) |
1080 GEN8_LQSC_RO_PERF_DIS);
1081
1082 /* WaEnableGapsTsvCreditFix:skl */
1083 if (IS_SKL_REVID(dev_priv, SKL_REVID_C0, REVID_FOREVER)) {
1084 I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) |
1085 GEN9_GAPS_TSV_CREDIT_DISABLE));
1086 }
1087
1088 /* WaDisablePowerCompilerClockGating:skl */
1089 if (IS_SKL_REVID(dev_priv, SKL_REVID_B0, SKL_REVID_B0))
1090 WA_SET_BIT_MASKED(HIZ_CHICKEN,
1091 BDW_HIZ_POWER_COMPILER_CLOCK_GATING_DISABLE);
1092
1093 /* WaBarrierPerformanceFixDisable:skl */
1094 if (IS_SKL_REVID(dev_priv, SKL_REVID_C0, SKL_REVID_D0))
1095 WA_SET_BIT_MASKED(HDC_CHICKEN0,
1096 HDC_FENCE_DEST_SLM_DISABLE |
1097 HDC_BARRIER_PERFORMANCE_DISABLE);
1098
1099 /* WaDisableSbeCacheDispatchPortSharing:skl */
1100 if (IS_SKL_REVID(dev_priv, 0, SKL_REVID_F0))
1101 WA_SET_BIT_MASKED(
1102 GEN7_HALF_SLICE_CHICKEN1,
1103 GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
1104
1105 /* WaDisableGafsUnitClkGating:skl */
1106 WA_SET_BIT(GEN7_UCGCTL4, GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
1107
1108 /* WaInPlaceDecompressionHang:skl */
1109 if (IS_SKL_REVID(dev_priv, SKL_REVID_H0, REVID_FOREVER))
1110 WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA,
1111 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
1112
1113 /* WaDisableLSQCROPERFforOCL:skl */
1114 ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4);
1115 if (ret)
1116 return ret;
1117
1118 return skl_tune_iz_hashing(engine);
1119 }
1120
1121 static int bxt_init_workarounds(struct intel_engine_cs *engine)
1122 {
1123 struct drm_i915_private *dev_priv = engine->i915;
1124 int ret;
1125
1126 ret = gen9_init_workarounds(engine);
1127 if (ret)
1128 return ret;
1129
1130 /* WaStoreMultiplePTEenable:bxt */
1131 /* This is a requirement according to Hardware specification */
1132 if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1))
1133 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_TLBPF);
1134
1135 /* WaSetClckGatingDisableMedia:bxt */
1136 if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) {
1137 I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) &
1138 ~GEN8_DOP_CLOCK_GATE_MEDIA_ENABLE));
1139 }
1140
1141 /* WaDisableThreadStallDopClockGating:bxt */
1142 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
1143 STALL_DOP_GATING_DISABLE);
1144
1145 /* WaDisablePooledEuLoadBalancingFix:bxt */
1146 if (IS_BXT_REVID(dev_priv, BXT_REVID_B0, REVID_FOREVER)) {
1147 WA_SET_BIT_MASKED(FF_SLICE_CS_CHICKEN2,
1148 GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE);
1149 }
1150
1151 /* WaDisableSbeCacheDispatchPortSharing:bxt */
1152 if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_B0)) {
1153 WA_SET_BIT_MASKED(
1154 GEN7_HALF_SLICE_CHICKEN1,
1155 GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
1156 }
1157
1158 /* WaDisableObjectLevelPreemptionForTrifanOrPolygon:bxt */
1159 /* WaDisableObjectLevelPreemptionForInstancedDraw:bxt */
1160 /* WaDisableObjectLevelPreemtionForInstanceId:bxt */
1161 /* WaDisableLSQCROPERFforOCL:bxt */
1162 if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) {
1163 ret = wa_ring_whitelist_reg(engine, GEN9_CS_DEBUG_MODE1);
1164 if (ret)
1165 return ret;
1166
1167 ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4);
1168 if (ret)
1169 return ret;
1170 }
1171
1172 /* WaProgramL3SqcReg1DefaultForPerf:bxt */
1173 if (IS_BXT_REVID(dev_priv, BXT_REVID_B0, REVID_FOREVER))
1174 I915_WRITE(GEN8_L3SQCREG1, L3_GENERAL_PRIO_CREDITS(62) |
1175 L3_HIGH_PRIO_CREDITS(2));
1176
1177 /* WaInsertDummyPushConstPs:bxt */
1178 if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_B0))
1179 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
1180 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
1181
1182 /* WaInPlaceDecompressionHang:bxt */
1183 if (IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER))
1184 WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA,
1185 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
1186
1187 return 0;
1188 }
1189
1190 static int kbl_init_workarounds(struct intel_engine_cs *engine)
1191 {
1192 struct drm_i915_private *dev_priv = engine->i915;
1193 int ret;
1194
1195 ret = gen9_init_workarounds(engine);
1196 if (ret)
1197 return ret;
1198
1199 /* WaEnableGapsTsvCreditFix:kbl */
1200 I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) |
1201 GEN9_GAPS_TSV_CREDIT_DISABLE));
1202
1203 /* WaDisableDynamicCreditSharing:kbl */
1204 if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0))
1205 WA_SET_BIT(GAMT_CHKN_BIT_REG,
1206 GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING);
1207
1208 /* WaDisableFenceDestinationToSLM:kbl (pre-prod) */
1209 if (IS_KBL_REVID(dev_priv, KBL_REVID_A0, KBL_REVID_A0))
1210 WA_SET_BIT_MASKED(HDC_CHICKEN0,
1211 HDC_FENCE_DEST_SLM_DISABLE);
1212
1213 /* GEN8_L3SQCREG4 has a dependency with WA batch so any new changes
1214 * involving this register should also be added to WA batch as required.
1215 */
1216 if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_E0))
1217 /* WaDisableLSQCROPERFforOCL:kbl */
1218 I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) |
1219 GEN8_LQSC_RO_PERF_DIS);
1220
1221 /* WaInsertDummyPushConstPs:kbl */
1222 if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0))
1223 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
1224 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
1225
1226 /* WaDisableGafsUnitClkGating:kbl */
1227 WA_SET_BIT(GEN7_UCGCTL4, GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
1228
1229 /* WaDisableSbeCacheDispatchPortSharing:kbl */
1230 WA_SET_BIT_MASKED(
1231 GEN7_HALF_SLICE_CHICKEN1,
1232 GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
1233
1234 /* WaInPlaceDecompressionHang:kbl */
1235 WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA,
1236 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
1237
1238 /* WaDisableLSQCROPERFforOCL:kbl */
1239 ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4);
1240 if (ret)
1241 return ret;
1242
1243 return 0;
1244 }
1245
1246 int init_workarounds_ring(struct intel_engine_cs *engine)
1247 {
1248 struct drm_i915_private *dev_priv = engine->i915;
1249
1250 WARN_ON(engine->id != RCS);
1251
1252 dev_priv->workarounds.count = 0;
1253 dev_priv->workarounds.hw_whitelist_count[RCS] = 0;
1254
1255 if (IS_BROADWELL(dev_priv))
1256 return bdw_init_workarounds(engine);
1257
1258 if (IS_CHERRYVIEW(dev_priv))
1259 return chv_init_workarounds(engine);
1260
1261 if (IS_SKYLAKE(dev_priv))
1262 return skl_init_workarounds(engine);
1263
1264 if (IS_BROXTON(dev_priv))
1265 return bxt_init_workarounds(engine);
1266
1267 if (IS_KABYLAKE(dev_priv))
1268 return kbl_init_workarounds(engine);
1269
1270 return 0;
1271 }
1272
1273 static int init_render_ring(struct intel_engine_cs *engine)
1274 {
1275 struct drm_i915_private *dev_priv = engine->i915;
1276 int ret = init_ring_common(engine);
1277 if (ret)
1278 return ret;
1279
1280 /* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */
1281 if (IS_GEN(dev_priv, 4, 6))
1282 I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH));
1283
1284 /* We need to disable the AsyncFlip performance optimisations in order
1285 * to use MI_WAIT_FOR_EVENT within the CS. It should already be
1286 * programmed to '1' on all products.
1287 *
1288 * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv
1289 */
1290 if (IS_GEN(dev_priv, 6, 7))
1291 I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE));
1292
1293 /* Required for the hardware to program scanline values for waiting */
1294 /* WaEnableFlushTlbInvalidationMode:snb */
1295 if (IS_GEN6(dev_priv))
1296 I915_WRITE(GFX_MODE,
1297 _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT));
1298
1299 /* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */
1300 if (IS_GEN7(dev_priv))
1301 I915_WRITE(GFX_MODE_GEN7,
1302 _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT) |
1303 _MASKED_BIT_ENABLE(GFX_REPLAY_MODE));
1304
1305 if (IS_GEN6(dev_priv)) {
1306 /* From the Sandybridge PRM, volume 1 part 3, page 24:
1307 * "If this bit is set, STCunit will have LRA as replacement
1308 * policy. [...] This bit must be reset. LRA replacement
1309 * policy is not supported."
1310 */
1311 I915_WRITE(CACHE_MODE_0,
1312 _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
1313 }
1314
1315 if (IS_GEN(dev_priv, 6, 7))
1316 I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
1317
1318 if (INTEL_INFO(dev_priv)->gen >= 6)
1319 I915_WRITE_IMR(engine, ~engine->irq_keep_mask);
1320
1321 return init_workarounds_ring(engine);
1322 }
1323
1324 static void render_ring_cleanup(struct intel_engine_cs *engine)
1325 {
1326 struct drm_i915_private *dev_priv = engine->i915;
1327
1328 if (dev_priv->semaphore_obj) {
1329 i915_gem_object_ggtt_unpin(dev_priv->semaphore_obj);
1330 i915_gem_object_put(dev_priv->semaphore_obj);
1331 dev_priv->semaphore_obj = NULL;
1332 }
1333
1334 intel_fini_pipe_control(engine);
1335 }
1336
1337 static int gen8_rcs_signal(struct drm_i915_gem_request *signaller_req,
1338 unsigned int num_dwords)
1339 {
1340 #define MBOX_UPDATE_DWORDS 8
1341 struct intel_ringbuffer *signaller = signaller_req->ring;
1342 struct drm_i915_private *dev_priv = signaller_req->i915;
1343 struct intel_engine_cs *waiter;
1344 enum intel_engine_id id;
1345 int ret, num_rings;
1346
1347 num_rings = hweight32(INTEL_INFO(dev_priv)->ring_mask);
1348 num_dwords += (num_rings-1) * MBOX_UPDATE_DWORDS;
1349 #undef MBOX_UPDATE_DWORDS
1350
1351 ret = intel_ring_begin(signaller_req, num_dwords);
1352 if (ret)
1353 return ret;
1354
1355 for_each_engine_id(waiter, dev_priv, id) {
1356 u64 gtt_offset =
1357 signaller_req->engine->semaphore.signal_ggtt[id];
1358 if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
1359 continue;
1360
1361 intel_ring_emit(signaller, GFX_OP_PIPE_CONTROL(6));
1362 intel_ring_emit(signaller,
1363 PIPE_CONTROL_GLOBAL_GTT_IVB |
1364 PIPE_CONTROL_QW_WRITE |
1365 PIPE_CONTROL_CS_STALL);
1366 intel_ring_emit(signaller, lower_32_bits(gtt_offset));
1367 intel_ring_emit(signaller, upper_32_bits(gtt_offset));
1368 intel_ring_emit(signaller, signaller_req->fence.seqno);
1369 intel_ring_emit(signaller, 0);
1370 intel_ring_emit(signaller,
1371 MI_SEMAPHORE_SIGNAL |
1372 MI_SEMAPHORE_TARGET(waiter->hw_id));
1373 intel_ring_emit(signaller, 0);
1374 }
1375
1376 return 0;
1377 }
1378
1379 static int gen8_xcs_signal(struct drm_i915_gem_request *signaller_req,
1380 unsigned int num_dwords)
1381 {
1382 #define MBOX_UPDATE_DWORDS 6
1383 struct intel_ringbuffer *signaller = signaller_req->ring;
1384 struct drm_i915_private *dev_priv = signaller_req->i915;
1385 struct intel_engine_cs *waiter;
1386 enum intel_engine_id id;
1387 int ret, num_rings;
1388
1389 num_rings = hweight32(INTEL_INFO(dev_priv)->ring_mask);
1390 num_dwords += (num_rings-1) * MBOX_UPDATE_DWORDS;
1391 #undef MBOX_UPDATE_DWORDS
1392
1393 ret = intel_ring_begin(signaller_req, num_dwords);
1394 if (ret)
1395 return ret;
1396
1397 for_each_engine_id(waiter, dev_priv, id) {
1398 u64 gtt_offset =
1399 signaller_req->engine->semaphore.signal_ggtt[id];
1400 if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
1401 continue;
1402
1403 intel_ring_emit(signaller,
1404 (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW);
1405 intel_ring_emit(signaller,
1406 lower_32_bits(gtt_offset) |
1407 MI_FLUSH_DW_USE_GTT);
1408 intel_ring_emit(signaller, upper_32_bits(gtt_offset));
1409 intel_ring_emit(signaller, signaller_req->fence.seqno);
1410 intel_ring_emit(signaller,
1411 MI_SEMAPHORE_SIGNAL |
1412 MI_SEMAPHORE_TARGET(waiter->hw_id));
1413 intel_ring_emit(signaller, 0);
1414 }
1415
1416 return 0;
1417 }
1418
1419 static int gen6_signal(struct drm_i915_gem_request *signaller_req,
1420 unsigned int num_dwords)
1421 {
1422 struct intel_ringbuffer *signaller = signaller_req->ring;
1423 struct drm_i915_private *dev_priv = signaller_req->i915;
1424 struct intel_engine_cs *useless;
1425 enum intel_engine_id id;
1426 int ret, num_rings;
1427
1428 #define MBOX_UPDATE_DWORDS 3
1429 num_rings = hweight32(INTEL_INFO(dev_priv)->ring_mask);
1430 num_dwords += round_up((num_rings-1) * MBOX_UPDATE_DWORDS, 2);
1431 #undef MBOX_UPDATE_DWORDS
1432
1433 ret = intel_ring_begin(signaller_req, num_dwords);
1434 if (ret)
1435 return ret;
1436
1437 for_each_engine_id(useless, dev_priv, id) {
1438 i915_reg_t mbox_reg =
1439 signaller_req->engine->semaphore.mbox.signal[id];
1440
1441 if (i915_mmio_reg_valid(mbox_reg)) {
1442 intel_ring_emit(signaller, MI_LOAD_REGISTER_IMM(1));
1443 intel_ring_emit_reg(signaller, mbox_reg);
1444 intel_ring_emit(signaller, signaller_req->fence.seqno);
1445 }
1446 }
1447
1448 /* If num_dwords was rounded, make sure the tail pointer is correct */
1449 if (num_rings % 2 == 0)
1450 intel_ring_emit(signaller, MI_NOOP);
1451
1452 return 0;
1453 }
1454
1455 /**
1456 * gen6_add_request - Update the semaphore mailbox registers
1457 *
1458 * @request - request to write to the ring
1459 *
1460 * Update the mailbox registers in the *other* rings with the current seqno.
1461 * This acts like a signal in the canonical semaphore.
1462 */
1463 static int
1464 gen6_add_request(struct drm_i915_gem_request *req)
1465 {
1466 struct intel_engine_cs *engine = req->engine;
1467 struct intel_ringbuffer *ring = req->ring;
1468 int ret;
1469
1470 if (engine->semaphore.signal)
1471 ret = engine->semaphore.signal(req, 4);
1472 else
1473 ret = intel_ring_begin(req, 4);
1474
1475 if (ret)
1476 return ret;
1477
1478 intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
1479 intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
1480 intel_ring_emit(ring, req->fence.seqno);
1481 intel_ring_emit(ring, MI_USER_INTERRUPT);
1482 __intel_engine_submit(engine);
1483
1484 return 0;
1485 }
1486
1487 static int
1488 gen8_render_add_request(struct drm_i915_gem_request *req)
1489 {
1490 struct intel_engine_cs *engine = req->engine;
1491 struct intel_ringbuffer *ring = req->ring;
1492 int ret;
1493
1494 if (engine->semaphore.signal)
1495 ret = engine->semaphore.signal(req, 8);
1496 else
1497 ret = intel_ring_begin(req, 8);
1498 if (ret)
1499 return ret;
1500
1501 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6));
1502 intel_ring_emit(ring, (PIPE_CONTROL_GLOBAL_GTT_IVB |
1503 PIPE_CONTROL_CS_STALL |
1504 PIPE_CONTROL_QW_WRITE));
1505 intel_ring_emit(ring, intel_hws_seqno_address(engine));
1506 intel_ring_emit(ring, 0);
1507 intel_ring_emit(ring, i915_gem_request_get_seqno(req));
1508 /* We're thrashing one dword of HWS. */
1509 intel_ring_emit(ring, 0);
1510 intel_ring_emit(ring, MI_USER_INTERRUPT);
1511 intel_ring_emit(ring, MI_NOOP);
1512 __intel_engine_submit(engine);
1513
1514 return 0;
1515 }
1516
1517 static inline bool i915_gem_has_seqno_wrapped(struct drm_i915_private *dev_priv,
1518 u32 seqno)
1519 {
1520 return dev_priv->last_seqno < seqno;
1521 }
1522
1523 /**
1524 * intel_ring_sync - sync the waiter to the signaller on seqno
1525 *
1526 * @waiter - ring that is waiting
1527 * @signaller - ring which has, or will signal
1528 * @seqno - seqno which the waiter will block on
1529 */
1530
1531 static int
1532 gen8_ring_sync(struct drm_i915_gem_request *waiter_req,
1533 struct intel_engine_cs *signaller,
1534 u32 seqno)
1535 {
1536 struct intel_ringbuffer *waiter = waiter_req->ring;
1537 struct drm_i915_private *dev_priv = waiter_req->i915;
1538 u64 offset = GEN8_WAIT_OFFSET(waiter_req->engine, signaller->id);
1539 struct i915_hw_ppgtt *ppgtt;
1540 int ret;
1541
1542 ret = intel_ring_begin(waiter_req, 4);
1543 if (ret)
1544 return ret;
1545
1546 intel_ring_emit(waiter, MI_SEMAPHORE_WAIT |
1547 MI_SEMAPHORE_GLOBAL_GTT |
1548 MI_SEMAPHORE_SAD_GTE_SDD);
1549 intel_ring_emit(waiter, seqno);
1550 intel_ring_emit(waiter, lower_32_bits(offset));
1551 intel_ring_emit(waiter, upper_32_bits(offset));
1552 intel_ring_advance(waiter);
1553
1554 /* When the !RCS engines idle waiting upon a semaphore, they lose their
1555 * pagetables and we must reload them before executing the batch.
1556 * We do this on the i915_switch_context() following the wait and
1557 * before the dispatch.
1558 */
1559 ppgtt = waiter_req->ctx->ppgtt;
1560 if (ppgtt && waiter_req->engine->id != RCS)
1561 ppgtt->pd_dirty_rings |= intel_engine_flag(waiter_req->engine);
1562 return 0;
1563 }
1564
1565 static int
1566 gen6_ring_sync(struct drm_i915_gem_request *waiter_req,
1567 struct intel_engine_cs *signaller,
1568 u32 seqno)
1569 {
1570 struct intel_ringbuffer *waiter = waiter_req->ring;
1571 u32 dw1 = MI_SEMAPHORE_MBOX |
1572 MI_SEMAPHORE_COMPARE |
1573 MI_SEMAPHORE_REGISTER;
1574 u32 wait_mbox = signaller->semaphore.mbox.wait[waiter_req->engine->id];
1575 int ret;
1576
1577 /* Throughout all of the GEM code, seqno passed implies our current
1578 * seqno is >= the last seqno executed. However for hardware the
1579 * comparison is strictly greater than.
1580 */
1581 seqno -= 1;
1582
1583 WARN_ON(wait_mbox == MI_SEMAPHORE_SYNC_INVALID);
1584
1585 ret = intel_ring_begin(waiter_req, 4);
1586 if (ret)
1587 return ret;
1588
1589 /* If seqno wrap happened, omit the wait with no-ops */
1590 if (likely(!i915_gem_has_seqno_wrapped(waiter_req->i915, seqno))) {
1591 intel_ring_emit(waiter, dw1 | wait_mbox);
1592 intel_ring_emit(waiter, seqno);
1593 intel_ring_emit(waiter, 0);
1594 intel_ring_emit(waiter, MI_NOOP);
1595 } else {
1596 intel_ring_emit(waiter, MI_NOOP);
1597 intel_ring_emit(waiter, MI_NOOP);
1598 intel_ring_emit(waiter, MI_NOOP);
1599 intel_ring_emit(waiter, MI_NOOP);
1600 }
1601 intel_ring_advance(waiter);
1602
1603 return 0;
1604 }
1605
1606 static void
1607 gen5_seqno_barrier(struct intel_engine_cs *engine)
1608 {
1609 /* MI_STORE are internally buffered by the GPU and not flushed
1610 * either by MI_FLUSH or SyncFlush or any other combination of
1611 * MI commands.
1612 *
1613 * "Only the submission of the store operation is guaranteed.
1614 * The write result will be complete (coherent) some time later
1615 * (this is practically a finite period but there is no guaranteed
1616 * latency)."
1617 *
1618 * Empirically, we observe that we need a delay of at least 75us to
1619 * be sure that the seqno write is visible by the CPU.
1620 */
1621 usleep_range(125, 250);
1622 }
1623
1624 static void
1625 gen6_seqno_barrier(struct intel_engine_cs *engine)
1626 {
1627 struct drm_i915_private *dev_priv = engine->i915;
1628
1629 /* Workaround to force correct ordering between irq and seqno writes on
1630 * ivb (and maybe also on snb) by reading from a CS register (like
1631 * ACTHD) before reading the status page.
1632 *
1633 * Note that this effectively stalls the read by the time it takes to
1634 * do a memory transaction, which more or less ensures that the write
1635 * from the GPU has sufficient time to invalidate the CPU cacheline.
1636 * Alternatively we could delay the interrupt from the CS ring to give
1637 * the write time to land, but that would incur a delay after every
1638 * batch i.e. much more frequent than a delay when waiting for the
1639 * interrupt (with the same net latency).
1640 *
1641 * Also note that to prevent whole machine hangs on gen7, we have to
1642 * take the spinlock to guard against concurrent cacheline access.
1643 */
1644 spin_lock_irq(&dev_priv->uncore.lock);
1645 POSTING_READ_FW(RING_ACTHD(engine->mmio_base));
1646 spin_unlock_irq(&dev_priv->uncore.lock);
1647 }
1648
1649 static void
1650 gen5_irq_enable(struct intel_engine_cs *engine)
1651 {
1652 gen5_enable_gt_irq(engine->i915, engine->irq_enable_mask);
1653 }
1654
1655 static void
1656 gen5_irq_disable(struct intel_engine_cs *engine)
1657 {
1658 gen5_disable_gt_irq(engine->i915, engine->irq_enable_mask);
1659 }
1660
1661 static void
1662 i9xx_irq_enable(struct intel_engine_cs *engine)
1663 {
1664 struct drm_i915_private *dev_priv = engine->i915;
1665
1666 dev_priv->irq_mask &= ~engine->irq_enable_mask;
1667 I915_WRITE(IMR, dev_priv->irq_mask);
1668 POSTING_READ_FW(RING_IMR(engine->mmio_base));
1669 }
1670
1671 static void
1672 i9xx_irq_disable(struct intel_engine_cs *engine)
1673 {
1674 struct drm_i915_private *dev_priv = engine->i915;
1675
1676 dev_priv->irq_mask |= engine->irq_enable_mask;
1677 I915_WRITE(IMR, dev_priv->irq_mask);
1678 }
1679
1680 static void
1681 i8xx_irq_enable(struct intel_engine_cs *engine)
1682 {
1683 struct drm_i915_private *dev_priv = engine->i915;
1684
1685 dev_priv->irq_mask &= ~engine->irq_enable_mask;
1686 I915_WRITE16(IMR, dev_priv->irq_mask);
1687 POSTING_READ16(RING_IMR(engine->mmio_base));
1688 }
1689
1690 static void
1691 i8xx_irq_disable(struct intel_engine_cs *engine)
1692 {
1693 struct drm_i915_private *dev_priv = engine->i915;
1694
1695 dev_priv->irq_mask |= engine->irq_enable_mask;
1696 I915_WRITE16(IMR, dev_priv->irq_mask);
1697 }
1698
1699 static int
1700 bsd_ring_flush(struct drm_i915_gem_request *req,
1701 u32 invalidate_domains,
1702 u32 flush_domains)
1703 {
1704 struct intel_ringbuffer *ring = req->ring;
1705 int ret;
1706
1707 ret = intel_ring_begin(req, 2);
1708 if (ret)
1709 return ret;
1710
1711 intel_ring_emit(ring, MI_FLUSH);
1712 intel_ring_emit(ring, MI_NOOP);
1713 intel_ring_advance(ring);
1714 return 0;
1715 }
1716
1717 static int
1718 i9xx_add_request(struct drm_i915_gem_request *req)
1719 {
1720 struct intel_ringbuffer *ring = req->ring;
1721 int ret;
1722
1723 ret = intel_ring_begin(req, 4);
1724 if (ret)
1725 return ret;
1726
1727 intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
1728 intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
1729 intel_ring_emit(ring, req->fence.seqno);
1730 intel_ring_emit(ring, MI_USER_INTERRUPT);
1731 __intel_engine_submit(req->engine);
1732
1733 return 0;
1734 }
1735
1736 static void
1737 gen6_irq_enable(struct intel_engine_cs *engine)
1738 {
1739 struct drm_i915_private *dev_priv = engine->i915;
1740
1741 I915_WRITE_IMR(engine,
1742 ~(engine->irq_enable_mask |
1743 engine->irq_keep_mask));
1744 gen5_enable_gt_irq(dev_priv, engine->irq_enable_mask);
1745 }
1746
1747 static void
1748 gen6_irq_disable(struct intel_engine_cs *engine)
1749 {
1750 struct drm_i915_private *dev_priv = engine->i915;
1751
1752 I915_WRITE_IMR(engine, ~engine->irq_keep_mask);
1753 gen5_disable_gt_irq(dev_priv, engine->irq_enable_mask);
1754 }
1755
1756 static void
1757 hsw_vebox_irq_enable(struct intel_engine_cs *engine)
1758 {
1759 struct drm_i915_private *dev_priv = engine->i915;
1760
1761 I915_WRITE_IMR(engine, ~engine->irq_enable_mask);
1762 gen6_enable_pm_irq(dev_priv, engine->irq_enable_mask);
1763 }
1764
1765 static void
1766 hsw_vebox_irq_disable(struct intel_engine_cs *engine)
1767 {
1768 struct drm_i915_private *dev_priv = engine->i915;
1769
1770 I915_WRITE_IMR(engine, ~0);
1771 gen6_disable_pm_irq(dev_priv, engine->irq_enable_mask);
1772 }
1773
1774 static void
1775 gen8_irq_enable(struct intel_engine_cs *engine)
1776 {
1777 struct drm_i915_private *dev_priv = engine->i915;
1778
1779 I915_WRITE_IMR(engine,
1780 ~(engine->irq_enable_mask |
1781 engine->irq_keep_mask));
1782 POSTING_READ_FW(RING_IMR(engine->mmio_base));
1783 }
1784
1785 static void
1786 gen8_irq_disable(struct intel_engine_cs *engine)
1787 {
1788 struct drm_i915_private *dev_priv = engine->i915;
1789
1790 I915_WRITE_IMR(engine, ~engine->irq_keep_mask);
1791 }
1792
1793 static int
1794 i965_dispatch_execbuffer(struct drm_i915_gem_request *req,
1795 u64 offset, u32 length,
1796 unsigned dispatch_flags)
1797 {
1798 struct intel_ringbuffer *ring = req->ring;
1799 int ret;
1800
1801 ret = intel_ring_begin(req, 2);
1802 if (ret)
1803 return ret;
1804
1805 intel_ring_emit(ring,
1806 MI_BATCH_BUFFER_START |
1807 MI_BATCH_GTT |
1808 (dispatch_flags & I915_DISPATCH_SECURE ?
1809 0 : MI_BATCH_NON_SECURE_I965));
1810 intel_ring_emit(ring, offset);
1811 intel_ring_advance(ring);
1812
1813 return 0;
1814 }
1815
1816 /* Just userspace ABI convention to limit the wa batch bo to a resonable size */
1817 #define I830_BATCH_LIMIT (256*1024)
1818 #define I830_TLB_ENTRIES (2)
1819 #define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT)
1820 static int
1821 i830_dispatch_execbuffer(struct drm_i915_gem_request *req,
1822 u64 offset, u32 len,
1823 unsigned dispatch_flags)
1824 {
1825 struct intel_ringbuffer *ring = req->ring;
1826 u32 cs_offset = req->engine->scratch.gtt_offset;
1827 int ret;
1828
1829 ret = intel_ring_begin(req, 6);
1830 if (ret)
1831 return ret;
1832
1833 /* Evict the invalid PTE TLBs */
1834 intel_ring_emit(ring, COLOR_BLT_CMD | BLT_WRITE_RGBA);
1835 intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | 4096);
1836 intel_ring_emit(ring, I830_TLB_ENTRIES << 16 | 4); /* load each page */
1837 intel_ring_emit(ring, cs_offset);
1838 intel_ring_emit(ring, 0xdeadbeef);
1839 intel_ring_emit(ring, MI_NOOP);
1840 intel_ring_advance(ring);
1841
1842 if ((dispatch_flags & I915_DISPATCH_PINNED) == 0) {
1843 if (len > I830_BATCH_LIMIT)
1844 return -ENOSPC;
1845
1846 ret = intel_ring_begin(req, 6 + 2);
1847 if (ret)
1848 return ret;
1849
1850 /* Blit the batch (which has now all relocs applied) to the
1851 * stable batch scratch bo area (so that the CS never
1852 * stumbles over its tlb invalidation bug) ...
1853 */
1854 intel_ring_emit(ring, SRC_COPY_BLT_CMD | BLT_WRITE_RGBA);
1855 intel_ring_emit(ring,
1856 BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096);
1857 intel_ring_emit(ring, DIV_ROUND_UP(len, 4096) << 16 | 4096);
1858 intel_ring_emit(ring, cs_offset);
1859 intel_ring_emit(ring, 4096);
1860 intel_ring_emit(ring, offset);
1861
1862 intel_ring_emit(ring, MI_FLUSH);
1863 intel_ring_emit(ring, MI_NOOP);
1864 intel_ring_advance(ring);
1865
1866 /* ... and execute it. */
1867 offset = cs_offset;
1868 }
1869
1870 ret = intel_ring_begin(req, 2);
1871 if (ret)
1872 return ret;
1873
1874 intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_GTT);
1875 intel_ring_emit(ring, offset | (dispatch_flags & I915_DISPATCH_SECURE ?
1876 0 : MI_BATCH_NON_SECURE));
1877 intel_ring_advance(ring);
1878
1879 return 0;
1880 }
1881
1882 static int
1883 i915_dispatch_execbuffer(struct drm_i915_gem_request *req,
1884 u64 offset, u32 len,
1885 unsigned dispatch_flags)
1886 {
1887 struct intel_ringbuffer *ring = req->ring;
1888 int ret;
1889
1890 ret = intel_ring_begin(req, 2);
1891 if (ret)
1892 return ret;
1893
1894 intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_GTT);
1895 intel_ring_emit(ring, offset | (dispatch_flags & I915_DISPATCH_SECURE ?
1896 0 : MI_BATCH_NON_SECURE));
1897 intel_ring_advance(ring);
1898
1899 return 0;
1900 }
1901
1902 static void cleanup_phys_status_page(struct intel_engine_cs *engine)
1903 {
1904 struct drm_i915_private *dev_priv = engine->i915;
1905
1906 if (!dev_priv->status_page_dmah)
1907 return;
1908
1909 drm_pci_free(&dev_priv->drm, dev_priv->status_page_dmah);
1910 engine->status_page.page_addr = NULL;
1911 }
1912
1913 static void cleanup_status_page(struct intel_engine_cs *engine)
1914 {
1915 struct drm_i915_gem_object *obj;
1916
1917 obj = engine->status_page.obj;
1918 if (obj == NULL)
1919 return;
1920
1921 kunmap(sg_page(obj->pages->sgl));
1922 i915_gem_object_ggtt_unpin(obj);
1923 i915_gem_object_put(obj);
1924 engine->status_page.obj = NULL;
1925 }
1926
1927 static int init_status_page(struct intel_engine_cs *engine)
1928 {
1929 struct drm_i915_gem_object *obj = engine->status_page.obj;
1930
1931 if (obj == NULL) {
1932 unsigned flags;
1933 int ret;
1934
1935 obj = i915_gem_object_create(&engine->i915->drm, 4096);
1936 if (IS_ERR(obj)) {
1937 DRM_ERROR("Failed to allocate status page\n");
1938 return PTR_ERR(obj);
1939 }
1940
1941 ret = i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
1942 if (ret)
1943 goto err_unref;
1944
1945 flags = 0;
1946 if (!HAS_LLC(engine->i915))
1947 /* On g33, we cannot place HWS above 256MiB, so
1948 * restrict its pinning to the low mappable arena.
1949 * Though this restriction is not documented for
1950 * gen4, gen5, or byt, they also behave similarly
1951 * and hang if the HWS is placed at the top of the
1952 * GTT. To generalise, it appears that all !llc
1953 * platforms have issues with us placing the HWS
1954 * above the mappable region (even though we never
1955 * actualy map it).
1956 */
1957 flags |= PIN_MAPPABLE;
1958 ret = i915_gem_obj_ggtt_pin(obj, 4096, flags);
1959 if (ret) {
1960 err_unref:
1961 i915_gem_object_put(obj);
1962 return ret;
1963 }
1964
1965 engine->status_page.obj = obj;
1966 }
1967
1968 engine->status_page.gfx_addr = i915_gem_obj_ggtt_offset(obj);
1969 engine->status_page.page_addr = kmap(sg_page(obj->pages->sgl));
1970 memset(engine->status_page.page_addr, 0, PAGE_SIZE);
1971
1972 DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n",
1973 engine->name, engine->status_page.gfx_addr);
1974
1975 return 0;
1976 }
1977
1978 static int init_phys_status_page(struct intel_engine_cs *engine)
1979 {
1980 struct drm_i915_private *dev_priv = engine->i915;
1981
1982 if (!dev_priv->status_page_dmah) {
1983 dev_priv->status_page_dmah =
1984 drm_pci_alloc(&dev_priv->drm, PAGE_SIZE, PAGE_SIZE);
1985 if (!dev_priv->status_page_dmah)
1986 return -ENOMEM;
1987 }
1988
1989 engine->status_page.page_addr = dev_priv->status_page_dmah->vaddr;
1990 memset(engine->status_page.page_addr, 0, PAGE_SIZE);
1991
1992 return 0;
1993 }
1994
1995 void intel_unpin_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
1996 {
1997 GEM_BUG_ON(!ringbuf->vma);
1998 GEM_BUG_ON(!ringbuf->vaddr);
1999
2000 if (HAS_LLC(ringbuf->obj->base.dev) && !ringbuf->obj->stolen)
2001 i915_gem_object_unpin_map(ringbuf->obj);
2002 else
2003 i915_vma_unpin_iomap(ringbuf->vma);
2004 ringbuf->vaddr = NULL;
2005
2006 i915_gem_object_ggtt_unpin(ringbuf->obj);
2007 ringbuf->vma = NULL;
2008 }
2009
2010 int intel_pin_and_map_ringbuffer_obj(struct drm_i915_private *dev_priv,
2011 struct intel_ringbuffer *ringbuf)
2012 {
2013 struct drm_i915_gem_object *obj = ringbuf->obj;
2014 /* Ring wraparound at offset 0 sometimes hangs. No idea why. */
2015 unsigned flags = PIN_OFFSET_BIAS | 4096;
2016 void *addr;
2017 int ret;
2018
2019 if (HAS_LLC(dev_priv) && !obj->stolen) {
2020 ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, flags);
2021 if (ret)
2022 return ret;
2023
2024 ret = i915_gem_object_set_to_cpu_domain(obj, true);
2025 if (ret)
2026 goto err_unpin;
2027
2028 addr = i915_gem_object_pin_map(obj);
2029 if (IS_ERR(addr)) {
2030 ret = PTR_ERR(addr);
2031 goto err_unpin;
2032 }
2033 } else {
2034 ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE,
2035 flags | PIN_MAPPABLE);
2036 if (ret)
2037 return ret;
2038
2039 ret = i915_gem_object_set_to_gtt_domain(obj, true);
2040 if (ret)
2041 goto err_unpin;
2042
2043 /* Access through the GTT requires the device to be awake. */
2044 assert_rpm_wakelock_held(dev_priv);
2045
2046 addr = (void __force *)
2047 i915_vma_pin_iomap(i915_gem_obj_to_ggtt(obj));
2048 if (IS_ERR(addr)) {
2049 ret = PTR_ERR(addr);
2050 goto err_unpin;
2051 }
2052 }
2053
2054 ringbuf->vaddr = addr;
2055 ringbuf->vma = i915_gem_obj_to_ggtt(obj);
2056 return 0;
2057
2058 err_unpin:
2059 i915_gem_object_ggtt_unpin(obj);
2060 return ret;
2061 }
2062
2063 static void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
2064 {
2065 i915_gem_object_put(ringbuf->obj);
2066 ringbuf->obj = NULL;
2067 }
2068
2069 static int intel_alloc_ringbuffer_obj(struct drm_device *dev,
2070 struct intel_ringbuffer *ringbuf)
2071 {
2072 struct drm_i915_gem_object *obj;
2073
2074 obj = NULL;
2075 if (!HAS_LLC(dev))
2076 obj = i915_gem_object_create_stolen(dev, ringbuf->size);
2077 if (obj == NULL)
2078 obj = i915_gem_object_create(dev, ringbuf->size);
2079 if (IS_ERR(obj))
2080 return PTR_ERR(obj);
2081
2082 /* mark ring buffers as read-only from GPU side by default */
2083 obj->gt_ro = 1;
2084
2085 ringbuf->obj = obj;
2086
2087 return 0;
2088 }
2089
2090 struct intel_ringbuffer *
2091 intel_engine_create_ringbuffer(struct intel_engine_cs *engine, int size)
2092 {
2093 struct intel_ringbuffer *ring;
2094 int ret;
2095
2096 ring = kzalloc(sizeof(*ring), GFP_KERNEL);
2097 if (ring == NULL) {
2098 DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s\n",
2099 engine->name);
2100 return ERR_PTR(-ENOMEM);
2101 }
2102
2103 ring->engine = engine;
2104 list_add(&ring->link, &engine->buffers);
2105
2106 ring->size = size;
2107 /* Workaround an erratum on the i830 which causes a hang if
2108 * the TAIL pointer points to within the last 2 cachelines
2109 * of the buffer.
2110 */
2111 ring->effective_size = size;
2112 if (IS_I830(engine->i915) || IS_845G(engine->i915))
2113 ring->effective_size -= 2 * CACHELINE_BYTES;
2114
2115 ring->last_retired_head = -1;
2116 intel_ring_update_space(ring);
2117
2118 ret = intel_alloc_ringbuffer_obj(&engine->i915->drm, ring);
2119 if (ret) {
2120 DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s: %d\n",
2121 engine->name, ret);
2122 list_del(&ring->link);
2123 kfree(ring);
2124 return ERR_PTR(ret);
2125 }
2126
2127 return ring;
2128 }
2129
2130 void
2131 intel_ringbuffer_free(struct intel_ringbuffer *ring)
2132 {
2133 intel_destroy_ringbuffer_obj(ring);
2134 list_del(&ring->link);
2135 kfree(ring);
2136 }
2137
2138 static int intel_ring_context_pin(struct i915_gem_context *ctx,
2139 struct intel_engine_cs *engine)
2140 {
2141 struct intel_context *ce = &ctx->engine[engine->id];
2142 int ret;
2143
2144 lockdep_assert_held(&ctx->i915->drm.struct_mutex);
2145
2146 if (ce->pin_count++)
2147 return 0;
2148
2149 if (ce->state) {
2150 ret = i915_gem_obj_ggtt_pin(ce->state, ctx->ggtt_alignment, 0);
2151 if (ret)
2152 goto error;
2153 }
2154
2155 /* The kernel context is only used as a placeholder for flushing the
2156 * active context. It is never used for submitting user rendering and
2157 * as such never requires the golden render context, and so we can skip
2158 * emitting it when we switch to the kernel context. This is required
2159 * as during eviction we cannot allocate and pin the renderstate in
2160 * order to initialise the context.
2161 */
2162 if (ctx == ctx->i915->kernel_context)
2163 ce->initialised = true;
2164
2165 i915_gem_context_get(ctx);
2166 return 0;
2167
2168 error:
2169 ce->pin_count = 0;
2170 return ret;
2171 }
2172
2173 static void intel_ring_context_unpin(struct i915_gem_context *ctx,
2174 struct intel_engine_cs *engine)
2175 {
2176 struct intel_context *ce = &ctx->engine[engine->id];
2177
2178 lockdep_assert_held(&ctx->i915->drm.struct_mutex);
2179
2180 if (--ce->pin_count)
2181 return;
2182
2183 if (ce->state)
2184 i915_gem_object_ggtt_unpin(ce->state);
2185
2186 i915_gem_context_put(ctx);
2187 }
2188
2189 static int intel_init_ring_buffer(struct intel_engine_cs *engine)
2190 {
2191 struct drm_i915_private *dev_priv = engine->i915;
2192 struct intel_ringbuffer *ringbuf;
2193 int ret;
2194
2195 WARN_ON(engine->buffer);
2196
2197 intel_engine_setup_common(engine);
2198
2199 memset(engine->semaphore.sync_seqno, 0,
2200 sizeof(engine->semaphore.sync_seqno));
2201
2202 ret = intel_engine_init_common(engine);
2203 if (ret)
2204 goto error;
2205
2206 /* We may need to do things with the shrinker which
2207 * require us to immediately switch back to the default
2208 * context. This can cause a problem as pinning the
2209 * default context also requires GTT space which may not
2210 * be available. To avoid this we always pin the default
2211 * context.
2212 */
2213 ret = intel_ring_context_pin(dev_priv->kernel_context, engine);
2214 if (ret)
2215 goto error;
2216
2217 ringbuf = intel_engine_create_ringbuffer(engine, 32 * PAGE_SIZE);
2218 if (IS_ERR(ringbuf)) {
2219 ret = PTR_ERR(ringbuf);
2220 goto error;
2221 }
2222 engine->buffer = ringbuf;
2223
2224 if (I915_NEED_GFX_HWS(dev_priv)) {
2225 ret = init_status_page(engine);
2226 if (ret)
2227 goto error;
2228 } else {
2229 WARN_ON(engine->id != RCS);
2230 ret = init_phys_status_page(engine);
2231 if (ret)
2232 goto error;
2233 }
2234
2235 ret = intel_pin_and_map_ringbuffer_obj(dev_priv, ringbuf);
2236 if (ret) {
2237 DRM_ERROR("Failed to pin and map ringbuffer %s: %d\n",
2238 engine->name, ret);
2239 intel_destroy_ringbuffer_obj(ringbuf);
2240 goto error;
2241 }
2242
2243 return 0;
2244
2245 error:
2246 intel_cleanup_engine(engine);
2247 return ret;
2248 }
2249
2250 void intel_cleanup_engine(struct intel_engine_cs *engine)
2251 {
2252 struct drm_i915_private *dev_priv;
2253
2254 if (!intel_engine_initialized(engine))
2255 return;
2256
2257 dev_priv = engine->i915;
2258
2259 if (engine->buffer) {
2260 intel_stop_engine(engine);
2261 WARN_ON(!IS_GEN2(dev_priv) && (I915_READ_MODE(engine) & MODE_IDLE) == 0);
2262
2263 intel_unpin_ringbuffer_obj(engine->buffer);
2264 intel_ringbuffer_free(engine->buffer);
2265 engine->buffer = NULL;
2266 }
2267
2268 if (engine->cleanup)
2269 engine->cleanup(engine);
2270
2271 if (I915_NEED_GFX_HWS(dev_priv)) {
2272 cleanup_status_page(engine);
2273 } else {
2274 WARN_ON(engine->id != RCS);
2275 cleanup_phys_status_page(engine);
2276 }
2277
2278 intel_engine_cleanup_cmd_parser(engine);
2279 i915_gem_batch_pool_fini(&engine->batch_pool);
2280 intel_engine_fini_breadcrumbs(engine);
2281
2282 intel_ring_context_unpin(dev_priv->kernel_context, engine);
2283
2284 engine->i915 = NULL;
2285 }
2286
2287 int intel_engine_idle(struct intel_engine_cs *engine)
2288 {
2289 struct drm_i915_gem_request *req;
2290
2291 /* Wait upon the last request to be completed */
2292 if (list_empty(&engine->request_list))
2293 return 0;
2294
2295 req = list_entry(engine->request_list.prev,
2296 struct drm_i915_gem_request,
2297 list);
2298
2299 /* Make sure we do not trigger any retires */
2300 return __i915_wait_request(req,
2301 req->i915->mm.interruptible,
2302 NULL, NULL);
2303 }
2304
2305 int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request)
2306 {
2307 int ret;
2308
2309 /* Flush enough space to reduce the likelihood of waiting after
2310 * we start building the request - in which case we will just
2311 * have to repeat work.
2312 */
2313 request->reserved_space += LEGACY_REQUEST_SIZE;
2314
2315 request->ring = request->engine->buffer;
2316
2317 ret = intel_ring_begin(request, 0);
2318 if (ret)
2319 return ret;
2320
2321 request->reserved_space -= LEGACY_REQUEST_SIZE;
2322 return 0;
2323 }
2324
2325 static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
2326 {
2327 struct intel_ringbuffer *ring = req->ring;
2328 struct intel_engine_cs *engine = req->engine;
2329 struct drm_i915_gem_request *target;
2330
2331 intel_ring_update_space(ring);
2332 if (ring->space >= bytes)
2333 return 0;
2334
2335 /*
2336 * Space is reserved in the ringbuffer for finalising the request,
2337 * as that cannot be allowed to fail. During request finalisation,
2338 * reserved_space is set to 0 to stop the overallocation and the
2339 * assumption is that then we never need to wait (which has the
2340 * risk of failing with EINTR).
2341 *
2342 * See also i915_gem_request_alloc() and i915_add_request().
2343 */
2344 GEM_BUG_ON(!req->reserved_space);
2345
2346 list_for_each_entry(target, &engine->request_list, list) {
2347 unsigned space;
2348
2349 /*
2350 * The request queue is per-engine, so can contain requests
2351 * from multiple ringbuffers. Here, we must ignore any that
2352 * aren't from the ringbuffer we're considering.
2353 */
2354 if (target->ring != ring)
2355 continue;
2356
2357 /* Would completion of this request free enough space? */
2358 space = __intel_ring_space(target->postfix, ring->tail,
2359 ring->size);
2360 if (space >= bytes)
2361 break;
2362 }
2363
2364 if (WARN_ON(&target->list == &engine->request_list))
2365 return -ENOSPC;
2366
2367 return i915_wait_request(target);
2368 }
2369
2370 int intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords)
2371 {
2372 struct intel_ringbuffer *ring = req->ring;
2373 int remain_actual = ring->size - ring->tail;
2374 int remain_usable = ring->effective_size - ring->tail;
2375 int bytes = num_dwords * sizeof(u32);
2376 int total_bytes, wait_bytes;
2377 bool need_wrap = false;
2378
2379 total_bytes = bytes + req->reserved_space;
2380
2381 if (unlikely(bytes > remain_usable)) {
2382 /*
2383 * Not enough space for the basic request. So need to flush
2384 * out the remainder and then wait for base + reserved.
2385 */
2386 wait_bytes = remain_actual + total_bytes;
2387 need_wrap = true;
2388 } else if (unlikely(total_bytes > remain_usable)) {
2389 /*
2390 * The base request will fit but the reserved space
2391 * falls off the end. So we don't need an immediate wrap
2392 * and only need to effectively wait for the reserved
2393 * size space from the start of ringbuffer.
2394 */
2395 wait_bytes = remain_actual + req->reserved_space;
2396 } else {
2397 /* No wrapping required, just waiting. */
2398 wait_bytes = total_bytes;
2399 }
2400
2401 if (wait_bytes > ring->space) {
2402 int ret = wait_for_space(req, wait_bytes);
2403 if (unlikely(ret))
2404 return ret;
2405
2406 intel_ring_update_space(ring);
2407 if (unlikely(ring->space < wait_bytes))
2408 return -EAGAIN;
2409 }
2410
2411 if (unlikely(need_wrap)) {
2412 GEM_BUG_ON(remain_actual > ring->space);
2413 GEM_BUG_ON(ring->tail + remain_actual > ring->size);
2414
2415 /* Fill the tail with MI_NOOP */
2416 memset(ring->vaddr + ring->tail, 0, remain_actual);
2417 ring->tail = 0;
2418 ring->space -= remain_actual;
2419 }
2420
2421 ring->space -= bytes;
2422 GEM_BUG_ON(ring->space < 0);
2423 return 0;
2424 }
2425
2426 /* Align the ring tail to a cacheline boundary */
2427 int intel_ring_cacheline_align(struct drm_i915_gem_request *req)
2428 {
2429 struct intel_ringbuffer *ring = req->ring;
2430 int num_dwords =
2431 (ring->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t);
2432 int ret;
2433
2434 if (num_dwords == 0)
2435 return 0;
2436
2437 num_dwords = CACHELINE_BYTES / sizeof(uint32_t) - num_dwords;
2438 ret = intel_ring_begin(req, num_dwords);
2439 if (ret)
2440 return ret;
2441
2442 while (num_dwords--)
2443 intel_ring_emit(ring, MI_NOOP);
2444
2445 intel_ring_advance(ring);
2446
2447 return 0;
2448 }
2449
2450 void intel_ring_init_seqno(struct intel_engine_cs *engine, u32 seqno)
2451 {
2452 struct drm_i915_private *dev_priv = engine->i915;
2453
2454 /* Our semaphore implementation is strictly monotonic (i.e. we proceed
2455 * so long as the semaphore value in the register/page is greater
2456 * than the sync value), so whenever we reset the seqno,
2457 * so long as we reset the tracking semaphore value to 0, it will
2458 * always be before the next request's seqno. If we don't reset
2459 * the semaphore value, then when the seqno moves backwards all
2460 * future waits will complete instantly (causing rendering corruption).
2461 */
2462 if (IS_GEN6(dev_priv) || IS_GEN7(dev_priv)) {
2463 I915_WRITE(RING_SYNC_0(engine->mmio_base), 0);
2464 I915_WRITE(RING_SYNC_1(engine->mmio_base), 0);
2465 if (HAS_VEBOX(dev_priv))
2466 I915_WRITE(RING_SYNC_2(engine->mmio_base), 0);
2467 }
2468 if (dev_priv->semaphore_obj) {
2469 struct drm_i915_gem_object *obj = dev_priv->semaphore_obj;
2470 struct page *page = i915_gem_object_get_dirty_page(obj, 0);
2471 void *semaphores = kmap(page);
2472 memset(semaphores + GEN8_SEMAPHORE_OFFSET(engine->id, 0),
2473 0, I915_NUM_ENGINES * gen8_semaphore_seqno_size);
2474 kunmap(page);
2475 }
2476 memset(engine->semaphore.sync_seqno, 0,
2477 sizeof(engine->semaphore.sync_seqno));
2478
2479 intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno);
2480 if (engine->irq_seqno_barrier)
2481 engine->irq_seqno_barrier(engine);
2482 engine->last_submitted_seqno = seqno;
2483
2484 engine->hangcheck.seqno = seqno;
2485
2486 /* After manually advancing the seqno, fake the interrupt in case
2487 * there are any waiters for that seqno.
2488 */
2489 rcu_read_lock();
2490 intel_engine_wakeup(engine);
2491 rcu_read_unlock();
2492 }
2493
2494 static void gen6_bsd_ring_write_tail(struct intel_engine_cs *engine,
2495 u32 value)
2496 {
2497 struct drm_i915_private *dev_priv = engine->i915;
2498
2499 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
2500
2501 /* Every tail move must follow the sequence below */
2502
2503 /* Disable notification that the ring is IDLE. The GT
2504 * will then assume that it is busy and bring it out of rc6.
2505 */
2506 I915_WRITE_FW(GEN6_BSD_SLEEP_PSMI_CONTROL,
2507 _MASKED_BIT_ENABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
2508
2509 /* Clear the context id. Here be magic! */
2510 I915_WRITE64_FW(GEN6_BSD_RNCID, 0x0);
2511
2512 /* Wait for the ring not to be idle, i.e. for it to wake up. */
2513 if (intel_wait_for_register_fw(dev_priv,
2514 GEN6_BSD_SLEEP_PSMI_CONTROL,
2515 GEN6_BSD_SLEEP_INDICATOR,
2516 0,
2517 50))
2518 DRM_ERROR("timed out waiting for the BSD ring to wake up\n");
2519
2520 /* Now that the ring is fully powered up, update the tail */
2521 I915_WRITE_FW(RING_TAIL(engine->mmio_base), value);
2522 POSTING_READ_FW(RING_TAIL(engine->mmio_base));
2523
2524 /* Let the ring send IDLE messages to the GT again,
2525 * and so let it sleep to conserve power when idle.
2526 */
2527 I915_WRITE_FW(GEN6_BSD_SLEEP_PSMI_CONTROL,
2528 _MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
2529
2530 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
2531 }
2532
2533 static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req,
2534 u32 invalidate, u32 flush)
2535 {
2536 struct intel_ringbuffer *ring = req->ring;
2537 uint32_t cmd;
2538 int ret;
2539
2540 ret = intel_ring_begin(req, 4);
2541 if (ret)
2542 return ret;
2543
2544 cmd = MI_FLUSH_DW;
2545 if (INTEL_GEN(req->i915) >= 8)
2546 cmd += 1;
2547
2548 /* We always require a command barrier so that subsequent
2549 * commands, such as breadcrumb interrupts, are strictly ordered
2550 * wrt the contents of the write cache being flushed to memory
2551 * (and thus being coherent from the CPU).
2552 */
2553 cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
2554
2555 /*
2556 * Bspec vol 1c.5 - video engine command streamer:
2557 * "If ENABLED, all TLBs will be invalidated once the flush
2558 * operation is complete. This bit is only valid when the
2559 * Post-Sync Operation field is a value of 1h or 3h."
2560 */
2561 if (invalidate & I915_GEM_GPU_DOMAINS)
2562 cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD;
2563
2564 intel_ring_emit(ring, cmd);
2565 intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
2566 if (INTEL_GEN(req->i915) >= 8) {
2567 intel_ring_emit(ring, 0); /* upper addr */
2568 intel_ring_emit(ring, 0); /* value */
2569 } else {
2570 intel_ring_emit(ring, 0);
2571 intel_ring_emit(ring, MI_NOOP);
2572 }
2573 intel_ring_advance(ring);
2574 return 0;
2575 }
2576
2577 static int
2578 gen8_ring_dispatch_execbuffer(struct drm_i915_gem_request *req,
2579 u64 offset, u32 len,
2580 unsigned dispatch_flags)
2581 {
2582 struct intel_ringbuffer *ring = req->ring;
2583 bool ppgtt = USES_PPGTT(req->i915) &&
2584 !(dispatch_flags & I915_DISPATCH_SECURE);
2585 int ret;
2586
2587 ret = intel_ring_begin(req, 4);
2588 if (ret)
2589 return ret;
2590
2591 /* FIXME(BDW): Address space and security selectors. */
2592 intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8) |
2593 (dispatch_flags & I915_DISPATCH_RS ?
2594 MI_BATCH_RESOURCE_STREAMER : 0));
2595 intel_ring_emit(ring, lower_32_bits(offset));
2596 intel_ring_emit(ring, upper_32_bits(offset));
2597 intel_ring_emit(ring, MI_NOOP);
2598 intel_ring_advance(ring);
2599
2600 return 0;
2601 }
2602
2603 static int
2604 hsw_ring_dispatch_execbuffer(struct drm_i915_gem_request *req,
2605 u64 offset, u32 len,
2606 unsigned dispatch_flags)
2607 {
2608 struct intel_ringbuffer *ring = req->ring;
2609 int ret;
2610
2611 ret = intel_ring_begin(req, 2);
2612 if (ret)
2613 return ret;
2614
2615 intel_ring_emit(ring,
2616 MI_BATCH_BUFFER_START |
2617 (dispatch_flags & I915_DISPATCH_SECURE ?
2618 0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW) |
2619 (dispatch_flags & I915_DISPATCH_RS ?
2620 MI_BATCH_RESOURCE_STREAMER : 0));
2621 /* bit0-7 is the length on GEN6+ */
2622 intel_ring_emit(ring, offset);
2623 intel_ring_advance(ring);
2624
2625 return 0;
2626 }
2627
2628 static int
2629 gen6_ring_dispatch_execbuffer(struct drm_i915_gem_request *req,
2630 u64 offset, u32 len,
2631 unsigned dispatch_flags)
2632 {
2633 struct intel_ringbuffer *ring = req->ring;
2634 int ret;
2635
2636 ret = intel_ring_begin(req, 2);
2637 if (ret)
2638 return ret;
2639
2640 intel_ring_emit(ring,
2641 MI_BATCH_BUFFER_START |
2642 (dispatch_flags & I915_DISPATCH_SECURE ?
2643 0 : MI_BATCH_NON_SECURE_I965));
2644 /* bit0-7 is the length on GEN6+ */
2645 intel_ring_emit(ring, offset);
2646 intel_ring_advance(ring);
2647
2648 return 0;
2649 }
2650
2651 /* Blitter support (SandyBridge+) */
2652
2653 static int gen6_ring_flush(struct drm_i915_gem_request *req,
2654 u32 invalidate, u32 flush)
2655 {
2656 struct intel_ringbuffer *ring = req->ring;
2657 uint32_t cmd;
2658 int ret;
2659
2660 ret = intel_ring_begin(req, 4);
2661 if (ret)
2662 return ret;
2663
2664 cmd = MI_FLUSH_DW;
2665 if (INTEL_GEN(req->i915) >= 8)
2666 cmd += 1;
2667
2668 /* We always require a command barrier so that subsequent
2669 * commands, such as breadcrumb interrupts, are strictly ordered
2670 * wrt the contents of the write cache being flushed to memory
2671 * (and thus being coherent from the CPU).
2672 */
2673 cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
2674
2675 /*
2676 * Bspec vol 1c.3 - blitter engine command streamer:
2677 * "If ENABLED, all TLBs will be invalidated once the flush
2678 * operation is complete. This bit is only valid when the
2679 * Post-Sync Operation field is a value of 1h or 3h."
2680 */
2681 if (invalidate & I915_GEM_DOMAIN_RENDER)
2682 cmd |= MI_INVALIDATE_TLB;
2683 intel_ring_emit(ring, cmd);
2684 intel_ring_emit(ring,
2685 I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
2686 if (INTEL_GEN(req->i915) >= 8) {
2687 intel_ring_emit(ring, 0); /* upper addr */
2688 intel_ring_emit(ring, 0); /* value */
2689 } else {
2690 intel_ring_emit(ring, 0);
2691 intel_ring_emit(ring, MI_NOOP);
2692 }
2693 intel_ring_advance(ring);
2694
2695 return 0;
2696 }
2697
2698 static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv,
2699 struct intel_engine_cs *engine)
2700 {
2701 struct drm_i915_gem_object *obj;
2702 int ret, i;
2703
2704 if (!i915.semaphores)
2705 return;
2706
2707 if (INTEL_GEN(dev_priv) >= 8 && !dev_priv->semaphore_obj) {
2708 obj = i915_gem_object_create(&dev_priv->drm, 4096);
2709 if (IS_ERR(obj)) {
2710 DRM_ERROR("Failed to allocate semaphore bo. Disabling semaphores\n");
2711 i915.semaphores = 0;
2712 } else {
2713 i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
2714 ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_NONBLOCK);
2715 if (ret != 0) {
2716 i915_gem_object_put(obj);
2717 DRM_ERROR("Failed to pin semaphore bo. Disabling semaphores\n");
2718 i915.semaphores = 0;
2719 } else {
2720 dev_priv->semaphore_obj = obj;
2721 }
2722 }
2723 }
2724
2725 if (!i915.semaphores)
2726 return;
2727
2728 if (INTEL_GEN(dev_priv) >= 8) {
2729 u64 offset = i915_gem_obj_ggtt_offset(dev_priv->semaphore_obj);
2730
2731 engine->semaphore.sync_to = gen8_ring_sync;
2732 engine->semaphore.signal = gen8_xcs_signal;
2733
2734 for (i = 0; i < I915_NUM_ENGINES; i++) {
2735 u64 ring_offset;
2736
2737 if (i != engine->id)
2738 ring_offset = offset + GEN8_SEMAPHORE_OFFSET(engine->id, i);
2739 else
2740 ring_offset = MI_SEMAPHORE_SYNC_INVALID;
2741
2742 engine->semaphore.signal_ggtt[i] = ring_offset;
2743 }
2744 } else if (INTEL_GEN(dev_priv) >= 6) {
2745 engine->semaphore.sync_to = gen6_ring_sync;
2746 engine->semaphore.signal = gen6_signal;
2747
2748 /*
2749 * The current semaphore is only applied on pre-gen8
2750 * platform. And there is no VCS2 ring on the pre-gen8
2751 * platform. So the semaphore between RCS and VCS2 is
2752 * initialized as INVALID. Gen8 will initialize the
2753 * sema between VCS2 and RCS later.
2754 */
2755 for (i = 0; i < I915_NUM_ENGINES; i++) {
2756 static const struct {
2757 u32 wait_mbox;
2758 i915_reg_t mbox_reg;
2759 } sem_data[I915_NUM_ENGINES][I915_NUM_ENGINES] = {
2760 [RCS] = {
2761 [VCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_RV, .mbox_reg = GEN6_VRSYNC },
2762 [BCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_RB, .mbox_reg = GEN6_BRSYNC },
2763 [VECS] = { .wait_mbox = MI_SEMAPHORE_SYNC_RVE, .mbox_reg = GEN6_VERSYNC },
2764 },
2765 [VCS] = {
2766 [RCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_VR, .mbox_reg = GEN6_RVSYNC },
2767 [BCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_VB, .mbox_reg = GEN6_BVSYNC },
2768 [VECS] = { .wait_mbox = MI_SEMAPHORE_SYNC_VVE, .mbox_reg = GEN6_VEVSYNC },
2769 },
2770 [BCS] = {
2771 [RCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_BR, .mbox_reg = GEN6_RBSYNC },
2772 [VCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_BV, .mbox_reg = GEN6_VBSYNC },
2773 [VECS] = { .wait_mbox = MI_SEMAPHORE_SYNC_BVE, .mbox_reg = GEN6_VEBSYNC },
2774 },
2775 [VECS] = {
2776 [RCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_VER, .mbox_reg = GEN6_RVESYNC },
2777 [VCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_VEV, .mbox_reg = GEN6_VVESYNC },
2778 [BCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_VEB, .mbox_reg = GEN6_BVESYNC },
2779 },
2780 };
2781 u32 wait_mbox;
2782 i915_reg_t mbox_reg;
2783
2784 if (i == engine->id || i == VCS2) {
2785 wait_mbox = MI_SEMAPHORE_SYNC_INVALID;
2786 mbox_reg = GEN6_NOSYNC;
2787 } else {
2788 wait_mbox = sem_data[engine->id][i].wait_mbox;
2789 mbox_reg = sem_data[engine->id][i].mbox_reg;
2790 }
2791
2792 engine->semaphore.mbox.wait[i] = wait_mbox;
2793 engine->semaphore.mbox.signal[i] = mbox_reg;
2794 }
2795 }
2796 }
2797
2798 static void intel_ring_init_irq(struct drm_i915_private *dev_priv,
2799 struct intel_engine_cs *engine)
2800 {
2801 engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << engine->irq_shift;
2802
2803 if (INTEL_GEN(dev_priv) >= 8) {
2804 engine->irq_enable = gen8_irq_enable;
2805 engine->irq_disable = gen8_irq_disable;
2806 engine->irq_seqno_barrier = gen6_seqno_barrier;
2807 } else if (INTEL_GEN(dev_priv) >= 6) {
2808 engine->irq_enable = gen6_irq_enable;
2809 engine->irq_disable = gen6_irq_disable;
2810 engine->irq_seqno_barrier = gen6_seqno_barrier;
2811 } else if (INTEL_GEN(dev_priv) >= 5) {
2812 engine->irq_enable = gen5_irq_enable;
2813 engine->irq_disable = gen5_irq_disable;
2814 engine->irq_seqno_barrier = gen5_seqno_barrier;
2815 } else if (INTEL_GEN(dev_priv) >= 3) {
2816 engine->irq_enable = i9xx_irq_enable;
2817 engine->irq_disable = i9xx_irq_disable;
2818 } else {
2819 engine->irq_enable = i8xx_irq_enable;
2820 engine->irq_disable = i8xx_irq_disable;
2821 }
2822 }
2823
2824 static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
2825 struct intel_engine_cs *engine)
2826 {
2827 engine->init_hw = init_ring_common;
2828 engine->write_tail = ring_write_tail;
2829
2830 engine->add_request = i9xx_add_request;
2831 if (INTEL_GEN(dev_priv) >= 6)
2832 engine->add_request = gen6_add_request;
2833
2834 if (INTEL_GEN(dev_priv) >= 8)
2835 engine->dispatch_execbuffer = gen8_ring_dispatch_execbuffer;
2836 else if (INTEL_GEN(dev_priv) >= 6)
2837 engine->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
2838 else if (INTEL_GEN(dev_priv) >= 4)
2839 engine->dispatch_execbuffer = i965_dispatch_execbuffer;
2840 else if (IS_I830(dev_priv) || IS_845G(dev_priv))
2841 engine->dispatch_execbuffer = i830_dispatch_execbuffer;
2842 else
2843 engine->dispatch_execbuffer = i915_dispatch_execbuffer;
2844
2845 intel_ring_init_irq(dev_priv, engine);
2846 intel_ring_init_semaphores(dev_priv, engine);
2847 }
2848
2849 int intel_init_render_ring_buffer(struct intel_engine_cs *engine)
2850 {
2851 struct drm_i915_private *dev_priv = engine->i915;
2852 int ret;
2853
2854 intel_ring_default_vfuncs(dev_priv, engine);
2855
2856 if (HAS_L3_DPF(dev_priv))
2857 engine->irq_keep_mask = GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
2858
2859 if (INTEL_GEN(dev_priv) >= 8) {
2860 engine->init_context = intel_rcs_ctx_init;
2861 engine->add_request = gen8_render_add_request;
2862 engine->flush = gen8_render_ring_flush;
2863 if (i915.semaphores)
2864 engine->semaphore.signal = gen8_rcs_signal;
2865 } else if (INTEL_GEN(dev_priv) >= 6) {
2866 engine->init_context = intel_rcs_ctx_init;
2867 engine->flush = gen7_render_ring_flush;
2868 if (IS_GEN6(dev_priv))
2869 engine->flush = gen6_render_ring_flush;
2870 } else if (IS_GEN5(dev_priv)) {
2871 engine->flush = gen4_render_ring_flush;
2872 } else {
2873 if (INTEL_GEN(dev_priv) < 4)
2874 engine->flush = gen2_render_ring_flush;
2875 else
2876 engine->flush = gen4_render_ring_flush;
2877 engine->irq_enable_mask = I915_USER_INTERRUPT;
2878 }
2879
2880 if (IS_HASWELL(dev_priv))
2881 engine->dispatch_execbuffer = hsw_ring_dispatch_execbuffer;
2882
2883 engine->init_hw = init_render_ring;
2884 engine->cleanup = render_ring_cleanup;
2885
2886 ret = intel_init_ring_buffer(engine);
2887 if (ret)
2888 return ret;
2889
2890 if (INTEL_GEN(dev_priv) >= 6) {
2891 ret = intel_init_pipe_control(engine, 4096);
2892 if (ret)
2893 return ret;
2894 } else if (HAS_BROKEN_CS_TLB(dev_priv)) {
2895 ret = intel_init_pipe_control(engine, I830_WA_SIZE);
2896 if (ret)
2897 return ret;
2898 }
2899
2900 return 0;
2901 }
2902
2903 int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine)
2904 {
2905 struct drm_i915_private *dev_priv = engine->i915;
2906
2907 intel_ring_default_vfuncs(dev_priv, engine);
2908
2909 if (INTEL_GEN(dev_priv) >= 6) {
2910 /* gen6 bsd needs a special wa for tail updates */
2911 if (IS_GEN6(dev_priv))
2912 engine->write_tail = gen6_bsd_ring_write_tail;
2913 engine->flush = gen6_bsd_ring_flush;
2914 if (INTEL_GEN(dev_priv) < 8)
2915 engine->irq_enable_mask = GT_BSD_USER_INTERRUPT;
2916 } else {
2917 engine->mmio_base = BSD_RING_BASE;
2918 engine->flush = bsd_ring_flush;
2919 if (IS_GEN5(dev_priv))
2920 engine->irq_enable_mask = ILK_BSD_USER_INTERRUPT;
2921 else
2922 engine->irq_enable_mask = I915_BSD_USER_INTERRUPT;
2923 }
2924
2925 return intel_init_ring_buffer(engine);
2926 }
2927
2928 /**
2929 * Initialize the second BSD ring (eg. Broadwell GT3, Skylake GT3)
2930 */
2931 int intel_init_bsd2_ring_buffer(struct intel_engine_cs *engine)
2932 {
2933 struct drm_i915_private *dev_priv = engine->i915;
2934
2935 intel_ring_default_vfuncs(dev_priv, engine);
2936
2937 engine->flush = gen6_bsd_ring_flush;
2938
2939 return intel_init_ring_buffer(engine);
2940 }
2941
2942 int intel_init_blt_ring_buffer(struct intel_engine_cs *engine)
2943 {
2944 struct drm_i915_private *dev_priv = engine->i915;
2945
2946 intel_ring_default_vfuncs(dev_priv, engine);
2947
2948 engine->flush = gen6_ring_flush;
2949 if (INTEL_GEN(dev_priv) < 8)
2950 engine->irq_enable_mask = GT_BLT_USER_INTERRUPT;
2951
2952 return intel_init_ring_buffer(engine);
2953 }
2954
2955 int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine)
2956 {
2957 struct drm_i915_private *dev_priv = engine->i915;
2958
2959 intel_ring_default_vfuncs(dev_priv, engine);
2960
2961 engine->flush = gen6_ring_flush;
2962
2963 if (INTEL_GEN(dev_priv) < 8) {
2964 engine->irq_enable_mask = PM_VEBOX_USER_INTERRUPT;
2965 engine->irq_enable = hsw_vebox_irq_enable;
2966 engine->irq_disable = hsw_vebox_irq_disable;
2967 }
2968
2969 return intel_init_ring_buffer(engine);
2970 }
2971
2972 int
2973 intel_ring_flush_all_caches(struct drm_i915_gem_request *req)
2974 {
2975 struct intel_engine_cs *engine = req->engine;
2976 int ret;
2977
2978 if (!engine->gpu_caches_dirty)
2979 return 0;
2980
2981 ret = engine->flush(req, 0, I915_GEM_GPU_DOMAINS);
2982 if (ret)
2983 return ret;
2984
2985 trace_i915_gem_ring_flush(req, 0, I915_GEM_GPU_DOMAINS);
2986
2987 engine->gpu_caches_dirty = false;
2988 return 0;
2989 }
2990
2991 int
2992 intel_ring_invalidate_all_caches(struct drm_i915_gem_request *req)
2993 {
2994 struct intel_engine_cs *engine = req->engine;
2995 uint32_t flush_domains;
2996 int ret;
2997
2998 flush_domains = 0;
2999 if (engine->gpu_caches_dirty)
3000 flush_domains = I915_GEM_GPU_DOMAINS;
3001
3002 ret = engine->flush(req, I915_GEM_GPU_DOMAINS, flush_domains);
3003 if (ret)
3004 return ret;
3005
3006 trace_i915_gem_ring_flush(req, I915_GEM_GPU_DOMAINS, flush_domains);
3007
3008 engine->gpu_caches_dirty = false;
3009 return 0;
3010 }
3011
3012 void
3013 intel_stop_engine(struct intel_engine_cs *engine)
3014 {
3015 int ret;
3016
3017 if (!intel_engine_initialized(engine))
3018 return;
3019
3020 ret = intel_engine_idle(engine);
3021 if (ret)
3022 DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n",
3023 engine->name, ret);
3024
3025 stop_ring(engine);
3026 }
This page took 0.093628 seconds and 5 git commands to generate.