Merge branches 'acpi-smbus', 'acpi-ec' and 'acpi-pci'
[deliverable/linux.git] / drivers / gpu / drm / i915 / intel_ringbuffer.c
CommitLineData
62fdfeaf
EA
1/*
2 * Copyright © 2008-2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Eric Anholt <eric@anholt.net>
25 * Zou Nan hai <nanhai.zou@intel.com>
26 * Xiang Hai hao<haihao.xiang@intel.com>
27 *
28 */
29
760285e7 30#include <drm/drmP.h>
62fdfeaf 31#include "i915_drv.h"
760285e7 32#include <drm/i915_drm.h>
62fdfeaf 33#include "i915_trace.h"
881f47b6 34#include "intel_drv.h"
62fdfeaf 35
48d82387
OM
36bool
37intel_ring_initialized(struct intel_engine_cs *ring)
38{
39 struct drm_device *dev = ring->dev;
40
41 if (!dev)
42 return false;
43
44 if (i915.enable_execlists) {
45 struct intel_context *dctx = ring->default_context;
46 struct intel_ringbuffer *ringbuf = dctx->engine[ring->id].ringbuf;
47
48 return ringbuf->obj;
49 } else
50 return ring->buffer && ring->buffer->obj;
51}
18393f63 52
82e104cc 53int __intel_ring_space(int head, int tail, int size)
c7dca47b 54{
4f54741e
DG
55 int space = head - tail;
56 if (space <= 0)
1cf0ba14 57 space += size;
4f54741e 58 return space - I915_RING_FREE_SPACE;
c7dca47b
CW
59}
60
ebd0fd4b
DG
61void intel_ring_update_space(struct intel_ringbuffer *ringbuf)
62{
63 if (ringbuf->last_retired_head != -1) {
64 ringbuf->head = ringbuf->last_retired_head;
65 ringbuf->last_retired_head = -1;
66 }
67
68 ringbuf->space = __intel_ring_space(ringbuf->head & HEAD_ADDR,
69 ringbuf->tail, ringbuf->size);
70}
71
82e104cc 72int intel_ring_space(struct intel_ringbuffer *ringbuf)
1cf0ba14 73{
ebd0fd4b
DG
74 intel_ring_update_space(ringbuf);
75 return ringbuf->space;
1cf0ba14
CW
76}
77
82e104cc 78bool intel_ring_stopped(struct intel_engine_cs *ring)
09246732
CW
79{
80 struct drm_i915_private *dev_priv = ring->dev->dev_private;
88b4aa87
MK
81 return dev_priv->gpu_error.stop_rings & intel_ring_flag(ring);
82}
09246732 83
6258fbe2 84static void __intel_ring_advance(struct intel_engine_cs *ring)
88b4aa87 85{
93b0a4e0
OM
86 struct intel_ringbuffer *ringbuf = ring->buffer;
87 ringbuf->tail &= ringbuf->size - 1;
88b4aa87 88 if (intel_ring_stopped(ring))
09246732 89 return;
93b0a4e0 90 ring->write_tail(ring, ringbuf->tail);
09246732
CW
91}
92
b72f3acb 93static int
a84c3ae1 94gen2_render_ring_flush(struct drm_i915_gem_request *req,
46f0f8d1
CW
95 u32 invalidate_domains,
96 u32 flush_domains)
97{
a84c3ae1 98 struct intel_engine_cs *ring = req->ring;
46f0f8d1
CW
99 u32 cmd;
100 int ret;
101
102 cmd = MI_FLUSH;
31b14c9f 103 if (((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER) == 0)
46f0f8d1
CW
104 cmd |= MI_NO_WRITE_FLUSH;
105
106 if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER)
107 cmd |= MI_READ_FLUSH;
108
5fb9de1a 109 ret = intel_ring_begin(req, 2);
46f0f8d1
CW
110 if (ret)
111 return ret;
112
113 intel_ring_emit(ring, cmd);
114 intel_ring_emit(ring, MI_NOOP);
115 intel_ring_advance(ring);
116
117 return 0;
118}
119
120static int
a84c3ae1 121gen4_render_ring_flush(struct drm_i915_gem_request *req,
46f0f8d1
CW
122 u32 invalidate_domains,
123 u32 flush_domains)
62fdfeaf 124{
a84c3ae1 125 struct intel_engine_cs *ring = req->ring;
78501eac 126 struct drm_device *dev = ring->dev;
6f392d54 127 u32 cmd;
b72f3acb 128 int ret;
6f392d54 129
36d527de
CW
130 /*
131 * read/write caches:
132 *
133 * I915_GEM_DOMAIN_RENDER is always invalidated, but is
134 * only flushed if MI_NO_WRITE_FLUSH is unset. On 965, it is
135 * also flushed at 2d versus 3d pipeline switches.
136 *
137 * read-only caches:
138 *
139 * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if
140 * MI_READ_FLUSH is set, and is always flushed on 965.
141 *
142 * I915_GEM_DOMAIN_COMMAND may not exist?
143 *
144 * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is
145 * invalidated when MI_EXE_FLUSH is set.
146 *
147 * I915_GEM_DOMAIN_VERTEX, which exists on 965, is
148 * invalidated with every MI_FLUSH.
149 *
150 * TLBs:
151 *
152 * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND
153 * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and
154 * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER
155 * are flushed at any MI_FLUSH.
156 */
157
158 cmd = MI_FLUSH | MI_NO_WRITE_FLUSH;
46f0f8d1 159 if ((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER)
36d527de 160 cmd &= ~MI_NO_WRITE_FLUSH;
36d527de
CW
161 if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION)
162 cmd |= MI_EXE_FLUSH;
62fdfeaf 163
36d527de
CW
164 if (invalidate_domains & I915_GEM_DOMAIN_COMMAND &&
165 (IS_G4X(dev) || IS_GEN5(dev)))
166 cmd |= MI_INVALIDATE_ISP;
70eac33e 167
5fb9de1a 168 ret = intel_ring_begin(req, 2);
36d527de
CW
169 if (ret)
170 return ret;
b72f3acb 171
36d527de
CW
172 intel_ring_emit(ring, cmd);
173 intel_ring_emit(ring, MI_NOOP);
174 intel_ring_advance(ring);
b72f3acb
CW
175
176 return 0;
8187a2b7
ZN
177}
178
8d315287
JB
179/**
180 * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
181 * implementing two workarounds on gen6. From section 1.4.7.1
182 * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
183 *
184 * [DevSNB-C+{W/A}] Before any depth stall flush (including those
185 * produced by non-pipelined state commands), software needs to first
186 * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
187 * 0.
188 *
189 * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
190 * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
191 *
192 * And the workaround for these two requires this workaround first:
193 *
194 * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
195 * BEFORE the pipe-control with a post-sync op and no write-cache
196 * flushes.
197 *
198 * And this last workaround is tricky because of the requirements on
199 * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
200 * volume 2 part 1:
201 *
202 * "1 of the following must also be set:
203 * - Render Target Cache Flush Enable ([12] of DW1)
204 * - Depth Cache Flush Enable ([0] of DW1)
205 * - Stall at Pixel Scoreboard ([1] of DW1)
206 * - Depth Stall ([13] of DW1)
207 * - Post-Sync Operation ([13] of DW1)
208 * - Notify Enable ([8] of DW1)"
209 *
210 * The cache flushes require the workaround flush that triggered this
211 * one, so we can't use it. Depth stall would trigger the same.
212 * Post-sync nonzero is what triggered this second workaround, so we
213 * can't use that one either. Notify enable is IRQs, which aren't
214 * really our business. That leaves only stall at scoreboard.
215 */
216static int
f2cf1fcc 217intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req)
8d315287 218{
f2cf1fcc 219 struct intel_engine_cs *ring = req->ring;
18393f63 220 u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
8d315287
JB
221 int ret;
222
5fb9de1a 223 ret = intel_ring_begin(req, 6);
8d315287
JB
224 if (ret)
225 return ret;
226
227 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
228 intel_ring_emit(ring, PIPE_CONTROL_CS_STALL |
229 PIPE_CONTROL_STALL_AT_SCOREBOARD);
230 intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
231 intel_ring_emit(ring, 0); /* low dword */
232 intel_ring_emit(ring, 0); /* high dword */
233 intel_ring_emit(ring, MI_NOOP);
234 intel_ring_advance(ring);
235
5fb9de1a 236 ret = intel_ring_begin(req, 6);
8d315287
JB
237 if (ret)
238 return ret;
239
240 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
241 intel_ring_emit(ring, PIPE_CONTROL_QW_WRITE);
242 intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
243 intel_ring_emit(ring, 0);
244 intel_ring_emit(ring, 0);
245 intel_ring_emit(ring, MI_NOOP);
246 intel_ring_advance(ring);
247
248 return 0;
249}
250
251static int
a84c3ae1
JH
252gen6_render_ring_flush(struct drm_i915_gem_request *req,
253 u32 invalidate_domains, u32 flush_domains)
8d315287 254{
a84c3ae1 255 struct intel_engine_cs *ring = req->ring;
8d315287 256 u32 flags = 0;
18393f63 257 u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
8d315287
JB
258 int ret;
259
b3111509 260 /* Force SNB workarounds for PIPE_CONTROL flushes */
f2cf1fcc 261 ret = intel_emit_post_sync_nonzero_flush(req);
b3111509
PZ
262 if (ret)
263 return ret;
264
8d315287
JB
265 /* Just flush everything. Experiments have shown that reducing the
266 * number of bits based on the write domains has little performance
267 * impact.
268 */
7d54a904
CW
269 if (flush_domains) {
270 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
271 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
272 /*
273 * Ensure that any following seqno writes only happen
274 * when the render cache is indeed flushed.
275 */
97f209bc 276 flags |= PIPE_CONTROL_CS_STALL;
7d54a904
CW
277 }
278 if (invalidate_domains) {
279 flags |= PIPE_CONTROL_TLB_INVALIDATE;
280 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
281 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
282 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
283 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
284 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
285 /*
286 * TLB invalidate requires a post-sync write.
287 */
3ac78313 288 flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL;
7d54a904 289 }
8d315287 290
5fb9de1a 291 ret = intel_ring_begin(req, 4);
8d315287
JB
292 if (ret)
293 return ret;
294
6c6cf5aa 295 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
8d315287
JB
296 intel_ring_emit(ring, flags);
297 intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT);
6c6cf5aa 298 intel_ring_emit(ring, 0);
8d315287
JB
299 intel_ring_advance(ring);
300
301 return 0;
302}
303
f3987631 304static int
f2cf1fcc 305gen7_render_ring_cs_stall_wa(struct drm_i915_gem_request *req)
f3987631 306{
f2cf1fcc 307 struct intel_engine_cs *ring = req->ring;
f3987631
PZ
308 int ret;
309
5fb9de1a 310 ret = intel_ring_begin(req, 4);
f3987631
PZ
311 if (ret)
312 return ret;
313
314 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
315 intel_ring_emit(ring, PIPE_CONTROL_CS_STALL |
316 PIPE_CONTROL_STALL_AT_SCOREBOARD);
317 intel_ring_emit(ring, 0);
318 intel_ring_emit(ring, 0);
319 intel_ring_advance(ring);
320
321 return 0;
322}
323
4772eaeb 324static int
a84c3ae1 325gen7_render_ring_flush(struct drm_i915_gem_request *req,
4772eaeb
PZ
326 u32 invalidate_domains, u32 flush_domains)
327{
a84c3ae1 328 struct intel_engine_cs *ring = req->ring;
4772eaeb 329 u32 flags = 0;
18393f63 330 u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
4772eaeb
PZ
331 int ret;
332
f3987631
PZ
333 /*
334 * Ensure that any following seqno writes only happen when the render
335 * cache is indeed flushed.
336 *
337 * Workaround: 4th PIPE_CONTROL command (except the ones with only
338 * read-cache invalidate bits set) must have the CS_STALL bit set. We
339 * don't try to be clever and just set it unconditionally.
340 */
341 flags |= PIPE_CONTROL_CS_STALL;
342
4772eaeb
PZ
343 /* Just flush everything. Experiments have shown that reducing the
344 * number of bits based on the write domains has little performance
345 * impact.
346 */
347 if (flush_domains) {
348 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
349 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
40a24488 350 flags |= PIPE_CONTROL_FLUSH_ENABLE;
4772eaeb
PZ
351 }
352 if (invalidate_domains) {
353 flags |= PIPE_CONTROL_TLB_INVALIDATE;
354 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
355 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
356 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
357 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
358 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
148b83d0 359 flags |= PIPE_CONTROL_MEDIA_STATE_CLEAR;
4772eaeb
PZ
360 /*
361 * TLB invalidate requires a post-sync write.
362 */
363 flags |= PIPE_CONTROL_QW_WRITE;
b9e1faa7 364 flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
f3987631 365
add284a3
CW
366 flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD;
367
f3987631
PZ
368 /* Workaround: we must issue a pipe_control with CS-stall bit
369 * set before a pipe_control command that has the state cache
370 * invalidate bit set. */
f2cf1fcc 371 gen7_render_ring_cs_stall_wa(req);
4772eaeb
PZ
372 }
373
5fb9de1a 374 ret = intel_ring_begin(req, 4);
4772eaeb
PZ
375 if (ret)
376 return ret;
377
378 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
379 intel_ring_emit(ring, flags);
b9e1faa7 380 intel_ring_emit(ring, scratch_addr);
4772eaeb
PZ
381 intel_ring_emit(ring, 0);
382 intel_ring_advance(ring);
383
384 return 0;
385}
386
884ceace 387static int
f2cf1fcc 388gen8_emit_pipe_control(struct drm_i915_gem_request *req,
884ceace
KG
389 u32 flags, u32 scratch_addr)
390{
f2cf1fcc 391 struct intel_engine_cs *ring = req->ring;
884ceace
KG
392 int ret;
393
5fb9de1a 394 ret = intel_ring_begin(req, 6);
884ceace
KG
395 if (ret)
396 return ret;
397
398 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6));
399 intel_ring_emit(ring, flags);
400 intel_ring_emit(ring, scratch_addr);
401 intel_ring_emit(ring, 0);
402 intel_ring_emit(ring, 0);
403 intel_ring_emit(ring, 0);
404 intel_ring_advance(ring);
405
406 return 0;
407}
408
a5f3d68e 409static int
a84c3ae1 410gen8_render_ring_flush(struct drm_i915_gem_request *req,
a5f3d68e
BW
411 u32 invalidate_domains, u32 flush_domains)
412{
413 u32 flags = 0;
f2cf1fcc 414 u32 scratch_addr = req->ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
02c9f7e3 415 int ret;
a5f3d68e
BW
416
417 flags |= PIPE_CONTROL_CS_STALL;
418
419 if (flush_domains) {
420 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
421 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
40a24488 422 flags |= PIPE_CONTROL_FLUSH_ENABLE;
a5f3d68e
BW
423 }
424 if (invalidate_domains) {
425 flags |= PIPE_CONTROL_TLB_INVALIDATE;
426 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
427 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
428 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
429 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
430 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
431 flags |= PIPE_CONTROL_QW_WRITE;
432 flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
02c9f7e3
KG
433
434 /* WaCsStallBeforeStateCacheInvalidate:bdw,chv */
f2cf1fcc 435 ret = gen8_emit_pipe_control(req,
02c9f7e3
KG
436 PIPE_CONTROL_CS_STALL |
437 PIPE_CONTROL_STALL_AT_SCOREBOARD,
438 0);
439 if (ret)
440 return ret;
a5f3d68e
BW
441 }
442
f2cf1fcc 443 return gen8_emit_pipe_control(req, flags, scratch_addr);
a5f3d68e
BW
444}
445
a4872ba6 446static void ring_write_tail(struct intel_engine_cs *ring,
297b0c5b 447 u32 value)
d46eefa2 448{
4640c4ff 449 struct drm_i915_private *dev_priv = ring->dev->dev_private;
297b0c5b 450 I915_WRITE_TAIL(ring, value);
d46eefa2
XH
451}
452
a4872ba6 453u64 intel_ring_get_active_head(struct intel_engine_cs *ring)
8187a2b7 454{
4640c4ff 455 struct drm_i915_private *dev_priv = ring->dev->dev_private;
50877445 456 u64 acthd;
8187a2b7 457
50877445
CW
458 if (INTEL_INFO(ring->dev)->gen >= 8)
459 acthd = I915_READ64_2x32(RING_ACTHD(ring->mmio_base),
460 RING_ACTHD_UDW(ring->mmio_base));
461 else if (INTEL_INFO(ring->dev)->gen >= 4)
462 acthd = I915_READ(RING_ACTHD(ring->mmio_base));
463 else
464 acthd = I915_READ(ACTHD);
465
466 return acthd;
8187a2b7
ZN
467}
468
a4872ba6 469static void ring_setup_phys_status_page(struct intel_engine_cs *ring)
035dc1e0
DV
470{
471 struct drm_i915_private *dev_priv = ring->dev->dev_private;
472 u32 addr;
473
474 addr = dev_priv->status_page_dmah->busaddr;
475 if (INTEL_INFO(ring->dev)->gen >= 4)
476 addr |= (dev_priv->status_page_dmah->busaddr >> 28) & 0xf0;
477 I915_WRITE(HWS_PGA, addr);
478}
479
af75f269
DL
480static void intel_ring_setup_status_page(struct intel_engine_cs *ring)
481{
482 struct drm_device *dev = ring->dev;
483 struct drm_i915_private *dev_priv = ring->dev->dev_private;
484 u32 mmio = 0;
485
486 /* The ring status page addresses are no longer next to the rest of
487 * the ring registers as of gen7.
488 */
489 if (IS_GEN7(dev)) {
490 switch (ring->id) {
491 case RCS:
492 mmio = RENDER_HWS_PGA_GEN7;
493 break;
494 case BCS:
495 mmio = BLT_HWS_PGA_GEN7;
496 break;
497 /*
498 * VCS2 actually doesn't exist on Gen7. Only shut up
499 * gcc switch check warning
500 */
501 case VCS2:
502 case VCS:
503 mmio = BSD_HWS_PGA_GEN7;
504 break;
505 case VECS:
506 mmio = VEBOX_HWS_PGA_GEN7;
507 break;
508 }
509 } else if (IS_GEN6(ring->dev)) {
510 mmio = RING_HWS_PGA_GEN6(ring->mmio_base);
511 } else {
512 /* XXX: gen8 returns to sanity */
513 mmio = RING_HWS_PGA(ring->mmio_base);
514 }
515
516 I915_WRITE(mmio, (u32)ring->status_page.gfx_addr);
517 POSTING_READ(mmio);
518
519 /*
520 * Flush the TLB for this page
521 *
522 * FIXME: These two bits have disappeared on gen8, so a question
523 * arises: do we still need this and if so how should we go about
524 * invalidating the TLB?
525 */
526 if (INTEL_INFO(dev)->gen >= 6 && INTEL_INFO(dev)->gen < 8) {
527 u32 reg = RING_INSTPM(ring->mmio_base);
528
529 /* ring should be idle before issuing a sync flush*/
530 WARN_ON((I915_READ_MODE(ring) & MODE_IDLE) == 0);
531
532 I915_WRITE(reg,
533 _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE |
534 INSTPM_SYNC_FLUSH));
535 if (wait_for((I915_READ(reg) & INSTPM_SYNC_FLUSH) == 0,
536 1000))
537 DRM_ERROR("%s: wait for SyncFlush to complete for TLB invalidation timed out\n",
538 ring->name);
539 }
540}
541
a4872ba6 542static bool stop_ring(struct intel_engine_cs *ring)
8187a2b7 543{
9991ae78 544 struct drm_i915_private *dev_priv = to_i915(ring->dev);
8187a2b7 545
9991ae78
CW
546 if (!IS_GEN2(ring->dev)) {
547 I915_WRITE_MODE(ring, _MASKED_BIT_ENABLE(STOP_RING));
403bdd10
DV
548 if (wait_for((I915_READ_MODE(ring) & MODE_IDLE) != 0, 1000)) {
549 DRM_ERROR("%s : timed out trying to stop ring\n", ring->name);
9bec9b13
CW
550 /* Sometimes we observe that the idle flag is not
551 * set even though the ring is empty. So double
552 * check before giving up.
553 */
554 if (I915_READ_HEAD(ring) != I915_READ_TAIL(ring))
555 return false;
9991ae78
CW
556 }
557 }
b7884eb4 558
7f2ab699 559 I915_WRITE_CTL(ring, 0);
570ef608 560 I915_WRITE_HEAD(ring, 0);
78501eac 561 ring->write_tail(ring, 0);
8187a2b7 562
9991ae78
CW
563 if (!IS_GEN2(ring->dev)) {
564 (void)I915_READ_CTL(ring);
565 I915_WRITE_MODE(ring, _MASKED_BIT_DISABLE(STOP_RING));
566 }
a51435a3 567
9991ae78
CW
568 return (I915_READ_HEAD(ring) & HEAD_ADDR) == 0;
569}
8187a2b7 570
a4872ba6 571static int init_ring_common(struct intel_engine_cs *ring)
9991ae78
CW
572{
573 struct drm_device *dev = ring->dev;
574 struct drm_i915_private *dev_priv = dev->dev_private;
93b0a4e0
OM
575 struct intel_ringbuffer *ringbuf = ring->buffer;
576 struct drm_i915_gem_object *obj = ringbuf->obj;
9991ae78
CW
577 int ret = 0;
578
59bad947 579 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
9991ae78
CW
580
581 if (!stop_ring(ring)) {
582 /* G45 ring initialization often fails to reset head to zero */
6fd0d56e
CW
583 DRM_DEBUG_KMS("%s head not reset to zero "
584 "ctl %08x head %08x tail %08x start %08x\n",
585 ring->name,
586 I915_READ_CTL(ring),
587 I915_READ_HEAD(ring),
588 I915_READ_TAIL(ring),
589 I915_READ_START(ring));
8187a2b7 590
9991ae78 591 if (!stop_ring(ring)) {
6fd0d56e
CW
592 DRM_ERROR("failed to set %s head to zero "
593 "ctl %08x head %08x tail %08x start %08x\n",
594 ring->name,
595 I915_READ_CTL(ring),
596 I915_READ_HEAD(ring),
597 I915_READ_TAIL(ring),
598 I915_READ_START(ring));
9991ae78
CW
599 ret = -EIO;
600 goto out;
6fd0d56e 601 }
8187a2b7
ZN
602 }
603
9991ae78
CW
604 if (I915_NEED_GFX_HWS(dev))
605 intel_ring_setup_status_page(ring);
606 else
607 ring_setup_phys_status_page(ring);
608
ece4a17d
JK
609 /* Enforce ordering by reading HEAD register back */
610 I915_READ_HEAD(ring);
611
0d8957c8
DV
612 /* Initialize the ring. This must happen _after_ we've cleared the ring
613 * registers with the above sequence (the readback of the HEAD registers
614 * also enforces ordering), otherwise the hw might lose the new ring
615 * register values. */
f343c5f6 616 I915_WRITE_START(ring, i915_gem_obj_ggtt_offset(obj));
95468892
CW
617
618 /* WaClearRingBufHeadRegAtInit:ctg,elk */
619 if (I915_READ_HEAD(ring))
620 DRM_DEBUG("%s initialization failed [head=%08x], fudging\n",
621 ring->name, I915_READ_HEAD(ring));
622 I915_WRITE_HEAD(ring, 0);
623 (void)I915_READ_HEAD(ring);
624
7f2ab699 625 I915_WRITE_CTL(ring,
93b0a4e0 626 ((ringbuf->size - PAGE_SIZE) & RING_NR_PAGES)
5d031e5b 627 | RING_VALID);
8187a2b7 628
8187a2b7 629 /* If the head is still not zero, the ring is dead */
f01db988 630 if (wait_for((I915_READ_CTL(ring) & RING_VALID) != 0 &&
f343c5f6 631 I915_READ_START(ring) == i915_gem_obj_ggtt_offset(obj) &&
f01db988 632 (I915_READ_HEAD(ring) & HEAD_ADDR) == 0, 50)) {
e74cfed5 633 DRM_ERROR("%s initialization failed "
48e48a0b
CW
634 "ctl %08x (valid? %d) head %08x tail %08x start %08x [expected %08lx]\n",
635 ring->name,
636 I915_READ_CTL(ring), I915_READ_CTL(ring) & RING_VALID,
637 I915_READ_HEAD(ring), I915_READ_TAIL(ring),
638 I915_READ_START(ring), (unsigned long)i915_gem_obj_ggtt_offset(obj));
b7884eb4
DV
639 ret = -EIO;
640 goto out;
8187a2b7
ZN
641 }
642
ebd0fd4b 643 ringbuf->last_retired_head = -1;
5c6c6003
CW
644 ringbuf->head = I915_READ_HEAD(ring);
645 ringbuf->tail = I915_READ_TAIL(ring) & TAIL_ADDR;
ebd0fd4b 646 intel_ring_update_space(ringbuf);
1ec14ad3 647
50f018df
CW
648 memset(&ring->hangcheck, 0, sizeof(ring->hangcheck));
649
b7884eb4 650out:
59bad947 651 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
b7884eb4
DV
652
653 return ret;
8187a2b7
ZN
654}
655
9b1136d5
OM
656void
657intel_fini_pipe_control(struct intel_engine_cs *ring)
658{
659 struct drm_device *dev = ring->dev;
660
661 if (ring->scratch.obj == NULL)
662 return;
663
664 if (INTEL_INFO(dev)->gen >= 5) {
665 kunmap(sg_page(ring->scratch.obj->pages->sgl));
666 i915_gem_object_ggtt_unpin(ring->scratch.obj);
667 }
668
669 drm_gem_object_unreference(&ring->scratch.obj->base);
670 ring->scratch.obj = NULL;
671}
672
673int
674intel_init_pipe_control(struct intel_engine_cs *ring)
c6df541c 675{
c6df541c
CW
676 int ret;
677
bfc882b4 678 WARN_ON(ring->scratch.obj);
c6df541c 679
0d1aacac
CW
680 ring->scratch.obj = i915_gem_alloc_object(ring->dev, 4096);
681 if (ring->scratch.obj == NULL) {
c6df541c
CW
682 DRM_ERROR("Failed to allocate seqno page\n");
683 ret = -ENOMEM;
684 goto err;
685 }
e4ffd173 686
a9cc726c
DV
687 ret = i915_gem_object_set_cache_level(ring->scratch.obj, I915_CACHE_LLC);
688 if (ret)
689 goto err_unref;
c6df541c 690
1ec9e26d 691 ret = i915_gem_obj_ggtt_pin(ring->scratch.obj, 4096, 0);
c6df541c
CW
692 if (ret)
693 goto err_unref;
694
0d1aacac
CW
695 ring->scratch.gtt_offset = i915_gem_obj_ggtt_offset(ring->scratch.obj);
696 ring->scratch.cpu_page = kmap(sg_page(ring->scratch.obj->pages->sgl));
697 if (ring->scratch.cpu_page == NULL) {
56b085a0 698 ret = -ENOMEM;
c6df541c 699 goto err_unpin;
56b085a0 700 }
c6df541c 701
2b1086cc 702 DRM_DEBUG_DRIVER("%s pipe control offset: 0x%08x\n",
0d1aacac 703 ring->name, ring->scratch.gtt_offset);
c6df541c
CW
704 return 0;
705
706err_unpin:
d7f46fc4 707 i915_gem_object_ggtt_unpin(ring->scratch.obj);
c6df541c 708err_unref:
0d1aacac 709 drm_gem_object_unreference(&ring->scratch.obj->base);
c6df541c 710err:
c6df541c
CW
711 return ret;
712}
713
e2be4faf 714static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req)
86d7f238 715{
7225342a 716 int ret, i;
e2be4faf 717 struct intel_engine_cs *ring = req->ring;
888b5995
AS
718 struct drm_device *dev = ring->dev;
719 struct drm_i915_private *dev_priv = dev->dev_private;
7225342a 720 struct i915_workarounds *w = &dev_priv->workarounds;
888b5995 721
02235808 722 if (w->count == 0)
7225342a 723 return 0;
888b5995 724
7225342a 725 ring->gpu_caches_dirty = true;
4866d729 726 ret = intel_ring_flush_all_caches(req);
7225342a
MK
727 if (ret)
728 return ret;
888b5995 729
5fb9de1a 730 ret = intel_ring_begin(req, (w->count * 2 + 2));
7225342a
MK
731 if (ret)
732 return ret;
733
22a916aa 734 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(w->count));
7225342a 735 for (i = 0; i < w->count; i++) {
7225342a
MK
736 intel_ring_emit(ring, w->reg[i].addr);
737 intel_ring_emit(ring, w->reg[i].value);
738 }
22a916aa 739 intel_ring_emit(ring, MI_NOOP);
7225342a
MK
740
741 intel_ring_advance(ring);
742
743 ring->gpu_caches_dirty = true;
4866d729 744 ret = intel_ring_flush_all_caches(req);
7225342a
MK
745 if (ret)
746 return ret;
888b5995 747
7225342a 748 DRM_DEBUG_DRIVER("Number of Workarounds emitted: %d\n", w->count);
888b5995 749
7225342a 750 return 0;
86d7f238
AS
751}
752
8753181e 753static int intel_rcs_ctx_init(struct drm_i915_gem_request *req)
8f0e2b9d
DV
754{
755 int ret;
756
e2be4faf 757 ret = intel_ring_workarounds_emit(req);
8f0e2b9d
DV
758 if (ret != 0)
759 return ret;
760
be01363f 761 ret = i915_gem_render_state_init(req);
8f0e2b9d
DV
762 if (ret)
763 DRM_ERROR("init render state: %d\n", ret);
764
765 return ret;
766}
767
7225342a 768static int wa_add(struct drm_i915_private *dev_priv,
cf4b0de6 769 const u32 addr, const u32 mask, const u32 val)
7225342a
MK
770{
771 const u32 idx = dev_priv->workarounds.count;
772
773 if (WARN_ON(idx >= I915_MAX_WA_REGS))
774 return -ENOSPC;
775
776 dev_priv->workarounds.reg[idx].addr = addr;
777 dev_priv->workarounds.reg[idx].value = val;
778 dev_priv->workarounds.reg[idx].mask = mask;
779
780 dev_priv->workarounds.count++;
781
782 return 0;
86d7f238
AS
783}
784
ca5a0fbd 785#define WA_REG(addr, mask, val) do { \
cf4b0de6 786 const int r = wa_add(dev_priv, (addr), (mask), (val)); \
7225342a
MK
787 if (r) \
788 return r; \
ca5a0fbd 789 } while (0)
7225342a
MK
790
791#define WA_SET_BIT_MASKED(addr, mask) \
26459343 792 WA_REG(addr, (mask), _MASKED_BIT_ENABLE(mask))
7225342a
MK
793
794#define WA_CLR_BIT_MASKED(addr, mask) \
26459343 795 WA_REG(addr, (mask), _MASKED_BIT_DISABLE(mask))
7225342a 796
98533251 797#define WA_SET_FIELD_MASKED(addr, mask, value) \
cf4b0de6 798 WA_REG(addr, mask, _MASKED_FIELD(mask, value))
7225342a 799
cf4b0de6
DL
800#define WA_SET_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) | (mask))
801#define WA_CLR_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) & ~(mask))
7225342a 802
cf4b0de6 803#define WA_WRITE(addr, val) WA_REG(addr, 0xffffffff, val)
7225342a 804
e9a64ada 805static int gen8_init_workarounds(struct intel_engine_cs *ring)
86d7f238 806{
888b5995
AS
807 struct drm_device *dev = ring->dev;
808 struct drm_i915_private *dev_priv = dev->dev_private;
86d7f238 809
9cc83020
VS
810 WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING);
811
717d84d6 812 /* WaDisableAsyncFlipPerfMode:bdw,chv */
2441f877
VS
813 WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE);
814
d0581194 815 /* WaDisablePartialInstShootdown:bdw,chv */
7225342a 816 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
d0581194 817 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
86d7f238
AS
818
819 /* Use Force Non-Coherent whenever executing a 3D context. This is a
820 * workaround for for a possible hang in the unlikely event a TLB
821 * invalidation occurs during a PSD flush.
822 */
a340af58 823 /* WaForceEnableNonCoherent:bdw,chv */
120f5d28 824 /* WaHdcDisableFetchWhenMasked:bdw,chv */
7225342a 825 WA_SET_BIT_MASKED(HDC_CHICKEN0,
f3f32360 826 HDC_DONOT_FETCH_MEM_WHEN_MASKED |
a340af58 827 HDC_FORCE_NON_COHERENT);
86d7f238 828
2701fc43
KG
829 /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0:
830 * "The Hierarchical Z RAW Stall Optimization allows non-overlapping
831 * polygons in the same 8x4 pixel/sample area to be processed without
832 * stalling waiting for the earlier ones to write to Hierarchical Z
833 * buffer."
834 *
6def8fdd 835 * This optimization is off by default for BDW and CHV; turn it on.
2701fc43
KG
836 */
837 WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);
838
48404636
AS
839 /* Wa4x4STCOptimizationDisable:bdw,chv */
840 WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE);
86d7f238
AS
841
842 /*
843 * BSpec recommends 8x4 when MSAA is used,
844 * however in practice 16x4 seems fastest.
845 *
846 * Note that PS/WM thread counts depend on the WIZ hashing
847 * disable bit, which we don't touch here, but it's good
848 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
849 */
98533251
DL
850 WA_SET_FIELD_MASKED(GEN7_GT_MODE,
851 GEN6_WIZ_HASHING_MASK,
852 GEN6_WIZ_HASHING_16x4);
888b5995 853
86d7f238
AS
854 return 0;
855}
856
00e1e623 857static int bdw_init_workarounds(struct intel_engine_cs *ring)
00e1e623 858{
e9a64ada 859 int ret;
00e1e623
VS
860 struct drm_device *dev = ring->dev;
861 struct drm_i915_private *dev_priv = dev->dev_private;
862
e9a64ada
AS
863 ret = gen8_init_workarounds(ring);
864 if (ret)
865 return ret;
9cc83020 866
101b376d 867 /* WaDisableThreadStallDopClockGating:bdw (pre-production) */
d0581194 868 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
2441f877 869
101b376d 870 /* WaDisableDopClockGating:bdw */
7225342a
MK
871 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
872 DOP_CLOCK_GATING_DISABLE);
86d7f238 873
7225342a
MK
874 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
875 GEN8_SAMPLER_POWER_BYPASS_DIS);
00e1e623 876
95289009 877 WA_SET_BIT_MASKED(HDC_CHICKEN0,
35cb6f3b
DL
878 /* WaForceContextSaveRestoreNonCoherent:bdw */
879 HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
35cb6f3b 880 /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
7225342a 881 (IS_BDW_GT3(dev) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
95289009 882
86d7f238
AS
883 return 0;
884}
973a5b06 885
00e1e623
VS
886static int chv_init_workarounds(struct intel_engine_cs *ring)
887{
e9a64ada 888 int ret;
00e1e623
VS
889 struct drm_device *dev = ring->dev;
890 struct drm_i915_private *dev_priv = dev->dev_private;
891
e9a64ada
AS
892 ret = gen8_init_workarounds(ring);
893 if (ret)
894 return ret;
895
00e1e623 896 /* WaDisableThreadStallDopClockGating:chv */
d0581194 897 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
14bc16e3 898
d60de81d
KG
899 /* Improve HiZ throughput on CHV. */
900 WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X);
901
7225342a
MK
902 return 0;
903}
904
3b106531
HN
905static int gen9_init_workarounds(struct intel_engine_cs *ring)
906{
ab0dfafe
HN
907 struct drm_device *dev = ring->dev;
908 struct drm_i915_private *dev_priv = dev->dev_private;
8ea6f892 909 uint32_t tmp;
ab0dfafe 910
9c4cbf82
MK
911 /* WaEnableLbsSlaRetryTimerDecrement:skl */
912 I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) |
913 GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
914
915 /* WaDisableKillLogic:bxt,skl */
916 I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
917 ECOCHK_DIS_TLB);
918
b0e6f6d4 919 /* WaDisablePartialInstShootdown:skl,bxt */
ab0dfafe
HN
920 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
921 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
922
a119a6e6 923 /* Syncing dependencies between camera and graphics:skl,bxt */
8424171e
NH
924 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
925 GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC);
926
d2a31dbd
NH
927 if ((IS_SKYLAKE(dev) && (INTEL_REVID(dev) == SKL_REVID_A0 ||
928 INTEL_REVID(dev) == SKL_REVID_B0)) ||
929 (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0)) {
930 /* WaDisableDgMirrorFixInHalfSliceChicken5:skl,bxt */
a86eb582
DL
931 WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
932 GEN9_DG_MIRROR_FIX_ENABLE);
1de4582f
NH
933 }
934
a13d215f
NH
935 if ((IS_SKYLAKE(dev) && INTEL_REVID(dev) <= SKL_REVID_B0) ||
936 (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0)) {
937 /* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:skl,bxt */
183c6dac
DL
938 WA_SET_BIT_MASKED(GEN7_COMMON_SLICE_CHICKEN1,
939 GEN9_RHWO_OPTIMIZATION_DISABLE);
9b01435d
AS
940 /*
941 * WA also requires GEN9_SLICE_COMMON_ECO_CHICKEN0[14:14] to be set
942 * but we do that in per ctx batchbuffer as there is an issue
943 * with this register not getting restored on ctx restore
944 */
183c6dac
DL
945 }
946
27a1b688
NH
947 if ((IS_SKYLAKE(dev) && INTEL_REVID(dev) >= SKL_REVID_C0) ||
948 IS_BROXTON(dev)) {
949 /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt */
cac23df4
NH
950 WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
951 GEN9_ENABLE_YV12_BUGFIX);
952 }
953
5068368c 954 /* Wa4x4STCOptimizationDisable:skl,bxt */
27160c96 955 /* WaDisablePartialResolveInVc:skl,bxt */
60294683
AS
956 WA_SET_BIT_MASKED(CACHE_MODE_1, (GEN8_4x4_STC_OPTIMIZATION_DISABLE |
957 GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE));
9370cd98 958
16be17af 959 /* WaCcsTlbPrefetchDisable:skl,bxt */
e2db7071
DL
960 WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
961 GEN9_CCS_TLB_PREFETCH_ENABLE);
962
5a2ae95e
ID
963 /* WaDisableMaskBasedCammingInRCC:skl,bxt */
964 if ((IS_SKYLAKE(dev) && INTEL_REVID(dev) == SKL_REVID_C0) ||
965 (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0))
38a39a7b
BW
966 WA_SET_BIT_MASKED(SLICE_ECO_CHICKEN0,
967 PIXEL_MASK_CAMMING_DISABLE);
968
8ea6f892
ID
969 /* WaForceContextSaveRestoreNonCoherent:skl,bxt */
970 tmp = HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT;
971 if ((IS_SKYLAKE(dev) && INTEL_REVID(dev) == SKL_REVID_F0) ||
972 (IS_BROXTON(dev) && INTEL_REVID(dev) >= BXT_REVID_B0))
973 tmp |= HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE;
974 WA_SET_BIT_MASKED(HDC_CHICKEN0, tmp);
975
8c761609
AS
976 /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt */
977 if (IS_SKYLAKE(dev) ||
978 (IS_BROXTON(dev) && INTEL_REVID(dev) <= BXT_REVID_B0)) {
979 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
980 GEN8_SAMPLER_POWER_BYPASS_DIS);
981 }
982
6b6d5626
RB
983 /* WaDisableSTUnitPowerOptimization:skl,bxt */
984 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
985
3b106531
HN
986 return 0;
987}
988
b7668791
DL
989static int skl_tune_iz_hashing(struct intel_engine_cs *ring)
990{
991 struct drm_device *dev = ring->dev;
992 struct drm_i915_private *dev_priv = dev->dev_private;
993 u8 vals[3] = { 0, 0, 0 };
994 unsigned int i;
995
996 for (i = 0; i < 3; i++) {
997 u8 ss;
998
999 /*
1000 * Only consider slices where one, and only one, subslice has 7
1001 * EUs
1002 */
1003 if (hweight8(dev_priv->info.subslice_7eu[i]) != 1)
1004 continue;
1005
1006 /*
1007 * subslice_7eu[i] != 0 (because of the check above) and
1008 * ss_max == 4 (maximum number of subslices possible per slice)
1009 *
1010 * -> 0 <= ss <= 3;
1011 */
1012 ss = ffs(dev_priv->info.subslice_7eu[i]) - 1;
1013 vals[i] = 3 - ss;
1014 }
1015
1016 if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0)
1017 return 0;
1018
1019 /* Tune IZ hashing. See intel_device_info_runtime_init() */
1020 WA_SET_FIELD_MASKED(GEN7_GT_MODE,
1021 GEN9_IZ_HASHING_MASK(2) |
1022 GEN9_IZ_HASHING_MASK(1) |
1023 GEN9_IZ_HASHING_MASK(0),
1024 GEN9_IZ_HASHING(2, vals[2]) |
1025 GEN9_IZ_HASHING(1, vals[1]) |
1026 GEN9_IZ_HASHING(0, vals[0]));
1027
1028 return 0;
1029}
1030
8d205494
DL
1031static int skl_init_workarounds(struct intel_engine_cs *ring)
1032{
aa0011a8 1033 int ret;
d0bbbc4f
DL
1034 struct drm_device *dev = ring->dev;
1035 struct drm_i915_private *dev_priv = dev->dev_private;
1036
aa0011a8
AS
1037 ret = gen9_init_workarounds(ring);
1038 if (ret)
1039 return ret;
8d205494 1040
9c4cbf82
MK
1041 if (INTEL_REVID(dev) <= SKL_REVID_D0) {
1042 /* WaDisableHDCInvalidation:skl */
1043 I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
1044 BDW_DISABLE_HDC_INVALIDATION);
1045
1046 /* WaDisableChickenBitTSGBarrierAckForFFSliceCS:skl */
1047 I915_WRITE(FF_SLICE_CS_CHICKEN2,
1048 _MASKED_BIT_ENABLE(GEN9_TSG_BARRIER_ACK_DISABLE));
1049 }
1050
1051 /* GEN8_L3SQCREG4 has a dependency with WA batch so any new changes
1052 * involving this register should also be added to WA batch as required.
1053 */
1054 if (INTEL_REVID(dev) <= SKL_REVID_E0)
1055 /* WaDisableLSQCROPERFforOCL:skl */
1056 I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) |
1057 GEN8_LQSC_RO_PERF_DIS);
1058
1059 /* WaEnableGapsTsvCreditFix:skl */
1060 if (IS_SKYLAKE(dev) && (INTEL_REVID(dev) >= SKL_REVID_C0)) {
1061 I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) |
1062 GEN9_GAPS_TSV_CREDIT_DISABLE));
1063 }
8d205494 1064
d0bbbc4f
DL
1065 /* WaDisablePowerCompilerClockGating:skl */
1066 if (INTEL_REVID(dev) == SKL_REVID_B0)
1067 WA_SET_BIT_MASKED(HIZ_CHICKEN,
1068 BDW_HIZ_POWER_COMPILER_CLOCK_GATING_DISABLE);
1069
b62adbd1
NH
1070 if (INTEL_REVID(dev) <= SKL_REVID_D0) {
1071 /*
1072 *Use Force Non-Coherent whenever executing a 3D context. This
1073 * is a workaround for a possible hang in the unlikely event
1074 * a TLB invalidation occurs during a PSD flush.
1075 */
1076 /* WaForceEnableNonCoherent:skl */
1077 WA_SET_BIT_MASKED(HDC_CHICKEN0,
1078 HDC_FORCE_NON_COHERENT);
1079 }
1080
5b6fd12a
VS
1081 if (INTEL_REVID(dev) == SKL_REVID_C0 ||
1082 INTEL_REVID(dev) == SKL_REVID_D0)
1083 /* WaBarrierPerformanceFixDisable:skl */
1084 WA_SET_BIT_MASKED(HDC_CHICKEN0,
1085 HDC_FENCE_DEST_SLM_DISABLE |
1086 HDC_BARRIER_PERFORMANCE_DISABLE);
1087
9bd9dfb4
MK
1088 /* WaDisableSbeCacheDispatchPortSharing:skl */
1089 if (INTEL_REVID(dev) <= SKL_REVID_F0) {
1090 WA_SET_BIT_MASKED(
1091 GEN7_HALF_SLICE_CHICKEN1,
1092 GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
1093 }
1094
b7668791 1095 return skl_tune_iz_hashing(ring);
7225342a
MK
1096}
1097
cae0437f
NH
1098static int bxt_init_workarounds(struct intel_engine_cs *ring)
1099{
aa0011a8 1100 int ret;
dfb601e6
NH
1101 struct drm_device *dev = ring->dev;
1102 struct drm_i915_private *dev_priv = dev->dev_private;
1103
aa0011a8
AS
1104 ret = gen9_init_workarounds(ring);
1105 if (ret)
1106 return ret;
cae0437f 1107
9c4cbf82
MK
1108 /* WaStoreMultiplePTEenable:bxt */
1109 /* This is a requirement according to Hardware specification */
1110 if (INTEL_REVID(dev) == BXT_REVID_A0)
1111 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_TLBPF);
1112
1113 /* WaSetClckGatingDisableMedia:bxt */
1114 if (INTEL_REVID(dev) == BXT_REVID_A0) {
1115 I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) &
1116 ~GEN8_DOP_CLOCK_GATE_MEDIA_ENABLE));
1117 }
cae0437f 1118
dfb601e6
NH
1119 /* WaDisableThreadStallDopClockGating:bxt */
1120 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
1121 STALL_DOP_GATING_DISABLE);
1122
983b4b9d
NH
1123 /* WaDisableSbeCacheDispatchPortSharing:bxt */
1124 if (INTEL_REVID(dev) <= BXT_REVID_B0) {
1125 WA_SET_BIT_MASKED(
1126 GEN7_HALF_SLICE_CHICKEN1,
1127 GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
1128 }
1129
cae0437f
NH
1130 return 0;
1131}
1132
771b9a53 1133int init_workarounds_ring(struct intel_engine_cs *ring)
7225342a
MK
1134{
1135 struct drm_device *dev = ring->dev;
1136 struct drm_i915_private *dev_priv = dev->dev_private;
1137
1138 WARN_ON(ring->id != RCS);
1139
1140 dev_priv->workarounds.count = 0;
1141
1142 if (IS_BROADWELL(dev))
1143 return bdw_init_workarounds(ring);
1144
1145 if (IS_CHERRYVIEW(dev))
1146 return chv_init_workarounds(ring);
00e1e623 1147
8d205494
DL
1148 if (IS_SKYLAKE(dev))
1149 return skl_init_workarounds(ring);
cae0437f
NH
1150
1151 if (IS_BROXTON(dev))
1152 return bxt_init_workarounds(ring);
3b106531 1153
00e1e623
VS
1154 return 0;
1155}
1156
a4872ba6 1157static int init_render_ring(struct intel_engine_cs *ring)
8187a2b7 1158{
78501eac 1159 struct drm_device *dev = ring->dev;
1ec14ad3 1160 struct drm_i915_private *dev_priv = dev->dev_private;
78501eac 1161 int ret = init_ring_common(ring);
9c33baa6
KZ
1162 if (ret)
1163 return ret;
a69ffdbf 1164
61a563a2
AG
1165 /* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */
1166 if (INTEL_INFO(dev)->gen >= 4 && INTEL_INFO(dev)->gen < 7)
6b26c86d 1167 I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH));
1c8c38c5
CW
1168
1169 /* We need to disable the AsyncFlip performance optimisations in order
1170 * to use MI_WAIT_FOR_EVENT within the CS. It should already be
1171 * programmed to '1' on all products.
8693a824 1172 *
2441f877 1173 * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv
1c8c38c5 1174 */
2441f877 1175 if (INTEL_INFO(dev)->gen >= 6 && INTEL_INFO(dev)->gen < 8)
1c8c38c5
CW
1176 I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE));
1177
f05bb0c7 1178 /* Required for the hardware to program scanline values for waiting */
01fa0302 1179 /* WaEnableFlushTlbInvalidationMode:snb */
f05bb0c7
CW
1180 if (INTEL_INFO(dev)->gen == 6)
1181 I915_WRITE(GFX_MODE,
aa83e30d 1182 _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT));
f05bb0c7 1183
01fa0302 1184 /* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */
1c8c38c5
CW
1185 if (IS_GEN7(dev))
1186 I915_WRITE(GFX_MODE_GEN7,
01fa0302 1187 _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT) |
1c8c38c5 1188 _MASKED_BIT_ENABLE(GFX_REPLAY_MODE));
78501eac 1189
5e13a0c5 1190 if (IS_GEN6(dev)) {
3a69ddd6
KG
1191 /* From the Sandybridge PRM, volume 1 part 3, page 24:
1192 * "If this bit is set, STCunit will have LRA as replacement
1193 * policy. [...] This bit must be reset. LRA replacement
1194 * policy is not supported."
1195 */
1196 I915_WRITE(CACHE_MODE_0,
5e13a0c5 1197 _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
84f9f938
BW
1198 }
1199
9cc83020 1200 if (INTEL_INFO(dev)->gen >= 6 && INTEL_INFO(dev)->gen < 8)
6b26c86d 1201 I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
84f9f938 1202
040d2baa 1203 if (HAS_L3_DPF(dev))
35a85ac6 1204 I915_WRITE_IMR(ring, ~GT_PARITY_ERROR(dev));
15b9f80e 1205
7225342a 1206 return init_workarounds_ring(ring);
8187a2b7
ZN
1207}
1208
a4872ba6 1209static void render_ring_cleanup(struct intel_engine_cs *ring)
c6df541c 1210{
b45305fc 1211 struct drm_device *dev = ring->dev;
3e78998a
BW
1212 struct drm_i915_private *dev_priv = dev->dev_private;
1213
1214 if (dev_priv->semaphore_obj) {
1215 i915_gem_object_ggtt_unpin(dev_priv->semaphore_obj);
1216 drm_gem_object_unreference(&dev_priv->semaphore_obj->base);
1217 dev_priv->semaphore_obj = NULL;
1218 }
b45305fc 1219
9b1136d5 1220 intel_fini_pipe_control(ring);
c6df541c
CW
1221}
1222
f7169687 1223static int gen8_rcs_signal(struct drm_i915_gem_request *signaller_req,
3e78998a
BW
1224 unsigned int num_dwords)
1225{
1226#define MBOX_UPDATE_DWORDS 8
f7169687 1227 struct intel_engine_cs *signaller = signaller_req->ring;
3e78998a
BW
1228 struct drm_device *dev = signaller->dev;
1229 struct drm_i915_private *dev_priv = dev->dev_private;
1230 struct intel_engine_cs *waiter;
1231 int i, ret, num_rings;
1232
1233 num_rings = hweight32(INTEL_INFO(dev)->ring_mask);
1234 num_dwords += (num_rings-1) * MBOX_UPDATE_DWORDS;
1235#undef MBOX_UPDATE_DWORDS
1236
5fb9de1a 1237 ret = intel_ring_begin(signaller_req, num_dwords);
3e78998a
BW
1238 if (ret)
1239 return ret;
1240
1241 for_each_ring(waiter, dev_priv, i) {
6259cead 1242 u32 seqno;
3e78998a
BW
1243 u64 gtt_offset = signaller->semaphore.signal_ggtt[i];
1244 if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
1245 continue;
1246
f7169687 1247 seqno = i915_gem_request_get_seqno(signaller_req);
3e78998a
BW
1248 intel_ring_emit(signaller, GFX_OP_PIPE_CONTROL(6));
1249 intel_ring_emit(signaller, PIPE_CONTROL_GLOBAL_GTT_IVB |
1250 PIPE_CONTROL_QW_WRITE |
1251 PIPE_CONTROL_FLUSH_ENABLE);
1252 intel_ring_emit(signaller, lower_32_bits(gtt_offset));
1253 intel_ring_emit(signaller, upper_32_bits(gtt_offset));
6259cead 1254 intel_ring_emit(signaller, seqno);
3e78998a
BW
1255 intel_ring_emit(signaller, 0);
1256 intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL |
1257 MI_SEMAPHORE_TARGET(waiter->id));
1258 intel_ring_emit(signaller, 0);
1259 }
1260
1261 return 0;
1262}
1263
f7169687 1264static int gen8_xcs_signal(struct drm_i915_gem_request *signaller_req,
3e78998a
BW
1265 unsigned int num_dwords)
1266{
1267#define MBOX_UPDATE_DWORDS 6
f7169687 1268 struct intel_engine_cs *signaller = signaller_req->ring;
3e78998a
BW
1269 struct drm_device *dev = signaller->dev;
1270 struct drm_i915_private *dev_priv = dev->dev_private;
1271 struct intel_engine_cs *waiter;
1272 int i, ret, num_rings;
1273
1274 num_rings = hweight32(INTEL_INFO(dev)->ring_mask);
1275 num_dwords += (num_rings-1) * MBOX_UPDATE_DWORDS;
1276#undef MBOX_UPDATE_DWORDS
1277
5fb9de1a 1278 ret = intel_ring_begin(signaller_req, num_dwords);
3e78998a
BW
1279 if (ret)
1280 return ret;
1281
1282 for_each_ring(waiter, dev_priv, i) {
6259cead 1283 u32 seqno;
3e78998a
BW
1284 u64 gtt_offset = signaller->semaphore.signal_ggtt[i];
1285 if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
1286 continue;
1287
f7169687 1288 seqno = i915_gem_request_get_seqno(signaller_req);
3e78998a
BW
1289 intel_ring_emit(signaller, (MI_FLUSH_DW + 1) |
1290 MI_FLUSH_DW_OP_STOREDW);
1291 intel_ring_emit(signaller, lower_32_bits(gtt_offset) |
1292 MI_FLUSH_DW_USE_GTT);
1293 intel_ring_emit(signaller, upper_32_bits(gtt_offset));
6259cead 1294 intel_ring_emit(signaller, seqno);
3e78998a
BW
1295 intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL |
1296 MI_SEMAPHORE_TARGET(waiter->id));
1297 intel_ring_emit(signaller, 0);
1298 }
1299
1300 return 0;
1301}
1302
f7169687 1303static int gen6_signal(struct drm_i915_gem_request *signaller_req,
024a43e1 1304 unsigned int num_dwords)
1ec14ad3 1305{
f7169687 1306 struct intel_engine_cs *signaller = signaller_req->ring;
024a43e1
BW
1307 struct drm_device *dev = signaller->dev;
1308 struct drm_i915_private *dev_priv = dev->dev_private;
a4872ba6 1309 struct intel_engine_cs *useless;
a1444b79 1310 int i, ret, num_rings;
78325f2d 1311
a1444b79
BW
1312#define MBOX_UPDATE_DWORDS 3
1313 num_rings = hweight32(INTEL_INFO(dev)->ring_mask);
1314 num_dwords += round_up((num_rings-1) * MBOX_UPDATE_DWORDS, 2);
1315#undef MBOX_UPDATE_DWORDS
024a43e1 1316
5fb9de1a 1317 ret = intel_ring_begin(signaller_req, num_dwords);
024a43e1
BW
1318 if (ret)
1319 return ret;
024a43e1 1320
78325f2d
BW
1321 for_each_ring(useless, dev_priv, i) {
1322 u32 mbox_reg = signaller->semaphore.mbox.signal[i];
1323 if (mbox_reg != GEN6_NOSYNC) {
f7169687 1324 u32 seqno = i915_gem_request_get_seqno(signaller_req);
78325f2d
BW
1325 intel_ring_emit(signaller, MI_LOAD_REGISTER_IMM(1));
1326 intel_ring_emit(signaller, mbox_reg);
6259cead 1327 intel_ring_emit(signaller, seqno);
78325f2d
BW
1328 }
1329 }
024a43e1 1330
a1444b79
BW
1331 /* If num_dwords was rounded, make sure the tail pointer is correct */
1332 if (num_rings % 2 == 0)
1333 intel_ring_emit(signaller, MI_NOOP);
1334
024a43e1 1335 return 0;
1ec14ad3
CW
1336}
1337
c8c99b0f
BW
1338/**
1339 * gen6_add_request - Update the semaphore mailbox registers
ee044a88
JH
1340 *
1341 * @request - request to write to the ring
c8c99b0f
BW
1342 *
1343 * Update the mailbox registers in the *other* rings with the current seqno.
1344 * This acts like a signal in the canonical semaphore.
1345 */
1ec14ad3 1346static int
ee044a88 1347gen6_add_request(struct drm_i915_gem_request *req)
1ec14ad3 1348{
ee044a88 1349 struct intel_engine_cs *ring = req->ring;
024a43e1 1350 int ret;
52ed2325 1351
707d9cf9 1352 if (ring->semaphore.signal)
f7169687 1353 ret = ring->semaphore.signal(req, 4);
707d9cf9 1354 else
5fb9de1a 1355 ret = intel_ring_begin(req, 4);
707d9cf9 1356
1ec14ad3
CW
1357 if (ret)
1358 return ret;
1359
1ec14ad3
CW
1360 intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
1361 intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
ee044a88 1362 intel_ring_emit(ring, i915_gem_request_get_seqno(req));
1ec14ad3 1363 intel_ring_emit(ring, MI_USER_INTERRUPT);
09246732 1364 __intel_ring_advance(ring);
1ec14ad3 1365
1ec14ad3
CW
1366 return 0;
1367}
1368
f72b3435
MK
1369static inline bool i915_gem_has_seqno_wrapped(struct drm_device *dev,
1370 u32 seqno)
1371{
1372 struct drm_i915_private *dev_priv = dev->dev_private;
1373 return dev_priv->last_seqno < seqno;
1374}
1375
c8c99b0f
BW
1376/**
1377 * intel_ring_sync - sync the waiter to the signaller on seqno
1378 *
1379 * @waiter - ring that is waiting
1380 * @signaller - ring which has, or will signal
1381 * @seqno - seqno which the waiter will block on
1382 */
5ee426ca
BW
1383
1384static int
599d924c 1385gen8_ring_sync(struct drm_i915_gem_request *waiter_req,
5ee426ca
BW
1386 struct intel_engine_cs *signaller,
1387 u32 seqno)
1388{
599d924c 1389 struct intel_engine_cs *waiter = waiter_req->ring;
5ee426ca
BW
1390 struct drm_i915_private *dev_priv = waiter->dev->dev_private;
1391 int ret;
1392
5fb9de1a 1393 ret = intel_ring_begin(waiter_req, 4);
5ee426ca
BW
1394 if (ret)
1395 return ret;
1396
1397 intel_ring_emit(waiter, MI_SEMAPHORE_WAIT |
1398 MI_SEMAPHORE_GLOBAL_GTT |
bae4fcd2 1399 MI_SEMAPHORE_POLL |
5ee426ca
BW
1400 MI_SEMAPHORE_SAD_GTE_SDD);
1401 intel_ring_emit(waiter, seqno);
1402 intel_ring_emit(waiter,
1403 lower_32_bits(GEN8_WAIT_OFFSET(waiter, signaller->id)));
1404 intel_ring_emit(waiter,
1405 upper_32_bits(GEN8_WAIT_OFFSET(waiter, signaller->id)));
1406 intel_ring_advance(waiter);
1407 return 0;
1408}
1409
c8c99b0f 1410static int
599d924c 1411gen6_ring_sync(struct drm_i915_gem_request *waiter_req,
a4872ba6 1412 struct intel_engine_cs *signaller,
686cb5f9 1413 u32 seqno)
1ec14ad3 1414{
599d924c 1415 struct intel_engine_cs *waiter = waiter_req->ring;
c8c99b0f
BW
1416 u32 dw1 = MI_SEMAPHORE_MBOX |
1417 MI_SEMAPHORE_COMPARE |
1418 MI_SEMAPHORE_REGISTER;
ebc348b2
BW
1419 u32 wait_mbox = signaller->semaphore.mbox.wait[waiter->id];
1420 int ret;
1ec14ad3 1421
1500f7ea
BW
1422 /* Throughout all of the GEM code, seqno passed implies our current
1423 * seqno is >= the last seqno executed. However for hardware the
1424 * comparison is strictly greater than.
1425 */
1426 seqno -= 1;
1427
ebc348b2 1428 WARN_ON(wait_mbox == MI_SEMAPHORE_SYNC_INVALID);
686cb5f9 1429
5fb9de1a 1430 ret = intel_ring_begin(waiter_req, 4);
1ec14ad3
CW
1431 if (ret)
1432 return ret;
1433
f72b3435
MK
1434 /* If seqno wrap happened, omit the wait with no-ops */
1435 if (likely(!i915_gem_has_seqno_wrapped(waiter->dev, seqno))) {
ebc348b2 1436 intel_ring_emit(waiter, dw1 | wait_mbox);
f72b3435
MK
1437 intel_ring_emit(waiter, seqno);
1438 intel_ring_emit(waiter, 0);
1439 intel_ring_emit(waiter, MI_NOOP);
1440 } else {
1441 intel_ring_emit(waiter, MI_NOOP);
1442 intel_ring_emit(waiter, MI_NOOP);
1443 intel_ring_emit(waiter, MI_NOOP);
1444 intel_ring_emit(waiter, MI_NOOP);
1445 }
c8c99b0f 1446 intel_ring_advance(waiter);
1ec14ad3
CW
1447
1448 return 0;
1449}
1450
c6df541c
CW
1451#define PIPE_CONTROL_FLUSH(ring__, addr__) \
1452do { \
fcbc34e4
KG
1453 intel_ring_emit(ring__, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE | \
1454 PIPE_CONTROL_DEPTH_STALL); \
c6df541c
CW
1455 intel_ring_emit(ring__, (addr__) | PIPE_CONTROL_GLOBAL_GTT); \
1456 intel_ring_emit(ring__, 0); \
1457 intel_ring_emit(ring__, 0); \
1458} while (0)
1459
1460static int
ee044a88 1461pc_render_add_request(struct drm_i915_gem_request *req)
c6df541c 1462{
ee044a88 1463 struct intel_engine_cs *ring = req->ring;
18393f63 1464 u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
c6df541c
CW
1465 int ret;
1466
1467 /* For Ironlake, MI_USER_INTERRUPT was deprecated and apparently
1468 * incoherent with writes to memory, i.e. completely fubar,
1469 * so we need to use PIPE_NOTIFY instead.
1470 *
1471 * However, we also need to workaround the qword write
1472 * incoherence by flushing the 6 PIPE_NOTIFY buffers out to
1473 * memory before requesting an interrupt.
1474 */
5fb9de1a 1475 ret = intel_ring_begin(req, 32);
c6df541c
CW
1476 if (ret)
1477 return ret;
1478
fcbc34e4 1479 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
9d971b37
KG
1480 PIPE_CONTROL_WRITE_FLUSH |
1481 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
0d1aacac 1482 intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
ee044a88 1483 intel_ring_emit(ring, i915_gem_request_get_seqno(req));
c6df541c
CW
1484 intel_ring_emit(ring, 0);
1485 PIPE_CONTROL_FLUSH(ring, scratch_addr);
18393f63 1486 scratch_addr += 2 * CACHELINE_BYTES; /* write to separate cachelines */
c6df541c 1487 PIPE_CONTROL_FLUSH(ring, scratch_addr);
18393f63 1488 scratch_addr += 2 * CACHELINE_BYTES;
c6df541c 1489 PIPE_CONTROL_FLUSH(ring, scratch_addr);
18393f63 1490 scratch_addr += 2 * CACHELINE_BYTES;
c6df541c 1491 PIPE_CONTROL_FLUSH(ring, scratch_addr);
18393f63 1492 scratch_addr += 2 * CACHELINE_BYTES;
c6df541c 1493 PIPE_CONTROL_FLUSH(ring, scratch_addr);
18393f63 1494 scratch_addr += 2 * CACHELINE_BYTES;
c6df541c 1495 PIPE_CONTROL_FLUSH(ring, scratch_addr);
a71d8d94 1496
fcbc34e4 1497 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
9d971b37
KG
1498 PIPE_CONTROL_WRITE_FLUSH |
1499 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
c6df541c 1500 PIPE_CONTROL_NOTIFY);
0d1aacac 1501 intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
ee044a88 1502 intel_ring_emit(ring, i915_gem_request_get_seqno(req));
c6df541c 1503 intel_ring_emit(ring, 0);
09246732 1504 __intel_ring_advance(ring);
c6df541c 1505
c6df541c
CW
1506 return 0;
1507}
1508
4cd53c0c 1509static u32
a4872ba6 1510gen6_ring_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
4cd53c0c 1511{
4cd53c0c
DV
1512 /* Workaround to force correct ordering between irq and seqno writes on
1513 * ivb (and maybe also on snb) by reading from a CS register (like
1514 * ACTHD) before reading the status page. */
50877445
CW
1515 if (!lazy_coherency) {
1516 struct drm_i915_private *dev_priv = ring->dev->dev_private;
1517 POSTING_READ(RING_ACTHD(ring->mmio_base));
1518 }
1519
4cd53c0c
DV
1520 return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
1521}
1522
8187a2b7 1523static u32
a4872ba6 1524ring_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
8187a2b7 1525{
1ec14ad3
CW
1526 return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
1527}
1528
b70ec5bf 1529static void
a4872ba6 1530ring_set_seqno(struct intel_engine_cs *ring, u32 seqno)
b70ec5bf
MK
1531{
1532 intel_write_status_page(ring, I915_GEM_HWS_INDEX, seqno);
1533}
1534
c6df541c 1535static u32
a4872ba6 1536pc_render_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
c6df541c 1537{
0d1aacac 1538 return ring->scratch.cpu_page[0];
c6df541c
CW
1539}
1540
b70ec5bf 1541static void
a4872ba6 1542pc_render_set_seqno(struct intel_engine_cs *ring, u32 seqno)
b70ec5bf 1543{
0d1aacac 1544 ring->scratch.cpu_page[0] = seqno;
b70ec5bf
MK
1545}
1546
e48d8634 1547static bool
a4872ba6 1548gen5_ring_get_irq(struct intel_engine_cs *ring)
e48d8634
DV
1549{
1550 struct drm_device *dev = ring->dev;
4640c4ff 1551 struct drm_i915_private *dev_priv = dev->dev_private;
7338aefa 1552 unsigned long flags;
e48d8634 1553
7cd512f1 1554 if (WARN_ON(!intel_irqs_enabled(dev_priv)))
e48d8634
DV
1555 return false;
1556
7338aefa 1557 spin_lock_irqsave(&dev_priv->irq_lock, flags);
43eaea13 1558 if (ring->irq_refcount++ == 0)
480c8033 1559 gen5_enable_gt_irq(dev_priv, ring->irq_enable_mask);
7338aefa 1560 spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
e48d8634
DV
1561
1562 return true;
1563}
1564
1565static void
a4872ba6 1566gen5_ring_put_irq(struct intel_engine_cs *ring)
e48d8634
DV
1567{
1568 struct drm_device *dev = ring->dev;
4640c4ff 1569 struct drm_i915_private *dev_priv = dev->dev_private;
7338aefa 1570 unsigned long flags;
e48d8634 1571
7338aefa 1572 spin_lock_irqsave(&dev_priv->irq_lock, flags);
43eaea13 1573 if (--ring->irq_refcount == 0)
480c8033 1574 gen5_disable_gt_irq(dev_priv, ring->irq_enable_mask);
7338aefa 1575 spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
e48d8634
DV
1576}
1577
b13c2b96 1578static bool
a4872ba6 1579i9xx_ring_get_irq(struct intel_engine_cs *ring)
62fdfeaf 1580{
78501eac 1581 struct drm_device *dev = ring->dev;
4640c4ff 1582 struct drm_i915_private *dev_priv = dev->dev_private;
7338aefa 1583 unsigned long flags;
62fdfeaf 1584
7cd512f1 1585 if (!intel_irqs_enabled(dev_priv))
b13c2b96
CW
1586 return false;
1587
7338aefa 1588 spin_lock_irqsave(&dev_priv->irq_lock, flags);
c7113cc3 1589 if (ring->irq_refcount++ == 0) {
f637fde4
DV
1590 dev_priv->irq_mask &= ~ring->irq_enable_mask;
1591 I915_WRITE(IMR, dev_priv->irq_mask);
1592 POSTING_READ(IMR);
1593 }
7338aefa 1594 spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
b13c2b96
CW
1595
1596 return true;
62fdfeaf
EA
1597}
1598
8187a2b7 1599static void
a4872ba6 1600i9xx_ring_put_irq(struct intel_engine_cs *ring)
62fdfeaf 1601{
78501eac 1602 struct drm_device *dev = ring->dev;
4640c4ff 1603 struct drm_i915_private *dev_priv = dev->dev_private;
7338aefa 1604 unsigned long flags;
62fdfeaf 1605
7338aefa 1606 spin_lock_irqsave(&dev_priv->irq_lock, flags);
c7113cc3 1607 if (--ring->irq_refcount == 0) {
f637fde4
DV
1608 dev_priv->irq_mask |= ring->irq_enable_mask;
1609 I915_WRITE(IMR, dev_priv->irq_mask);
1610 POSTING_READ(IMR);
1611 }
7338aefa 1612 spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
62fdfeaf
EA
1613}
1614
c2798b19 1615static bool
a4872ba6 1616i8xx_ring_get_irq(struct intel_engine_cs *ring)
c2798b19
CW
1617{
1618 struct drm_device *dev = ring->dev;
4640c4ff 1619 struct drm_i915_private *dev_priv = dev->dev_private;
7338aefa 1620 unsigned long flags;
c2798b19 1621
7cd512f1 1622 if (!intel_irqs_enabled(dev_priv))
c2798b19
CW
1623 return false;
1624
7338aefa 1625 spin_lock_irqsave(&dev_priv->irq_lock, flags);
c7113cc3 1626 if (ring->irq_refcount++ == 0) {
c2798b19
CW
1627 dev_priv->irq_mask &= ~ring->irq_enable_mask;
1628 I915_WRITE16(IMR, dev_priv->irq_mask);
1629 POSTING_READ16(IMR);
1630 }
7338aefa 1631 spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
c2798b19
CW
1632
1633 return true;
1634}
1635
1636static void
a4872ba6 1637i8xx_ring_put_irq(struct intel_engine_cs *ring)
c2798b19
CW
1638{
1639 struct drm_device *dev = ring->dev;
4640c4ff 1640 struct drm_i915_private *dev_priv = dev->dev_private;
7338aefa 1641 unsigned long flags;
c2798b19 1642
7338aefa 1643 spin_lock_irqsave(&dev_priv->irq_lock, flags);
c7113cc3 1644 if (--ring->irq_refcount == 0) {
c2798b19
CW
1645 dev_priv->irq_mask |= ring->irq_enable_mask;
1646 I915_WRITE16(IMR, dev_priv->irq_mask);
1647 POSTING_READ16(IMR);
1648 }
7338aefa 1649 spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
c2798b19
CW
1650}
1651
b72f3acb 1652static int
a84c3ae1 1653bsd_ring_flush(struct drm_i915_gem_request *req,
78501eac
CW
1654 u32 invalidate_domains,
1655 u32 flush_domains)
d1b851fc 1656{
a84c3ae1 1657 struct intel_engine_cs *ring = req->ring;
b72f3acb
CW
1658 int ret;
1659
5fb9de1a 1660 ret = intel_ring_begin(req, 2);
b72f3acb
CW
1661 if (ret)
1662 return ret;
1663
1664 intel_ring_emit(ring, MI_FLUSH);
1665 intel_ring_emit(ring, MI_NOOP);
1666 intel_ring_advance(ring);
1667 return 0;
d1b851fc
ZN
1668}
1669
3cce469c 1670static int
ee044a88 1671i9xx_add_request(struct drm_i915_gem_request *req)
d1b851fc 1672{
ee044a88 1673 struct intel_engine_cs *ring = req->ring;
3cce469c
CW
1674 int ret;
1675
5fb9de1a 1676 ret = intel_ring_begin(req, 4);
3cce469c
CW
1677 if (ret)
1678 return ret;
6f392d54 1679
3cce469c
CW
1680 intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
1681 intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
ee044a88 1682 intel_ring_emit(ring, i915_gem_request_get_seqno(req));
3cce469c 1683 intel_ring_emit(ring, MI_USER_INTERRUPT);
09246732 1684 __intel_ring_advance(ring);
d1b851fc 1685
3cce469c 1686 return 0;
d1b851fc
ZN
1687}
1688
0f46832f 1689static bool
a4872ba6 1690gen6_ring_get_irq(struct intel_engine_cs *ring)
0f46832f
CW
1691{
1692 struct drm_device *dev = ring->dev;
4640c4ff 1693 struct drm_i915_private *dev_priv = dev->dev_private;
7338aefa 1694 unsigned long flags;
0f46832f 1695
7cd512f1
DV
1696 if (WARN_ON(!intel_irqs_enabled(dev_priv)))
1697 return false;
0f46832f 1698
7338aefa 1699 spin_lock_irqsave(&dev_priv->irq_lock, flags);
c7113cc3 1700 if (ring->irq_refcount++ == 0) {
040d2baa 1701 if (HAS_L3_DPF(dev) && ring->id == RCS)
cc609d5d
BW
1702 I915_WRITE_IMR(ring,
1703 ~(ring->irq_enable_mask |
35a85ac6 1704 GT_PARITY_ERROR(dev)));
15b9f80e
BW
1705 else
1706 I915_WRITE_IMR(ring, ~ring->irq_enable_mask);
480c8033 1707 gen5_enable_gt_irq(dev_priv, ring->irq_enable_mask);
0f46832f 1708 }
7338aefa 1709 spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
0f46832f
CW
1710
1711 return true;
1712}
1713
1714static void
a4872ba6 1715gen6_ring_put_irq(struct intel_engine_cs *ring)
0f46832f
CW
1716{
1717 struct drm_device *dev = ring->dev;
4640c4ff 1718 struct drm_i915_private *dev_priv = dev->dev_private;
7338aefa 1719 unsigned long flags;
0f46832f 1720
7338aefa 1721 spin_lock_irqsave(&dev_priv->irq_lock, flags);
c7113cc3 1722 if (--ring->irq_refcount == 0) {
040d2baa 1723 if (HAS_L3_DPF(dev) && ring->id == RCS)
35a85ac6 1724 I915_WRITE_IMR(ring, ~GT_PARITY_ERROR(dev));
15b9f80e
BW
1725 else
1726 I915_WRITE_IMR(ring, ~0);
480c8033 1727 gen5_disable_gt_irq(dev_priv, ring->irq_enable_mask);
1ec14ad3 1728 }
7338aefa 1729 spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
d1b851fc
ZN
1730}
1731
a19d2933 1732static bool
a4872ba6 1733hsw_vebox_get_irq(struct intel_engine_cs *ring)
a19d2933
BW
1734{
1735 struct drm_device *dev = ring->dev;
1736 struct drm_i915_private *dev_priv = dev->dev_private;
1737 unsigned long flags;
1738
7cd512f1 1739 if (WARN_ON(!intel_irqs_enabled(dev_priv)))
a19d2933
BW
1740 return false;
1741
59cdb63d 1742 spin_lock_irqsave(&dev_priv->irq_lock, flags);
c7113cc3 1743 if (ring->irq_refcount++ == 0) {
a19d2933 1744 I915_WRITE_IMR(ring, ~ring->irq_enable_mask);
480c8033 1745 gen6_enable_pm_irq(dev_priv, ring->irq_enable_mask);
a19d2933 1746 }
59cdb63d 1747 spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
a19d2933
BW
1748
1749 return true;
1750}
1751
1752static void
a4872ba6 1753hsw_vebox_put_irq(struct intel_engine_cs *ring)
a19d2933
BW
1754{
1755 struct drm_device *dev = ring->dev;
1756 struct drm_i915_private *dev_priv = dev->dev_private;
1757 unsigned long flags;
1758
59cdb63d 1759 spin_lock_irqsave(&dev_priv->irq_lock, flags);
c7113cc3 1760 if (--ring->irq_refcount == 0) {
a19d2933 1761 I915_WRITE_IMR(ring, ~0);
480c8033 1762 gen6_disable_pm_irq(dev_priv, ring->irq_enable_mask);
a19d2933 1763 }
59cdb63d 1764 spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
a19d2933
BW
1765}
1766
abd58f01 1767static bool
a4872ba6 1768gen8_ring_get_irq(struct intel_engine_cs *ring)
abd58f01
BW
1769{
1770 struct drm_device *dev = ring->dev;
1771 struct drm_i915_private *dev_priv = dev->dev_private;
1772 unsigned long flags;
1773
7cd512f1 1774 if (WARN_ON(!intel_irqs_enabled(dev_priv)))
abd58f01
BW
1775 return false;
1776
1777 spin_lock_irqsave(&dev_priv->irq_lock, flags);
1778 if (ring->irq_refcount++ == 0) {
1779 if (HAS_L3_DPF(dev) && ring->id == RCS) {
1780 I915_WRITE_IMR(ring,
1781 ~(ring->irq_enable_mask |
1782 GT_RENDER_L3_PARITY_ERROR_INTERRUPT));
1783 } else {
1784 I915_WRITE_IMR(ring, ~ring->irq_enable_mask);
1785 }
1786 POSTING_READ(RING_IMR(ring->mmio_base));
1787 }
1788 spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1789
1790 return true;
1791}
1792
1793static void
a4872ba6 1794gen8_ring_put_irq(struct intel_engine_cs *ring)
abd58f01
BW
1795{
1796 struct drm_device *dev = ring->dev;
1797 struct drm_i915_private *dev_priv = dev->dev_private;
1798 unsigned long flags;
1799
1800 spin_lock_irqsave(&dev_priv->irq_lock, flags);
1801 if (--ring->irq_refcount == 0) {
1802 if (HAS_L3_DPF(dev) && ring->id == RCS) {
1803 I915_WRITE_IMR(ring,
1804 ~GT_RENDER_L3_PARITY_ERROR_INTERRUPT);
1805 } else {
1806 I915_WRITE_IMR(ring, ~0);
1807 }
1808 POSTING_READ(RING_IMR(ring->mmio_base));
1809 }
1810 spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1811}
1812
d1b851fc 1813static int
53fddaf7 1814i965_dispatch_execbuffer(struct drm_i915_gem_request *req,
9bcb144c 1815 u64 offset, u32 length,
8e004efc 1816 unsigned dispatch_flags)
d1b851fc 1817{
53fddaf7 1818 struct intel_engine_cs *ring = req->ring;
e1f99ce6 1819 int ret;
78501eac 1820
5fb9de1a 1821 ret = intel_ring_begin(req, 2);
e1f99ce6
CW
1822 if (ret)
1823 return ret;
1824
78501eac 1825 intel_ring_emit(ring,
65f56876
CW
1826 MI_BATCH_BUFFER_START |
1827 MI_BATCH_GTT |
8e004efc
JH
1828 (dispatch_flags & I915_DISPATCH_SECURE ?
1829 0 : MI_BATCH_NON_SECURE_I965));
c4e7a414 1830 intel_ring_emit(ring, offset);
78501eac
CW
1831 intel_ring_advance(ring);
1832
d1b851fc
ZN
1833 return 0;
1834}
1835
b45305fc
DV
1836/* Just userspace ABI convention to limit the wa batch bo to a resonable size */
1837#define I830_BATCH_LIMIT (256*1024)
c4d69da1
CW
1838#define I830_TLB_ENTRIES (2)
1839#define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT)
8187a2b7 1840static int
53fddaf7 1841i830_dispatch_execbuffer(struct drm_i915_gem_request *req,
8e004efc
JH
1842 u64 offset, u32 len,
1843 unsigned dispatch_flags)
62fdfeaf 1844{
53fddaf7 1845 struct intel_engine_cs *ring = req->ring;
c4d69da1 1846 u32 cs_offset = ring->scratch.gtt_offset;
c4e7a414 1847 int ret;
62fdfeaf 1848
5fb9de1a 1849 ret = intel_ring_begin(req, 6);
c4d69da1
CW
1850 if (ret)
1851 return ret;
62fdfeaf 1852
c4d69da1
CW
1853 /* Evict the invalid PTE TLBs */
1854 intel_ring_emit(ring, COLOR_BLT_CMD | BLT_WRITE_RGBA);
1855 intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | 4096);
1856 intel_ring_emit(ring, I830_TLB_ENTRIES << 16 | 4); /* load each page */
1857 intel_ring_emit(ring, cs_offset);
1858 intel_ring_emit(ring, 0xdeadbeef);
1859 intel_ring_emit(ring, MI_NOOP);
1860 intel_ring_advance(ring);
b45305fc 1861
8e004efc 1862 if ((dispatch_flags & I915_DISPATCH_PINNED) == 0) {
b45305fc
DV
1863 if (len > I830_BATCH_LIMIT)
1864 return -ENOSPC;
1865
5fb9de1a 1866 ret = intel_ring_begin(req, 6 + 2);
b45305fc
DV
1867 if (ret)
1868 return ret;
c4d69da1
CW
1869
1870 /* Blit the batch (which has now all relocs applied) to the
1871 * stable batch scratch bo area (so that the CS never
1872 * stumbles over its tlb invalidation bug) ...
1873 */
1874 intel_ring_emit(ring, SRC_COPY_BLT_CMD | BLT_WRITE_RGBA);
1875 intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096);
611a7a4f 1876 intel_ring_emit(ring, DIV_ROUND_UP(len, 4096) << 16 | 4096);
b45305fc 1877 intel_ring_emit(ring, cs_offset);
b45305fc
DV
1878 intel_ring_emit(ring, 4096);
1879 intel_ring_emit(ring, offset);
c4d69da1 1880
b45305fc 1881 intel_ring_emit(ring, MI_FLUSH);
c4d69da1
CW
1882 intel_ring_emit(ring, MI_NOOP);
1883 intel_ring_advance(ring);
b45305fc
DV
1884
1885 /* ... and execute it. */
c4d69da1 1886 offset = cs_offset;
b45305fc 1887 }
e1f99ce6 1888
5fb9de1a 1889 ret = intel_ring_begin(req, 4);
c4d69da1
CW
1890 if (ret)
1891 return ret;
1892
1893 intel_ring_emit(ring, MI_BATCH_BUFFER);
8e004efc
JH
1894 intel_ring_emit(ring, offset | (dispatch_flags & I915_DISPATCH_SECURE ?
1895 0 : MI_BATCH_NON_SECURE));
c4d69da1
CW
1896 intel_ring_emit(ring, offset + len - 8);
1897 intel_ring_emit(ring, MI_NOOP);
1898 intel_ring_advance(ring);
1899
fb3256da
DV
1900 return 0;
1901}
1902
1903static int
53fddaf7 1904i915_dispatch_execbuffer(struct drm_i915_gem_request *req,
9bcb144c 1905 u64 offset, u32 len,
8e004efc 1906 unsigned dispatch_flags)
fb3256da 1907{
53fddaf7 1908 struct intel_engine_cs *ring = req->ring;
fb3256da
DV
1909 int ret;
1910
5fb9de1a 1911 ret = intel_ring_begin(req, 2);
fb3256da
DV
1912 if (ret)
1913 return ret;
1914
65f56876 1915 intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_GTT);
8e004efc
JH
1916 intel_ring_emit(ring, offset | (dispatch_flags & I915_DISPATCH_SECURE ?
1917 0 : MI_BATCH_NON_SECURE));
c4e7a414 1918 intel_ring_advance(ring);
62fdfeaf 1919
62fdfeaf
EA
1920 return 0;
1921}
1922
a4872ba6 1923static void cleanup_status_page(struct intel_engine_cs *ring)
62fdfeaf 1924{
05394f39 1925 struct drm_i915_gem_object *obj;
62fdfeaf 1926
8187a2b7
ZN
1927 obj = ring->status_page.obj;
1928 if (obj == NULL)
62fdfeaf 1929 return;
62fdfeaf 1930
9da3da66 1931 kunmap(sg_page(obj->pages->sgl));
d7f46fc4 1932 i915_gem_object_ggtt_unpin(obj);
05394f39 1933 drm_gem_object_unreference(&obj->base);
8187a2b7 1934 ring->status_page.obj = NULL;
62fdfeaf
EA
1935}
1936
a4872ba6 1937static int init_status_page(struct intel_engine_cs *ring)
62fdfeaf 1938{
05394f39 1939 struct drm_i915_gem_object *obj;
62fdfeaf 1940
e3efda49 1941 if ((obj = ring->status_page.obj) == NULL) {
1f767e02 1942 unsigned flags;
e3efda49 1943 int ret;
e4ffd173 1944
e3efda49
CW
1945 obj = i915_gem_alloc_object(ring->dev, 4096);
1946 if (obj == NULL) {
1947 DRM_ERROR("Failed to allocate status page\n");
1948 return -ENOMEM;
1949 }
62fdfeaf 1950
e3efda49
CW
1951 ret = i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
1952 if (ret)
1953 goto err_unref;
1954
1f767e02
CW
1955 flags = 0;
1956 if (!HAS_LLC(ring->dev))
1957 /* On g33, we cannot place HWS above 256MiB, so
1958 * restrict its pinning to the low mappable arena.
1959 * Though this restriction is not documented for
1960 * gen4, gen5, or byt, they also behave similarly
1961 * and hang if the HWS is placed at the top of the
1962 * GTT. To generalise, it appears that all !llc
1963 * platforms have issues with us placing the HWS
1964 * above the mappable region (even though we never
1965 * actualy map it).
1966 */
1967 flags |= PIN_MAPPABLE;
1968 ret = i915_gem_obj_ggtt_pin(obj, 4096, flags);
e3efda49
CW
1969 if (ret) {
1970err_unref:
1971 drm_gem_object_unreference(&obj->base);
1972 return ret;
1973 }
1974
1975 ring->status_page.obj = obj;
1976 }
62fdfeaf 1977
f343c5f6 1978 ring->status_page.gfx_addr = i915_gem_obj_ggtt_offset(obj);
9da3da66 1979 ring->status_page.page_addr = kmap(sg_page(obj->pages->sgl));
8187a2b7 1980 memset(ring->status_page.page_addr, 0, PAGE_SIZE);
62fdfeaf 1981
8187a2b7
ZN
1982 DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n",
1983 ring->name, ring->status_page.gfx_addr);
62fdfeaf
EA
1984
1985 return 0;
62fdfeaf
EA
1986}
1987
a4872ba6 1988static int init_phys_status_page(struct intel_engine_cs *ring)
6b8294a4
CW
1989{
1990 struct drm_i915_private *dev_priv = ring->dev->dev_private;
6b8294a4
CW
1991
1992 if (!dev_priv->status_page_dmah) {
1993 dev_priv->status_page_dmah =
1994 drm_pci_alloc(ring->dev, PAGE_SIZE, PAGE_SIZE);
1995 if (!dev_priv->status_page_dmah)
1996 return -ENOMEM;
1997 }
1998
6b8294a4
CW
1999 ring->status_page.page_addr = dev_priv->status_page_dmah->vaddr;
2000 memset(ring->status_page.page_addr, 0, PAGE_SIZE);
2001
2002 return 0;
2003}
2004
7ba717cf 2005void intel_unpin_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
2919d291 2006{
2919d291 2007 iounmap(ringbuf->virtual_start);
7ba717cf 2008 ringbuf->virtual_start = NULL;
2919d291 2009 i915_gem_object_ggtt_unpin(ringbuf->obj);
7ba717cf
TD
2010}
2011
2012int intel_pin_and_map_ringbuffer_obj(struct drm_device *dev,
2013 struct intel_ringbuffer *ringbuf)
2014{
2015 struct drm_i915_private *dev_priv = to_i915(dev);
2016 struct drm_i915_gem_object *obj = ringbuf->obj;
2017 int ret;
2018
2019 ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, PIN_MAPPABLE);
2020 if (ret)
2021 return ret;
2022
2023 ret = i915_gem_object_set_to_gtt_domain(obj, true);
2024 if (ret) {
2025 i915_gem_object_ggtt_unpin(obj);
2026 return ret;
2027 }
2028
2029 ringbuf->virtual_start = ioremap_wc(dev_priv->gtt.mappable_base +
2030 i915_gem_obj_ggtt_offset(obj), ringbuf->size);
2031 if (ringbuf->virtual_start == NULL) {
2032 i915_gem_object_ggtt_unpin(obj);
2033 return -EINVAL;
2034 }
2035
2036 return 0;
2037}
2038
01101fa7 2039static void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
7ba717cf 2040{
2919d291
OM
2041 drm_gem_object_unreference(&ringbuf->obj->base);
2042 ringbuf->obj = NULL;
2043}
2044
01101fa7
CW
2045static int intel_alloc_ringbuffer_obj(struct drm_device *dev,
2046 struct intel_ringbuffer *ringbuf)
62fdfeaf 2047{
05394f39 2048 struct drm_i915_gem_object *obj;
62fdfeaf 2049
ebc052e0
CW
2050 obj = NULL;
2051 if (!HAS_LLC(dev))
93b0a4e0 2052 obj = i915_gem_object_create_stolen(dev, ringbuf->size);
ebc052e0 2053 if (obj == NULL)
93b0a4e0 2054 obj = i915_gem_alloc_object(dev, ringbuf->size);
e3efda49
CW
2055 if (obj == NULL)
2056 return -ENOMEM;
8187a2b7 2057
24f3a8cf
AG
2058 /* mark ring buffers as read-only from GPU side by default */
2059 obj->gt_ro = 1;
2060
93b0a4e0 2061 ringbuf->obj = obj;
e3efda49 2062
7ba717cf 2063 return 0;
e3efda49
CW
2064}
2065
01101fa7
CW
2066struct intel_ringbuffer *
2067intel_engine_create_ringbuffer(struct intel_engine_cs *engine, int size)
2068{
2069 struct intel_ringbuffer *ring;
2070 int ret;
2071
2072 ring = kzalloc(sizeof(*ring), GFP_KERNEL);
2073 if (ring == NULL)
2074 return ERR_PTR(-ENOMEM);
2075
2076 ring->ring = engine;
2077
2078 ring->size = size;
2079 /* Workaround an erratum on the i830 which causes a hang if
2080 * the TAIL pointer points to within the last 2 cachelines
2081 * of the buffer.
2082 */
2083 ring->effective_size = size;
2084 if (IS_I830(engine->dev) || IS_845G(engine->dev))
2085 ring->effective_size -= 2 * CACHELINE_BYTES;
2086
2087 ring->last_retired_head = -1;
2088 intel_ring_update_space(ring);
2089
2090 ret = intel_alloc_ringbuffer_obj(engine->dev, ring);
2091 if (ret) {
2092 DRM_ERROR("Failed to allocate ringbuffer %s: %d\n",
2093 engine->name, ret);
2094 kfree(ring);
2095 return ERR_PTR(ret);
2096 }
2097
2098 return ring;
2099}
2100
2101void
2102intel_ringbuffer_free(struct intel_ringbuffer *ring)
2103{
2104 intel_destroy_ringbuffer_obj(ring);
2105 kfree(ring);
2106}
2107
e3efda49 2108static int intel_init_ring_buffer(struct drm_device *dev,
a4872ba6 2109 struct intel_engine_cs *ring)
e3efda49 2110{
bfc882b4 2111 struct intel_ringbuffer *ringbuf;
e3efda49
CW
2112 int ret;
2113
bfc882b4
DV
2114 WARN_ON(ring->buffer);
2115
e3efda49
CW
2116 ring->dev = dev;
2117 INIT_LIST_HEAD(&ring->active_list);
2118 INIT_LIST_HEAD(&ring->request_list);
cc9130be 2119 INIT_LIST_HEAD(&ring->execlist_queue);
06fbca71 2120 i915_gem_batch_pool_init(dev, &ring->batch_pool);
ebc348b2 2121 memset(ring->semaphore.sync_seqno, 0, sizeof(ring->semaphore.sync_seqno));
e3efda49
CW
2122
2123 init_waitqueue_head(&ring->irq_queue);
2124
01101fa7
CW
2125 ringbuf = intel_engine_create_ringbuffer(ring, 32 * PAGE_SIZE);
2126 if (IS_ERR(ringbuf))
2127 return PTR_ERR(ringbuf);
2128 ring->buffer = ringbuf;
2129
e3efda49
CW
2130 if (I915_NEED_GFX_HWS(dev)) {
2131 ret = init_status_page(ring);
2132 if (ret)
8ee14975 2133 goto error;
e3efda49
CW
2134 } else {
2135 BUG_ON(ring->id != RCS);
2136 ret = init_phys_status_page(ring);
2137 if (ret)
8ee14975 2138 goto error;
e3efda49
CW
2139 }
2140
bfc882b4
DV
2141 ret = intel_pin_and_map_ringbuffer_obj(dev, ringbuf);
2142 if (ret) {
2143 DRM_ERROR("Failed to pin and map ringbuffer %s: %d\n",
2144 ring->name, ret);
2145 intel_destroy_ringbuffer_obj(ringbuf);
2146 goto error;
e3efda49 2147 }
62fdfeaf 2148
44e895a8
BV
2149 ret = i915_cmd_parser_init_ring(ring);
2150 if (ret)
8ee14975
OM
2151 goto error;
2152
8ee14975 2153 return 0;
351e3db2 2154
8ee14975 2155error:
01101fa7 2156 intel_ringbuffer_free(ringbuf);
8ee14975
OM
2157 ring->buffer = NULL;
2158 return ret;
62fdfeaf
EA
2159}
2160
a4872ba6 2161void intel_cleanup_ring_buffer(struct intel_engine_cs *ring)
62fdfeaf 2162{
6402c330 2163 struct drm_i915_private *dev_priv;
33626e6a 2164
93b0a4e0 2165 if (!intel_ring_initialized(ring))
62fdfeaf
EA
2166 return;
2167
6402c330 2168 dev_priv = to_i915(ring->dev);
6402c330 2169
e3efda49 2170 intel_stop_ring_buffer(ring);
de8f0a50 2171 WARN_ON(!IS_GEN2(ring->dev) && (I915_READ_MODE(ring) & MODE_IDLE) == 0);
33626e6a 2172
01101fa7
CW
2173 intel_unpin_ringbuffer_obj(ring->buffer);
2174 intel_ringbuffer_free(ring->buffer);
2175 ring->buffer = NULL;
78501eac 2176
8d19215b
ZN
2177 if (ring->cleanup)
2178 ring->cleanup(ring);
2179
78501eac 2180 cleanup_status_page(ring);
44e895a8
BV
2181
2182 i915_cmd_parser_fini_ring(ring);
06fbca71 2183 i915_gem_batch_pool_fini(&ring->batch_pool);
62fdfeaf
EA
2184}
2185
595e1eeb 2186static int ring_wait_for_space(struct intel_engine_cs *ring, int n)
a71d8d94 2187{
93b0a4e0 2188 struct intel_ringbuffer *ringbuf = ring->buffer;
a71d8d94 2189 struct drm_i915_gem_request *request;
b4716185
CW
2190 unsigned space;
2191 int ret;
a71d8d94 2192
ebd0fd4b
DG
2193 if (intel_ring_space(ringbuf) >= n)
2194 return 0;
a71d8d94 2195
79bbcc29
JH
2196 /* The whole point of reserving space is to not wait! */
2197 WARN_ON(ringbuf->reserved_in_use);
2198
a71d8d94 2199 list_for_each_entry(request, &ring->request_list, list) {
b4716185
CW
2200 space = __intel_ring_space(request->postfix, ringbuf->tail,
2201 ringbuf->size);
2202 if (space >= n)
a71d8d94 2203 break;
a71d8d94
CW
2204 }
2205
595e1eeb 2206 if (WARN_ON(&request->list == &ring->request_list))
a71d8d94
CW
2207 return -ENOSPC;
2208
a4b3a571 2209 ret = i915_wait_request(request);
a71d8d94
CW
2210 if (ret)
2211 return ret;
2212
b4716185 2213 ringbuf->space = space;
a71d8d94
CW
2214 return 0;
2215}
2216
79bbcc29 2217static void __wrap_ring_buffer(struct intel_ringbuffer *ringbuf)
3e960501
CW
2218{
2219 uint32_t __iomem *virt;
93b0a4e0 2220 int rem = ringbuf->size - ringbuf->tail;
3e960501 2221
93b0a4e0 2222 virt = ringbuf->virtual_start + ringbuf->tail;
3e960501
CW
2223 rem /= 4;
2224 while (rem--)
2225 iowrite32(MI_NOOP, virt++);
2226
93b0a4e0 2227 ringbuf->tail = 0;
ebd0fd4b 2228 intel_ring_update_space(ringbuf);
3e960501
CW
2229}
2230
a4872ba6 2231int intel_ring_idle(struct intel_engine_cs *ring)
3e960501 2232{
a4b3a571 2233 struct drm_i915_gem_request *req;
3e960501 2234
3e960501
CW
2235 /* Wait upon the last request to be completed */
2236 if (list_empty(&ring->request_list))
2237 return 0;
2238
a4b3a571 2239 req = list_entry(ring->request_list.prev,
b4716185
CW
2240 struct drm_i915_gem_request,
2241 list);
2242
2243 /* Make sure we do not trigger any retires */
2244 return __i915_wait_request(req,
2245 atomic_read(&to_i915(ring->dev)->gpu_error.reset_counter),
2246 to_i915(ring->dev)->mm.interruptible,
2247 NULL, NULL);
3e960501
CW
2248}
2249
6689cb2b 2250int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request)
9d773091 2251{
6689cb2b 2252 request->ringbuf = request->ring->buffer;
9eba5d4a 2253 return 0;
9d773091
CW
2254}
2255
ccd98fe4
JH
2256int intel_ring_reserve_space(struct drm_i915_gem_request *request)
2257{
2258 /*
2259 * The first call merely notes the reserve request and is common for
2260 * all back ends. The subsequent localised _begin() call actually
2261 * ensures that the reservation is available. Without the begin, if
2262 * the request creator immediately submitted the request without
2263 * adding any commands to it then there might not actually be
2264 * sufficient room for the submission commands.
2265 */
2266 intel_ring_reserved_space_reserve(request->ringbuf, MIN_SPACE_FOR_ADD_REQUEST);
2267
2268 return intel_ring_begin(request, 0);
2269}
2270
29b1b415
JH
2271void intel_ring_reserved_space_reserve(struct intel_ringbuffer *ringbuf, int size)
2272{
ccd98fe4 2273 WARN_ON(ringbuf->reserved_size);
29b1b415
JH
2274 WARN_ON(ringbuf->reserved_in_use);
2275
2276 ringbuf->reserved_size = size;
29b1b415
JH
2277}
2278
2279void intel_ring_reserved_space_cancel(struct intel_ringbuffer *ringbuf)
2280{
2281 WARN_ON(ringbuf->reserved_in_use);
2282
2283 ringbuf->reserved_size = 0;
2284 ringbuf->reserved_in_use = false;
2285}
2286
2287void intel_ring_reserved_space_use(struct intel_ringbuffer *ringbuf)
2288{
2289 WARN_ON(ringbuf->reserved_in_use);
2290
2291 ringbuf->reserved_in_use = true;
2292 ringbuf->reserved_tail = ringbuf->tail;
2293}
2294
2295void intel_ring_reserved_space_end(struct intel_ringbuffer *ringbuf)
2296{
2297 WARN_ON(!ringbuf->reserved_in_use);
79bbcc29
JH
2298 if (ringbuf->tail > ringbuf->reserved_tail) {
2299 WARN(ringbuf->tail > ringbuf->reserved_tail + ringbuf->reserved_size,
2300 "request reserved size too small: %d vs %d!\n",
2301 ringbuf->tail - ringbuf->reserved_tail, ringbuf->reserved_size);
2302 } else {
2303 /*
2304 * The ring was wrapped while the reserved space was in use.
2305 * That means that some unknown amount of the ring tail was
2306 * no-op filled and skipped. Thus simply adding the ring size
2307 * to the tail and doing the above space check will not work.
2308 * Rather than attempt to track how much tail was skipped,
2309 * it is much simpler to say that also skipping the sanity
2310 * check every once in a while is not a big issue.
2311 */
2312 }
29b1b415
JH
2313
2314 ringbuf->reserved_size = 0;
2315 ringbuf->reserved_in_use = false;
2316}
2317
2318static int __intel_ring_prepare(struct intel_engine_cs *ring, int bytes)
cbcc80df 2319{
93b0a4e0 2320 struct intel_ringbuffer *ringbuf = ring->buffer;
79bbcc29
JH
2321 int remain_usable = ringbuf->effective_size - ringbuf->tail;
2322 int remain_actual = ringbuf->size - ringbuf->tail;
2323 int ret, total_bytes, wait_bytes = 0;
2324 bool need_wrap = false;
29b1b415 2325
79bbcc29
JH
2326 if (ringbuf->reserved_in_use)
2327 total_bytes = bytes;
2328 else
2329 total_bytes = bytes + ringbuf->reserved_size;
29b1b415 2330
79bbcc29
JH
2331 if (unlikely(bytes > remain_usable)) {
2332 /*
2333 * Not enough space for the basic request. So need to flush
2334 * out the remainder and then wait for base + reserved.
2335 */
2336 wait_bytes = remain_actual + total_bytes;
2337 need_wrap = true;
2338 } else {
2339 if (unlikely(total_bytes > remain_usable)) {
2340 /*
2341 * The base request will fit but the reserved space
2342 * falls off the end. So only need to to wait for the
2343 * reserved size after flushing out the remainder.
2344 */
2345 wait_bytes = remain_actual + ringbuf->reserved_size;
2346 need_wrap = true;
2347 } else if (total_bytes > ringbuf->space) {
2348 /* No wrapping required, just waiting. */
2349 wait_bytes = total_bytes;
29b1b415 2350 }
cbcc80df
MK
2351 }
2352
79bbcc29
JH
2353 if (wait_bytes) {
2354 ret = ring_wait_for_space(ring, wait_bytes);
cbcc80df
MK
2355 if (unlikely(ret))
2356 return ret;
79bbcc29
JH
2357
2358 if (need_wrap)
2359 __wrap_ring_buffer(ringbuf);
cbcc80df
MK
2360 }
2361
cbcc80df
MK
2362 return 0;
2363}
2364
5fb9de1a 2365int intel_ring_begin(struct drm_i915_gem_request *req,
e1f99ce6 2366 int num_dwords)
8187a2b7 2367{
5fb9de1a
JH
2368 struct intel_engine_cs *ring;
2369 struct drm_i915_private *dev_priv;
e1f99ce6 2370 int ret;
78501eac 2371
5fb9de1a
JH
2372 WARN_ON(req == NULL);
2373 ring = req->ring;
2374 dev_priv = ring->dev->dev_private;
2375
33196ded
DV
2376 ret = i915_gem_check_wedge(&dev_priv->gpu_error,
2377 dev_priv->mm.interruptible);
de2b9985
DV
2378 if (ret)
2379 return ret;
21dd3734 2380
304d695c
CW
2381 ret = __intel_ring_prepare(ring, num_dwords * sizeof(uint32_t));
2382 if (ret)
2383 return ret;
2384
ee1b1e5e 2385 ring->buffer->space -= num_dwords * sizeof(uint32_t);
304d695c 2386 return 0;
8187a2b7 2387}
78501eac 2388
753b1ad4 2389/* Align the ring tail to a cacheline boundary */
bba09b12 2390int intel_ring_cacheline_align(struct drm_i915_gem_request *req)
753b1ad4 2391{
bba09b12 2392 struct intel_engine_cs *ring = req->ring;
ee1b1e5e 2393 int num_dwords = (ring->buffer->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t);
753b1ad4
VS
2394 int ret;
2395
2396 if (num_dwords == 0)
2397 return 0;
2398
18393f63 2399 num_dwords = CACHELINE_BYTES / sizeof(uint32_t) - num_dwords;
5fb9de1a 2400 ret = intel_ring_begin(req, num_dwords);
753b1ad4
VS
2401 if (ret)
2402 return ret;
2403
2404 while (num_dwords--)
2405 intel_ring_emit(ring, MI_NOOP);
2406
2407 intel_ring_advance(ring);
2408
2409 return 0;
2410}
2411
a4872ba6 2412void intel_ring_init_seqno(struct intel_engine_cs *ring, u32 seqno)
498d2ac1 2413{
3b2cc8ab
OM
2414 struct drm_device *dev = ring->dev;
2415 struct drm_i915_private *dev_priv = dev->dev_private;
498d2ac1 2416
3b2cc8ab 2417 if (INTEL_INFO(dev)->gen == 6 || INTEL_INFO(dev)->gen == 7) {
f7e98ad4
MK
2418 I915_WRITE(RING_SYNC_0(ring->mmio_base), 0);
2419 I915_WRITE(RING_SYNC_1(ring->mmio_base), 0);
3b2cc8ab 2420 if (HAS_VEBOX(dev))
5020150b 2421 I915_WRITE(RING_SYNC_2(ring->mmio_base), 0);
e1f99ce6 2422 }
d97ed339 2423
f7e98ad4 2424 ring->set_seqno(ring, seqno);
92cab734 2425 ring->hangcheck.seqno = seqno;
8187a2b7 2426}
62fdfeaf 2427
a4872ba6 2428static void gen6_bsd_ring_write_tail(struct intel_engine_cs *ring,
297b0c5b 2429 u32 value)
881f47b6 2430{
4640c4ff 2431 struct drm_i915_private *dev_priv = ring->dev->dev_private;
881f47b6
XH
2432
2433 /* Every tail move must follow the sequence below */
12f55818
CW
2434
2435 /* Disable notification that the ring is IDLE. The GT
2436 * will then assume that it is busy and bring it out of rc6.
2437 */
0206e353 2438 I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL,
12f55818
CW
2439 _MASKED_BIT_ENABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
2440
2441 /* Clear the context id. Here be magic! */
2442 I915_WRITE64(GEN6_BSD_RNCID, 0x0);
0206e353 2443
12f55818 2444 /* Wait for the ring not to be idle, i.e. for it to wake up. */
0206e353 2445 if (wait_for((I915_READ(GEN6_BSD_SLEEP_PSMI_CONTROL) &
12f55818
CW
2446 GEN6_BSD_SLEEP_INDICATOR) == 0,
2447 50))
2448 DRM_ERROR("timed out waiting for the BSD ring to wake up\n");
0206e353 2449
12f55818 2450 /* Now that the ring is fully powered up, update the tail */
0206e353 2451 I915_WRITE_TAIL(ring, value);
12f55818
CW
2452 POSTING_READ(RING_TAIL(ring->mmio_base));
2453
2454 /* Let the ring send IDLE messages to the GT again,
2455 * and so let it sleep to conserve power when idle.
2456 */
0206e353 2457 I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL,
12f55818 2458 _MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
881f47b6
XH
2459}
2460
a84c3ae1 2461static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req,
ea251324 2462 u32 invalidate, u32 flush)
881f47b6 2463{
a84c3ae1 2464 struct intel_engine_cs *ring = req->ring;
71a77e07 2465 uint32_t cmd;
b72f3acb
CW
2466 int ret;
2467
5fb9de1a 2468 ret = intel_ring_begin(req, 4);
b72f3acb
CW
2469 if (ret)
2470 return ret;
2471
71a77e07 2472 cmd = MI_FLUSH_DW;
075b3bba
BW
2473 if (INTEL_INFO(ring->dev)->gen >= 8)
2474 cmd += 1;
f0a1fb10
CW
2475
2476 /* We always require a command barrier so that subsequent
2477 * commands, such as breadcrumb interrupts, are strictly ordered
2478 * wrt the contents of the write cache being flushed to memory
2479 * (and thus being coherent from the CPU).
2480 */
2481 cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
2482
9a289771
JB
2483 /*
2484 * Bspec vol 1c.5 - video engine command streamer:
2485 * "If ENABLED, all TLBs will be invalidated once the flush
2486 * operation is complete. This bit is only valid when the
2487 * Post-Sync Operation field is a value of 1h or 3h."
2488 */
71a77e07 2489 if (invalidate & I915_GEM_GPU_DOMAINS)
f0a1fb10
CW
2490 cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD;
2491
71a77e07 2492 intel_ring_emit(ring, cmd);
9a289771 2493 intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
075b3bba
BW
2494 if (INTEL_INFO(ring->dev)->gen >= 8) {
2495 intel_ring_emit(ring, 0); /* upper addr */
2496 intel_ring_emit(ring, 0); /* value */
2497 } else {
2498 intel_ring_emit(ring, 0);
2499 intel_ring_emit(ring, MI_NOOP);
2500 }
b72f3acb
CW
2501 intel_ring_advance(ring);
2502 return 0;
881f47b6
XH
2503}
2504
1c7a0623 2505static int
53fddaf7 2506gen8_ring_dispatch_execbuffer(struct drm_i915_gem_request *req,
9bcb144c 2507 u64 offset, u32 len,
8e004efc 2508 unsigned dispatch_flags)
1c7a0623 2509{
53fddaf7 2510 struct intel_engine_cs *ring = req->ring;
8e004efc
JH
2511 bool ppgtt = USES_PPGTT(ring->dev) &&
2512 !(dispatch_flags & I915_DISPATCH_SECURE);
1c7a0623
BW
2513 int ret;
2514
5fb9de1a 2515 ret = intel_ring_begin(req, 4);
1c7a0623
BW
2516 if (ret)
2517 return ret;
2518
2519 /* FIXME(BDW): Address space and security selectors. */
919032ec
AJ
2520 intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8) |
2521 (dispatch_flags & I915_DISPATCH_RS ?
2522 MI_BATCH_RESOURCE_STREAMER : 0));
9bcb144c
BW
2523 intel_ring_emit(ring, lower_32_bits(offset));
2524 intel_ring_emit(ring, upper_32_bits(offset));
1c7a0623
BW
2525 intel_ring_emit(ring, MI_NOOP);
2526 intel_ring_advance(ring);
2527
2528 return 0;
2529}
2530
d7d4eedd 2531static int
53fddaf7 2532hsw_ring_dispatch_execbuffer(struct drm_i915_gem_request *req,
8e004efc
JH
2533 u64 offset, u32 len,
2534 unsigned dispatch_flags)
d7d4eedd 2535{
53fddaf7 2536 struct intel_engine_cs *ring = req->ring;
d7d4eedd
CW
2537 int ret;
2538
5fb9de1a 2539 ret = intel_ring_begin(req, 2);
d7d4eedd
CW
2540 if (ret)
2541 return ret;
2542
2543 intel_ring_emit(ring,
77072258 2544 MI_BATCH_BUFFER_START |
8e004efc 2545 (dispatch_flags & I915_DISPATCH_SECURE ?
919032ec
AJ
2546 0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW) |
2547 (dispatch_flags & I915_DISPATCH_RS ?
2548 MI_BATCH_RESOURCE_STREAMER : 0));
d7d4eedd
CW
2549 /* bit0-7 is the length on GEN6+ */
2550 intel_ring_emit(ring, offset);
2551 intel_ring_advance(ring);
2552
2553 return 0;
2554}
2555
881f47b6 2556static int
53fddaf7 2557gen6_ring_dispatch_execbuffer(struct drm_i915_gem_request *req,
9bcb144c 2558 u64 offset, u32 len,
8e004efc 2559 unsigned dispatch_flags)
881f47b6 2560{
53fddaf7 2561 struct intel_engine_cs *ring = req->ring;
0206e353 2562 int ret;
ab6f8e32 2563
5fb9de1a 2564 ret = intel_ring_begin(req, 2);
0206e353
AJ
2565 if (ret)
2566 return ret;
e1f99ce6 2567
d7d4eedd
CW
2568 intel_ring_emit(ring,
2569 MI_BATCH_BUFFER_START |
8e004efc
JH
2570 (dispatch_flags & I915_DISPATCH_SECURE ?
2571 0 : MI_BATCH_NON_SECURE_I965));
0206e353
AJ
2572 /* bit0-7 is the length on GEN6+ */
2573 intel_ring_emit(ring, offset);
2574 intel_ring_advance(ring);
ab6f8e32 2575
0206e353 2576 return 0;
881f47b6
XH
2577}
2578
549f7365
CW
2579/* Blitter support (SandyBridge+) */
2580
a84c3ae1 2581static int gen6_ring_flush(struct drm_i915_gem_request *req,
ea251324 2582 u32 invalidate, u32 flush)
8d19215b 2583{
a84c3ae1 2584 struct intel_engine_cs *ring = req->ring;
fd3da6c9 2585 struct drm_device *dev = ring->dev;
71a77e07 2586 uint32_t cmd;
b72f3acb
CW
2587 int ret;
2588
5fb9de1a 2589 ret = intel_ring_begin(req, 4);
b72f3acb
CW
2590 if (ret)
2591 return ret;
2592
71a77e07 2593 cmd = MI_FLUSH_DW;
dbef0f15 2594 if (INTEL_INFO(dev)->gen >= 8)
075b3bba 2595 cmd += 1;
f0a1fb10
CW
2596
2597 /* We always require a command barrier so that subsequent
2598 * commands, such as breadcrumb interrupts, are strictly ordered
2599 * wrt the contents of the write cache being flushed to memory
2600 * (and thus being coherent from the CPU).
2601 */
2602 cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
2603
9a289771
JB
2604 /*
2605 * Bspec vol 1c.3 - blitter engine command streamer:
2606 * "If ENABLED, all TLBs will be invalidated once the flush
2607 * operation is complete. This bit is only valid when the
2608 * Post-Sync Operation field is a value of 1h or 3h."
2609 */
71a77e07 2610 if (invalidate & I915_GEM_DOMAIN_RENDER)
f0a1fb10 2611 cmd |= MI_INVALIDATE_TLB;
71a77e07 2612 intel_ring_emit(ring, cmd);
9a289771 2613 intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
dbef0f15 2614 if (INTEL_INFO(dev)->gen >= 8) {
075b3bba
BW
2615 intel_ring_emit(ring, 0); /* upper addr */
2616 intel_ring_emit(ring, 0); /* value */
2617 } else {
2618 intel_ring_emit(ring, 0);
2619 intel_ring_emit(ring, MI_NOOP);
2620 }
b72f3acb 2621 intel_ring_advance(ring);
fd3da6c9 2622
b72f3acb 2623 return 0;
8d19215b
ZN
2624}
2625
5c1143bb
XH
2626int intel_init_render_ring_buffer(struct drm_device *dev)
2627{
4640c4ff 2628 struct drm_i915_private *dev_priv = dev->dev_private;
a4872ba6 2629 struct intel_engine_cs *ring = &dev_priv->ring[RCS];
3e78998a
BW
2630 struct drm_i915_gem_object *obj;
2631 int ret;
5c1143bb 2632
59465b5f
DV
2633 ring->name = "render ring";
2634 ring->id = RCS;
2635 ring->mmio_base = RENDER_RING_BASE;
2636
707d9cf9 2637 if (INTEL_INFO(dev)->gen >= 8) {
3e78998a
BW
2638 if (i915_semaphore_is_enabled(dev)) {
2639 obj = i915_gem_alloc_object(dev, 4096);
2640 if (obj == NULL) {
2641 DRM_ERROR("Failed to allocate semaphore bo. Disabling semaphores\n");
2642 i915.semaphores = 0;
2643 } else {
2644 i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
2645 ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_NONBLOCK);
2646 if (ret != 0) {
2647 drm_gem_object_unreference(&obj->base);
2648 DRM_ERROR("Failed to pin semaphore bo. Disabling semaphores\n");
2649 i915.semaphores = 0;
2650 } else
2651 dev_priv->semaphore_obj = obj;
2652 }
2653 }
7225342a 2654
8f0e2b9d 2655 ring->init_context = intel_rcs_ctx_init;
707d9cf9
BW
2656 ring->add_request = gen6_add_request;
2657 ring->flush = gen8_render_ring_flush;
2658 ring->irq_get = gen8_ring_get_irq;
2659 ring->irq_put = gen8_ring_put_irq;
2660 ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
2661 ring->get_seqno = gen6_ring_get_seqno;
2662 ring->set_seqno = ring_set_seqno;
2663 if (i915_semaphore_is_enabled(dev)) {
3e78998a 2664 WARN_ON(!dev_priv->semaphore_obj);
5ee426ca 2665 ring->semaphore.sync_to = gen8_ring_sync;
3e78998a
BW
2666 ring->semaphore.signal = gen8_rcs_signal;
2667 GEN8_RING_SEMAPHORE_INIT;
707d9cf9
BW
2668 }
2669 } else if (INTEL_INFO(dev)->gen >= 6) {
4f91fc6d 2670 ring->init_context = intel_rcs_ctx_init;
1ec14ad3 2671 ring->add_request = gen6_add_request;
4772eaeb 2672 ring->flush = gen7_render_ring_flush;
6c6cf5aa 2673 if (INTEL_INFO(dev)->gen == 6)
b3111509 2674 ring->flush = gen6_render_ring_flush;
707d9cf9
BW
2675 ring->irq_get = gen6_ring_get_irq;
2676 ring->irq_put = gen6_ring_put_irq;
cc609d5d 2677 ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
4cd53c0c 2678 ring->get_seqno = gen6_ring_get_seqno;
b70ec5bf 2679 ring->set_seqno = ring_set_seqno;
707d9cf9
BW
2680 if (i915_semaphore_is_enabled(dev)) {
2681 ring->semaphore.sync_to = gen6_ring_sync;
2682 ring->semaphore.signal = gen6_signal;
2683 /*
2684 * The current semaphore is only applied on pre-gen8
2685 * platform. And there is no VCS2 ring on the pre-gen8
2686 * platform. So the semaphore between RCS and VCS2 is
2687 * initialized as INVALID. Gen8 will initialize the
2688 * sema between VCS2 and RCS later.
2689 */
2690 ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_INVALID;
2691 ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_RV;
2692 ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_RB;
2693 ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_RVE;
2694 ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID;
2695 ring->semaphore.mbox.signal[RCS] = GEN6_NOSYNC;
2696 ring->semaphore.mbox.signal[VCS] = GEN6_VRSYNC;
2697 ring->semaphore.mbox.signal[BCS] = GEN6_BRSYNC;
2698 ring->semaphore.mbox.signal[VECS] = GEN6_VERSYNC;
2699 ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC;
2700 }
c6df541c
CW
2701 } else if (IS_GEN5(dev)) {
2702 ring->add_request = pc_render_add_request;
46f0f8d1 2703 ring->flush = gen4_render_ring_flush;
c6df541c 2704 ring->get_seqno = pc_render_get_seqno;
b70ec5bf 2705 ring->set_seqno = pc_render_set_seqno;
e48d8634
DV
2706 ring->irq_get = gen5_ring_get_irq;
2707 ring->irq_put = gen5_ring_put_irq;
cc609d5d
BW
2708 ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT |
2709 GT_RENDER_PIPECTL_NOTIFY_INTERRUPT;
59465b5f 2710 } else {
8620a3a9 2711 ring->add_request = i9xx_add_request;
46f0f8d1
CW
2712 if (INTEL_INFO(dev)->gen < 4)
2713 ring->flush = gen2_render_ring_flush;
2714 else
2715 ring->flush = gen4_render_ring_flush;
59465b5f 2716 ring->get_seqno = ring_get_seqno;
b70ec5bf 2717 ring->set_seqno = ring_set_seqno;
c2798b19
CW
2718 if (IS_GEN2(dev)) {
2719 ring->irq_get = i8xx_ring_get_irq;
2720 ring->irq_put = i8xx_ring_put_irq;
2721 } else {
2722 ring->irq_get = i9xx_ring_get_irq;
2723 ring->irq_put = i9xx_ring_put_irq;
2724 }
e3670319 2725 ring->irq_enable_mask = I915_USER_INTERRUPT;
1ec14ad3 2726 }
59465b5f 2727 ring->write_tail = ring_write_tail;
707d9cf9 2728
d7d4eedd
CW
2729 if (IS_HASWELL(dev))
2730 ring->dispatch_execbuffer = hsw_ring_dispatch_execbuffer;
1c7a0623
BW
2731 else if (IS_GEN8(dev))
2732 ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer;
d7d4eedd 2733 else if (INTEL_INFO(dev)->gen >= 6)
fb3256da
DV
2734 ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
2735 else if (INTEL_INFO(dev)->gen >= 4)
2736 ring->dispatch_execbuffer = i965_dispatch_execbuffer;
2737 else if (IS_I830(dev) || IS_845G(dev))
2738 ring->dispatch_execbuffer = i830_dispatch_execbuffer;
2739 else
2740 ring->dispatch_execbuffer = i915_dispatch_execbuffer;
ecfe00d8 2741 ring->init_hw = init_render_ring;
59465b5f
DV
2742 ring->cleanup = render_ring_cleanup;
2743
b45305fc
DV
2744 /* Workaround batchbuffer to combat CS tlb bug. */
2745 if (HAS_BROKEN_CS_TLB(dev)) {
c4d69da1 2746 obj = i915_gem_alloc_object(dev, I830_WA_SIZE);
b45305fc
DV
2747 if (obj == NULL) {
2748 DRM_ERROR("Failed to allocate batch bo\n");
2749 return -ENOMEM;
2750 }
2751
be1fa129 2752 ret = i915_gem_obj_ggtt_pin(obj, 0, 0);
b45305fc
DV
2753 if (ret != 0) {
2754 drm_gem_object_unreference(&obj->base);
2755 DRM_ERROR("Failed to ping batch bo\n");
2756 return ret;
2757 }
2758
0d1aacac
CW
2759 ring->scratch.obj = obj;
2760 ring->scratch.gtt_offset = i915_gem_obj_ggtt_offset(obj);
b45305fc
DV
2761 }
2762
99be1dfe
DV
2763 ret = intel_init_ring_buffer(dev, ring);
2764 if (ret)
2765 return ret;
2766
2767 if (INTEL_INFO(dev)->gen >= 5) {
2768 ret = intel_init_pipe_control(ring);
2769 if (ret)
2770 return ret;
2771 }
2772
2773 return 0;
5c1143bb
XH
2774}
2775
2776int intel_init_bsd_ring_buffer(struct drm_device *dev)
2777{
4640c4ff 2778 struct drm_i915_private *dev_priv = dev->dev_private;
a4872ba6 2779 struct intel_engine_cs *ring = &dev_priv->ring[VCS];
5c1143bb 2780
58fa3835
DV
2781 ring->name = "bsd ring";
2782 ring->id = VCS;
2783
0fd2c201 2784 ring->write_tail = ring_write_tail;
780f18c8 2785 if (INTEL_INFO(dev)->gen >= 6) {
58fa3835 2786 ring->mmio_base = GEN6_BSD_RING_BASE;
0fd2c201
DV
2787 /* gen6 bsd needs a special wa for tail updates */
2788 if (IS_GEN6(dev))
2789 ring->write_tail = gen6_bsd_ring_write_tail;
ea251324 2790 ring->flush = gen6_bsd_ring_flush;
58fa3835
DV
2791 ring->add_request = gen6_add_request;
2792 ring->get_seqno = gen6_ring_get_seqno;
b70ec5bf 2793 ring->set_seqno = ring_set_seqno;
abd58f01
BW
2794 if (INTEL_INFO(dev)->gen >= 8) {
2795 ring->irq_enable_mask =
2796 GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT;
2797 ring->irq_get = gen8_ring_get_irq;
2798 ring->irq_put = gen8_ring_put_irq;
1c7a0623
BW
2799 ring->dispatch_execbuffer =
2800 gen8_ring_dispatch_execbuffer;
707d9cf9 2801 if (i915_semaphore_is_enabled(dev)) {
5ee426ca 2802 ring->semaphore.sync_to = gen8_ring_sync;
3e78998a
BW
2803 ring->semaphore.signal = gen8_xcs_signal;
2804 GEN8_RING_SEMAPHORE_INIT;
707d9cf9 2805 }
abd58f01
BW
2806 } else {
2807 ring->irq_enable_mask = GT_BSD_USER_INTERRUPT;
2808 ring->irq_get = gen6_ring_get_irq;
2809 ring->irq_put = gen6_ring_put_irq;
1c7a0623
BW
2810 ring->dispatch_execbuffer =
2811 gen6_ring_dispatch_execbuffer;
707d9cf9
BW
2812 if (i915_semaphore_is_enabled(dev)) {
2813 ring->semaphore.sync_to = gen6_ring_sync;
2814 ring->semaphore.signal = gen6_signal;
2815 ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_VR;
2816 ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_INVALID;
2817 ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_VB;
2818 ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_VVE;
2819 ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID;
2820 ring->semaphore.mbox.signal[RCS] = GEN6_RVSYNC;
2821 ring->semaphore.mbox.signal[VCS] = GEN6_NOSYNC;
2822 ring->semaphore.mbox.signal[BCS] = GEN6_BVSYNC;
2823 ring->semaphore.mbox.signal[VECS] = GEN6_VEVSYNC;
2824 ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC;
2825 }
abd58f01 2826 }
58fa3835
DV
2827 } else {
2828 ring->mmio_base = BSD_RING_BASE;
58fa3835 2829 ring->flush = bsd_ring_flush;
8620a3a9 2830 ring->add_request = i9xx_add_request;
58fa3835 2831 ring->get_seqno = ring_get_seqno;
b70ec5bf 2832 ring->set_seqno = ring_set_seqno;
e48d8634 2833 if (IS_GEN5(dev)) {
cc609d5d 2834 ring->irq_enable_mask = ILK_BSD_USER_INTERRUPT;
e48d8634
DV
2835 ring->irq_get = gen5_ring_get_irq;
2836 ring->irq_put = gen5_ring_put_irq;
2837 } else {
e3670319 2838 ring->irq_enable_mask = I915_BSD_USER_INTERRUPT;
e48d8634
DV
2839 ring->irq_get = i9xx_ring_get_irq;
2840 ring->irq_put = i9xx_ring_put_irq;
2841 }
fb3256da 2842 ring->dispatch_execbuffer = i965_dispatch_execbuffer;
58fa3835 2843 }
ecfe00d8 2844 ring->init_hw = init_ring_common;
58fa3835 2845
1ec14ad3 2846 return intel_init_ring_buffer(dev, ring);
5c1143bb 2847}
549f7365 2848
845f74a7 2849/**
62659920 2850 * Initialize the second BSD ring (eg. Broadwell GT3, Skylake GT3)
845f74a7
ZY
2851 */
2852int intel_init_bsd2_ring_buffer(struct drm_device *dev)
2853{
2854 struct drm_i915_private *dev_priv = dev->dev_private;
a4872ba6 2855 struct intel_engine_cs *ring = &dev_priv->ring[VCS2];
845f74a7 2856
f7b64236 2857 ring->name = "bsd2 ring";
845f74a7
ZY
2858 ring->id = VCS2;
2859
2860 ring->write_tail = ring_write_tail;
2861 ring->mmio_base = GEN8_BSD2_RING_BASE;
2862 ring->flush = gen6_bsd_ring_flush;
2863 ring->add_request = gen6_add_request;
2864 ring->get_seqno = gen6_ring_get_seqno;
2865 ring->set_seqno = ring_set_seqno;
2866 ring->irq_enable_mask =
2867 GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT;
2868 ring->irq_get = gen8_ring_get_irq;
2869 ring->irq_put = gen8_ring_put_irq;
2870 ring->dispatch_execbuffer =
2871 gen8_ring_dispatch_execbuffer;
3e78998a 2872 if (i915_semaphore_is_enabled(dev)) {
5ee426ca 2873 ring->semaphore.sync_to = gen8_ring_sync;
3e78998a
BW
2874 ring->semaphore.signal = gen8_xcs_signal;
2875 GEN8_RING_SEMAPHORE_INIT;
2876 }
ecfe00d8 2877 ring->init_hw = init_ring_common;
845f74a7
ZY
2878
2879 return intel_init_ring_buffer(dev, ring);
2880}
2881
549f7365
CW
2882int intel_init_blt_ring_buffer(struct drm_device *dev)
2883{
4640c4ff 2884 struct drm_i915_private *dev_priv = dev->dev_private;
a4872ba6 2885 struct intel_engine_cs *ring = &dev_priv->ring[BCS];
549f7365 2886
3535d9dd
DV
2887 ring->name = "blitter ring";
2888 ring->id = BCS;
2889
2890 ring->mmio_base = BLT_RING_BASE;
2891 ring->write_tail = ring_write_tail;
ea251324 2892 ring->flush = gen6_ring_flush;
3535d9dd
DV
2893 ring->add_request = gen6_add_request;
2894 ring->get_seqno = gen6_ring_get_seqno;
b70ec5bf 2895 ring->set_seqno = ring_set_seqno;
abd58f01
BW
2896 if (INTEL_INFO(dev)->gen >= 8) {
2897 ring->irq_enable_mask =
2898 GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT;
2899 ring->irq_get = gen8_ring_get_irq;
2900 ring->irq_put = gen8_ring_put_irq;
1c7a0623 2901 ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer;
707d9cf9 2902 if (i915_semaphore_is_enabled(dev)) {
5ee426ca 2903 ring->semaphore.sync_to = gen8_ring_sync;
3e78998a
BW
2904 ring->semaphore.signal = gen8_xcs_signal;
2905 GEN8_RING_SEMAPHORE_INIT;
707d9cf9 2906 }
abd58f01
BW
2907 } else {
2908 ring->irq_enable_mask = GT_BLT_USER_INTERRUPT;
2909 ring->irq_get = gen6_ring_get_irq;
2910 ring->irq_put = gen6_ring_put_irq;
1c7a0623 2911 ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
707d9cf9
BW
2912 if (i915_semaphore_is_enabled(dev)) {
2913 ring->semaphore.signal = gen6_signal;
2914 ring->semaphore.sync_to = gen6_ring_sync;
2915 /*
2916 * The current semaphore is only applied on pre-gen8
2917 * platform. And there is no VCS2 ring on the pre-gen8
2918 * platform. So the semaphore between BCS and VCS2 is
2919 * initialized as INVALID. Gen8 will initialize the
2920 * sema between BCS and VCS2 later.
2921 */
2922 ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_BR;
2923 ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_BV;
2924 ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_INVALID;
2925 ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_BVE;
2926 ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID;
2927 ring->semaphore.mbox.signal[RCS] = GEN6_RBSYNC;
2928 ring->semaphore.mbox.signal[VCS] = GEN6_VBSYNC;
2929 ring->semaphore.mbox.signal[BCS] = GEN6_NOSYNC;
2930 ring->semaphore.mbox.signal[VECS] = GEN6_VEBSYNC;
2931 ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC;
2932 }
abd58f01 2933 }
ecfe00d8 2934 ring->init_hw = init_ring_common;
549f7365 2935
1ec14ad3 2936 return intel_init_ring_buffer(dev, ring);
549f7365 2937}
a7b9761d 2938
9a8a2213
BW
2939int intel_init_vebox_ring_buffer(struct drm_device *dev)
2940{
4640c4ff 2941 struct drm_i915_private *dev_priv = dev->dev_private;
a4872ba6 2942 struct intel_engine_cs *ring = &dev_priv->ring[VECS];
9a8a2213
BW
2943
2944 ring->name = "video enhancement ring";
2945 ring->id = VECS;
2946
2947 ring->mmio_base = VEBOX_RING_BASE;
2948 ring->write_tail = ring_write_tail;
2949 ring->flush = gen6_ring_flush;
2950 ring->add_request = gen6_add_request;
2951 ring->get_seqno = gen6_ring_get_seqno;
2952 ring->set_seqno = ring_set_seqno;
abd58f01
BW
2953
2954 if (INTEL_INFO(dev)->gen >= 8) {
2955 ring->irq_enable_mask =
40c499f9 2956 GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT;
abd58f01
BW
2957 ring->irq_get = gen8_ring_get_irq;
2958 ring->irq_put = gen8_ring_put_irq;
1c7a0623 2959 ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer;
707d9cf9 2960 if (i915_semaphore_is_enabled(dev)) {
5ee426ca 2961 ring->semaphore.sync_to = gen8_ring_sync;
3e78998a
BW
2962 ring->semaphore.signal = gen8_xcs_signal;
2963 GEN8_RING_SEMAPHORE_INIT;
707d9cf9 2964 }
abd58f01
BW
2965 } else {
2966 ring->irq_enable_mask = PM_VEBOX_USER_INTERRUPT;
2967 ring->irq_get = hsw_vebox_get_irq;
2968 ring->irq_put = hsw_vebox_put_irq;
1c7a0623 2969 ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
707d9cf9
BW
2970 if (i915_semaphore_is_enabled(dev)) {
2971 ring->semaphore.sync_to = gen6_ring_sync;
2972 ring->semaphore.signal = gen6_signal;
2973 ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_VER;
2974 ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_VEV;
2975 ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_VEB;
2976 ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_INVALID;
2977 ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID;
2978 ring->semaphore.mbox.signal[RCS] = GEN6_RVESYNC;
2979 ring->semaphore.mbox.signal[VCS] = GEN6_VVESYNC;
2980 ring->semaphore.mbox.signal[BCS] = GEN6_BVESYNC;
2981 ring->semaphore.mbox.signal[VECS] = GEN6_NOSYNC;
2982 ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC;
2983 }
abd58f01 2984 }
ecfe00d8 2985 ring->init_hw = init_ring_common;
9a8a2213
BW
2986
2987 return intel_init_ring_buffer(dev, ring);
2988}
2989
a7b9761d 2990int
4866d729 2991intel_ring_flush_all_caches(struct drm_i915_gem_request *req)
a7b9761d 2992{
4866d729 2993 struct intel_engine_cs *ring = req->ring;
a7b9761d
CW
2994 int ret;
2995
2996 if (!ring->gpu_caches_dirty)
2997 return 0;
2998
a84c3ae1 2999 ret = ring->flush(req, 0, I915_GEM_GPU_DOMAINS);
a7b9761d
CW
3000 if (ret)
3001 return ret;
3002
a84c3ae1 3003 trace_i915_gem_ring_flush(req, 0, I915_GEM_GPU_DOMAINS);
a7b9761d
CW
3004
3005 ring->gpu_caches_dirty = false;
3006 return 0;
3007}
3008
3009int
2f20055d 3010intel_ring_invalidate_all_caches(struct drm_i915_gem_request *req)
a7b9761d 3011{
2f20055d 3012 struct intel_engine_cs *ring = req->ring;
a7b9761d
CW
3013 uint32_t flush_domains;
3014 int ret;
3015
3016 flush_domains = 0;
3017 if (ring->gpu_caches_dirty)
3018 flush_domains = I915_GEM_GPU_DOMAINS;
3019
a84c3ae1 3020 ret = ring->flush(req, I915_GEM_GPU_DOMAINS, flush_domains);
a7b9761d
CW
3021 if (ret)
3022 return ret;
3023
a84c3ae1 3024 trace_i915_gem_ring_flush(req, I915_GEM_GPU_DOMAINS, flush_domains);
a7b9761d
CW
3025
3026 ring->gpu_caches_dirty = false;
3027 return 0;
3028}
e3efda49
CW
3029
3030void
a4872ba6 3031intel_stop_ring_buffer(struct intel_engine_cs *ring)
e3efda49
CW
3032{
3033 int ret;
3034
3035 if (!intel_ring_initialized(ring))
3036 return;
3037
3038 ret = intel_ring_idle(ring);
3039 if (ret && !i915_reset_in_progress(&to_i915(ring->dev)->gpu_error))
3040 DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n",
3041 ring->name, ret);
3042
3043 stop_ring(ring);
3044}
This page took 0.639728 seconds and 5 git commands to generate.