drm/i915/skl: Add GEN8_L3SQCREG4 to HW whitelist
[deliverable/linux.git] / drivers / gpu / drm / i915 / intel_ringbuffer.c
CommitLineData
62fdfeaf
EA
1/*
2 * Copyright © 2008-2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Eric Anholt <eric@anholt.net>
25 * Zou Nan hai <nanhai.zou@intel.com>
26 * Xiang Hai hao<haihao.xiang@intel.com>
27 *
28 */
29
a4d8a0fe 30#include <linux/log2.h>
760285e7 31#include <drm/drmP.h>
62fdfeaf 32#include "i915_drv.h"
760285e7 33#include <drm/i915_drm.h>
62fdfeaf 34#include "i915_trace.h"
881f47b6 35#include "intel_drv.h"
62fdfeaf 36
82e104cc 37int __intel_ring_space(int head, int tail, int size)
c7dca47b 38{
4f54741e
DG
39 int space = head - tail;
40 if (space <= 0)
1cf0ba14 41 space += size;
4f54741e 42 return space - I915_RING_FREE_SPACE;
c7dca47b
CW
43}
44
ebd0fd4b
DG
45void intel_ring_update_space(struct intel_ringbuffer *ringbuf)
46{
47 if (ringbuf->last_retired_head != -1) {
48 ringbuf->head = ringbuf->last_retired_head;
49 ringbuf->last_retired_head = -1;
50 }
51
52 ringbuf->space = __intel_ring_space(ringbuf->head & HEAD_ADDR,
53 ringbuf->tail, ringbuf->size);
54}
55
82e104cc 56int intel_ring_space(struct intel_ringbuffer *ringbuf)
1cf0ba14 57{
ebd0fd4b
DG
58 intel_ring_update_space(ringbuf);
59 return ringbuf->space;
1cf0ba14
CW
60}
61
82e104cc 62bool intel_ring_stopped(struct intel_engine_cs *ring)
09246732
CW
63{
64 struct drm_i915_private *dev_priv = ring->dev->dev_private;
88b4aa87
MK
65 return dev_priv->gpu_error.stop_rings & intel_ring_flag(ring);
66}
09246732 67
6258fbe2 68static void __intel_ring_advance(struct intel_engine_cs *ring)
88b4aa87 69{
93b0a4e0
OM
70 struct intel_ringbuffer *ringbuf = ring->buffer;
71 ringbuf->tail &= ringbuf->size - 1;
88b4aa87 72 if (intel_ring_stopped(ring))
09246732 73 return;
93b0a4e0 74 ring->write_tail(ring, ringbuf->tail);
09246732
CW
75}
76
b72f3acb 77static int
a84c3ae1 78gen2_render_ring_flush(struct drm_i915_gem_request *req,
46f0f8d1
CW
79 u32 invalidate_domains,
80 u32 flush_domains)
81{
a84c3ae1 82 struct intel_engine_cs *ring = req->ring;
46f0f8d1
CW
83 u32 cmd;
84 int ret;
85
86 cmd = MI_FLUSH;
31b14c9f 87 if (((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER) == 0)
46f0f8d1
CW
88 cmd |= MI_NO_WRITE_FLUSH;
89
90 if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER)
91 cmd |= MI_READ_FLUSH;
92
5fb9de1a 93 ret = intel_ring_begin(req, 2);
46f0f8d1
CW
94 if (ret)
95 return ret;
96
97 intel_ring_emit(ring, cmd);
98 intel_ring_emit(ring, MI_NOOP);
99 intel_ring_advance(ring);
100
101 return 0;
102}
103
104static int
a84c3ae1 105gen4_render_ring_flush(struct drm_i915_gem_request *req,
46f0f8d1
CW
106 u32 invalidate_domains,
107 u32 flush_domains)
62fdfeaf 108{
a84c3ae1 109 struct intel_engine_cs *ring = req->ring;
78501eac 110 struct drm_device *dev = ring->dev;
6f392d54 111 u32 cmd;
b72f3acb 112 int ret;
6f392d54 113
36d527de
CW
114 /*
115 * read/write caches:
116 *
117 * I915_GEM_DOMAIN_RENDER is always invalidated, but is
118 * only flushed if MI_NO_WRITE_FLUSH is unset. On 965, it is
119 * also flushed at 2d versus 3d pipeline switches.
120 *
121 * read-only caches:
122 *
123 * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if
124 * MI_READ_FLUSH is set, and is always flushed on 965.
125 *
126 * I915_GEM_DOMAIN_COMMAND may not exist?
127 *
128 * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is
129 * invalidated when MI_EXE_FLUSH is set.
130 *
131 * I915_GEM_DOMAIN_VERTEX, which exists on 965, is
132 * invalidated with every MI_FLUSH.
133 *
134 * TLBs:
135 *
136 * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND
137 * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and
138 * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER
139 * are flushed at any MI_FLUSH.
140 */
141
142 cmd = MI_FLUSH | MI_NO_WRITE_FLUSH;
46f0f8d1 143 if ((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER)
36d527de 144 cmd &= ~MI_NO_WRITE_FLUSH;
36d527de
CW
145 if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION)
146 cmd |= MI_EXE_FLUSH;
62fdfeaf 147
36d527de
CW
148 if (invalidate_domains & I915_GEM_DOMAIN_COMMAND &&
149 (IS_G4X(dev) || IS_GEN5(dev)))
150 cmd |= MI_INVALIDATE_ISP;
70eac33e 151
5fb9de1a 152 ret = intel_ring_begin(req, 2);
36d527de
CW
153 if (ret)
154 return ret;
b72f3acb 155
36d527de
CW
156 intel_ring_emit(ring, cmd);
157 intel_ring_emit(ring, MI_NOOP);
158 intel_ring_advance(ring);
b72f3acb
CW
159
160 return 0;
8187a2b7
ZN
161}
162
8d315287
JB
163/**
164 * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
165 * implementing two workarounds on gen6. From section 1.4.7.1
166 * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
167 *
168 * [DevSNB-C+{W/A}] Before any depth stall flush (including those
169 * produced by non-pipelined state commands), software needs to first
170 * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
171 * 0.
172 *
173 * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
174 * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
175 *
176 * And the workaround for these two requires this workaround first:
177 *
178 * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
179 * BEFORE the pipe-control with a post-sync op and no write-cache
180 * flushes.
181 *
182 * And this last workaround is tricky because of the requirements on
183 * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
184 * volume 2 part 1:
185 *
186 * "1 of the following must also be set:
187 * - Render Target Cache Flush Enable ([12] of DW1)
188 * - Depth Cache Flush Enable ([0] of DW1)
189 * - Stall at Pixel Scoreboard ([1] of DW1)
190 * - Depth Stall ([13] of DW1)
191 * - Post-Sync Operation ([13] of DW1)
192 * - Notify Enable ([8] of DW1)"
193 *
194 * The cache flushes require the workaround flush that triggered this
195 * one, so we can't use it. Depth stall would trigger the same.
196 * Post-sync nonzero is what triggered this second workaround, so we
197 * can't use that one either. Notify enable is IRQs, which aren't
198 * really our business. That leaves only stall at scoreboard.
199 */
200static int
f2cf1fcc 201intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req)
8d315287 202{
f2cf1fcc 203 struct intel_engine_cs *ring = req->ring;
18393f63 204 u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
8d315287
JB
205 int ret;
206
5fb9de1a 207 ret = intel_ring_begin(req, 6);
8d315287
JB
208 if (ret)
209 return ret;
210
211 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
212 intel_ring_emit(ring, PIPE_CONTROL_CS_STALL |
213 PIPE_CONTROL_STALL_AT_SCOREBOARD);
214 intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
215 intel_ring_emit(ring, 0); /* low dword */
216 intel_ring_emit(ring, 0); /* high dword */
217 intel_ring_emit(ring, MI_NOOP);
218 intel_ring_advance(ring);
219
5fb9de1a 220 ret = intel_ring_begin(req, 6);
8d315287
JB
221 if (ret)
222 return ret;
223
224 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
225 intel_ring_emit(ring, PIPE_CONTROL_QW_WRITE);
226 intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
227 intel_ring_emit(ring, 0);
228 intel_ring_emit(ring, 0);
229 intel_ring_emit(ring, MI_NOOP);
230 intel_ring_advance(ring);
231
232 return 0;
233}
234
235static int
a84c3ae1
JH
236gen6_render_ring_flush(struct drm_i915_gem_request *req,
237 u32 invalidate_domains, u32 flush_domains)
8d315287 238{
a84c3ae1 239 struct intel_engine_cs *ring = req->ring;
8d315287 240 u32 flags = 0;
18393f63 241 u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
8d315287
JB
242 int ret;
243
b3111509 244 /* Force SNB workarounds for PIPE_CONTROL flushes */
f2cf1fcc 245 ret = intel_emit_post_sync_nonzero_flush(req);
b3111509
PZ
246 if (ret)
247 return ret;
248
8d315287
JB
249 /* Just flush everything. Experiments have shown that reducing the
250 * number of bits based on the write domains has little performance
251 * impact.
252 */
7d54a904
CW
253 if (flush_domains) {
254 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
255 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
256 /*
257 * Ensure that any following seqno writes only happen
258 * when the render cache is indeed flushed.
259 */
97f209bc 260 flags |= PIPE_CONTROL_CS_STALL;
7d54a904
CW
261 }
262 if (invalidate_domains) {
263 flags |= PIPE_CONTROL_TLB_INVALIDATE;
264 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
265 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
266 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
267 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
268 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
269 /*
270 * TLB invalidate requires a post-sync write.
271 */
3ac78313 272 flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL;
7d54a904 273 }
8d315287 274
5fb9de1a 275 ret = intel_ring_begin(req, 4);
8d315287
JB
276 if (ret)
277 return ret;
278
6c6cf5aa 279 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
8d315287
JB
280 intel_ring_emit(ring, flags);
281 intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT);
6c6cf5aa 282 intel_ring_emit(ring, 0);
8d315287
JB
283 intel_ring_advance(ring);
284
285 return 0;
286}
287
f3987631 288static int
f2cf1fcc 289gen7_render_ring_cs_stall_wa(struct drm_i915_gem_request *req)
f3987631 290{
f2cf1fcc 291 struct intel_engine_cs *ring = req->ring;
f3987631
PZ
292 int ret;
293
5fb9de1a 294 ret = intel_ring_begin(req, 4);
f3987631
PZ
295 if (ret)
296 return ret;
297
298 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
299 intel_ring_emit(ring, PIPE_CONTROL_CS_STALL |
300 PIPE_CONTROL_STALL_AT_SCOREBOARD);
301 intel_ring_emit(ring, 0);
302 intel_ring_emit(ring, 0);
303 intel_ring_advance(ring);
304
305 return 0;
306}
307
4772eaeb 308static int
a84c3ae1 309gen7_render_ring_flush(struct drm_i915_gem_request *req,
4772eaeb
PZ
310 u32 invalidate_domains, u32 flush_domains)
311{
a84c3ae1 312 struct intel_engine_cs *ring = req->ring;
4772eaeb 313 u32 flags = 0;
18393f63 314 u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
4772eaeb
PZ
315 int ret;
316
f3987631
PZ
317 /*
318 * Ensure that any following seqno writes only happen when the render
319 * cache is indeed flushed.
320 *
321 * Workaround: 4th PIPE_CONTROL command (except the ones with only
322 * read-cache invalidate bits set) must have the CS_STALL bit set. We
323 * don't try to be clever and just set it unconditionally.
324 */
325 flags |= PIPE_CONTROL_CS_STALL;
326
4772eaeb
PZ
327 /* Just flush everything. Experiments have shown that reducing the
328 * number of bits based on the write domains has little performance
329 * impact.
330 */
331 if (flush_domains) {
332 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
333 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
965fd602 334 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
40a24488 335 flags |= PIPE_CONTROL_FLUSH_ENABLE;
4772eaeb
PZ
336 }
337 if (invalidate_domains) {
338 flags |= PIPE_CONTROL_TLB_INVALIDATE;
339 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
340 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
341 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
342 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
343 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
148b83d0 344 flags |= PIPE_CONTROL_MEDIA_STATE_CLEAR;
4772eaeb
PZ
345 /*
346 * TLB invalidate requires a post-sync write.
347 */
348 flags |= PIPE_CONTROL_QW_WRITE;
b9e1faa7 349 flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
f3987631 350
add284a3
CW
351 flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD;
352
f3987631
PZ
353 /* Workaround: we must issue a pipe_control with CS-stall bit
354 * set before a pipe_control command that has the state cache
355 * invalidate bit set. */
f2cf1fcc 356 gen7_render_ring_cs_stall_wa(req);
4772eaeb
PZ
357 }
358
5fb9de1a 359 ret = intel_ring_begin(req, 4);
4772eaeb
PZ
360 if (ret)
361 return ret;
362
363 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
364 intel_ring_emit(ring, flags);
b9e1faa7 365 intel_ring_emit(ring, scratch_addr);
4772eaeb
PZ
366 intel_ring_emit(ring, 0);
367 intel_ring_advance(ring);
368
369 return 0;
370}
371
884ceace 372static int
f2cf1fcc 373gen8_emit_pipe_control(struct drm_i915_gem_request *req,
884ceace
KG
374 u32 flags, u32 scratch_addr)
375{
f2cf1fcc 376 struct intel_engine_cs *ring = req->ring;
884ceace
KG
377 int ret;
378
5fb9de1a 379 ret = intel_ring_begin(req, 6);
884ceace
KG
380 if (ret)
381 return ret;
382
383 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6));
384 intel_ring_emit(ring, flags);
385 intel_ring_emit(ring, scratch_addr);
386 intel_ring_emit(ring, 0);
387 intel_ring_emit(ring, 0);
388 intel_ring_emit(ring, 0);
389 intel_ring_advance(ring);
390
391 return 0;
392}
393
a5f3d68e 394static int
a84c3ae1 395gen8_render_ring_flush(struct drm_i915_gem_request *req,
a5f3d68e
BW
396 u32 invalidate_domains, u32 flush_domains)
397{
398 u32 flags = 0;
f2cf1fcc 399 u32 scratch_addr = req->ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
02c9f7e3 400 int ret;
a5f3d68e
BW
401
402 flags |= PIPE_CONTROL_CS_STALL;
403
404 if (flush_domains) {
405 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
406 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
965fd602 407 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
40a24488 408 flags |= PIPE_CONTROL_FLUSH_ENABLE;
a5f3d68e
BW
409 }
410 if (invalidate_domains) {
411 flags |= PIPE_CONTROL_TLB_INVALIDATE;
412 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
413 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
414 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
415 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
416 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
417 flags |= PIPE_CONTROL_QW_WRITE;
418 flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
02c9f7e3
KG
419
420 /* WaCsStallBeforeStateCacheInvalidate:bdw,chv */
f2cf1fcc 421 ret = gen8_emit_pipe_control(req,
02c9f7e3
KG
422 PIPE_CONTROL_CS_STALL |
423 PIPE_CONTROL_STALL_AT_SCOREBOARD,
424 0);
425 if (ret)
426 return ret;
a5f3d68e
BW
427 }
428
f2cf1fcc 429 return gen8_emit_pipe_control(req, flags, scratch_addr);
a5f3d68e
BW
430}
431
a4872ba6 432static void ring_write_tail(struct intel_engine_cs *ring,
297b0c5b 433 u32 value)
d46eefa2 434{
4640c4ff 435 struct drm_i915_private *dev_priv = ring->dev->dev_private;
297b0c5b 436 I915_WRITE_TAIL(ring, value);
d46eefa2
XH
437}
438
a4872ba6 439u64 intel_ring_get_active_head(struct intel_engine_cs *ring)
8187a2b7 440{
4640c4ff 441 struct drm_i915_private *dev_priv = ring->dev->dev_private;
50877445 442 u64 acthd;
8187a2b7 443
50877445
CW
444 if (INTEL_INFO(ring->dev)->gen >= 8)
445 acthd = I915_READ64_2x32(RING_ACTHD(ring->mmio_base),
446 RING_ACTHD_UDW(ring->mmio_base));
447 else if (INTEL_INFO(ring->dev)->gen >= 4)
448 acthd = I915_READ(RING_ACTHD(ring->mmio_base));
449 else
450 acthd = I915_READ(ACTHD);
451
452 return acthd;
8187a2b7
ZN
453}
454
a4872ba6 455static void ring_setup_phys_status_page(struct intel_engine_cs *ring)
035dc1e0
DV
456{
457 struct drm_i915_private *dev_priv = ring->dev->dev_private;
458 u32 addr;
459
460 addr = dev_priv->status_page_dmah->busaddr;
461 if (INTEL_INFO(ring->dev)->gen >= 4)
462 addr |= (dev_priv->status_page_dmah->busaddr >> 28) & 0xf0;
463 I915_WRITE(HWS_PGA, addr);
464}
465
af75f269
DL
466static void intel_ring_setup_status_page(struct intel_engine_cs *ring)
467{
468 struct drm_device *dev = ring->dev;
469 struct drm_i915_private *dev_priv = ring->dev->dev_private;
f0f59a00 470 i915_reg_t mmio;
af75f269
DL
471
472 /* The ring status page addresses are no longer next to the rest of
473 * the ring registers as of gen7.
474 */
475 if (IS_GEN7(dev)) {
476 switch (ring->id) {
477 case RCS:
478 mmio = RENDER_HWS_PGA_GEN7;
479 break;
480 case BCS:
481 mmio = BLT_HWS_PGA_GEN7;
482 break;
483 /*
484 * VCS2 actually doesn't exist on Gen7. Only shut up
485 * gcc switch check warning
486 */
487 case VCS2:
488 case VCS:
489 mmio = BSD_HWS_PGA_GEN7;
490 break;
491 case VECS:
492 mmio = VEBOX_HWS_PGA_GEN7;
493 break;
494 }
495 } else if (IS_GEN6(ring->dev)) {
496 mmio = RING_HWS_PGA_GEN6(ring->mmio_base);
497 } else {
498 /* XXX: gen8 returns to sanity */
499 mmio = RING_HWS_PGA(ring->mmio_base);
500 }
501
502 I915_WRITE(mmio, (u32)ring->status_page.gfx_addr);
503 POSTING_READ(mmio);
504
505 /*
506 * Flush the TLB for this page
507 *
508 * FIXME: These two bits have disappeared on gen8, so a question
509 * arises: do we still need this and if so how should we go about
510 * invalidating the TLB?
511 */
512 if (INTEL_INFO(dev)->gen >= 6 && INTEL_INFO(dev)->gen < 8) {
f0f59a00 513 i915_reg_t reg = RING_INSTPM(ring->mmio_base);
af75f269
DL
514
515 /* ring should be idle before issuing a sync flush*/
516 WARN_ON((I915_READ_MODE(ring) & MODE_IDLE) == 0);
517
518 I915_WRITE(reg,
519 _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE |
520 INSTPM_SYNC_FLUSH));
521 if (wait_for((I915_READ(reg) & INSTPM_SYNC_FLUSH) == 0,
522 1000))
523 DRM_ERROR("%s: wait for SyncFlush to complete for TLB invalidation timed out\n",
524 ring->name);
525 }
526}
527
a4872ba6 528static bool stop_ring(struct intel_engine_cs *ring)
8187a2b7 529{
9991ae78 530 struct drm_i915_private *dev_priv = to_i915(ring->dev);
8187a2b7 531
9991ae78
CW
532 if (!IS_GEN2(ring->dev)) {
533 I915_WRITE_MODE(ring, _MASKED_BIT_ENABLE(STOP_RING));
403bdd10
DV
534 if (wait_for((I915_READ_MODE(ring) & MODE_IDLE) != 0, 1000)) {
535 DRM_ERROR("%s : timed out trying to stop ring\n", ring->name);
9bec9b13
CW
536 /* Sometimes we observe that the idle flag is not
537 * set even though the ring is empty. So double
538 * check before giving up.
539 */
540 if (I915_READ_HEAD(ring) != I915_READ_TAIL(ring))
541 return false;
9991ae78
CW
542 }
543 }
b7884eb4 544
7f2ab699 545 I915_WRITE_CTL(ring, 0);
570ef608 546 I915_WRITE_HEAD(ring, 0);
78501eac 547 ring->write_tail(ring, 0);
8187a2b7 548
9991ae78
CW
549 if (!IS_GEN2(ring->dev)) {
550 (void)I915_READ_CTL(ring);
551 I915_WRITE_MODE(ring, _MASKED_BIT_DISABLE(STOP_RING));
552 }
a51435a3 553
9991ae78
CW
554 return (I915_READ_HEAD(ring) & HEAD_ADDR) == 0;
555}
8187a2b7 556
a4872ba6 557static int init_ring_common(struct intel_engine_cs *ring)
9991ae78
CW
558{
559 struct drm_device *dev = ring->dev;
560 struct drm_i915_private *dev_priv = dev->dev_private;
93b0a4e0
OM
561 struct intel_ringbuffer *ringbuf = ring->buffer;
562 struct drm_i915_gem_object *obj = ringbuf->obj;
9991ae78
CW
563 int ret = 0;
564
59bad947 565 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
9991ae78
CW
566
567 if (!stop_ring(ring)) {
568 /* G45 ring initialization often fails to reset head to zero */
6fd0d56e
CW
569 DRM_DEBUG_KMS("%s head not reset to zero "
570 "ctl %08x head %08x tail %08x start %08x\n",
571 ring->name,
572 I915_READ_CTL(ring),
573 I915_READ_HEAD(ring),
574 I915_READ_TAIL(ring),
575 I915_READ_START(ring));
8187a2b7 576
9991ae78 577 if (!stop_ring(ring)) {
6fd0d56e
CW
578 DRM_ERROR("failed to set %s head to zero "
579 "ctl %08x head %08x tail %08x start %08x\n",
580 ring->name,
581 I915_READ_CTL(ring),
582 I915_READ_HEAD(ring),
583 I915_READ_TAIL(ring),
584 I915_READ_START(ring));
9991ae78
CW
585 ret = -EIO;
586 goto out;
6fd0d56e 587 }
8187a2b7
ZN
588 }
589
9991ae78
CW
590 if (I915_NEED_GFX_HWS(dev))
591 intel_ring_setup_status_page(ring);
592 else
593 ring_setup_phys_status_page(ring);
594
ece4a17d
JK
595 /* Enforce ordering by reading HEAD register back */
596 I915_READ_HEAD(ring);
597
0d8957c8
DV
598 /* Initialize the ring. This must happen _after_ we've cleared the ring
599 * registers with the above sequence (the readback of the HEAD registers
600 * also enforces ordering), otherwise the hw might lose the new ring
601 * register values. */
f343c5f6 602 I915_WRITE_START(ring, i915_gem_obj_ggtt_offset(obj));
95468892
CW
603
604 /* WaClearRingBufHeadRegAtInit:ctg,elk */
605 if (I915_READ_HEAD(ring))
606 DRM_DEBUG("%s initialization failed [head=%08x], fudging\n",
607 ring->name, I915_READ_HEAD(ring));
608 I915_WRITE_HEAD(ring, 0);
609 (void)I915_READ_HEAD(ring);
610
7f2ab699 611 I915_WRITE_CTL(ring,
93b0a4e0 612 ((ringbuf->size - PAGE_SIZE) & RING_NR_PAGES)
5d031e5b 613 | RING_VALID);
8187a2b7 614
8187a2b7 615 /* If the head is still not zero, the ring is dead */
f01db988 616 if (wait_for((I915_READ_CTL(ring) & RING_VALID) != 0 &&
f343c5f6 617 I915_READ_START(ring) == i915_gem_obj_ggtt_offset(obj) &&
f01db988 618 (I915_READ_HEAD(ring) & HEAD_ADDR) == 0, 50)) {
e74cfed5 619 DRM_ERROR("%s initialization failed "
48e48a0b
CW
620 "ctl %08x (valid? %d) head %08x tail %08x start %08x [expected %08lx]\n",
621 ring->name,
622 I915_READ_CTL(ring), I915_READ_CTL(ring) & RING_VALID,
623 I915_READ_HEAD(ring), I915_READ_TAIL(ring),
624 I915_READ_START(ring), (unsigned long)i915_gem_obj_ggtt_offset(obj));
b7884eb4
DV
625 ret = -EIO;
626 goto out;
8187a2b7
ZN
627 }
628
ebd0fd4b 629 ringbuf->last_retired_head = -1;
5c6c6003
CW
630 ringbuf->head = I915_READ_HEAD(ring);
631 ringbuf->tail = I915_READ_TAIL(ring) & TAIL_ADDR;
ebd0fd4b 632 intel_ring_update_space(ringbuf);
1ec14ad3 633
50f018df
CW
634 memset(&ring->hangcheck, 0, sizeof(ring->hangcheck));
635
b7884eb4 636out:
59bad947 637 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
b7884eb4
DV
638
639 return ret;
8187a2b7
ZN
640}
641
9b1136d5
OM
642void
643intel_fini_pipe_control(struct intel_engine_cs *ring)
644{
645 struct drm_device *dev = ring->dev;
646
647 if (ring->scratch.obj == NULL)
648 return;
649
650 if (INTEL_INFO(dev)->gen >= 5) {
651 kunmap(sg_page(ring->scratch.obj->pages->sgl));
652 i915_gem_object_ggtt_unpin(ring->scratch.obj);
653 }
654
655 drm_gem_object_unreference(&ring->scratch.obj->base);
656 ring->scratch.obj = NULL;
657}
658
659int
660intel_init_pipe_control(struct intel_engine_cs *ring)
c6df541c 661{
c6df541c
CW
662 int ret;
663
bfc882b4 664 WARN_ON(ring->scratch.obj);
c6df541c 665
0d1aacac
CW
666 ring->scratch.obj = i915_gem_alloc_object(ring->dev, 4096);
667 if (ring->scratch.obj == NULL) {
c6df541c
CW
668 DRM_ERROR("Failed to allocate seqno page\n");
669 ret = -ENOMEM;
670 goto err;
671 }
e4ffd173 672
a9cc726c
DV
673 ret = i915_gem_object_set_cache_level(ring->scratch.obj, I915_CACHE_LLC);
674 if (ret)
675 goto err_unref;
c6df541c 676
1ec9e26d 677 ret = i915_gem_obj_ggtt_pin(ring->scratch.obj, 4096, 0);
c6df541c
CW
678 if (ret)
679 goto err_unref;
680
0d1aacac
CW
681 ring->scratch.gtt_offset = i915_gem_obj_ggtt_offset(ring->scratch.obj);
682 ring->scratch.cpu_page = kmap(sg_page(ring->scratch.obj->pages->sgl));
683 if (ring->scratch.cpu_page == NULL) {
56b085a0 684 ret = -ENOMEM;
c6df541c 685 goto err_unpin;
56b085a0 686 }
c6df541c 687
2b1086cc 688 DRM_DEBUG_DRIVER("%s pipe control offset: 0x%08x\n",
0d1aacac 689 ring->name, ring->scratch.gtt_offset);
c6df541c
CW
690 return 0;
691
692err_unpin:
d7f46fc4 693 i915_gem_object_ggtt_unpin(ring->scratch.obj);
c6df541c 694err_unref:
0d1aacac 695 drm_gem_object_unreference(&ring->scratch.obj->base);
c6df541c 696err:
c6df541c
CW
697 return ret;
698}
699
e2be4faf 700static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req)
86d7f238 701{
7225342a 702 int ret, i;
e2be4faf 703 struct intel_engine_cs *ring = req->ring;
888b5995
AS
704 struct drm_device *dev = ring->dev;
705 struct drm_i915_private *dev_priv = dev->dev_private;
7225342a 706 struct i915_workarounds *w = &dev_priv->workarounds;
888b5995 707
02235808 708 if (w->count == 0)
7225342a 709 return 0;
888b5995 710
7225342a 711 ring->gpu_caches_dirty = true;
4866d729 712 ret = intel_ring_flush_all_caches(req);
7225342a
MK
713 if (ret)
714 return ret;
888b5995 715
5fb9de1a 716 ret = intel_ring_begin(req, (w->count * 2 + 2));
7225342a
MK
717 if (ret)
718 return ret;
719
22a916aa 720 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(w->count));
7225342a 721 for (i = 0; i < w->count; i++) {
f92a9162 722 intel_ring_emit_reg(ring, w->reg[i].addr);
7225342a
MK
723 intel_ring_emit(ring, w->reg[i].value);
724 }
22a916aa 725 intel_ring_emit(ring, MI_NOOP);
7225342a
MK
726
727 intel_ring_advance(ring);
728
729 ring->gpu_caches_dirty = true;
4866d729 730 ret = intel_ring_flush_all_caches(req);
7225342a
MK
731 if (ret)
732 return ret;
888b5995 733
7225342a 734 DRM_DEBUG_DRIVER("Number of Workarounds emitted: %d\n", w->count);
888b5995 735
7225342a 736 return 0;
86d7f238
AS
737}
738
8753181e 739static int intel_rcs_ctx_init(struct drm_i915_gem_request *req)
8f0e2b9d
DV
740{
741 int ret;
742
e2be4faf 743 ret = intel_ring_workarounds_emit(req);
8f0e2b9d
DV
744 if (ret != 0)
745 return ret;
746
be01363f 747 ret = i915_gem_render_state_init(req);
8f0e2b9d
DV
748 if (ret)
749 DRM_ERROR("init render state: %d\n", ret);
750
751 return ret;
752}
753
7225342a 754static int wa_add(struct drm_i915_private *dev_priv,
f0f59a00
VS
755 i915_reg_t addr,
756 const u32 mask, const u32 val)
7225342a
MK
757{
758 const u32 idx = dev_priv->workarounds.count;
759
760 if (WARN_ON(idx >= I915_MAX_WA_REGS))
761 return -ENOSPC;
762
763 dev_priv->workarounds.reg[idx].addr = addr;
764 dev_priv->workarounds.reg[idx].value = val;
765 dev_priv->workarounds.reg[idx].mask = mask;
766
767 dev_priv->workarounds.count++;
768
769 return 0;
86d7f238
AS
770}
771
ca5a0fbd 772#define WA_REG(addr, mask, val) do { \
cf4b0de6 773 const int r = wa_add(dev_priv, (addr), (mask), (val)); \
7225342a
MK
774 if (r) \
775 return r; \
ca5a0fbd 776 } while (0)
7225342a
MK
777
778#define WA_SET_BIT_MASKED(addr, mask) \
26459343 779 WA_REG(addr, (mask), _MASKED_BIT_ENABLE(mask))
7225342a
MK
780
781#define WA_CLR_BIT_MASKED(addr, mask) \
26459343 782 WA_REG(addr, (mask), _MASKED_BIT_DISABLE(mask))
7225342a 783
98533251 784#define WA_SET_FIELD_MASKED(addr, mask, value) \
cf4b0de6 785 WA_REG(addr, mask, _MASKED_FIELD(mask, value))
7225342a 786
cf4b0de6
DL
787#define WA_SET_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) | (mask))
788#define WA_CLR_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) & ~(mask))
7225342a 789
cf4b0de6 790#define WA_WRITE(addr, val) WA_REG(addr, 0xffffffff, val)
7225342a 791
33136b06
AS
792static int wa_ring_whitelist_reg(struct intel_engine_cs *ring, i915_reg_t reg)
793{
794 struct drm_i915_private *dev_priv = ring->dev->dev_private;
795 struct i915_workarounds *wa = &dev_priv->workarounds;
796 const uint32_t index = wa->hw_whitelist_count[ring->id];
797
798 if (WARN_ON(index >= RING_MAX_NONPRIV_SLOTS))
799 return -EINVAL;
800
801 WA_WRITE(RING_FORCE_TO_NONPRIV(ring->mmio_base, index),
802 i915_mmio_reg_offset(reg));
803 wa->hw_whitelist_count[ring->id]++;
804
805 return 0;
806}
807
e9a64ada
AS
808static int gen8_init_workarounds(struct intel_engine_cs *ring)
809{
68c6198b
AS
810 struct drm_device *dev = ring->dev;
811 struct drm_i915_private *dev_priv = dev->dev_private;
812
813 WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING);
e9a64ada 814
717d84d6
AS
815 /* WaDisableAsyncFlipPerfMode:bdw,chv */
816 WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE);
817
d0581194
AS
818 /* WaDisablePartialInstShootdown:bdw,chv */
819 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
820 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
821
a340af58
AS
822 /* Use Force Non-Coherent whenever executing a 3D context. This is a
823 * workaround for for a possible hang in the unlikely event a TLB
824 * invalidation occurs during a PSD flush.
825 */
826 /* WaForceEnableNonCoherent:bdw,chv */
120f5d28 827 /* WaHdcDisableFetchWhenMasked:bdw,chv */
a340af58 828 WA_SET_BIT_MASKED(HDC_CHICKEN0,
120f5d28 829 HDC_DONOT_FETCH_MEM_WHEN_MASKED |
a340af58
AS
830 HDC_FORCE_NON_COHERENT);
831
6def8fdd
AS
832 /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0:
833 * "The Hierarchical Z RAW Stall Optimization allows non-overlapping
834 * polygons in the same 8x4 pixel/sample area to be processed without
835 * stalling waiting for the earlier ones to write to Hierarchical Z
836 * buffer."
837 *
838 * This optimization is off by default for BDW and CHV; turn it on.
839 */
840 WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);
841
48404636
AS
842 /* Wa4x4STCOptimizationDisable:bdw,chv */
843 WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE);
844
7eebcde6
AS
845 /*
846 * BSpec recommends 8x4 when MSAA is used,
847 * however in practice 16x4 seems fastest.
848 *
849 * Note that PS/WM thread counts depend on the WIZ hashing
850 * disable bit, which we don't touch here, but it's good
851 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
852 */
853 WA_SET_FIELD_MASKED(GEN7_GT_MODE,
854 GEN6_WIZ_HASHING_MASK,
855 GEN6_WIZ_HASHING_16x4);
856
e9a64ada
AS
857 return 0;
858}
859
00e1e623 860static int bdw_init_workarounds(struct intel_engine_cs *ring)
86d7f238 861{
e9a64ada 862 int ret;
888b5995
AS
863 struct drm_device *dev = ring->dev;
864 struct drm_i915_private *dev_priv = dev->dev_private;
86d7f238 865
e9a64ada
AS
866 ret = gen8_init_workarounds(ring);
867 if (ret)
868 return ret;
869
101b376d 870 /* WaDisableThreadStallDopClockGating:bdw (pre-production) */
d0581194 871 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
86d7f238 872
101b376d 873 /* WaDisableDopClockGating:bdw */
7225342a
MK
874 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
875 DOP_CLOCK_GATING_DISABLE);
86d7f238 876
7225342a
MK
877 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
878 GEN8_SAMPLER_POWER_BYPASS_DIS);
86d7f238 879
7225342a 880 WA_SET_BIT_MASKED(HDC_CHICKEN0,
35cb6f3b
DL
881 /* WaForceContextSaveRestoreNonCoherent:bdw */
882 HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
35cb6f3b 883 /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
7225342a 884 (IS_BDW_GT3(dev) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
86d7f238 885
86d7f238
AS
886 return 0;
887}
888
00e1e623
VS
889static int chv_init_workarounds(struct intel_engine_cs *ring)
890{
e9a64ada 891 int ret;
00e1e623
VS
892 struct drm_device *dev = ring->dev;
893 struct drm_i915_private *dev_priv = dev->dev_private;
894
e9a64ada
AS
895 ret = gen8_init_workarounds(ring);
896 if (ret)
897 return ret;
898
00e1e623 899 /* WaDisableThreadStallDopClockGating:chv */
d0581194 900 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
00e1e623 901
d60de81d
KG
902 /* Improve HiZ throughput on CHV. */
903 WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X);
904
7225342a
MK
905 return 0;
906}
907
3b106531
HN
908static int gen9_init_workarounds(struct intel_engine_cs *ring)
909{
ab0dfafe
HN
910 struct drm_device *dev = ring->dev;
911 struct drm_i915_private *dev_priv = dev->dev_private;
8ea6f892 912 uint32_t tmp;
e0f3fa09 913 int ret;
ab0dfafe 914
9c4cbf82
MK
915 /* WaEnableLbsSlaRetryTimerDecrement:skl */
916 I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) |
917 GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
918
919 /* WaDisableKillLogic:bxt,skl */
920 I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
921 ECOCHK_DIS_TLB);
922
b0e6f6d4 923 /* WaDisablePartialInstShootdown:skl,bxt */
ab0dfafe
HN
924 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
925 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
926
a119a6e6 927 /* Syncing dependencies between camera and graphics:skl,bxt */
8424171e
NH
928 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
929 GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC);
930
e87a005d
JN
931 /* WaDisableDgMirrorFixInHalfSliceChicken5:skl,bxt */
932 if (IS_SKL_REVID(dev, 0, SKL_REVID_B0) ||
933 IS_BXT_REVID(dev, 0, BXT_REVID_A1))
a86eb582
DL
934 WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
935 GEN9_DG_MIRROR_FIX_ENABLE);
1de4582f 936
e87a005d
JN
937 /* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:skl,bxt */
938 if (IS_SKL_REVID(dev, 0, SKL_REVID_B0) ||
939 IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
183c6dac
DL
940 WA_SET_BIT_MASKED(GEN7_COMMON_SLICE_CHICKEN1,
941 GEN9_RHWO_OPTIMIZATION_DISABLE);
9b01435d
AS
942 /*
943 * WA also requires GEN9_SLICE_COMMON_ECO_CHICKEN0[14:14] to be set
944 * but we do that in per ctx batchbuffer as there is an issue
945 * with this register not getting restored on ctx restore
946 */
183c6dac
DL
947 }
948
e87a005d
JN
949 /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt */
950 if (IS_SKL_REVID(dev, SKL_REVID_C0, REVID_FOREVER) || IS_BROXTON(dev))
cac23df4
NH
951 WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
952 GEN9_ENABLE_YV12_BUGFIX);
cac23df4 953
5068368c 954 /* Wa4x4STCOptimizationDisable:skl,bxt */
27160c96 955 /* WaDisablePartialResolveInVc:skl,bxt */
60294683
AS
956 WA_SET_BIT_MASKED(CACHE_MODE_1, (GEN8_4x4_STC_OPTIMIZATION_DISABLE |
957 GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE));
9370cd98 958
16be17af 959 /* WaCcsTlbPrefetchDisable:skl,bxt */
e2db7071
DL
960 WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
961 GEN9_CCS_TLB_PREFETCH_ENABLE);
962
5a2ae95e 963 /* WaDisableMaskBasedCammingInRCC:skl,bxt */
e87a005d
JN
964 if (IS_SKL_REVID(dev, SKL_REVID_C0, SKL_REVID_C0) ||
965 IS_BXT_REVID(dev, 0, BXT_REVID_A1))
38a39a7b
BW
966 WA_SET_BIT_MASKED(SLICE_ECO_CHICKEN0,
967 PIXEL_MASK_CAMMING_DISABLE);
968
8ea6f892
ID
969 /* WaForceContextSaveRestoreNonCoherent:skl,bxt */
970 tmp = HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT;
e87a005d
JN
971 if (IS_SKL_REVID(dev, SKL_REVID_F0, SKL_REVID_F0) ||
972 IS_BXT_REVID(dev, BXT_REVID_B0, REVID_FOREVER))
8ea6f892
ID
973 tmp |= HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE;
974 WA_SET_BIT_MASKED(HDC_CHICKEN0, tmp);
975
8c761609 976 /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt */
e87a005d 977 if (IS_SKYLAKE(dev) || IS_BXT_REVID(dev, 0, BXT_REVID_B0))
8c761609
AS
978 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
979 GEN8_SAMPLER_POWER_BYPASS_DIS);
8c761609 980
6b6d5626
RB
981 /* WaDisableSTUnitPowerOptimization:skl,bxt */
982 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
983
e0f3fa09
AS
984 /* WaEnablePreemptionGranularityControlByUMD:skl,bxt */
985 ret= wa_ring_whitelist_reg(ring, GEN8_CS_CHICKEN1);
986 if (ret)
987 return ret;
988
3669ab61
AS
989 /* WaAllowUMDToModifyHDCChicken1:skl,bxt */
990 ret = wa_ring_whitelist_reg(ring, GEN8_HDC_CHICKEN1);
991 if (ret)
992 return ret;
993
3b106531
HN
994 return 0;
995}
996
b7668791
DL
997static int skl_tune_iz_hashing(struct intel_engine_cs *ring)
998{
999 struct drm_device *dev = ring->dev;
1000 struct drm_i915_private *dev_priv = dev->dev_private;
1001 u8 vals[3] = { 0, 0, 0 };
1002 unsigned int i;
1003
1004 for (i = 0; i < 3; i++) {
1005 u8 ss;
1006
1007 /*
1008 * Only consider slices where one, and only one, subslice has 7
1009 * EUs
1010 */
a4d8a0fe 1011 if (!is_power_of_2(dev_priv->info.subslice_7eu[i]))
b7668791
DL
1012 continue;
1013
1014 /*
1015 * subslice_7eu[i] != 0 (because of the check above) and
1016 * ss_max == 4 (maximum number of subslices possible per slice)
1017 *
1018 * -> 0 <= ss <= 3;
1019 */
1020 ss = ffs(dev_priv->info.subslice_7eu[i]) - 1;
1021 vals[i] = 3 - ss;
1022 }
1023
1024 if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0)
1025 return 0;
1026
1027 /* Tune IZ hashing. See intel_device_info_runtime_init() */
1028 WA_SET_FIELD_MASKED(GEN7_GT_MODE,
1029 GEN9_IZ_HASHING_MASK(2) |
1030 GEN9_IZ_HASHING_MASK(1) |
1031 GEN9_IZ_HASHING_MASK(0),
1032 GEN9_IZ_HASHING(2, vals[2]) |
1033 GEN9_IZ_HASHING(1, vals[1]) |
1034 GEN9_IZ_HASHING(0, vals[0]));
1035
1036 return 0;
1037}
1038
8d205494
DL
1039static int skl_init_workarounds(struct intel_engine_cs *ring)
1040{
aa0011a8 1041 int ret;
d0bbbc4f
DL
1042 struct drm_device *dev = ring->dev;
1043 struct drm_i915_private *dev_priv = dev->dev_private;
1044
aa0011a8
AS
1045 ret = gen9_init_workarounds(ring);
1046 if (ret)
1047 return ret;
8d205494 1048
e87a005d 1049 if (IS_SKL_REVID(dev, 0, SKL_REVID_D0)) {
9c4cbf82
MK
1050 /* WaDisableChickenBitTSGBarrierAckForFFSliceCS:skl */
1051 I915_WRITE(FF_SLICE_CS_CHICKEN2,
1052 _MASKED_BIT_ENABLE(GEN9_TSG_BARRIER_ACK_DISABLE));
1053 }
1054
1055 /* GEN8_L3SQCREG4 has a dependency with WA batch so any new changes
1056 * involving this register should also be added to WA batch as required.
1057 */
e87a005d 1058 if (IS_SKL_REVID(dev, 0, SKL_REVID_E0))
9c4cbf82
MK
1059 /* WaDisableLSQCROPERFforOCL:skl */
1060 I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) |
1061 GEN8_LQSC_RO_PERF_DIS);
1062
1063 /* WaEnableGapsTsvCreditFix:skl */
e87a005d 1064 if (IS_SKL_REVID(dev, SKL_REVID_C0, REVID_FOREVER)) {
9c4cbf82
MK
1065 I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) |
1066 GEN9_GAPS_TSV_CREDIT_DISABLE));
1067 }
1068
d0bbbc4f 1069 /* WaDisablePowerCompilerClockGating:skl */
e87a005d 1070 if (IS_SKL_REVID(dev, SKL_REVID_B0, SKL_REVID_B0))
d0bbbc4f
DL
1071 WA_SET_BIT_MASKED(HIZ_CHICKEN,
1072 BDW_HIZ_POWER_COMPILER_CLOCK_GATING_DISABLE);
1073
e238659d 1074 if (IS_SKL_REVID(dev, 0, SKL_REVID_F0)) {
b62adbd1
NH
1075 /*
1076 *Use Force Non-Coherent whenever executing a 3D context. This
1077 * is a workaround for a possible hang in the unlikely event
1078 * a TLB invalidation occurs during a PSD flush.
1079 */
1080 /* WaForceEnableNonCoherent:skl */
1081 WA_SET_BIT_MASKED(HDC_CHICKEN0,
1082 HDC_FORCE_NON_COHERENT);
e238659d
MK
1083
1084 /* WaDisableHDCInvalidation:skl */
1085 I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
1086 BDW_DISABLE_HDC_INVALIDATION);
b62adbd1
NH
1087 }
1088
e87a005d
JN
1089 /* WaBarrierPerformanceFixDisable:skl */
1090 if (IS_SKL_REVID(dev, SKL_REVID_C0, SKL_REVID_D0))
5b6fd12a
VS
1091 WA_SET_BIT_MASKED(HDC_CHICKEN0,
1092 HDC_FENCE_DEST_SLM_DISABLE |
1093 HDC_BARRIER_PERFORMANCE_DISABLE);
1094
9bd9dfb4 1095 /* WaDisableSbeCacheDispatchPortSharing:skl */
e87a005d 1096 if (IS_SKL_REVID(dev, 0, SKL_REVID_F0))
9bd9dfb4
MK
1097 WA_SET_BIT_MASKED(
1098 GEN7_HALF_SLICE_CHICKEN1,
1099 GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
9bd9dfb4 1100
6107497e
AS
1101 /* WaDisableLSQCROPERFforOCL:skl */
1102 ret = wa_ring_whitelist_reg(ring, GEN8_L3SQCREG4);
1103 if (ret)
1104 return ret;
1105
b7668791 1106 return skl_tune_iz_hashing(ring);
7225342a
MK
1107}
1108
cae0437f
NH
1109static int bxt_init_workarounds(struct intel_engine_cs *ring)
1110{
aa0011a8 1111 int ret;
dfb601e6
NH
1112 struct drm_device *dev = ring->dev;
1113 struct drm_i915_private *dev_priv = dev->dev_private;
1114
aa0011a8
AS
1115 ret = gen9_init_workarounds(ring);
1116 if (ret)
1117 return ret;
cae0437f 1118
9c4cbf82
MK
1119 /* WaStoreMultiplePTEenable:bxt */
1120 /* This is a requirement according to Hardware specification */
cbdc12a9 1121 if (IS_BXT_REVID(dev, 0, BXT_REVID_A1))
9c4cbf82
MK
1122 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_TLBPF);
1123
1124 /* WaSetClckGatingDisableMedia:bxt */
cbdc12a9 1125 if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
9c4cbf82
MK
1126 I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) &
1127 ~GEN8_DOP_CLOCK_GATE_MEDIA_ENABLE));
1128 }
1129
dfb601e6
NH
1130 /* WaDisableThreadStallDopClockGating:bxt */
1131 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
1132 STALL_DOP_GATING_DISABLE);
1133
983b4b9d 1134 /* WaDisableSbeCacheDispatchPortSharing:bxt */
e87a005d 1135 if (IS_BXT_REVID(dev, 0, BXT_REVID_B0)) {
983b4b9d
NH
1136 WA_SET_BIT_MASKED(
1137 GEN7_HALF_SLICE_CHICKEN1,
1138 GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
1139 }
1140
2c8580e4
AS
1141 /* WaDisableObjectLevelPreemptionForTrifanOrPolygon:bxt */
1142 /* WaDisableObjectLevelPreemptionForInstancedDraw:bxt */
1143 /* WaDisableObjectLevelPreemtionForInstanceId:bxt */
a786d53a 1144 /* WaDisableLSQCROPERFforOCL:bxt */
2c8580e4
AS
1145 if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
1146 ret = wa_ring_whitelist_reg(ring, GEN9_CS_DEBUG_MODE1);
1147 if (ret)
1148 return ret;
a786d53a
AS
1149
1150 ret = wa_ring_whitelist_reg(ring, GEN8_L3SQCREG4);
1151 if (ret)
1152 return ret;
2c8580e4
AS
1153 }
1154
cae0437f
NH
1155 return 0;
1156}
1157
771b9a53 1158int init_workarounds_ring(struct intel_engine_cs *ring)
7225342a
MK
1159{
1160 struct drm_device *dev = ring->dev;
1161 struct drm_i915_private *dev_priv = dev->dev_private;
1162
1163 WARN_ON(ring->id != RCS);
1164
1165 dev_priv->workarounds.count = 0;
33136b06 1166 dev_priv->workarounds.hw_whitelist_count[RCS] = 0;
7225342a
MK
1167
1168 if (IS_BROADWELL(dev))
1169 return bdw_init_workarounds(ring);
1170
1171 if (IS_CHERRYVIEW(dev))
1172 return chv_init_workarounds(ring);
00e1e623 1173
8d205494
DL
1174 if (IS_SKYLAKE(dev))
1175 return skl_init_workarounds(ring);
cae0437f
NH
1176
1177 if (IS_BROXTON(dev))
1178 return bxt_init_workarounds(ring);
3b106531 1179
00e1e623
VS
1180 return 0;
1181}
1182
a4872ba6 1183static int init_render_ring(struct intel_engine_cs *ring)
8187a2b7 1184{
78501eac 1185 struct drm_device *dev = ring->dev;
1ec14ad3 1186 struct drm_i915_private *dev_priv = dev->dev_private;
78501eac 1187 int ret = init_ring_common(ring);
9c33baa6
KZ
1188 if (ret)
1189 return ret;
a69ffdbf 1190
61a563a2
AG
1191 /* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */
1192 if (INTEL_INFO(dev)->gen >= 4 && INTEL_INFO(dev)->gen < 7)
6b26c86d 1193 I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH));
1c8c38c5
CW
1194
1195 /* We need to disable the AsyncFlip performance optimisations in order
1196 * to use MI_WAIT_FOR_EVENT within the CS. It should already be
1197 * programmed to '1' on all products.
8693a824 1198 *
2441f877 1199 * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv
1c8c38c5 1200 */
2441f877 1201 if (INTEL_INFO(dev)->gen >= 6 && INTEL_INFO(dev)->gen < 8)
1c8c38c5
CW
1202 I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE));
1203
f05bb0c7 1204 /* Required for the hardware to program scanline values for waiting */
01fa0302 1205 /* WaEnableFlushTlbInvalidationMode:snb */
f05bb0c7
CW
1206 if (INTEL_INFO(dev)->gen == 6)
1207 I915_WRITE(GFX_MODE,
aa83e30d 1208 _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT));
f05bb0c7 1209
01fa0302 1210 /* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */
1c8c38c5
CW
1211 if (IS_GEN7(dev))
1212 I915_WRITE(GFX_MODE_GEN7,
01fa0302 1213 _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT) |
1c8c38c5 1214 _MASKED_BIT_ENABLE(GFX_REPLAY_MODE));
78501eac 1215
5e13a0c5 1216 if (IS_GEN6(dev)) {
3a69ddd6
KG
1217 /* From the Sandybridge PRM, volume 1 part 3, page 24:
1218 * "If this bit is set, STCunit will have LRA as replacement
1219 * policy. [...] This bit must be reset. LRA replacement
1220 * policy is not supported."
1221 */
1222 I915_WRITE(CACHE_MODE_0,
5e13a0c5 1223 _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
84f9f938
BW
1224 }
1225
9cc83020 1226 if (INTEL_INFO(dev)->gen >= 6 && INTEL_INFO(dev)->gen < 8)
6b26c86d 1227 I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
84f9f938 1228
040d2baa 1229 if (HAS_L3_DPF(dev))
35a85ac6 1230 I915_WRITE_IMR(ring, ~GT_PARITY_ERROR(dev));
15b9f80e 1231
7225342a 1232 return init_workarounds_ring(ring);
8187a2b7
ZN
1233}
1234
a4872ba6 1235static void render_ring_cleanup(struct intel_engine_cs *ring)
c6df541c 1236{
b45305fc 1237 struct drm_device *dev = ring->dev;
3e78998a
BW
1238 struct drm_i915_private *dev_priv = dev->dev_private;
1239
1240 if (dev_priv->semaphore_obj) {
1241 i915_gem_object_ggtt_unpin(dev_priv->semaphore_obj);
1242 drm_gem_object_unreference(&dev_priv->semaphore_obj->base);
1243 dev_priv->semaphore_obj = NULL;
1244 }
b45305fc 1245
9b1136d5 1246 intel_fini_pipe_control(ring);
c6df541c
CW
1247}
1248
f7169687 1249static int gen8_rcs_signal(struct drm_i915_gem_request *signaller_req,
3e78998a
BW
1250 unsigned int num_dwords)
1251{
1252#define MBOX_UPDATE_DWORDS 8
f7169687 1253 struct intel_engine_cs *signaller = signaller_req->ring;
3e78998a
BW
1254 struct drm_device *dev = signaller->dev;
1255 struct drm_i915_private *dev_priv = dev->dev_private;
1256 struct intel_engine_cs *waiter;
1257 int i, ret, num_rings;
1258
1259 num_rings = hweight32(INTEL_INFO(dev)->ring_mask);
1260 num_dwords += (num_rings-1) * MBOX_UPDATE_DWORDS;
1261#undef MBOX_UPDATE_DWORDS
1262
5fb9de1a 1263 ret = intel_ring_begin(signaller_req, num_dwords);
3e78998a
BW
1264 if (ret)
1265 return ret;
1266
1267 for_each_ring(waiter, dev_priv, i) {
6259cead 1268 u32 seqno;
3e78998a
BW
1269 u64 gtt_offset = signaller->semaphore.signal_ggtt[i];
1270 if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
1271 continue;
1272
f7169687 1273 seqno = i915_gem_request_get_seqno(signaller_req);
3e78998a
BW
1274 intel_ring_emit(signaller, GFX_OP_PIPE_CONTROL(6));
1275 intel_ring_emit(signaller, PIPE_CONTROL_GLOBAL_GTT_IVB |
1276 PIPE_CONTROL_QW_WRITE |
1277 PIPE_CONTROL_FLUSH_ENABLE);
1278 intel_ring_emit(signaller, lower_32_bits(gtt_offset));
1279 intel_ring_emit(signaller, upper_32_bits(gtt_offset));
6259cead 1280 intel_ring_emit(signaller, seqno);
3e78998a
BW
1281 intel_ring_emit(signaller, 0);
1282 intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL |
1283 MI_SEMAPHORE_TARGET(waiter->id));
1284 intel_ring_emit(signaller, 0);
1285 }
1286
1287 return 0;
1288}
1289
f7169687 1290static int gen8_xcs_signal(struct drm_i915_gem_request *signaller_req,
3e78998a
BW
1291 unsigned int num_dwords)
1292{
1293#define MBOX_UPDATE_DWORDS 6
f7169687 1294 struct intel_engine_cs *signaller = signaller_req->ring;
3e78998a
BW
1295 struct drm_device *dev = signaller->dev;
1296 struct drm_i915_private *dev_priv = dev->dev_private;
1297 struct intel_engine_cs *waiter;
1298 int i, ret, num_rings;
1299
1300 num_rings = hweight32(INTEL_INFO(dev)->ring_mask);
1301 num_dwords += (num_rings-1) * MBOX_UPDATE_DWORDS;
1302#undef MBOX_UPDATE_DWORDS
1303
5fb9de1a 1304 ret = intel_ring_begin(signaller_req, num_dwords);
3e78998a
BW
1305 if (ret)
1306 return ret;
1307
1308 for_each_ring(waiter, dev_priv, i) {
6259cead 1309 u32 seqno;
3e78998a
BW
1310 u64 gtt_offset = signaller->semaphore.signal_ggtt[i];
1311 if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
1312 continue;
1313
f7169687 1314 seqno = i915_gem_request_get_seqno(signaller_req);
3e78998a
BW
1315 intel_ring_emit(signaller, (MI_FLUSH_DW + 1) |
1316 MI_FLUSH_DW_OP_STOREDW);
1317 intel_ring_emit(signaller, lower_32_bits(gtt_offset) |
1318 MI_FLUSH_DW_USE_GTT);
1319 intel_ring_emit(signaller, upper_32_bits(gtt_offset));
6259cead 1320 intel_ring_emit(signaller, seqno);
3e78998a
BW
1321 intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL |
1322 MI_SEMAPHORE_TARGET(waiter->id));
1323 intel_ring_emit(signaller, 0);
1324 }
1325
1326 return 0;
1327}
1328
f7169687 1329static int gen6_signal(struct drm_i915_gem_request *signaller_req,
024a43e1 1330 unsigned int num_dwords)
1ec14ad3 1331{
f7169687 1332 struct intel_engine_cs *signaller = signaller_req->ring;
024a43e1
BW
1333 struct drm_device *dev = signaller->dev;
1334 struct drm_i915_private *dev_priv = dev->dev_private;
a4872ba6 1335 struct intel_engine_cs *useless;
a1444b79 1336 int i, ret, num_rings;
78325f2d 1337
a1444b79
BW
1338#define MBOX_UPDATE_DWORDS 3
1339 num_rings = hweight32(INTEL_INFO(dev)->ring_mask);
1340 num_dwords += round_up((num_rings-1) * MBOX_UPDATE_DWORDS, 2);
1341#undef MBOX_UPDATE_DWORDS
024a43e1 1342
5fb9de1a 1343 ret = intel_ring_begin(signaller_req, num_dwords);
024a43e1
BW
1344 if (ret)
1345 return ret;
024a43e1 1346
78325f2d 1347 for_each_ring(useless, dev_priv, i) {
f0f59a00
VS
1348 i915_reg_t mbox_reg = signaller->semaphore.mbox.signal[i];
1349
1350 if (i915_mmio_reg_valid(mbox_reg)) {
f7169687 1351 u32 seqno = i915_gem_request_get_seqno(signaller_req);
f0f59a00 1352
78325f2d 1353 intel_ring_emit(signaller, MI_LOAD_REGISTER_IMM(1));
f92a9162 1354 intel_ring_emit_reg(signaller, mbox_reg);
6259cead 1355 intel_ring_emit(signaller, seqno);
78325f2d
BW
1356 }
1357 }
024a43e1 1358
a1444b79
BW
1359 /* If num_dwords was rounded, make sure the tail pointer is correct */
1360 if (num_rings % 2 == 0)
1361 intel_ring_emit(signaller, MI_NOOP);
1362
024a43e1 1363 return 0;
1ec14ad3
CW
1364}
1365
c8c99b0f
BW
1366/**
1367 * gen6_add_request - Update the semaphore mailbox registers
ee044a88
JH
1368 *
1369 * @request - request to write to the ring
c8c99b0f
BW
1370 *
1371 * Update the mailbox registers in the *other* rings with the current seqno.
1372 * This acts like a signal in the canonical semaphore.
1373 */
1ec14ad3 1374static int
ee044a88 1375gen6_add_request(struct drm_i915_gem_request *req)
1ec14ad3 1376{
ee044a88 1377 struct intel_engine_cs *ring = req->ring;
024a43e1 1378 int ret;
52ed2325 1379
707d9cf9 1380 if (ring->semaphore.signal)
f7169687 1381 ret = ring->semaphore.signal(req, 4);
707d9cf9 1382 else
5fb9de1a 1383 ret = intel_ring_begin(req, 4);
707d9cf9 1384
1ec14ad3
CW
1385 if (ret)
1386 return ret;
1387
1ec14ad3
CW
1388 intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
1389 intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
ee044a88 1390 intel_ring_emit(ring, i915_gem_request_get_seqno(req));
1ec14ad3 1391 intel_ring_emit(ring, MI_USER_INTERRUPT);
09246732 1392 __intel_ring_advance(ring);
1ec14ad3 1393
1ec14ad3
CW
1394 return 0;
1395}
1396
f72b3435
MK
1397static inline bool i915_gem_has_seqno_wrapped(struct drm_device *dev,
1398 u32 seqno)
1399{
1400 struct drm_i915_private *dev_priv = dev->dev_private;
1401 return dev_priv->last_seqno < seqno;
1402}
1403
c8c99b0f
BW
1404/**
1405 * intel_ring_sync - sync the waiter to the signaller on seqno
1406 *
1407 * @waiter - ring that is waiting
1408 * @signaller - ring which has, or will signal
1409 * @seqno - seqno which the waiter will block on
1410 */
5ee426ca
BW
1411
1412static int
599d924c 1413gen8_ring_sync(struct drm_i915_gem_request *waiter_req,
5ee426ca
BW
1414 struct intel_engine_cs *signaller,
1415 u32 seqno)
1416{
599d924c 1417 struct intel_engine_cs *waiter = waiter_req->ring;
5ee426ca
BW
1418 struct drm_i915_private *dev_priv = waiter->dev->dev_private;
1419 int ret;
1420
5fb9de1a 1421 ret = intel_ring_begin(waiter_req, 4);
5ee426ca
BW
1422 if (ret)
1423 return ret;
1424
1425 intel_ring_emit(waiter, MI_SEMAPHORE_WAIT |
1426 MI_SEMAPHORE_GLOBAL_GTT |
bae4fcd2 1427 MI_SEMAPHORE_POLL |
5ee426ca
BW
1428 MI_SEMAPHORE_SAD_GTE_SDD);
1429 intel_ring_emit(waiter, seqno);
1430 intel_ring_emit(waiter,
1431 lower_32_bits(GEN8_WAIT_OFFSET(waiter, signaller->id)));
1432 intel_ring_emit(waiter,
1433 upper_32_bits(GEN8_WAIT_OFFSET(waiter, signaller->id)));
1434 intel_ring_advance(waiter);
1435 return 0;
1436}
1437
c8c99b0f 1438static int
599d924c 1439gen6_ring_sync(struct drm_i915_gem_request *waiter_req,
a4872ba6 1440 struct intel_engine_cs *signaller,
686cb5f9 1441 u32 seqno)
1ec14ad3 1442{
599d924c 1443 struct intel_engine_cs *waiter = waiter_req->ring;
c8c99b0f
BW
1444 u32 dw1 = MI_SEMAPHORE_MBOX |
1445 MI_SEMAPHORE_COMPARE |
1446 MI_SEMAPHORE_REGISTER;
ebc348b2
BW
1447 u32 wait_mbox = signaller->semaphore.mbox.wait[waiter->id];
1448 int ret;
1ec14ad3 1449
1500f7ea
BW
1450 /* Throughout all of the GEM code, seqno passed implies our current
1451 * seqno is >= the last seqno executed. However for hardware the
1452 * comparison is strictly greater than.
1453 */
1454 seqno -= 1;
1455
ebc348b2 1456 WARN_ON(wait_mbox == MI_SEMAPHORE_SYNC_INVALID);
686cb5f9 1457
5fb9de1a 1458 ret = intel_ring_begin(waiter_req, 4);
1ec14ad3
CW
1459 if (ret)
1460 return ret;
1461
f72b3435
MK
1462 /* If seqno wrap happened, omit the wait with no-ops */
1463 if (likely(!i915_gem_has_seqno_wrapped(waiter->dev, seqno))) {
ebc348b2 1464 intel_ring_emit(waiter, dw1 | wait_mbox);
f72b3435
MK
1465 intel_ring_emit(waiter, seqno);
1466 intel_ring_emit(waiter, 0);
1467 intel_ring_emit(waiter, MI_NOOP);
1468 } else {
1469 intel_ring_emit(waiter, MI_NOOP);
1470 intel_ring_emit(waiter, MI_NOOP);
1471 intel_ring_emit(waiter, MI_NOOP);
1472 intel_ring_emit(waiter, MI_NOOP);
1473 }
c8c99b0f 1474 intel_ring_advance(waiter);
1ec14ad3
CW
1475
1476 return 0;
1477}
1478
c6df541c
CW
1479#define PIPE_CONTROL_FLUSH(ring__, addr__) \
1480do { \
fcbc34e4
KG
1481 intel_ring_emit(ring__, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE | \
1482 PIPE_CONTROL_DEPTH_STALL); \
c6df541c
CW
1483 intel_ring_emit(ring__, (addr__) | PIPE_CONTROL_GLOBAL_GTT); \
1484 intel_ring_emit(ring__, 0); \
1485 intel_ring_emit(ring__, 0); \
1486} while (0)
1487
1488static int
ee044a88 1489pc_render_add_request(struct drm_i915_gem_request *req)
c6df541c 1490{
ee044a88 1491 struct intel_engine_cs *ring = req->ring;
18393f63 1492 u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
c6df541c
CW
1493 int ret;
1494
1495 /* For Ironlake, MI_USER_INTERRUPT was deprecated and apparently
1496 * incoherent with writes to memory, i.e. completely fubar,
1497 * so we need to use PIPE_NOTIFY instead.
1498 *
1499 * However, we also need to workaround the qword write
1500 * incoherence by flushing the 6 PIPE_NOTIFY buffers out to
1501 * memory before requesting an interrupt.
1502 */
5fb9de1a 1503 ret = intel_ring_begin(req, 32);
c6df541c
CW
1504 if (ret)
1505 return ret;
1506
fcbc34e4 1507 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
9d971b37
KG
1508 PIPE_CONTROL_WRITE_FLUSH |
1509 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
0d1aacac 1510 intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
ee044a88 1511 intel_ring_emit(ring, i915_gem_request_get_seqno(req));
c6df541c
CW
1512 intel_ring_emit(ring, 0);
1513 PIPE_CONTROL_FLUSH(ring, scratch_addr);
18393f63 1514 scratch_addr += 2 * CACHELINE_BYTES; /* write to separate cachelines */
c6df541c 1515 PIPE_CONTROL_FLUSH(ring, scratch_addr);
18393f63 1516 scratch_addr += 2 * CACHELINE_BYTES;
c6df541c 1517 PIPE_CONTROL_FLUSH(ring, scratch_addr);
18393f63 1518 scratch_addr += 2 * CACHELINE_BYTES;
c6df541c 1519 PIPE_CONTROL_FLUSH(ring, scratch_addr);
18393f63 1520 scratch_addr += 2 * CACHELINE_BYTES;
c6df541c 1521 PIPE_CONTROL_FLUSH(ring, scratch_addr);
18393f63 1522 scratch_addr += 2 * CACHELINE_BYTES;
c6df541c 1523 PIPE_CONTROL_FLUSH(ring, scratch_addr);
a71d8d94 1524
fcbc34e4 1525 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
9d971b37
KG
1526 PIPE_CONTROL_WRITE_FLUSH |
1527 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
c6df541c 1528 PIPE_CONTROL_NOTIFY);
0d1aacac 1529 intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
ee044a88 1530 intel_ring_emit(ring, i915_gem_request_get_seqno(req));
c6df541c 1531 intel_ring_emit(ring, 0);
09246732 1532 __intel_ring_advance(ring);
c6df541c 1533
c6df541c
CW
1534 return 0;
1535}
1536
4cd53c0c 1537static u32
a4872ba6 1538gen6_ring_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
4cd53c0c 1539{
4cd53c0c
DV
1540 /* Workaround to force correct ordering between irq and seqno writes on
1541 * ivb (and maybe also on snb) by reading from a CS register (like
1542 * ACTHD) before reading the status page. */
50877445
CW
1543 if (!lazy_coherency) {
1544 struct drm_i915_private *dev_priv = ring->dev->dev_private;
1545 POSTING_READ(RING_ACTHD(ring->mmio_base));
1546 }
1547
4cd53c0c
DV
1548 return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
1549}
1550
8187a2b7 1551static u32
a4872ba6 1552ring_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
8187a2b7 1553{
1ec14ad3
CW
1554 return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
1555}
1556
b70ec5bf 1557static void
a4872ba6 1558ring_set_seqno(struct intel_engine_cs *ring, u32 seqno)
b70ec5bf
MK
1559{
1560 intel_write_status_page(ring, I915_GEM_HWS_INDEX, seqno);
1561}
1562
c6df541c 1563static u32
a4872ba6 1564pc_render_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
c6df541c 1565{
0d1aacac 1566 return ring->scratch.cpu_page[0];
c6df541c
CW
1567}
1568
b70ec5bf 1569static void
a4872ba6 1570pc_render_set_seqno(struct intel_engine_cs *ring, u32 seqno)
b70ec5bf 1571{
0d1aacac 1572 ring->scratch.cpu_page[0] = seqno;
b70ec5bf
MK
1573}
1574
e48d8634 1575static bool
a4872ba6 1576gen5_ring_get_irq(struct intel_engine_cs *ring)
e48d8634
DV
1577{
1578 struct drm_device *dev = ring->dev;
4640c4ff 1579 struct drm_i915_private *dev_priv = dev->dev_private;
7338aefa 1580 unsigned long flags;
e48d8634 1581
7cd512f1 1582 if (WARN_ON(!intel_irqs_enabled(dev_priv)))
e48d8634
DV
1583 return false;
1584
7338aefa 1585 spin_lock_irqsave(&dev_priv->irq_lock, flags);
43eaea13 1586 if (ring->irq_refcount++ == 0)
480c8033 1587 gen5_enable_gt_irq(dev_priv, ring->irq_enable_mask);
7338aefa 1588 spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
e48d8634
DV
1589
1590 return true;
1591}
1592
1593static void
a4872ba6 1594gen5_ring_put_irq(struct intel_engine_cs *ring)
e48d8634
DV
1595{
1596 struct drm_device *dev = ring->dev;
4640c4ff 1597 struct drm_i915_private *dev_priv = dev->dev_private;
7338aefa 1598 unsigned long flags;
e48d8634 1599
7338aefa 1600 spin_lock_irqsave(&dev_priv->irq_lock, flags);
43eaea13 1601 if (--ring->irq_refcount == 0)
480c8033 1602 gen5_disable_gt_irq(dev_priv, ring->irq_enable_mask);
7338aefa 1603 spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
e48d8634
DV
1604}
1605
b13c2b96 1606static bool
a4872ba6 1607i9xx_ring_get_irq(struct intel_engine_cs *ring)
62fdfeaf 1608{
78501eac 1609 struct drm_device *dev = ring->dev;
4640c4ff 1610 struct drm_i915_private *dev_priv = dev->dev_private;
7338aefa 1611 unsigned long flags;
62fdfeaf 1612
7cd512f1 1613 if (!intel_irqs_enabled(dev_priv))
b13c2b96
CW
1614 return false;
1615
7338aefa 1616 spin_lock_irqsave(&dev_priv->irq_lock, flags);
c7113cc3 1617 if (ring->irq_refcount++ == 0) {
f637fde4
DV
1618 dev_priv->irq_mask &= ~ring->irq_enable_mask;
1619 I915_WRITE(IMR, dev_priv->irq_mask);
1620 POSTING_READ(IMR);
1621 }
7338aefa 1622 spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
b13c2b96
CW
1623
1624 return true;
62fdfeaf
EA
1625}
1626
8187a2b7 1627static void
a4872ba6 1628i9xx_ring_put_irq(struct intel_engine_cs *ring)
62fdfeaf 1629{
78501eac 1630 struct drm_device *dev = ring->dev;
4640c4ff 1631 struct drm_i915_private *dev_priv = dev->dev_private;
7338aefa 1632 unsigned long flags;
62fdfeaf 1633
7338aefa 1634 spin_lock_irqsave(&dev_priv->irq_lock, flags);
c7113cc3 1635 if (--ring->irq_refcount == 0) {
f637fde4
DV
1636 dev_priv->irq_mask |= ring->irq_enable_mask;
1637 I915_WRITE(IMR, dev_priv->irq_mask);
1638 POSTING_READ(IMR);
1639 }
7338aefa 1640 spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
62fdfeaf
EA
1641}
1642
c2798b19 1643static bool
a4872ba6 1644i8xx_ring_get_irq(struct intel_engine_cs *ring)
c2798b19
CW
1645{
1646 struct drm_device *dev = ring->dev;
4640c4ff 1647 struct drm_i915_private *dev_priv = dev->dev_private;
7338aefa 1648 unsigned long flags;
c2798b19 1649
7cd512f1 1650 if (!intel_irqs_enabled(dev_priv))
c2798b19
CW
1651 return false;
1652
7338aefa 1653 spin_lock_irqsave(&dev_priv->irq_lock, flags);
c7113cc3 1654 if (ring->irq_refcount++ == 0) {
c2798b19
CW
1655 dev_priv->irq_mask &= ~ring->irq_enable_mask;
1656 I915_WRITE16(IMR, dev_priv->irq_mask);
1657 POSTING_READ16(IMR);
1658 }
7338aefa 1659 spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
c2798b19
CW
1660
1661 return true;
1662}
1663
1664static void
a4872ba6 1665i8xx_ring_put_irq(struct intel_engine_cs *ring)
c2798b19
CW
1666{
1667 struct drm_device *dev = ring->dev;
4640c4ff 1668 struct drm_i915_private *dev_priv = dev->dev_private;
7338aefa 1669 unsigned long flags;
c2798b19 1670
7338aefa 1671 spin_lock_irqsave(&dev_priv->irq_lock, flags);
c7113cc3 1672 if (--ring->irq_refcount == 0) {
c2798b19
CW
1673 dev_priv->irq_mask |= ring->irq_enable_mask;
1674 I915_WRITE16(IMR, dev_priv->irq_mask);
1675 POSTING_READ16(IMR);
1676 }
7338aefa 1677 spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
c2798b19
CW
1678}
1679
b72f3acb 1680static int
a84c3ae1 1681bsd_ring_flush(struct drm_i915_gem_request *req,
78501eac
CW
1682 u32 invalidate_domains,
1683 u32 flush_domains)
d1b851fc 1684{
a84c3ae1 1685 struct intel_engine_cs *ring = req->ring;
b72f3acb
CW
1686 int ret;
1687
5fb9de1a 1688 ret = intel_ring_begin(req, 2);
b72f3acb
CW
1689 if (ret)
1690 return ret;
1691
1692 intel_ring_emit(ring, MI_FLUSH);
1693 intel_ring_emit(ring, MI_NOOP);
1694 intel_ring_advance(ring);
1695 return 0;
d1b851fc
ZN
1696}
1697
3cce469c 1698static int
ee044a88 1699i9xx_add_request(struct drm_i915_gem_request *req)
d1b851fc 1700{
ee044a88 1701 struct intel_engine_cs *ring = req->ring;
3cce469c
CW
1702 int ret;
1703
5fb9de1a 1704 ret = intel_ring_begin(req, 4);
3cce469c
CW
1705 if (ret)
1706 return ret;
6f392d54 1707
3cce469c
CW
1708 intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
1709 intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
ee044a88 1710 intel_ring_emit(ring, i915_gem_request_get_seqno(req));
3cce469c 1711 intel_ring_emit(ring, MI_USER_INTERRUPT);
09246732 1712 __intel_ring_advance(ring);
d1b851fc 1713
3cce469c 1714 return 0;
d1b851fc
ZN
1715}
1716
0f46832f 1717static bool
a4872ba6 1718gen6_ring_get_irq(struct intel_engine_cs *ring)
0f46832f
CW
1719{
1720 struct drm_device *dev = ring->dev;
4640c4ff 1721 struct drm_i915_private *dev_priv = dev->dev_private;
7338aefa 1722 unsigned long flags;
0f46832f 1723
7cd512f1
DV
1724 if (WARN_ON(!intel_irqs_enabled(dev_priv)))
1725 return false;
0f46832f 1726
7338aefa 1727 spin_lock_irqsave(&dev_priv->irq_lock, flags);
c7113cc3 1728 if (ring->irq_refcount++ == 0) {
040d2baa 1729 if (HAS_L3_DPF(dev) && ring->id == RCS)
cc609d5d
BW
1730 I915_WRITE_IMR(ring,
1731 ~(ring->irq_enable_mask |
35a85ac6 1732 GT_PARITY_ERROR(dev)));
15b9f80e
BW
1733 else
1734 I915_WRITE_IMR(ring, ~ring->irq_enable_mask);
480c8033 1735 gen5_enable_gt_irq(dev_priv, ring->irq_enable_mask);
0f46832f 1736 }
7338aefa 1737 spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
0f46832f
CW
1738
1739 return true;
1740}
1741
1742static void
a4872ba6 1743gen6_ring_put_irq(struct intel_engine_cs *ring)
0f46832f
CW
1744{
1745 struct drm_device *dev = ring->dev;
4640c4ff 1746 struct drm_i915_private *dev_priv = dev->dev_private;
7338aefa 1747 unsigned long flags;
0f46832f 1748
7338aefa 1749 spin_lock_irqsave(&dev_priv->irq_lock, flags);
c7113cc3 1750 if (--ring->irq_refcount == 0) {
040d2baa 1751 if (HAS_L3_DPF(dev) && ring->id == RCS)
35a85ac6 1752 I915_WRITE_IMR(ring, ~GT_PARITY_ERROR(dev));
15b9f80e
BW
1753 else
1754 I915_WRITE_IMR(ring, ~0);
480c8033 1755 gen5_disable_gt_irq(dev_priv, ring->irq_enable_mask);
1ec14ad3 1756 }
7338aefa 1757 spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
d1b851fc
ZN
1758}
1759
a19d2933 1760static bool
a4872ba6 1761hsw_vebox_get_irq(struct intel_engine_cs *ring)
a19d2933
BW
1762{
1763 struct drm_device *dev = ring->dev;
1764 struct drm_i915_private *dev_priv = dev->dev_private;
1765 unsigned long flags;
1766
7cd512f1 1767 if (WARN_ON(!intel_irqs_enabled(dev_priv)))
a19d2933
BW
1768 return false;
1769
59cdb63d 1770 spin_lock_irqsave(&dev_priv->irq_lock, flags);
c7113cc3 1771 if (ring->irq_refcount++ == 0) {
a19d2933 1772 I915_WRITE_IMR(ring, ~ring->irq_enable_mask);
480c8033 1773 gen6_enable_pm_irq(dev_priv, ring->irq_enable_mask);
a19d2933 1774 }
59cdb63d 1775 spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
a19d2933
BW
1776
1777 return true;
1778}
1779
1780static void
a4872ba6 1781hsw_vebox_put_irq(struct intel_engine_cs *ring)
a19d2933
BW
1782{
1783 struct drm_device *dev = ring->dev;
1784 struct drm_i915_private *dev_priv = dev->dev_private;
1785 unsigned long flags;
1786
59cdb63d 1787 spin_lock_irqsave(&dev_priv->irq_lock, flags);
c7113cc3 1788 if (--ring->irq_refcount == 0) {
a19d2933 1789 I915_WRITE_IMR(ring, ~0);
480c8033 1790 gen6_disable_pm_irq(dev_priv, ring->irq_enable_mask);
a19d2933 1791 }
59cdb63d 1792 spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
a19d2933
BW
1793}
1794
abd58f01 1795static bool
a4872ba6 1796gen8_ring_get_irq(struct intel_engine_cs *ring)
abd58f01
BW
1797{
1798 struct drm_device *dev = ring->dev;
1799 struct drm_i915_private *dev_priv = dev->dev_private;
1800 unsigned long flags;
1801
7cd512f1 1802 if (WARN_ON(!intel_irqs_enabled(dev_priv)))
abd58f01
BW
1803 return false;
1804
1805 spin_lock_irqsave(&dev_priv->irq_lock, flags);
1806 if (ring->irq_refcount++ == 0) {
1807 if (HAS_L3_DPF(dev) && ring->id == RCS) {
1808 I915_WRITE_IMR(ring,
1809 ~(ring->irq_enable_mask |
1810 GT_RENDER_L3_PARITY_ERROR_INTERRUPT));
1811 } else {
1812 I915_WRITE_IMR(ring, ~ring->irq_enable_mask);
1813 }
1814 POSTING_READ(RING_IMR(ring->mmio_base));
1815 }
1816 spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1817
1818 return true;
1819}
1820
1821static void
a4872ba6 1822gen8_ring_put_irq(struct intel_engine_cs *ring)
abd58f01
BW
1823{
1824 struct drm_device *dev = ring->dev;
1825 struct drm_i915_private *dev_priv = dev->dev_private;
1826 unsigned long flags;
1827
1828 spin_lock_irqsave(&dev_priv->irq_lock, flags);
1829 if (--ring->irq_refcount == 0) {
1830 if (HAS_L3_DPF(dev) && ring->id == RCS) {
1831 I915_WRITE_IMR(ring,
1832 ~GT_RENDER_L3_PARITY_ERROR_INTERRUPT);
1833 } else {
1834 I915_WRITE_IMR(ring, ~0);
1835 }
1836 POSTING_READ(RING_IMR(ring->mmio_base));
1837 }
1838 spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1839}
1840
d1b851fc 1841static int
53fddaf7 1842i965_dispatch_execbuffer(struct drm_i915_gem_request *req,
9bcb144c 1843 u64 offset, u32 length,
8e004efc 1844 unsigned dispatch_flags)
d1b851fc 1845{
53fddaf7 1846 struct intel_engine_cs *ring = req->ring;
e1f99ce6 1847 int ret;
78501eac 1848
5fb9de1a 1849 ret = intel_ring_begin(req, 2);
e1f99ce6
CW
1850 if (ret)
1851 return ret;
1852
78501eac 1853 intel_ring_emit(ring,
65f56876
CW
1854 MI_BATCH_BUFFER_START |
1855 MI_BATCH_GTT |
8e004efc
JH
1856 (dispatch_flags & I915_DISPATCH_SECURE ?
1857 0 : MI_BATCH_NON_SECURE_I965));
c4e7a414 1858 intel_ring_emit(ring, offset);
78501eac
CW
1859 intel_ring_advance(ring);
1860
d1b851fc
ZN
1861 return 0;
1862}
1863
b45305fc
DV
1864/* Just userspace ABI convention to limit the wa batch bo to a resonable size */
1865#define I830_BATCH_LIMIT (256*1024)
c4d69da1
CW
1866#define I830_TLB_ENTRIES (2)
1867#define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT)
8187a2b7 1868static int
53fddaf7 1869i830_dispatch_execbuffer(struct drm_i915_gem_request *req,
8e004efc
JH
1870 u64 offset, u32 len,
1871 unsigned dispatch_flags)
62fdfeaf 1872{
53fddaf7 1873 struct intel_engine_cs *ring = req->ring;
c4d69da1 1874 u32 cs_offset = ring->scratch.gtt_offset;
c4e7a414 1875 int ret;
62fdfeaf 1876
5fb9de1a 1877 ret = intel_ring_begin(req, 6);
c4d69da1
CW
1878 if (ret)
1879 return ret;
62fdfeaf 1880
c4d69da1
CW
1881 /* Evict the invalid PTE TLBs */
1882 intel_ring_emit(ring, COLOR_BLT_CMD | BLT_WRITE_RGBA);
1883 intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | 4096);
1884 intel_ring_emit(ring, I830_TLB_ENTRIES << 16 | 4); /* load each page */
1885 intel_ring_emit(ring, cs_offset);
1886 intel_ring_emit(ring, 0xdeadbeef);
1887 intel_ring_emit(ring, MI_NOOP);
1888 intel_ring_advance(ring);
b45305fc 1889
8e004efc 1890 if ((dispatch_flags & I915_DISPATCH_PINNED) == 0) {
b45305fc
DV
1891 if (len > I830_BATCH_LIMIT)
1892 return -ENOSPC;
1893
5fb9de1a 1894 ret = intel_ring_begin(req, 6 + 2);
b45305fc
DV
1895 if (ret)
1896 return ret;
c4d69da1
CW
1897
1898 /* Blit the batch (which has now all relocs applied) to the
1899 * stable batch scratch bo area (so that the CS never
1900 * stumbles over its tlb invalidation bug) ...
1901 */
1902 intel_ring_emit(ring, SRC_COPY_BLT_CMD | BLT_WRITE_RGBA);
1903 intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096);
611a7a4f 1904 intel_ring_emit(ring, DIV_ROUND_UP(len, 4096) << 16 | 4096);
b45305fc 1905 intel_ring_emit(ring, cs_offset);
b45305fc
DV
1906 intel_ring_emit(ring, 4096);
1907 intel_ring_emit(ring, offset);
c4d69da1 1908
b45305fc 1909 intel_ring_emit(ring, MI_FLUSH);
c4d69da1
CW
1910 intel_ring_emit(ring, MI_NOOP);
1911 intel_ring_advance(ring);
b45305fc
DV
1912
1913 /* ... and execute it. */
c4d69da1 1914 offset = cs_offset;
b45305fc 1915 }
e1f99ce6 1916
9d611c03 1917 ret = intel_ring_begin(req, 2);
c4d69da1
CW
1918 if (ret)
1919 return ret;
1920
9d611c03 1921 intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_GTT);
8e004efc
JH
1922 intel_ring_emit(ring, offset | (dispatch_flags & I915_DISPATCH_SECURE ?
1923 0 : MI_BATCH_NON_SECURE));
c4d69da1
CW
1924 intel_ring_advance(ring);
1925
fb3256da
DV
1926 return 0;
1927}
1928
1929static int
53fddaf7 1930i915_dispatch_execbuffer(struct drm_i915_gem_request *req,
9bcb144c 1931 u64 offset, u32 len,
8e004efc 1932 unsigned dispatch_flags)
fb3256da 1933{
53fddaf7 1934 struct intel_engine_cs *ring = req->ring;
fb3256da
DV
1935 int ret;
1936
5fb9de1a 1937 ret = intel_ring_begin(req, 2);
fb3256da
DV
1938 if (ret)
1939 return ret;
1940
65f56876 1941 intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_GTT);
8e004efc
JH
1942 intel_ring_emit(ring, offset | (dispatch_flags & I915_DISPATCH_SECURE ?
1943 0 : MI_BATCH_NON_SECURE));
c4e7a414 1944 intel_ring_advance(ring);
62fdfeaf 1945
62fdfeaf
EA
1946 return 0;
1947}
1948
7d3fdfff
VS
1949static void cleanup_phys_status_page(struct intel_engine_cs *ring)
1950{
1951 struct drm_i915_private *dev_priv = to_i915(ring->dev);
1952
1953 if (!dev_priv->status_page_dmah)
1954 return;
1955
1956 drm_pci_free(ring->dev, dev_priv->status_page_dmah);
1957 ring->status_page.page_addr = NULL;
1958}
1959
a4872ba6 1960static void cleanup_status_page(struct intel_engine_cs *ring)
62fdfeaf 1961{
05394f39 1962 struct drm_i915_gem_object *obj;
62fdfeaf 1963
8187a2b7
ZN
1964 obj = ring->status_page.obj;
1965 if (obj == NULL)
62fdfeaf 1966 return;
62fdfeaf 1967
9da3da66 1968 kunmap(sg_page(obj->pages->sgl));
d7f46fc4 1969 i915_gem_object_ggtt_unpin(obj);
05394f39 1970 drm_gem_object_unreference(&obj->base);
8187a2b7 1971 ring->status_page.obj = NULL;
62fdfeaf
EA
1972}
1973
a4872ba6 1974static int init_status_page(struct intel_engine_cs *ring)
62fdfeaf 1975{
7d3fdfff 1976 struct drm_i915_gem_object *obj = ring->status_page.obj;
62fdfeaf 1977
7d3fdfff 1978 if (obj == NULL) {
1f767e02 1979 unsigned flags;
e3efda49 1980 int ret;
e4ffd173 1981
e3efda49
CW
1982 obj = i915_gem_alloc_object(ring->dev, 4096);
1983 if (obj == NULL) {
1984 DRM_ERROR("Failed to allocate status page\n");
1985 return -ENOMEM;
1986 }
62fdfeaf 1987
e3efda49
CW
1988 ret = i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
1989 if (ret)
1990 goto err_unref;
1991
1f767e02
CW
1992 flags = 0;
1993 if (!HAS_LLC(ring->dev))
1994 /* On g33, we cannot place HWS above 256MiB, so
1995 * restrict its pinning to the low mappable arena.
1996 * Though this restriction is not documented for
1997 * gen4, gen5, or byt, they also behave similarly
1998 * and hang if the HWS is placed at the top of the
1999 * GTT. To generalise, it appears that all !llc
2000 * platforms have issues with us placing the HWS
2001 * above the mappable region (even though we never
2002 * actualy map it).
2003 */
2004 flags |= PIN_MAPPABLE;
2005 ret = i915_gem_obj_ggtt_pin(obj, 4096, flags);
e3efda49
CW
2006 if (ret) {
2007err_unref:
2008 drm_gem_object_unreference(&obj->base);
2009 return ret;
2010 }
2011
2012 ring->status_page.obj = obj;
2013 }
62fdfeaf 2014
f343c5f6 2015 ring->status_page.gfx_addr = i915_gem_obj_ggtt_offset(obj);
9da3da66 2016 ring->status_page.page_addr = kmap(sg_page(obj->pages->sgl));
8187a2b7 2017 memset(ring->status_page.page_addr, 0, PAGE_SIZE);
62fdfeaf 2018
8187a2b7
ZN
2019 DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n",
2020 ring->name, ring->status_page.gfx_addr);
62fdfeaf
EA
2021
2022 return 0;
62fdfeaf
EA
2023}
2024
a4872ba6 2025static int init_phys_status_page(struct intel_engine_cs *ring)
6b8294a4
CW
2026{
2027 struct drm_i915_private *dev_priv = ring->dev->dev_private;
6b8294a4
CW
2028
2029 if (!dev_priv->status_page_dmah) {
2030 dev_priv->status_page_dmah =
2031 drm_pci_alloc(ring->dev, PAGE_SIZE, PAGE_SIZE);
2032 if (!dev_priv->status_page_dmah)
2033 return -ENOMEM;
2034 }
2035
6b8294a4
CW
2036 ring->status_page.page_addr = dev_priv->status_page_dmah->vaddr;
2037 memset(ring->status_page.page_addr, 0, PAGE_SIZE);
2038
2039 return 0;
2040}
2041
7ba717cf 2042void intel_unpin_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
2919d291 2043{
def0c5f6
CW
2044 if (HAS_LLC(ringbuf->obj->base.dev) && !ringbuf->obj->stolen)
2045 vunmap(ringbuf->virtual_start);
2046 else
2047 iounmap(ringbuf->virtual_start);
7ba717cf 2048 ringbuf->virtual_start = NULL;
0eb973d3 2049 ringbuf->vma = NULL;
2919d291 2050 i915_gem_object_ggtt_unpin(ringbuf->obj);
7ba717cf
TD
2051}
2052
def0c5f6
CW
2053static u32 *vmap_obj(struct drm_i915_gem_object *obj)
2054{
2055 struct sg_page_iter sg_iter;
2056 struct page **pages;
2057 void *addr;
2058 int i;
2059
2060 pages = drm_malloc_ab(obj->base.size >> PAGE_SHIFT, sizeof(*pages));
2061 if (pages == NULL)
2062 return NULL;
2063
2064 i = 0;
2065 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0)
2066 pages[i++] = sg_page_iter_page(&sg_iter);
2067
2068 addr = vmap(pages, i, 0, PAGE_KERNEL);
2069 drm_free_large(pages);
2070
2071 return addr;
2072}
2073
7ba717cf
TD
2074int intel_pin_and_map_ringbuffer_obj(struct drm_device *dev,
2075 struct intel_ringbuffer *ringbuf)
2076{
2077 struct drm_i915_private *dev_priv = to_i915(dev);
2078 struct drm_i915_gem_object *obj = ringbuf->obj;
2079 int ret;
2080
def0c5f6
CW
2081 if (HAS_LLC(dev_priv) && !obj->stolen) {
2082 ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, 0);
2083 if (ret)
2084 return ret;
7ba717cf 2085
def0c5f6
CW
2086 ret = i915_gem_object_set_to_cpu_domain(obj, true);
2087 if (ret) {
2088 i915_gem_object_ggtt_unpin(obj);
2089 return ret;
2090 }
2091
2092 ringbuf->virtual_start = vmap_obj(obj);
2093 if (ringbuf->virtual_start == NULL) {
2094 i915_gem_object_ggtt_unpin(obj);
2095 return -ENOMEM;
2096 }
2097 } else {
2098 ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, PIN_MAPPABLE);
2099 if (ret)
2100 return ret;
7ba717cf 2101
def0c5f6
CW
2102 ret = i915_gem_object_set_to_gtt_domain(obj, true);
2103 if (ret) {
2104 i915_gem_object_ggtt_unpin(obj);
2105 return ret;
2106 }
2107
2108 ringbuf->virtual_start = ioremap_wc(dev_priv->gtt.mappable_base +
2109 i915_gem_obj_ggtt_offset(obj), ringbuf->size);
2110 if (ringbuf->virtual_start == NULL) {
2111 i915_gem_object_ggtt_unpin(obj);
2112 return -EINVAL;
2113 }
7ba717cf
TD
2114 }
2115
0eb973d3
TU
2116 ringbuf->vma = i915_gem_obj_to_ggtt(obj);
2117
7ba717cf
TD
2118 return 0;
2119}
2120
01101fa7 2121static void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
7ba717cf 2122{
2919d291
OM
2123 drm_gem_object_unreference(&ringbuf->obj->base);
2124 ringbuf->obj = NULL;
2125}
2126
01101fa7
CW
2127static int intel_alloc_ringbuffer_obj(struct drm_device *dev,
2128 struct intel_ringbuffer *ringbuf)
62fdfeaf 2129{
05394f39 2130 struct drm_i915_gem_object *obj;
62fdfeaf 2131
ebc052e0
CW
2132 obj = NULL;
2133 if (!HAS_LLC(dev))
93b0a4e0 2134 obj = i915_gem_object_create_stolen(dev, ringbuf->size);
ebc052e0 2135 if (obj == NULL)
93b0a4e0 2136 obj = i915_gem_alloc_object(dev, ringbuf->size);
e3efda49
CW
2137 if (obj == NULL)
2138 return -ENOMEM;
8187a2b7 2139
24f3a8cf
AG
2140 /* mark ring buffers as read-only from GPU side by default */
2141 obj->gt_ro = 1;
2142
93b0a4e0 2143 ringbuf->obj = obj;
e3efda49 2144
7ba717cf 2145 return 0;
e3efda49
CW
2146}
2147
01101fa7
CW
2148struct intel_ringbuffer *
2149intel_engine_create_ringbuffer(struct intel_engine_cs *engine, int size)
2150{
2151 struct intel_ringbuffer *ring;
2152 int ret;
2153
2154 ring = kzalloc(sizeof(*ring), GFP_KERNEL);
608c1a52
CW
2155 if (ring == NULL) {
2156 DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s\n",
2157 engine->name);
01101fa7 2158 return ERR_PTR(-ENOMEM);
608c1a52 2159 }
01101fa7
CW
2160
2161 ring->ring = engine;
608c1a52 2162 list_add(&ring->link, &engine->buffers);
01101fa7
CW
2163
2164 ring->size = size;
2165 /* Workaround an erratum on the i830 which causes a hang if
2166 * the TAIL pointer points to within the last 2 cachelines
2167 * of the buffer.
2168 */
2169 ring->effective_size = size;
2170 if (IS_I830(engine->dev) || IS_845G(engine->dev))
2171 ring->effective_size -= 2 * CACHELINE_BYTES;
2172
2173 ring->last_retired_head = -1;
2174 intel_ring_update_space(ring);
2175
2176 ret = intel_alloc_ringbuffer_obj(engine->dev, ring);
2177 if (ret) {
608c1a52
CW
2178 DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s: %d\n",
2179 engine->name, ret);
2180 list_del(&ring->link);
01101fa7
CW
2181 kfree(ring);
2182 return ERR_PTR(ret);
2183 }
2184
2185 return ring;
2186}
2187
2188void
2189intel_ringbuffer_free(struct intel_ringbuffer *ring)
2190{
2191 intel_destroy_ringbuffer_obj(ring);
608c1a52 2192 list_del(&ring->link);
01101fa7
CW
2193 kfree(ring);
2194}
2195
e3efda49 2196static int intel_init_ring_buffer(struct drm_device *dev,
a4872ba6 2197 struct intel_engine_cs *ring)
e3efda49 2198{
bfc882b4 2199 struct intel_ringbuffer *ringbuf;
e3efda49
CW
2200 int ret;
2201
bfc882b4
DV
2202 WARN_ON(ring->buffer);
2203
e3efda49
CW
2204 ring->dev = dev;
2205 INIT_LIST_HEAD(&ring->active_list);
2206 INIT_LIST_HEAD(&ring->request_list);
cc9130be 2207 INIT_LIST_HEAD(&ring->execlist_queue);
608c1a52 2208 INIT_LIST_HEAD(&ring->buffers);
06fbca71 2209 i915_gem_batch_pool_init(dev, &ring->batch_pool);
ebc348b2 2210 memset(ring->semaphore.sync_seqno, 0, sizeof(ring->semaphore.sync_seqno));
e3efda49
CW
2211
2212 init_waitqueue_head(&ring->irq_queue);
2213
01101fa7 2214 ringbuf = intel_engine_create_ringbuffer(ring, 32 * PAGE_SIZE);
b0366a54
DG
2215 if (IS_ERR(ringbuf)) {
2216 ret = PTR_ERR(ringbuf);
2217 goto error;
2218 }
01101fa7
CW
2219 ring->buffer = ringbuf;
2220
e3efda49
CW
2221 if (I915_NEED_GFX_HWS(dev)) {
2222 ret = init_status_page(ring);
2223 if (ret)
8ee14975 2224 goto error;
e3efda49 2225 } else {
7d3fdfff 2226 WARN_ON(ring->id != RCS);
e3efda49
CW
2227 ret = init_phys_status_page(ring);
2228 if (ret)
8ee14975 2229 goto error;
e3efda49
CW
2230 }
2231
bfc882b4
DV
2232 ret = intel_pin_and_map_ringbuffer_obj(dev, ringbuf);
2233 if (ret) {
2234 DRM_ERROR("Failed to pin and map ringbuffer %s: %d\n",
2235 ring->name, ret);
2236 intel_destroy_ringbuffer_obj(ringbuf);
2237 goto error;
e3efda49 2238 }
62fdfeaf 2239
44e895a8
BV
2240 ret = i915_cmd_parser_init_ring(ring);
2241 if (ret)
8ee14975
OM
2242 goto error;
2243
8ee14975 2244 return 0;
351e3db2 2245
8ee14975 2246error:
b0366a54 2247 intel_cleanup_ring_buffer(ring);
8ee14975 2248 return ret;
62fdfeaf
EA
2249}
2250
a4872ba6 2251void intel_cleanup_ring_buffer(struct intel_engine_cs *ring)
62fdfeaf 2252{
6402c330 2253 struct drm_i915_private *dev_priv;
33626e6a 2254
93b0a4e0 2255 if (!intel_ring_initialized(ring))
62fdfeaf
EA
2256 return;
2257
6402c330 2258 dev_priv = to_i915(ring->dev);
6402c330 2259
b0366a54
DG
2260 if (ring->buffer) {
2261 intel_stop_ring_buffer(ring);
2262 WARN_ON(!IS_GEN2(ring->dev) && (I915_READ_MODE(ring) & MODE_IDLE) == 0);
33626e6a 2263
b0366a54
DG
2264 intel_unpin_ringbuffer_obj(ring->buffer);
2265 intel_ringbuffer_free(ring->buffer);
2266 ring->buffer = NULL;
2267 }
78501eac 2268
8d19215b
ZN
2269 if (ring->cleanup)
2270 ring->cleanup(ring);
2271
7d3fdfff
VS
2272 if (I915_NEED_GFX_HWS(ring->dev)) {
2273 cleanup_status_page(ring);
2274 } else {
2275 WARN_ON(ring->id != RCS);
2276 cleanup_phys_status_page(ring);
2277 }
44e895a8
BV
2278
2279 i915_cmd_parser_fini_ring(ring);
06fbca71 2280 i915_gem_batch_pool_fini(&ring->batch_pool);
b0366a54 2281 ring->dev = NULL;
62fdfeaf
EA
2282}
2283
595e1eeb 2284static int ring_wait_for_space(struct intel_engine_cs *ring, int n)
a71d8d94 2285{
93b0a4e0 2286 struct intel_ringbuffer *ringbuf = ring->buffer;
a71d8d94 2287 struct drm_i915_gem_request *request;
b4716185
CW
2288 unsigned space;
2289 int ret;
a71d8d94 2290
ebd0fd4b
DG
2291 if (intel_ring_space(ringbuf) >= n)
2292 return 0;
a71d8d94 2293
79bbcc29
JH
2294 /* The whole point of reserving space is to not wait! */
2295 WARN_ON(ringbuf->reserved_in_use);
2296
a71d8d94 2297 list_for_each_entry(request, &ring->request_list, list) {
b4716185
CW
2298 space = __intel_ring_space(request->postfix, ringbuf->tail,
2299 ringbuf->size);
2300 if (space >= n)
a71d8d94 2301 break;
a71d8d94
CW
2302 }
2303
595e1eeb 2304 if (WARN_ON(&request->list == &ring->request_list))
a71d8d94
CW
2305 return -ENOSPC;
2306
a4b3a571 2307 ret = i915_wait_request(request);
a71d8d94
CW
2308 if (ret)
2309 return ret;
2310
b4716185 2311 ringbuf->space = space;
a71d8d94
CW
2312 return 0;
2313}
2314
79bbcc29 2315static void __wrap_ring_buffer(struct intel_ringbuffer *ringbuf)
3e960501
CW
2316{
2317 uint32_t __iomem *virt;
93b0a4e0 2318 int rem = ringbuf->size - ringbuf->tail;
3e960501 2319
93b0a4e0 2320 virt = ringbuf->virtual_start + ringbuf->tail;
3e960501
CW
2321 rem /= 4;
2322 while (rem--)
2323 iowrite32(MI_NOOP, virt++);
2324
93b0a4e0 2325 ringbuf->tail = 0;
ebd0fd4b 2326 intel_ring_update_space(ringbuf);
3e960501
CW
2327}
2328
a4872ba6 2329int intel_ring_idle(struct intel_engine_cs *ring)
3e960501 2330{
a4b3a571 2331 struct drm_i915_gem_request *req;
3e960501 2332
3e960501
CW
2333 /* Wait upon the last request to be completed */
2334 if (list_empty(&ring->request_list))
2335 return 0;
2336
a4b3a571 2337 req = list_entry(ring->request_list.prev,
b4716185
CW
2338 struct drm_i915_gem_request,
2339 list);
2340
2341 /* Make sure we do not trigger any retires */
2342 return __i915_wait_request(req,
2343 atomic_read(&to_i915(ring->dev)->gpu_error.reset_counter),
2344 to_i915(ring->dev)->mm.interruptible,
2345 NULL, NULL);
3e960501
CW
2346}
2347
6689cb2b 2348int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request)
9d773091 2349{
6689cb2b 2350 request->ringbuf = request->ring->buffer;
9eba5d4a 2351 return 0;
9d773091
CW
2352}
2353
ccd98fe4
JH
2354int intel_ring_reserve_space(struct drm_i915_gem_request *request)
2355{
2356 /*
2357 * The first call merely notes the reserve request and is common for
2358 * all back ends. The subsequent localised _begin() call actually
2359 * ensures that the reservation is available. Without the begin, if
2360 * the request creator immediately submitted the request without
2361 * adding any commands to it then there might not actually be
2362 * sufficient room for the submission commands.
2363 */
2364 intel_ring_reserved_space_reserve(request->ringbuf, MIN_SPACE_FOR_ADD_REQUEST);
2365
2366 return intel_ring_begin(request, 0);
2367}
2368
29b1b415
JH
2369void intel_ring_reserved_space_reserve(struct intel_ringbuffer *ringbuf, int size)
2370{
ccd98fe4 2371 WARN_ON(ringbuf->reserved_size);
29b1b415
JH
2372 WARN_ON(ringbuf->reserved_in_use);
2373
2374 ringbuf->reserved_size = size;
29b1b415
JH
2375}
2376
2377void intel_ring_reserved_space_cancel(struct intel_ringbuffer *ringbuf)
2378{
2379 WARN_ON(ringbuf->reserved_in_use);
2380
2381 ringbuf->reserved_size = 0;
2382 ringbuf->reserved_in_use = false;
2383}
2384
2385void intel_ring_reserved_space_use(struct intel_ringbuffer *ringbuf)
2386{
2387 WARN_ON(ringbuf->reserved_in_use);
2388
2389 ringbuf->reserved_in_use = true;
2390 ringbuf->reserved_tail = ringbuf->tail;
2391}
2392
2393void intel_ring_reserved_space_end(struct intel_ringbuffer *ringbuf)
2394{
2395 WARN_ON(!ringbuf->reserved_in_use);
79bbcc29
JH
2396 if (ringbuf->tail > ringbuf->reserved_tail) {
2397 WARN(ringbuf->tail > ringbuf->reserved_tail + ringbuf->reserved_size,
2398 "request reserved size too small: %d vs %d!\n",
2399 ringbuf->tail - ringbuf->reserved_tail, ringbuf->reserved_size);
2400 } else {
2401 /*
2402 * The ring was wrapped while the reserved space was in use.
2403 * That means that some unknown amount of the ring tail was
2404 * no-op filled and skipped. Thus simply adding the ring size
2405 * to the tail and doing the above space check will not work.
2406 * Rather than attempt to track how much tail was skipped,
2407 * it is much simpler to say that also skipping the sanity
2408 * check every once in a while is not a big issue.
2409 */
2410 }
29b1b415
JH
2411
2412 ringbuf->reserved_size = 0;
2413 ringbuf->reserved_in_use = false;
2414}
2415
2416static int __intel_ring_prepare(struct intel_engine_cs *ring, int bytes)
cbcc80df 2417{
93b0a4e0 2418 struct intel_ringbuffer *ringbuf = ring->buffer;
79bbcc29
JH
2419 int remain_usable = ringbuf->effective_size - ringbuf->tail;
2420 int remain_actual = ringbuf->size - ringbuf->tail;
2421 int ret, total_bytes, wait_bytes = 0;
2422 bool need_wrap = false;
29b1b415 2423
79bbcc29
JH
2424 if (ringbuf->reserved_in_use)
2425 total_bytes = bytes;
2426 else
2427 total_bytes = bytes + ringbuf->reserved_size;
29b1b415 2428
79bbcc29
JH
2429 if (unlikely(bytes > remain_usable)) {
2430 /*
2431 * Not enough space for the basic request. So need to flush
2432 * out the remainder and then wait for base + reserved.
2433 */
2434 wait_bytes = remain_actual + total_bytes;
2435 need_wrap = true;
2436 } else {
2437 if (unlikely(total_bytes > remain_usable)) {
2438 /*
2439 * The base request will fit but the reserved space
2440 * falls off the end. So only need to to wait for the
2441 * reserved size after flushing out the remainder.
2442 */
2443 wait_bytes = remain_actual + ringbuf->reserved_size;
2444 need_wrap = true;
2445 } else if (total_bytes > ringbuf->space) {
2446 /* No wrapping required, just waiting. */
2447 wait_bytes = total_bytes;
29b1b415 2448 }
cbcc80df
MK
2449 }
2450
79bbcc29
JH
2451 if (wait_bytes) {
2452 ret = ring_wait_for_space(ring, wait_bytes);
cbcc80df
MK
2453 if (unlikely(ret))
2454 return ret;
79bbcc29
JH
2455
2456 if (need_wrap)
2457 __wrap_ring_buffer(ringbuf);
cbcc80df
MK
2458 }
2459
cbcc80df
MK
2460 return 0;
2461}
2462
5fb9de1a 2463int intel_ring_begin(struct drm_i915_gem_request *req,
e1f99ce6 2464 int num_dwords)
8187a2b7 2465{
5fb9de1a
JH
2466 struct intel_engine_cs *ring;
2467 struct drm_i915_private *dev_priv;
e1f99ce6 2468 int ret;
78501eac 2469
5fb9de1a
JH
2470 WARN_ON(req == NULL);
2471 ring = req->ring;
2472 dev_priv = ring->dev->dev_private;
2473
33196ded
DV
2474 ret = i915_gem_check_wedge(&dev_priv->gpu_error,
2475 dev_priv->mm.interruptible);
de2b9985
DV
2476 if (ret)
2477 return ret;
21dd3734 2478
304d695c
CW
2479 ret = __intel_ring_prepare(ring, num_dwords * sizeof(uint32_t));
2480 if (ret)
2481 return ret;
2482
ee1b1e5e 2483 ring->buffer->space -= num_dwords * sizeof(uint32_t);
304d695c 2484 return 0;
8187a2b7 2485}
78501eac 2486
753b1ad4 2487/* Align the ring tail to a cacheline boundary */
bba09b12 2488int intel_ring_cacheline_align(struct drm_i915_gem_request *req)
753b1ad4 2489{
bba09b12 2490 struct intel_engine_cs *ring = req->ring;
ee1b1e5e 2491 int num_dwords = (ring->buffer->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t);
753b1ad4
VS
2492 int ret;
2493
2494 if (num_dwords == 0)
2495 return 0;
2496
18393f63 2497 num_dwords = CACHELINE_BYTES / sizeof(uint32_t) - num_dwords;
5fb9de1a 2498 ret = intel_ring_begin(req, num_dwords);
753b1ad4
VS
2499 if (ret)
2500 return ret;
2501
2502 while (num_dwords--)
2503 intel_ring_emit(ring, MI_NOOP);
2504
2505 intel_ring_advance(ring);
2506
2507 return 0;
2508}
2509
a4872ba6 2510void intel_ring_init_seqno(struct intel_engine_cs *ring, u32 seqno)
498d2ac1 2511{
3b2cc8ab
OM
2512 struct drm_device *dev = ring->dev;
2513 struct drm_i915_private *dev_priv = dev->dev_private;
498d2ac1 2514
3b2cc8ab 2515 if (INTEL_INFO(dev)->gen == 6 || INTEL_INFO(dev)->gen == 7) {
f7e98ad4
MK
2516 I915_WRITE(RING_SYNC_0(ring->mmio_base), 0);
2517 I915_WRITE(RING_SYNC_1(ring->mmio_base), 0);
3b2cc8ab 2518 if (HAS_VEBOX(dev))
5020150b 2519 I915_WRITE(RING_SYNC_2(ring->mmio_base), 0);
e1f99ce6 2520 }
d97ed339 2521
f7e98ad4 2522 ring->set_seqno(ring, seqno);
92cab734 2523 ring->hangcheck.seqno = seqno;
8187a2b7 2524}
62fdfeaf 2525
a4872ba6 2526static void gen6_bsd_ring_write_tail(struct intel_engine_cs *ring,
297b0c5b 2527 u32 value)
881f47b6 2528{
4640c4ff 2529 struct drm_i915_private *dev_priv = ring->dev->dev_private;
881f47b6
XH
2530
2531 /* Every tail move must follow the sequence below */
12f55818
CW
2532
2533 /* Disable notification that the ring is IDLE. The GT
2534 * will then assume that it is busy and bring it out of rc6.
2535 */
0206e353 2536 I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL,
12f55818
CW
2537 _MASKED_BIT_ENABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
2538
2539 /* Clear the context id. Here be magic! */
2540 I915_WRITE64(GEN6_BSD_RNCID, 0x0);
0206e353 2541
12f55818 2542 /* Wait for the ring not to be idle, i.e. for it to wake up. */
0206e353 2543 if (wait_for((I915_READ(GEN6_BSD_SLEEP_PSMI_CONTROL) &
12f55818
CW
2544 GEN6_BSD_SLEEP_INDICATOR) == 0,
2545 50))
2546 DRM_ERROR("timed out waiting for the BSD ring to wake up\n");
0206e353 2547
12f55818 2548 /* Now that the ring is fully powered up, update the tail */
0206e353 2549 I915_WRITE_TAIL(ring, value);
12f55818
CW
2550 POSTING_READ(RING_TAIL(ring->mmio_base));
2551
2552 /* Let the ring send IDLE messages to the GT again,
2553 * and so let it sleep to conserve power when idle.
2554 */
0206e353 2555 I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL,
12f55818 2556 _MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
881f47b6
XH
2557}
2558
a84c3ae1 2559static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req,
ea251324 2560 u32 invalidate, u32 flush)
881f47b6 2561{
a84c3ae1 2562 struct intel_engine_cs *ring = req->ring;
71a77e07 2563 uint32_t cmd;
b72f3acb
CW
2564 int ret;
2565
5fb9de1a 2566 ret = intel_ring_begin(req, 4);
b72f3acb
CW
2567 if (ret)
2568 return ret;
2569
71a77e07 2570 cmd = MI_FLUSH_DW;
075b3bba
BW
2571 if (INTEL_INFO(ring->dev)->gen >= 8)
2572 cmd += 1;
f0a1fb10
CW
2573
2574 /* We always require a command barrier so that subsequent
2575 * commands, such as breadcrumb interrupts, are strictly ordered
2576 * wrt the contents of the write cache being flushed to memory
2577 * (and thus being coherent from the CPU).
2578 */
2579 cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
2580
9a289771
JB
2581 /*
2582 * Bspec vol 1c.5 - video engine command streamer:
2583 * "If ENABLED, all TLBs will be invalidated once the flush
2584 * operation is complete. This bit is only valid when the
2585 * Post-Sync Operation field is a value of 1h or 3h."
2586 */
71a77e07 2587 if (invalidate & I915_GEM_GPU_DOMAINS)
f0a1fb10
CW
2588 cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD;
2589
71a77e07 2590 intel_ring_emit(ring, cmd);
9a289771 2591 intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
075b3bba
BW
2592 if (INTEL_INFO(ring->dev)->gen >= 8) {
2593 intel_ring_emit(ring, 0); /* upper addr */
2594 intel_ring_emit(ring, 0); /* value */
2595 } else {
2596 intel_ring_emit(ring, 0);
2597 intel_ring_emit(ring, MI_NOOP);
2598 }
b72f3acb
CW
2599 intel_ring_advance(ring);
2600 return 0;
881f47b6
XH
2601}
2602
1c7a0623 2603static int
53fddaf7 2604gen8_ring_dispatch_execbuffer(struct drm_i915_gem_request *req,
9bcb144c 2605 u64 offset, u32 len,
8e004efc 2606 unsigned dispatch_flags)
1c7a0623 2607{
53fddaf7 2608 struct intel_engine_cs *ring = req->ring;
8e004efc
JH
2609 bool ppgtt = USES_PPGTT(ring->dev) &&
2610 !(dispatch_flags & I915_DISPATCH_SECURE);
1c7a0623
BW
2611 int ret;
2612
5fb9de1a 2613 ret = intel_ring_begin(req, 4);
1c7a0623
BW
2614 if (ret)
2615 return ret;
2616
2617 /* FIXME(BDW): Address space and security selectors. */
919032ec
AJ
2618 intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8) |
2619 (dispatch_flags & I915_DISPATCH_RS ?
2620 MI_BATCH_RESOURCE_STREAMER : 0));
9bcb144c
BW
2621 intel_ring_emit(ring, lower_32_bits(offset));
2622 intel_ring_emit(ring, upper_32_bits(offset));
1c7a0623
BW
2623 intel_ring_emit(ring, MI_NOOP);
2624 intel_ring_advance(ring);
2625
2626 return 0;
2627}
2628
d7d4eedd 2629static int
53fddaf7 2630hsw_ring_dispatch_execbuffer(struct drm_i915_gem_request *req,
8e004efc
JH
2631 u64 offset, u32 len,
2632 unsigned dispatch_flags)
d7d4eedd 2633{
53fddaf7 2634 struct intel_engine_cs *ring = req->ring;
d7d4eedd
CW
2635 int ret;
2636
5fb9de1a 2637 ret = intel_ring_begin(req, 2);
d7d4eedd
CW
2638 if (ret)
2639 return ret;
2640
2641 intel_ring_emit(ring,
77072258 2642 MI_BATCH_BUFFER_START |
8e004efc 2643 (dispatch_flags & I915_DISPATCH_SECURE ?
919032ec
AJ
2644 0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW) |
2645 (dispatch_flags & I915_DISPATCH_RS ?
2646 MI_BATCH_RESOURCE_STREAMER : 0));
d7d4eedd
CW
2647 /* bit0-7 is the length on GEN6+ */
2648 intel_ring_emit(ring, offset);
2649 intel_ring_advance(ring);
2650
2651 return 0;
2652}
2653
881f47b6 2654static int
53fddaf7 2655gen6_ring_dispatch_execbuffer(struct drm_i915_gem_request *req,
9bcb144c 2656 u64 offset, u32 len,
8e004efc 2657 unsigned dispatch_flags)
881f47b6 2658{
53fddaf7 2659 struct intel_engine_cs *ring = req->ring;
0206e353 2660 int ret;
ab6f8e32 2661
5fb9de1a 2662 ret = intel_ring_begin(req, 2);
0206e353
AJ
2663 if (ret)
2664 return ret;
e1f99ce6 2665
d7d4eedd
CW
2666 intel_ring_emit(ring,
2667 MI_BATCH_BUFFER_START |
8e004efc
JH
2668 (dispatch_flags & I915_DISPATCH_SECURE ?
2669 0 : MI_BATCH_NON_SECURE_I965));
0206e353
AJ
2670 /* bit0-7 is the length on GEN6+ */
2671 intel_ring_emit(ring, offset);
2672 intel_ring_advance(ring);
ab6f8e32 2673
0206e353 2674 return 0;
881f47b6
XH
2675}
2676
549f7365
CW
2677/* Blitter support (SandyBridge+) */
2678
a84c3ae1 2679static int gen6_ring_flush(struct drm_i915_gem_request *req,
ea251324 2680 u32 invalidate, u32 flush)
8d19215b 2681{
a84c3ae1 2682 struct intel_engine_cs *ring = req->ring;
fd3da6c9 2683 struct drm_device *dev = ring->dev;
71a77e07 2684 uint32_t cmd;
b72f3acb
CW
2685 int ret;
2686
5fb9de1a 2687 ret = intel_ring_begin(req, 4);
b72f3acb
CW
2688 if (ret)
2689 return ret;
2690
71a77e07 2691 cmd = MI_FLUSH_DW;
dbef0f15 2692 if (INTEL_INFO(dev)->gen >= 8)
075b3bba 2693 cmd += 1;
f0a1fb10
CW
2694
2695 /* We always require a command barrier so that subsequent
2696 * commands, such as breadcrumb interrupts, are strictly ordered
2697 * wrt the contents of the write cache being flushed to memory
2698 * (and thus being coherent from the CPU).
2699 */
2700 cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
2701
9a289771
JB
2702 /*
2703 * Bspec vol 1c.3 - blitter engine command streamer:
2704 * "If ENABLED, all TLBs will be invalidated once the flush
2705 * operation is complete. This bit is only valid when the
2706 * Post-Sync Operation field is a value of 1h or 3h."
2707 */
71a77e07 2708 if (invalidate & I915_GEM_DOMAIN_RENDER)
f0a1fb10 2709 cmd |= MI_INVALIDATE_TLB;
71a77e07 2710 intel_ring_emit(ring, cmd);
9a289771 2711 intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
dbef0f15 2712 if (INTEL_INFO(dev)->gen >= 8) {
075b3bba
BW
2713 intel_ring_emit(ring, 0); /* upper addr */
2714 intel_ring_emit(ring, 0); /* value */
2715 } else {
2716 intel_ring_emit(ring, 0);
2717 intel_ring_emit(ring, MI_NOOP);
2718 }
b72f3acb 2719 intel_ring_advance(ring);
fd3da6c9 2720
b72f3acb 2721 return 0;
8d19215b
ZN
2722}
2723
5c1143bb
XH
2724int intel_init_render_ring_buffer(struct drm_device *dev)
2725{
4640c4ff 2726 struct drm_i915_private *dev_priv = dev->dev_private;
a4872ba6 2727 struct intel_engine_cs *ring = &dev_priv->ring[RCS];
3e78998a
BW
2728 struct drm_i915_gem_object *obj;
2729 int ret;
5c1143bb 2730
59465b5f
DV
2731 ring->name = "render ring";
2732 ring->id = RCS;
426960be 2733 ring->exec_id = I915_EXEC_RENDER;
59465b5f
DV
2734 ring->mmio_base = RENDER_RING_BASE;
2735
707d9cf9 2736 if (INTEL_INFO(dev)->gen >= 8) {
3e78998a
BW
2737 if (i915_semaphore_is_enabled(dev)) {
2738 obj = i915_gem_alloc_object(dev, 4096);
2739 if (obj == NULL) {
2740 DRM_ERROR("Failed to allocate semaphore bo. Disabling semaphores\n");
2741 i915.semaphores = 0;
2742 } else {
2743 i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
2744 ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_NONBLOCK);
2745 if (ret != 0) {
2746 drm_gem_object_unreference(&obj->base);
2747 DRM_ERROR("Failed to pin semaphore bo. Disabling semaphores\n");
2748 i915.semaphores = 0;
2749 } else
2750 dev_priv->semaphore_obj = obj;
2751 }
2752 }
7225342a 2753
8f0e2b9d 2754 ring->init_context = intel_rcs_ctx_init;
707d9cf9
BW
2755 ring->add_request = gen6_add_request;
2756 ring->flush = gen8_render_ring_flush;
2757 ring->irq_get = gen8_ring_get_irq;
2758 ring->irq_put = gen8_ring_put_irq;
2759 ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
2760 ring->get_seqno = gen6_ring_get_seqno;
2761 ring->set_seqno = ring_set_seqno;
2762 if (i915_semaphore_is_enabled(dev)) {
3e78998a 2763 WARN_ON(!dev_priv->semaphore_obj);
5ee426ca 2764 ring->semaphore.sync_to = gen8_ring_sync;
3e78998a
BW
2765 ring->semaphore.signal = gen8_rcs_signal;
2766 GEN8_RING_SEMAPHORE_INIT;
707d9cf9
BW
2767 }
2768 } else if (INTEL_INFO(dev)->gen >= 6) {
4f91fc6d 2769 ring->init_context = intel_rcs_ctx_init;
1ec14ad3 2770 ring->add_request = gen6_add_request;
4772eaeb 2771 ring->flush = gen7_render_ring_flush;
6c6cf5aa 2772 if (INTEL_INFO(dev)->gen == 6)
b3111509 2773 ring->flush = gen6_render_ring_flush;
707d9cf9
BW
2774 ring->irq_get = gen6_ring_get_irq;
2775 ring->irq_put = gen6_ring_put_irq;
cc609d5d 2776 ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
4cd53c0c 2777 ring->get_seqno = gen6_ring_get_seqno;
b70ec5bf 2778 ring->set_seqno = ring_set_seqno;
707d9cf9
BW
2779 if (i915_semaphore_is_enabled(dev)) {
2780 ring->semaphore.sync_to = gen6_ring_sync;
2781 ring->semaphore.signal = gen6_signal;
2782 /*
2783 * The current semaphore is only applied on pre-gen8
2784 * platform. And there is no VCS2 ring on the pre-gen8
2785 * platform. So the semaphore between RCS and VCS2 is
2786 * initialized as INVALID. Gen8 will initialize the
2787 * sema between VCS2 and RCS later.
2788 */
2789 ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_INVALID;
2790 ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_RV;
2791 ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_RB;
2792 ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_RVE;
2793 ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID;
2794 ring->semaphore.mbox.signal[RCS] = GEN6_NOSYNC;
2795 ring->semaphore.mbox.signal[VCS] = GEN6_VRSYNC;
2796 ring->semaphore.mbox.signal[BCS] = GEN6_BRSYNC;
2797 ring->semaphore.mbox.signal[VECS] = GEN6_VERSYNC;
2798 ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC;
2799 }
c6df541c
CW
2800 } else if (IS_GEN5(dev)) {
2801 ring->add_request = pc_render_add_request;
46f0f8d1 2802 ring->flush = gen4_render_ring_flush;
c6df541c 2803 ring->get_seqno = pc_render_get_seqno;
b70ec5bf 2804 ring->set_seqno = pc_render_set_seqno;
e48d8634
DV
2805 ring->irq_get = gen5_ring_get_irq;
2806 ring->irq_put = gen5_ring_put_irq;
cc609d5d
BW
2807 ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT |
2808 GT_RENDER_PIPECTL_NOTIFY_INTERRUPT;
59465b5f 2809 } else {
8620a3a9 2810 ring->add_request = i9xx_add_request;
46f0f8d1
CW
2811 if (INTEL_INFO(dev)->gen < 4)
2812 ring->flush = gen2_render_ring_flush;
2813 else
2814 ring->flush = gen4_render_ring_flush;
59465b5f 2815 ring->get_seqno = ring_get_seqno;
b70ec5bf 2816 ring->set_seqno = ring_set_seqno;
c2798b19
CW
2817 if (IS_GEN2(dev)) {
2818 ring->irq_get = i8xx_ring_get_irq;
2819 ring->irq_put = i8xx_ring_put_irq;
2820 } else {
2821 ring->irq_get = i9xx_ring_get_irq;
2822 ring->irq_put = i9xx_ring_put_irq;
2823 }
e3670319 2824 ring->irq_enable_mask = I915_USER_INTERRUPT;
1ec14ad3 2825 }
59465b5f 2826 ring->write_tail = ring_write_tail;
707d9cf9 2827
d7d4eedd
CW
2828 if (IS_HASWELL(dev))
2829 ring->dispatch_execbuffer = hsw_ring_dispatch_execbuffer;
1c7a0623
BW
2830 else if (IS_GEN8(dev))
2831 ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer;
d7d4eedd 2832 else if (INTEL_INFO(dev)->gen >= 6)
fb3256da
DV
2833 ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
2834 else if (INTEL_INFO(dev)->gen >= 4)
2835 ring->dispatch_execbuffer = i965_dispatch_execbuffer;
2836 else if (IS_I830(dev) || IS_845G(dev))
2837 ring->dispatch_execbuffer = i830_dispatch_execbuffer;
2838 else
2839 ring->dispatch_execbuffer = i915_dispatch_execbuffer;
ecfe00d8 2840 ring->init_hw = init_render_ring;
59465b5f
DV
2841 ring->cleanup = render_ring_cleanup;
2842
b45305fc
DV
2843 /* Workaround batchbuffer to combat CS tlb bug. */
2844 if (HAS_BROKEN_CS_TLB(dev)) {
c4d69da1 2845 obj = i915_gem_alloc_object(dev, I830_WA_SIZE);
b45305fc
DV
2846 if (obj == NULL) {
2847 DRM_ERROR("Failed to allocate batch bo\n");
2848 return -ENOMEM;
2849 }
2850
be1fa129 2851 ret = i915_gem_obj_ggtt_pin(obj, 0, 0);
b45305fc
DV
2852 if (ret != 0) {
2853 drm_gem_object_unreference(&obj->base);
2854 DRM_ERROR("Failed to ping batch bo\n");
2855 return ret;
2856 }
2857
0d1aacac
CW
2858 ring->scratch.obj = obj;
2859 ring->scratch.gtt_offset = i915_gem_obj_ggtt_offset(obj);
b45305fc
DV
2860 }
2861
99be1dfe
DV
2862 ret = intel_init_ring_buffer(dev, ring);
2863 if (ret)
2864 return ret;
2865
2866 if (INTEL_INFO(dev)->gen >= 5) {
2867 ret = intel_init_pipe_control(ring);
2868 if (ret)
2869 return ret;
2870 }
2871
2872 return 0;
5c1143bb
XH
2873}
2874
2875int intel_init_bsd_ring_buffer(struct drm_device *dev)
2876{
4640c4ff 2877 struct drm_i915_private *dev_priv = dev->dev_private;
a4872ba6 2878 struct intel_engine_cs *ring = &dev_priv->ring[VCS];
5c1143bb 2879
58fa3835
DV
2880 ring->name = "bsd ring";
2881 ring->id = VCS;
426960be 2882 ring->exec_id = I915_EXEC_BSD;
58fa3835 2883
0fd2c201 2884 ring->write_tail = ring_write_tail;
780f18c8 2885 if (INTEL_INFO(dev)->gen >= 6) {
58fa3835 2886 ring->mmio_base = GEN6_BSD_RING_BASE;
0fd2c201
DV
2887 /* gen6 bsd needs a special wa for tail updates */
2888 if (IS_GEN6(dev))
2889 ring->write_tail = gen6_bsd_ring_write_tail;
ea251324 2890 ring->flush = gen6_bsd_ring_flush;
58fa3835
DV
2891 ring->add_request = gen6_add_request;
2892 ring->get_seqno = gen6_ring_get_seqno;
b70ec5bf 2893 ring->set_seqno = ring_set_seqno;
abd58f01
BW
2894 if (INTEL_INFO(dev)->gen >= 8) {
2895 ring->irq_enable_mask =
2896 GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT;
2897 ring->irq_get = gen8_ring_get_irq;
2898 ring->irq_put = gen8_ring_put_irq;
1c7a0623
BW
2899 ring->dispatch_execbuffer =
2900 gen8_ring_dispatch_execbuffer;
707d9cf9 2901 if (i915_semaphore_is_enabled(dev)) {
5ee426ca 2902 ring->semaphore.sync_to = gen8_ring_sync;
3e78998a
BW
2903 ring->semaphore.signal = gen8_xcs_signal;
2904 GEN8_RING_SEMAPHORE_INIT;
707d9cf9 2905 }
abd58f01
BW
2906 } else {
2907 ring->irq_enable_mask = GT_BSD_USER_INTERRUPT;
2908 ring->irq_get = gen6_ring_get_irq;
2909 ring->irq_put = gen6_ring_put_irq;
1c7a0623
BW
2910 ring->dispatch_execbuffer =
2911 gen6_ring_dispatch_execbuffer;
707d9cf9
BW
2912 if (i915_semaphore_is_enabled(dev)) {
2913 ring->semaphore.sync_to = gen6_ring_sync;
2914 ring->semaphore.signal = gen6_signal;
2915 ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_VR;
2916 ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_INVALID;
2917 ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_VB;
2918 ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_VVE;
2919 ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID;
2920 ring->semaphore.mbox.signal[RCS] = GEN6_RVSYNC;
2921 ring->semaphore.mbox.signal[VCS] = GEN6_NOSYNC;
2922 ring->semaphore.mbox.signal[BCS] = GEN6_BVSYNC;
2923 ring->semaphore.mbox.signal[VECS] = GEN6_VEVSYNC;
2924 ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC;
2925 }
abd58f01 2926 }
58fa3835
DV
2927 } else {
2928 ring->mmio_base = BSD_RING_BASE;
58fa3835 2929 ring->flush = bsd_ring_flush;
8620a3a9 2930 ring->add_request = i9xx_add_request;
58fa3835 2931 ring->get_seqno = ring_get_seqno;
b70ec5bf 2932 ring->set_seqno = ring_set_seqno;
e48d8634 2933 if (IS_GEN5(dev)) {
cc609d5d 2934 ring->irq_enable_mask = ILK_BSD_USER_INTERRUPT;
e48d8634
DV
2935 ring->irq_get = gen5_ring_get_irq;
2936 ring->irq_put = gen5_ring_put_irq;
2937 } else {
e3670319 2938 ring->irq_enable_mask = I915_BSD_USER_INTERRUPT;
e48d8634
DV
2939 ring->irq_get = i9xx_ring_get_irq;
2940 ring->irq_put = i9xx_ring_put_irq;
2941 }
fb3256da 2942 ring->dispatch_execbuffer = i965_dispatch_execbuffer;
58fa3835 2943 }
ecfe00d8 2944 ring->init_hw = init_ring_common;
58fa3835 2945
1ec14ad3 2946 return intel_init_ring_buffer(dev, ring);
5c1143bb 2947}
549f7365 2948
845f74a7 2949/**
62659920 2950 * Initialize the second BSD ring (eg. Broadwell GT3, Skylake GT3)
845f74a7
ZY
2951 */
2952int intel_init_bsd2_ring_buffer(struct drm_device *dev)
2953{
2954 struct drm_i915_private *dev_priv = dev->dev_private;
a4872ba6 2955 struct intel_engine_cs *ring = &dev_priv->ring[VCS2];
845f74a7 2956
f7b64236 2957 ring->name = "bsd2 ring";
845f74a7 2958 ring->id = VCS2;
426960be 2959 ring->exec_id = I915_EXEC_BSD;
845f74a7
ZY
2960
2961 ring->write_tail = ring_write_tail;
2962 ring->mmio_base = GEN8_BSD2_RING_BASE;
2963 ring->flush = gen6_bsd_ring_flush;
2964 ring->add_request = gen6_add_request;
2965 ring->get_seqno = gen6_ring_get_seqno;
2966 ring->set_seqno = ring_set_seqno;
2967 ring->irq_enable_mask =
2968 GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT;
2969 ring->irq_get = gen8_ring_get_irq;
2970 ring->irq_put = gen8_ring_put_irq;
2971 ring->dispatch_execbuffer =
2972 gen8_ring_dispatch_execbuffer;
3e78998a 2973 if (i915_semaphore_is_enabled(dev)) {
5ee426ca 2974 ring->semaphore.sync_to = gen8_ring_sync;
3e78998a
BW
2975 ring->semaphore.signal = gen8_xcs_signal;
2976 GEN8_RING_SEMAPHORE_INIT;
2977 }
ecfe00d8 2978 ring->init_hw = init_ring_common;
845f74a7
ZY
2979
2980 return intel_init_ring_buffer(dev, ring);
2981}
2982
549f7365
CW
2983int intel_init_blt_ring_buffer(struct drm_device *dev)
2984{
4640c4ff 2985 struct drm_i915_private *dev_priv = dev->dev_private;
a4872ba6 2986 struct intel_engine_cs *ring = &dev_priv->ring[BCS];
549f7365 2987
3535d9dd
DV
2988 ring->name = "blitter ring";
2989 ring->id = BCS;
426960be 2990 ring->exec_id = I915_EXEC_BLT;
3535d9dd
DV
2991
2992 ring->mmio_base = BLT_RING_BASE;
2993 ring->write_tail = ring_write_tail;
ea251324 2994 ring->flush = gen6_ring_flush;
3535d9dd
DV
2995 ring->add_request = gen6_add_request;
2996 ring->get_seqno = gen6_ring_get_seqno;
b70ec5bf 2997 ring->set_seqno = ring_set_seqno;
abd58f01
BW
2998 if (INTEL_INFO(dev)->gen >= 8) {
2999 ring->irq_enable_mask =
3000 GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT;
3001 ring->irq_get = gen8_ring_get_irq;
3002 ring->irq_put = gen8_ring_put_irq;
1c7a0623 3003 ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer;
707d9cf9 3004 if (i915_semaphore_is_enabled(dev)) {
5ee426ca 3005 ring->semaphore.sync_to = gen8_ring_sync;
3e78998a
BW
3006 ring->semaphore.signal = gen8_xcs_signal;
3007 GEN8_RING_SEMAPHORE_INIT;
707d9cf9 3008 }
abd58f01
BW
3009 } else {
3010 ring->irq_enable_mask = GT_BLT_USER_INTERRUPT;
3011 ring->irq_get = gen6_ring_get_irq;
3012 ring->irq_put = gen6_ring_put_irq;
1c7a0623 3013 ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
707d9cf9
BW
3014 if (i915_semaphore_is_enabled(dev)) {
3015 ring->semaphore.signal = gen6_signal;
3016 ring->semaphore.sync_to = gen6_ring_sync;
3017 /*
3018 * The current semaphore is only applied on pre-gen8
3019 * platform. And there is no VCS2 ring on the pre-gen8
3020 * platform. So the semaphore between BCS and VCS2 is
3021 * initialized as INVALID. Gen8 will initialize the
3022 * sema between BCS and VCS2 later.
3023 */
3024 ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_BR;
3025 ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_BV;
3026 ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_INVALID;
3027 ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_BVE;
3028 ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID;
3029 ring->semaphore.mbox.signal[RCS] = GEN6_RBSYNC;
3030 ring->semaphore.mbox.signal[VCS] = GEN6_VBSYNC;
3031 ring->semaphore.mbox.signal[BCS] = GEN6_NOSYNC;
3032 ring->semaphore.mbox.signal[VECS] = GEN6_VEBSYNC;
3033 ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC;
3034 }
abd58f01 3035 }
ecfe00d8 3036 ring->init_hw = init_ring_common;
549f7365 3037
1ec14ad3 3038 return intel_init_ring_buffer(dev, ring);
549f7365 3039}
a7b9761d 3040
9a8a2213
BW
3041int intel_init_vebox_ring_buffer(struct drm_device *dev)
3042{
4640c4ff 3043 struct drm_i915_private *dev_priv = dev->dev_private;
a4872ba6 3044 struct intel_engine_cs *ring = &dev_priv->ring[VECS];
9a8a2213
BW
3045
3046 ring->name = "video enhancement ring";
3047 ring->id = VECS;
426960be 3048 ring->exec_id = I915_EXEC_VEBOX;
9a8a2213
BW
3049
3050 ring->mmio_base = VEBOX_RING_BASE;
3051 ring->write_tail = ring_write_tail;
3052 ring->flush = gen6_ring_flush;
3053 ring->add_request = gen6_add_request;
3054 ring->get_seqno = gen6_ring_get_seqno;
3055 ring->set_seqno = ring_set_seqno;
abd58f01
BW
3056
3057 if (INTEL_INFO(dev)->gen >= 8) {
3058 ring->irq_enable_mask =
40c499f9 3059 GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT;
abd58f01
BW
3060 ring->irq_get = gen8_ring_get_irq;
3061 ring->irq_put = gen8_ring_put_irq;
1c7a0623 3062 ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer;
707d9cf9 3063 if (i915_semaphore_is_enabled(dev)) {
5ee426ca 3064 ring->semaphore.sync_to = gen8_ring_sync;
3e78998a
BW
3065 ring->semaphore.signal = gen8_xcs_signal;
3066 GEN8_RING_SEMAPHORE_INIT;
707d9cf9 3067 }
abd58f01
BW
3068 } else {
3069 ring->irq_enable_mask = PM_VEBOX_USER_INTERRUPT;
3070 ring->irq_get = hsw_vebox_get_irq;
3071 ring->irq_put = hsw_vebox_put_irq;
1c7a0623 3072 ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
707d9cf9
BW
3073 if (i915_semaphore_is_enabled(dev)) {
3074 ring->semaphore.sync_to = gen6_ring_sync;
3075 ring->semaphore.signal = gen6_signal;
3076 ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_VER;
3077 ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_VEV;
3078 ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_VEB;
3079 ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_INVALID;
3080 ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID;
3081 ring->semaphore.mbox.signal[RCS] = GEN6_RVESYNC;
3082 ring->semaphore.mbox.signal[VCS] = GEN6_VVESYNC;
3083 ring->semaphore.mbox.signal[BCS] = GEN6_BVESYNC;
3084 ring->semaphore.mbox.signal[VECS] = GEN6_NOSYNC;
3085 ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC;
3086 }
abd58f01 3087 }
ecfe00d8 3088 ring->init_hw = init_ring_common;
9a8a2213
BW
3089
3090 return intel_init_ring_buffer(dev, ring);
3091}
3092
a7b9761d 3093int
4866d729 3094intel_ring_flush_all_caches(struct drm_i915_gem_request *req)
a7b9761d 3095{
4866d729 3096 struct intel_engine_cs *ring = req->ring;
a7b9761d
CW
3097 int ret;
3098
3099 if (!ring->gpu_caches_dirty)
3100 return 0;
3101
a84c3ae1 3102 ret = ring->flush(req, 0, I915_GEM_GPU_DOMAINS);
a7b9761d
CW
3103 if (ret)
3104 return ret;
3105
a84c3ae1 3106 trace_i915_gem_ring_flush(req, 0, I915_GEM_GPU_DOMAINS);
a7b9761d
CW
3107
3108 ring->gpu_caches_dirty = false;
3109 return 0;
3110}
3111
3112int
2f20055d 3113intel_ring_invalidate_all_caches(struct drm_i915_gem_request *req)
a7b9761d 3114{
2f20055d 3115 struct intel_engine_cs *ring = req->ring;
a7b9761d
CW
3116 uint32_t flush_domains;
3117 int ret;
3118
3119 flush_domains = 0;
3120 if (ring->gpu_caches_dirty)
3121 flush_domains = I915_GEM_GPU_DOMAINS;
3122
a84c3ae1 3123 ret = ring->flush(req, I915_GEM_GPU_DOMAINS, flush_domains);
a7b9761d
CW
3124 if (ret)
3125 return ret;
3126
a84c3ae1 3127 trace_i915_gem_ring_flush(req, I915_GEM_GPU_DOMAINS, flush_domains);
a7b9761d
CW
3128
3129 ring->gpu_caches_dirty = false;
3130 return 0;
3131}
e3efda49
CW
3132
3133void
a4872ba6 3134intel_stop_ring_buffer(struct intel_engine_cs *ring)
e3efda49
CW
3135{
3136 int ret;
3137
3138 if (!intel_ring_initialized(ring))
3139 return;
3140
3141 ret = intel_ring_idle(ring);
3142 if (ret && !i915_reset_in_progress(&to_i915(ring->dev)->gpu_error))
3143 DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n",
3144 ring->name, ret);
3145
3146 stop_ring(ring);
3147}
This page took 0.62467 seconds and 5 git commands to generate.