u32 invalidate_domains, u32 flush_domains)
{
u32 flags = 0;
+ struct pipe_control *pc = ring->private;
+ u32 scratch_addr = pc->gtt_offset + 128;
int ret;
/* Just flush everything. Experiments have shown that reducing the
* number of bits based on the write domains has little performance
* impact.
*/
- flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
- flags |= PIPE_CONTROL_TLB_INVALIDATE;
- flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
- flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
- flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
- flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
- flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
- flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
- /*
- * Ensure that any following seqno writes only happen when the render
- * cache is indeed flushed (but only if the caller actually wants that).
- */
- if (flush_domains)
+ if (flush_domains) {
+ flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
+ flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
+ /*
+ * Ensure that any following seqno writes only happen
+ * when the render cache is indeed flushed.
+ */
flags |= PIPE_CONTROL_CS_STALL;
+ }
+ if (invalidate_domains) {
+ flags |= PIPE_CONTROL_TLB_INVALIDATE;
+ flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
+ flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
+ flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
+ flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
+ flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
+ /*
+ * TLB invalidate requires a post-sync write.
+ */
+ flags |= PIPE_CONTROL_QW_WRITE;
+ }
ret = intel_ring_begin(ring, 4);
if (ret)
intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
intel_ring_emit(ring, flags);
- intel_ring_emit(ring, 0);
+ intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT);
intel_ring_emit(ring, 0);
intel_ring_advance(ring);
I915_WRITE_HEAD(ring, 0);
ring->write_tail(ring, 0);
- /* Initialize the ring. */
- I915_WRITE_START(ring, obj->gtt_offset);
head = I915_READ_HEAD(ring) & HEAD_ADDR;
/* G45 ring initialization fails to reset head to zero */
}
}
+ /* Initialize the ring. This must happen _after_ we've cleared the ring
+ * registers with the above sequence (the readback of the HEAD registers
+ * also enforces ordering), otherwise the hw might lose the new ring
+ * register values. */
+ I915_WRITE_START(ring, obj->gtt_offset);
I915_WRITE_CTL(ring,
((ring->size - PAGE_SIZE) & RING_NR_PAGES)
| RING_VALID);
i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
- ret = i915_gem_object_pin(obj, 4096, true);
+ ret = i915_gem_object_pin(obj, 4096, true, false);
if (ret)
goto err_unref;
}
static u32
-gen6_ring_get_seqno(struct intel_ring_buffer *ring)
+gen6_ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
{
- struct drm_device *dev = ring->dev;
-
/* Workaround to force correct ordering between irq and seqno writes on
* ivb (and maybe also on snb) by reading from a CS register (like
* ACTHD) before reading the status page. */
- if (IS_GEN6(dev) || IS_GEN7(dev))
+ if (!lazy_coherency)
intel_ring_get_active_head(ring);
return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
}
static u32
-ring_get_seqno(struct intel_ring_buffer *ring)
+ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
{
return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
}
static u32
-pc_render_get_seqno(struct intel_ring_buffer *ring)
+pc_render_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
{
struct pipe_control *pc = ring->private;
return pc->cpu_page[0];
i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
- ret = i915_gem_object_pin(obj, 4096, true);
+ ret = i915_gem_object_pin(obj, 4096, true, false);
if (ret != 0) {
goto err_unref;
}
ring->obj = obj;
- ret = i915_gem_object_pin(obj, PAGE_SIZE, true);
+ ret = i915_gem_object_pin(obj, PAGE_SIZE, true, false);
if (ret)
goto err_unref;