struct drm_i915_private *dev_priv = dev->dev_private;
struct i915_workarounds *w = &dev_priv->workarounds;
- if (WARN_ON_ONCE(w->count == 0))
+ if (w->count == 0)
return 0;
ring->gpu_caches_dirty = true;
intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(w->count));
for (i = 0; i < w->count; i++) {
- intel_ring_emit(ring, w->reg[i].addr);
+ intel_ring_emit_reg(ring, w->reg[i].addr);
intel_ring_emit(ring, w->reg[i].value);
}
intel_ring_emit(ring, MI_NOOP);
WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
+ /* Use Force Non-Coherent whenever executing a 3D context. This is a
+ * workaround for for a possible hang in the unlikely event a TLB
+ * invalidation occurs during a PSD flush.
+ */
+ /* WaForceEnableNonCoherent:bdw,chv */
+ /* WaHdcDisableFetchWhenMasked:bdw,chv */
+ WA_SET_BIT_MASKED(HDC_CHICKEN0,
+ HDC_DONOT_FETCH_MEM_WHEN_MASKED |
+ HDC_FORCE_NON_COHERENT);
+
/* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0:
* "The Hierarchical Z RAW Stall Optimization allows non-overlapping
* polygons in the same 8x4 pixel/sample area to be processed without
*/
WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);
+ /* Wa4x4STCOptimizationDisable:bdw,chv */
+ WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE);
+
+ /*
+ * BSpec recommends 8x4 when MSAA is used,
+ * however in practice 16x4 seems fastest.
+ *
+ * Note that PS/WM thread counts depend on the WIZ hashing
+ * disable bit, which we don't touch here, but it's good
+ * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
+ */
+ WA_SET_FIELD_MASKED(GEN7_GT_MODE,
+ GEN6_WIZ_HASHING_MASK,
+ GEN6_WIZ_HASHING_16x4);
+
return 0;
}
WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
GEN8_SAMPLER_POWER_BYPASS_DIS);
- /* Use Force Non-Coherent whenever executing a 3D context. This is a
- * workaround for for a possible hang in the unlikely event a TLB
- * invalidation occurs during a PSD flush.
- */
WA_SET_BIT_MASKED(HDC_CHICKEN0,
- /* WaForceEnableNonCoherent:bdw */
- HDC_FORCE_NON_COHERENT |
/* WaForceContextSaveRestoreNonCoherent:bdw */
HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
- /* WaHdcDisableFetchWhenMasked:bdw */
- HDC_DONOT_FETCH_MEM_WHEN_MASKED |
/* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
(IS_BDW_GT3(dev) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
- /* Wa4x4STCOptimizationDisable:bdw */
- WA_SET_BIT_MASKED(CACHE_MODE_1,
- GEN8_4x4_STC_OPTIMIZATION_DISABLE);
-
- /*
- * BSpec recommends 8x4 when MSAA is used,
- * however in practice 16x4 seems fastest.
- *
- * Note that PS/WM thread counts depend on the WIZ hashing
- * disable bit, which we don't touch here, but it's good
- * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
- */
- WA_SET_FIELD_MASKED(GEN7_GT_MODE,
- GEN6_WIZ_HASHING_MASK,
- GEN6_WIZ_HASHING_16x4);
-
return 0;
}
/* WaDisableThreadStallDopClockGating:chv */
WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
- /* Use Force Non-Coherent whenever executing a 3D context. This is a
- * workaround for a possible hang in the unlikely event a TLB
- * invalidation occurs during a PSD flush.
- */
- /* WaForceEnableNonCoherent:chv */
- /* WaHdcDisableFetchWhenMasked:chv */
- WA_SET_BIT_MASKED(HDC_CHICKEN0,
- HDC_FORCE_NON_COHERENT |
- HDC_DONOT_FETCH_MEM_WHEN_MASKED);
-
- /* Wa4x4STCOptimizationDisable:chv */
- WA_SET_BIT_MASKED(CACHE_MODE_1,
- GEN8_4x4_STC_OPTIMIZATION_DISABLE);
-
/* Improve HiZ throughput on CHV. */
WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X);
- /*
- * BSpec recommends 8x4 when MSAA is used,
- * however in practice 16x4 seems fastest.
- *
- * Note that PS/WM thread counts depend on the WIZ hashing
- * disable bit, which we don't touch here, but it's good
- * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
- */
- WA_SET_FIELD_MASKED(GEN7_GT_MODE,
- GEN6_WIZ_HASHING_MASK,
- GEN6_WIZ_HASHING_16x4);
-
return 0;
}
struct drm_i915_private *dev_priv = dev->dev_private;
uint32_t tmp;
+ /* WaEnableLbsSlaRetryTimerDecrement:skl */
+ I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) |
+ GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
+
+ /* WaDisableKillLogic:bxt,skl */
+ I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
+ ECOCHK_DIS_TLB);
+
/* WaDisablePartialInstShootdown:skl,bxt */
WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC);
- if ((IS_SKYLAKE(dev) && (INTEL_REVID(dev) == SKL_REVID_A0 ||
- INTEL_REVID(dev) == SKL_REVID_B0)) ||
- (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0)) {
- /* WaDisableDgMirrorFixInHalfSliceChicken5:skl,bxt */
+ /* WaDisableDgMirrorFixInHalfSliceChicken5:skl,bxt */
+ if (IS_SKL_REVID(dev, 0, SKL_REVID_B0) ||
+ IS_BXT_REVID(dev, 0, BXT_REVID_A1))
WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
GEN9_DG_MIRROR_FIX_ENABLE);
- }
- if ((IS_SKYLAKE(dev) && INTEL_REVID(dev) <= SKL_REVID_B0) ||
- (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0)) {
- /* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:skl,bxt */
+ /* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:skl,bxt */
+ if (IS_SKL_REVID(dev, 0, SKL_REVID_B0) ||
+ IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
WA_SET_BIT_MASKED(GEN7_COMMON_SLICE_CHICKEN1,
GEN9_RHWO_OPTIMIZATION_DISABLE);
/*
*/
}
- if ((IS_SKYLAKE(dev) && INTEL_REVID(dev) >= SKL_REVID_C0) ||
- IS_BROXTON(dev)) {
- /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt */
+ /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt */
+ if (IS_SKL_REVID(dev, SKL_REVID_C0, REVID_FOREVER) || IS_BROXTON(dev))
WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
GEN9_ENABLE_YV12_BUGFIX);
- }
/* Wa4x4STCOptimizationDisable:skl,bxt */
/* WaDisablePartialResolveInVc:skl,bxt */
GEN9_CCS_TLB_PREFETCH_ENABLE);
/* WaDisableMaskBasedCammingInRCC:skl,bxt */
- if ((IS_SKYLAKE(dev) && INTEL_REVID(dev) == SKL_REVID_C0) ||
- (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0))
+ if (IS_SKL_REVID(dev, SKL_REVID_C0, SKL_REVID_C0) ||
+ IS_BXT_REVID(dev, 0, BXT_REVID_A1))
WA_SET_BIT_MASKED(SLICE_ECO_CHICKEN0,
PIXEL_MASK_CAMMING_DISABLE);
/* WaForceContextSaveRestoreNonCoherent:skl,bxt */
tmp = HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT;
- if ((IS_SKYLAKE(dev) && INTEL_REVID(dev) == SKL_REVID_F0) ||
- (IS_BROXTON(dev) && INTEL_REVID(dev) >= BXT_REVID_B0))
+ if (IS_SKL_REVID(dev, SKL_REVID_F0, SKL_REVID_F0) ||
+ IS_BXT_REVID(dev, BXT_REVID_B0, REVID_FOREVER))
tmp |= HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE;
WA_SET_BIT_MASKED(HDC_CHICKEN0, tmp);
/* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt */
- if (IS_SKYLAKE(dev) ||
- (IS_BROXTON(dev) && INTEL_REVID(dev) <= BXT_REVID_B0)) {
+ if (IS_SKYLAKE(dev) || IS_BXT_REVID(dev, 0, BXT_REVID_B0))
WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
GEN8_SAMPLER_POWER_BYPASS_DIS);
- }
/* WaDisableSTUnitPowerOptimization:skl,bxt */
WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
return 0;
}
-
static int skl_init_workarounds(struct intel_engine_cs *ring)
{
int ret;
if (ret)
return ret;
+ if (IS_SKL_REVID(dev, 0, SKL_REVID_D0)) {
+ /* WaDisableHDCInvalidation:skl */
+ I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
+ BDW_DISABLE_HDC_INVALIDATION);
+
+ /* WaDisableChickenBitTSGBarrierAckForFFSliceCS:skl */
+ I915_WRITE(FF_SLICE_CS_CHICKEN2,
+ _MASKED_BIT_ENABLE(GEN9_TSG_BARRIER_ACK_DISABLE));
+ }
+
+ /* GEN8_L3SQCREG4 has a dependency with WA batch so any new changes
+ * involving this register should also be added to WA batch as required.
+ */
+ if (IS_SKL_REVID(dev, 0, SKL_REVID_E0))
+ /* WaDisableLSQCROPERFforOCL:skl */
+ I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) |
+ GEN8_LQSC_RO_PERF_DIS);
+
+ /* WaEnableGapsTsvCreditFix:skl */
+ if (IS_SKL_REVID(dev, SKL_REVID_C0, REVID_FOREVER)) {
+ I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) |
+ GEN9_GAPS_TSV_CREDIT_DISABLE));
+ }
+
/* WaDisablePowerCompilerClockGating:skl */
- if (INTEL_REVID(dev) == SKL_REVID_B0)
+ if (IS_SKL_REVID(dev, SKL_REVID_B0, SKL_REVID_B0))
WA_SET_BIT_MASKED(HIZ_CHICKEN,
BDW_HIZ_POWER_COMPILER_CLOCK_GATING_DISABLE);
- if (INTEL_REVID(dev) <= SKL_REVID_D0) {
+ if (IS_SKL_REVID(dev, 0, SKL_REVID_D0)) {
/*
*Use Force Non-Coherent whenever executing a 3D context. This
* is a workaround for a possible hang in the unlikely event
HDC_FORCE_NON_COHERENT);
}
- if (INTEL_REVID(dev) == SKL_REVID_C0 ||
- INTEL_REVID(dev) == SKL_REVID_D0)
- /* WaBarrierPerformanceFixDisable:skl */
+ /* WaBarrierPerformanceFixDisable:skl */
+ if (IS_SKL_REVID(dev, SKL_REVID_C0, SKL_REVID_D0))
WA_SET_BIT_MASKED(HDC_CHICKEN0,
HDC_FENCE_DEST_SLM_DISABLE |
HDC_BARRIER_PERFORMANCE_DISABLE);
/* WaDisableSbeCacheDispatchPortSharing:skl */
- if (INTEL_REVID(dev) <= SKL_REVID_F0) {
+ if (IS_SKL_REVID(dev, 0, SKL_REVID_F0))
WA_SET_BIT_MASKED(
GEN7_HALF_SLICE_CHICKEN1,
GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
- }
return skl_tune_iz_hashing(ring);
}
if (ret)
return ret;
+ /* WaStoreMultiplePTEenable:bxt */
+ /* This is a requirement according to Hardware specification */
+ if (IS_BXT_REVID(dev, 0, BXT_REVID_A1))
+ I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_TLBPF);
+
+ /* WaSetClckGatingDisableMedia:bxt */
+ if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
+ I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) &
+ ~GEN8_DOP_CLOCK_GATE_MEDIA_ENABLE));
+ }
+
/* WaDisableThreadStallDopClockGating:bxt */
WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
STALL_DOP_GATING_DISABLE);
/* WaDisableSbeCacheDispatchPortSharing:bxt */
- if (INTEL_REVID(dev) <= BXT_REVID_B0) {
+ if (IS_BXT_REVID(dev, 0, BXT_REVID_B0)) {
WA_SET_BIT_MASKED(
GEN7_HALF_SLICE_CHICKEN1,
GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
if (mbox_reg != GEN6_NOSYNC) {
u32 seqno = i915_gem_request_get_seqno(signaller_req);
intel_ring_emit(signaller, MI_LOAD_REGISTER_IMM(1));
- intel_ring_emit(signaller, mbox_reg);
+ intel_ring_emit_reg(signaller, mbox_reg);
intel_ring_emit(signaller, seqno);
}
}
void intel_unpin_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
{
- iounmap(ringbuf->virtual_start);
+ if (HAS_LLC(ringbuf->obj->base.dev) && !ringbuf->obj->stolen)
+ vunmap(ringbuf->virtual_start);
+ else
+ iounmap(ringbuf->virtual_start);
ringbuf->virtual_start = NULL;
i915_gem_object_ggtt_unpin(ringbuf->obj);
}
+static u32 *vmap_obj(struct drm_i915_gem_object *obj)
+{
+ struct sg_page_iter sg_iter;
+ struct page **pages;
+ void *addr;
+ int i;
+
+ pages = drm_malloc_ab(obj->base.size >> PAGE_SHIFT, sizeof(*pages));
+ if (pages == NULL)
+ return NULL;
+
+ i = 0;
+ for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0)
+ pages[i++] = sg_page_iter_page(&sg_iter);
+
+ addr = vmap(pages, i, 0, PAGE_KERNEL);
+ drm_free_large(pages);
+
+ return addr;
+}
+
int intel_pin_and_map_ringbuffer_obj(struct drm_device *dev,
struct intel_ringbuffer *ringbuf)
{
struct drm_i915_gem_object *obj = ringbuf->obj;
int ret;
- ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, PIN_MAPPABLE);
- if (ret)
- return ret;
+ if (HAS_LLC(dev_priv) && !obj->stolen) {
+ ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, 0);
+ if (ret)
+ return ret;
- ret = i915_gem_object_set_to_gtt_domain(obj, true);
- if (ret) {
- i915_gem_object_ggtt_unpin(obj);
- return ret;
- }
+ ret = i915_gem_object_set_to_cpu_domain(obj, true);
+ if (ret) {
+ i915_gem_object_ggtt_unpin(obj);
+ return ret;
+ }
- ringbuf->virtual_start = ioremap_wc(dev_priv->gtt.mappable_base +
- i915_gem_obj_ggtt_offset(obj), ringbuf->size);
- if (ringbuf->virtual_start == NULL) {
- i915_gem_object_ggtt_unpin(obj);
- return -EINVAL;
+ ringbuf->virtual_start = vmap_obj(obj);
+ if (ringbuf->virtual_start == NULL) {
+ i915_gem_object_ggtt_unpin(obj);
+ return -ENOMEM;
+ }
+ } else {
+ ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, PIN_MAPPABLE);
+ if (ret)
+ return ret;
+
+ ret = i915_gem_object_set_to_gtt_domain(obj, true);
+ if (ret) {
+ i915_gem_object_ggtt_unpin(obj);
+ return ret;
+ }
+
+ ringbuf->virtual_start = ioremap_wc(dev_priv->gtt.mappable_base +
+ i915_gem_obj_ggtt_offset(obj), ringbuf->size);
+ if (ringbuf->virtual_start == NULL) {
+ i915_gem_object_ggtt_unpin(obj);
+ return -EINVAL;
+ }
}
return 0;
int ret;
ring = kzalloc(sizeof(*ring), GFP_KERNEL);
- if (ring == NULL)
+ if (ring == NULL) {
+ DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s\n",
+ engine->name);
return ERR_PTR(-ENOMEM);
+ }
ring->ring = engine;
+ list_add(&ring->link, &engine->buffers);
ring->size = size;
/* Workaround an erratum on the i830 which causes a hang if
ret = intel_alloc_ringbuffer_obj(engine->dev, ring);
if (ret) {
- DRM_ERROR("Failed to allocate ringbuffer %s: %d\n",
- engine->name, ret);
+ DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s: %d\n",
+ engine->name, ret);
+ list_del(&ring->link);
kfree(ring);
return ERR_PTR(ret);
}
intel_ringbuffer_free(struct intel_ringbuffer *ring)
{
intel_destroy_ringbuffer_obj(ring);
+ list_del(&ring->link);
kfree(ring);
}
INIT_LIST_HEAD(&ring->active_list);
INIT_LIST_HEAD(&ring->request_list);
INIT_LIST_HEAD(&ring->execlist_queue);
+ INIT_LIST_HEAD(&ring->buffers);
i915_gem_batch_pool_init(dev, &ring->batch_pool);
memset(ring->semaphore.sync_seqno, 0, sizeof(ring->semaphore.sync_seqno));
GEN8_RING_SEMAPHORE_INIT;
}
} else if (INTEL_INFO(dev)->gen >= 6) {
+ ring->init_context = intel_rcs_ctx_init;
ring->add_request = gen6_add_request;
ring->flush = gen7_render_ring_flush;
if (INTEL_INFO(dev)->gen == 6)