X-Git-Url: http://drtracing.org/?a=blobdiff_plain;f=drivers%2Fgpu%2Fdrm%2Fi915%2Fintel_ringbuffer.c;h=75f29d81af1620e5b975c8213ee844174d4b97ae;hb=f92a9162208a4d4e3d28fa8d00b9fb210d63487b;hp=6adc7f11056844476cad0bc4726014c0ed3df87f;hpb=6def8fdd5d9528db7fb6dfebd994491f6ba45785;p=deliverable%2Flinux.git diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 6adc7f110568..75f29d81af16 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -717,7 +717,7 @@ static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req) struct drm_i915_private *dev_priv = dev->dev_private; struct i915_workarounds *w = &dev_priv->workarounds; - if (WARN_ON_ONCE(w->count == 0)) + if (w->count == 0) return 0; ring->gpu_caches_dirty = true; @@ -731,7 +731,7 @@ static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req) intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(w->count)); for (i = 0; i < w->count; i++) { - intel_ring_emit(ring, w->reg[i].addr); + intel_ring_emit_reg(ring, w->reg[i].addr); intel_ring_emit(ring, w->reg[i].value); } intel_ring_emit(ring, MI_NOOP); @@ -814,6 +814,16 @@ static int gen8_init_workarounds(struct intel_engine_cs *ring) WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); + /* Use Force Non-Coherent whenever executing a 3D context. This is a + * workaround for for a possible hang in the unlikely event a TLB + * invalidation occurs during a PSD flush. + */ + /* WaForceEnableNonCoherent:bdw,chv */ + /* WaHdcDisableFetchWhenMasked:bdw,chv */ + WA_SET_BIT_MASKED(HDC_CHICKEN0, + HDC_DONOT_FETCH_MEM_WHEN_MASKED | + HDC_FORCE_NON_COHERENT); + /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0: * "The Hierarchical Z RAW Stall Optimization allows non-overlapping * polygons in the same 8x4 pixel/sample area to be processed without @@ -824,6 +834,21 @@ static int gen8_init_workarounds(struct intel_engine_cs *ring) */ WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE); + /* Wa4x4STCOptimizationDisable:bdw,chv */ + WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE); + + /* + * BSpec recommends 8x4 when MSAA is used, + * however in practice 16x4 seems fastest. + * + * Note that PS/WM thread counts depend on the WIZ hashing + * disable bit, which we don't touch here, but it's good + * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). + */ + WA_SET_FIELD_MASKED(GEN7_GT_MODE, + GEN6_WIZ_HASHING_MASK, + GEN6_WIZ_HASHING_16x4); + return 0; } @@ -847,36 +872,12 @@ static int bdw_init_workarounds(struct intel_engine_cs *ring) WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, GEN8_SAMPLER_POWER_BYPASS_DIS); - /* Use Force Non-Coherent whenever executing a 3D context. This is a - * workaround for for a possible hang in the unlikely event a TLB - * invalidation occurs during a PSD flush. - */ WA_SET_BIT_MASKED(HDC_CHICKEN0, - /* WaForceEnableNonCoherent:bdw */ - HDC_FORCE_NON_COHERENT | /* WaForceContextSaveRestoreNonCoherent:bdw */ HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | - /* WaHdcDisableFetchWhenMasked:bdw */ - HDC_DONOT_FETCH_MEM_WHEN_MASKED | /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */ (IS_BDW_GT3(dev) ? HDC_FENCE_DEST_SLM_DISABLE : 0)); - /* Wa4x4STCOptimizationDisable:bdw */ - WA_SET_BIT_MASKED(CACHE_MODE_1, - GEN8_4x4_STC_OPTIMIZATION_DISABLE); - - /* - * BSpec recommends 8x4 when MSAA is used, - * however in practice 16x4 seems fastest. - * - * Note that PS/WM thread counts depend on the WIZ hashing - * disable bit, which we don't touch here, but it's good - * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). - */ - WA_SET_FIELD_MASKED(GEN7_GT_MODE, - GEN6_WIZ_HASHING_MASK, - GEN6_WIZ_HASHING_16x4); - return 0; } @@ -893,35 +894,9 @@ static int chv_init_workarounds(struct intel_engine_cs *ring) /* WaDisableThreadStallDopClockGating:chv */ WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); - /* Use Force Non-Coherent whenever executing a 3D context. This is a - * workaround for a possible hang in the unlikely event a TLB - * invalidation occurs during a PSD flush. - */ - /* WaForceEnableNonCoherent:chv */ - /* WaHdcDisableFetchWhenMasked:chv */ - WA_SET_BIT_MASKED(HDC_CHICKEN0, - HDC_FORCE_NON_COHERENT | - HDC_DONOT_FETCH_MEM_WHEN_MASKED); - - /* Wa4x4STCOptimizationDisable:chv */ - WA_SET_BIT_MASKED(CACHE_MODE_1, - GEN8_4x4_STC_OPTIMIZATION_DISABLE); - /* Improve HiZ throughput on CHV. */ WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X); - /* - * BSpec recommends 8x4 when MSAA is used, - * however in practice 16x4 seems fastest. - * - * Note that PS/WM thread counts depend on the WIZ hashing - * disable bit, which we don't touch here, but it's good - * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). - */ - WA_SET_FIELD_MASKED(GEN7_GT_MODE, - GEN6_WIZ_HASHING_MASK, - GEN6_WIZ_HASHING_16x4); - return 0; } @@ -931,6 +906,14 @@ static int gen9_init_workarounds(struct intel_engine_cs *ring) struct drm_i915_private *dev_priv = dev->dev_private; uint32_t tmp; + /* WaEnableLbsSlaRetryTimerDecrement:skl */ + I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) | + GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE); + + /* WaDisableKillLogic:bxt,skl */ + I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | + ECOCHK_DIS_TLB); + /* WaDisablePartialInstShootdown:skl,bxt */ WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); @@ -939,17 +922,15 @@ static int gen9_init_workarounds(struct intel_engine_cs *ring) WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC); - if ((IS_SKYLAKE(dev) && (INTEL_REVID(dev) == SKL_REVID_A0 || - INTEL_REVID(dev) == SKL_REVID_B0)) || - (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0)) { - /* WaDisableDgMirrorFixInHalfSliceChicken5:skl,bxt */ + /* WaDisableDgMirrorFixInHalfSliceChicken5:skl,bxt */ + if (IS_SKL_REVID(dev, 0, SKL_REVID_B0) || + IS_BXT_REVID(dev, 0, BXT_REVID_A1)) WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5, GEN9_DG_MIRROR_FIX_ENABLE); - } - if ((IS_SKYLAKE(dev) && INTEL_REVID(dev) <= SKL_REVID_B0) || - (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0)) { - /* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:skl,bxt */ + /* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:skl,bxt */ + if (IS_SKL_REVID(dev, 0, SKL_REVID_B0) || + IS_BXT_REVID(dev, 0, BXT_REVID_A1)) { WA_SET_BIT_MASKED(GEN7_COMMON_SLICE_CHICKEN1, GEN9_RHWO_OPTIMIZATION_DISABLE); /* @@ -959,12 +940,10 @@ static int gen9_init_workarounds(struct intel_engine_cs *ring) */ } - if ((IS_SKYLAKE(dev) && INTEL_REVID(dev) >= SKL_REVID_C0) || - IS_BROXTON(dev)) { - /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt */ + /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt */ + if (IS_SKL_REVID(dev, SKL_REVID_C0, REVID_FOREVER) || IS_BROXTON(dev)) WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, GEN9_ENABLE_YV12_BUGFIX); - } /* Wa4x4STCOptimizationDisable:skl,bxt */ /* WaDisablePartialResolveInVc:skl,bxt */ @@ -976,24 +955,22 @@ static int gen9_init_workarounds(struct intel_engine_cs *ring) GEN9_CCS_TLB_PREFETCH_ENABLE); /* WaDisableMaskBasedCammingInRCC:skl,bxt */ - if ((IS_SKYLAKE(dev) && INTEL_REVID(dev) == SKL_REVID_C0) || - (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0)) + if (IS_SKL_REVID(dev, SKL_REVID_C0, SKL_REVID_C0) || + IS_BXT_REVID(dev, 0, BXT_REVID_A1)) WA_SET_BIT_MASKED(SLICE_ECO_CHICKEN0, PIXEL_MASK_CAMMING_DISABLE); /* WaForceContextSaveRestoreNonCoherent:skl,bxt */ tmp = HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT; - if ((IS_SKYLAKE(dev) && INTEL_REVID(dev) == SKL_REVID_F0) || - (IS_BROXTON(dev) && INTEL_REVID(dev) >= BXT_REVID_B0)) + if (IS_SKL_REVID(dev, SKL_REVID_F0, SKL_REVID_F0) || + IS_BXT_REVID(dev, BXT_REVID_B0, REVID_FOREVER)) tmp |= HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE; WA_SET_BIT_MASKED(HDC_CHICKEN0, tmp); /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt */ - if (IS_SKYLAKE(dev) || - (IS_BROXTON(dev) && INTEL_REVID(dev) <= BXT_REVID_B0)) { + if (IS_SKYLAKE(dev) || IS_BXT_REVID(dev, 0, BXT_REVID_B0)) WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, GEN8_SAMPLER_POWER_BYPASS_DIS); - } /* WaDisableSTUnitPowerOptimization:skl,bxt */ WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE); @@ -1043,7 +1020,6 @@ static int skl_tune_iz_hashing(struct intel_engine_cs *ring) return 0; } - static int skl_init_workarounds(struct intel_engine_cs *ring) { int ret; @@ -1054,12 +1030,36 @@ static int skl_init_workarounds(struct intel_engine_cs *ring) if (ret) return ret; + if (IS_SKL_REVID(dev, 0, SKL_REVID_D0)) { + /* WaDisableHDCInvalidation:skl */ + I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | + BDW_DISABLE_HDC_INVALIDATION); + + /* WaDisableChickenBitTSGBarrierAckForFFSliceCS:skl */ + I915_WRITE(FF_SLICE_CS_CHICKEN2, + _MASKED_BIT_ENABLE(GEN9_TSG_BARRIER_ACK_DISABLE)); + } + + /* GEN8_L3SQCREG4 has a dependency with WA batch so any new changes + * involving this register should also be added to WA batch as required. + */ + if (IS_SKL_REVID(dev, 0, SKL_REVID_E0)) + /* WaDisableLSQCROPERFforOCL:skl */ + I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) | + GEN8_LQSC_RO_PERF_DIS); + + /* WaEnableGapsTsvCreditFix:skl */ + if (IS_SKL_REVID(dev, SKL_REVID_C0, REVID_FOREVER)) { + I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) | + GEN9_GAPS_TSV_CREDIT_DISABLE)); + } + /* WaDisablePowerCompilerClockGating:skl */ - if (INTEL_REVID(dev) == SKL_REVID_B0) + if (IS_SKL_REVID(dev, SKL_REVID_B0, SKL_REVID_B0)) WA_SET_BIT_MASKED(HIZ_CHICKEN, BDW_HIZ_POWER_COMPILER_CLOCK_GATING_DISABLE); - if (INTEL_REVID(dev) <= SKL_REVID_D0) { + if (IS_SKL_REVID(dev, 0, SKL_REVID_D0)) { /* *Use Force Non-Coherent whenever executing a 3D context. This * is a workaround for a possible hang in the unlikely event @@ -1070,19 +1070,17 @@ static int skl_init_workarounds(struct intel_engine_cs *ring) HDC_FORCE_NON_COHERENT); } - if (INTEL_REVID(dev) == SKL_REVID_C0 || - INTEL_REVID(dev) == SKL_REVID_D0) - /* WaBarrierPerformanceFixDisable:skl */ + /* WaBarrierPerformanceFixDisable:skl */ + if (IS_SKL_REVID(dev, SKL_REVID_C0, SKL_REVID_D0)) WA_SET_BIT_MASKED(HDC_CHICKEN0, HDC_FENCE_DEST_SLM_DISABLE | HDC_BARRIER_PERFORMANCE_DISABLE); /* WaDisableSbeCacheDispatchPortSharing:skl */ - if (INTEL_REVID(dev) <= SKL_REVID_F0) { + if (IS_SKL_REVID(dev, 0, SKL_REVID_F0)) WA_SET_BIT_MASKED( GEN7_HALF_SLICE_CHICKEN1, GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); - } return skl_tune_iz_hashing(ring); } @@ -1097,12 +1095,23 @@ static int bxt_init_workarounds(struct intel_engine_cs *ring) if (ret) return ret; + /* WaStoreMultiplePTEenable:bxt */ + /* This is a requirement according to Hardware specification */ + if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) + I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_TLBPF); + + /* WaSetClckGatingDisableMedia:bxt */ + if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) { + I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) & + ~GEN8_DOP_CLOCK_GATE_MEDIA_ENABLE)); + } + /* WaDisableThreadStallDopClockGating:bxt */ WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); /* WaDisableSbeCacheDispatchPortSharing:bxt */ - if (INTEL_REVID(dev) <= BXT_REVID_B0) { + if (IS_BXT_REVID(dev, 0, BXT_REVID_B0)) { WA_SET_BIT_MASKED( GEN7_HALF_SLICE_CHICKEN1, GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); @@ -1304,7 +1313,7 @@ static int gen6_signal(struct drm_i915_gem_request *signaller_req, if (mbox_reg != GEN6_NOSYNC) { u32 seqno = i915_gem_request_get_seqno(signaller_req); intel_ring_emit(signaller, MI_LOAD_REGISTER_IMM(1)); - intel_ring_emit(signaller, mbox_reg); + intel_ring_emit_reg(signaller, mbox_reg); intel_ring_emit(signaller, seqno); } } @@ -1985,11 +1994,35 @@ static int init_phys_status_page(struct intel_engine_cs *ring) void intel_unpin_ringbuffer_obj(struct intel_ringbuffer *ringbuf) { - iounmap(ringbuf->virtual_start); + if (HAS_LLC(ringbuf->obj->base.dev) && !ringbuf->obj->stolen) + vunmap(ringbuf->virtual_start); + else + iounmap(ringbuf->virtual_start); ringbuf->virtual_start = NULL; i915_gem_object_ggtt_unpin(ringbuf->obj); } +static u32 *vmap_obj(struct drm_i915_gem_object *obj) +{ + struct sg_page_iter sg_iter; + struct page **pages; + void *addr; + int i; + + pages = drm_malloc_ab(obj->base.size >> PAGE_SHIFT, sizeof(*pages)); + if (pages == NULL) + return NULL; + + i = 0; + for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) + pages[i++] = sg_page_iter_page(&sg_iter); + + addr = vmap(pages, i, 0, PAGE_KERNEL); + drm_free_large(pages); + + return addr; +} + int intel_pin_and_map_ringbuffer_obj(struct drm_device *dev, struct intel_ringbuffer *ringbuf) { @@ -1997,21 +2030,39 @@ int intel_pin_and_map_ringbuffer_obj(struct drm_device *dev, struct drm_i915_gem_object *obj = ringbuf->obj; int ret; - ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, PIN_MAPPABLE); - if (ret) - return ret; + if (HAS_LLC(dev_priv) && !obj->stolen) { + ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, 0); + if (ret) + return ret; - ret = i915_gem_object_set_to_gtt_domain(obj, true); - if (ret) { - i915_gem_object_ggtt_unpin(obj); - return ret; - } + ret = i915_gem_object_set_to_cpu_domain(obj, true); + if (ret) { + i915_gem_object_ggtt_unpin(obj); + return ret; + } - ringbuf->virtual_start = ioremap_wc(dev_priv->gtt.mappable_base + - i915_gem_obj_ggtt_offset(obj), ringbuf->size); - if (ringbuf->virtual_start == NULL) { - i915_gem_object_ggtt_unpin(obj); - return -EINVAL; + ringbuf->virtual_start = vmap_obj(obj); + if (ringbuf->virtual_start == NULL) { + i915_gem_object_ggtt_unpin(obj); + return -ENOMEM; + } + } else { + ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, PIN_MAPPABLE); + if (ret) + return ret; + + ret = i915_gem_object_set_to_gtt_domain(obj, true); + if (ret) { + i915_gem_object_ggtt_unpin(obj); + return ret; + } + + ringbuf->virtual_start = ioremap_wc(dev_priv->gtt.mappable_base + + i915_gem_obj_ggtt_offset(obj), ringbuf->size); + if (ringbuf->virtual_start == NULL) { + i915_gem_object_ggtt_unpin(obj); + return -EINVAL; + } } return 0; @@ -2051,10 +2102,14 @@ intel_engine_create_ringbuffer(struct intel_engine_cs *engine, int size) int ret; ring = kzalloc(sizeof(*ring), GFP_KERNEL); - if (ring == NULL) + if (ring == NULL) { + DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s\n", + engine->name); return ERR_PTR(-ENOMEM); + } ring->ring = engine; + list_add(&ring->link, &engine->buffers); ring->size = size; /* Workaround an erratum on the i830 which causes a hang if @@ -2070,8 +2125,9 @@ intel_engine_create_ringbuffer(struct intel_engine_cs *engine, int size) ret = intel_alloc_ringbuffer_obj(engine->dev, ring); if (ret) { - DRM_ERROR("Failed to allocate ringbuffer %s: %d\n", - engine->name, ret); + DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s: %d\n", + engine->name, ret); + list_del(&ring->link); kfree(ring); return ERR_PTR(ret); } @@ -2083,6 +2139,7 @@ void intel_ringbuffer_free(struct intel_ringbuffer *ring) { intel_destroy_ringbuffer_obj(ring); + list_del(&ring->link); kfree(ring); } @@ -2098,6 +2155,7 @@ static int intel_init_ring_buffer(struct drm_device *dev, INIT_LIST_HEAD(&ring->active_list); INIT_LIST_HEAD(&ring->request_list); INIT_LIST_HEAD(&ring->execlist_queue); + INIT_LIST_HEAD(&ring->buffers); i915_gem_batch_pool_init(dev, &ring->batch_pool); memset(ring->semaphore.sync_seqno, 0, sizeof(ring->semaphore.sync_seqno)); @@ -2648,6 +2706,7 @@ int intel_init_render_ring_buffer(struct drm_device *dev) GEN8_RING_SEMAPHORE_INIT; } } else if (INTEL_INFO(dev)->gen >= 6) { + ring->init_context = intel_rcs_ctx_init; ring->add_request = gen6_add_request; ring->flush = gen7_render_ring_flush; if (INTEL_INFO(dev)->gen == 6)