drm/i915: Update flush_all_caches() to take request structures
[deliverable/linux.git] / drivers / gpu / drm / i915 / intel_ringbuffer.c
index 441e2502b88946ff2d7455a9f26cc32faa87d8fc..48ca73e7aaa6b4a9832f21ec7289d8a13b56ca6c 100644 (file)
@@ -81,7 +81,7 @@ bool intel_ring_stopped(struct intel_engine_cs *ring)
        return dev_priv->gpu_error.stop_rings & intel_ring_flag(ring);
 }
 
-void __intel_ring_advance(struct intel_engine_cs *ring)
+static void __intel_ring_advance(struct intel_engine_cs *ring)
 {
        struct intel_ringbuffer *ringbuf = ring->buffer;
        ringbuf->tail &= ringbuf->size - 1;
@@ -703,10 +703,10 @@ err:
        return ret;
 }
 
-static int intel_ring_workarounds_emit(struct intel_engine_cs *ring,
-                                      struct intel_context *ctx)
+static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req)
 {
        int ret, i;
+       struct intel_engine_cs *ring = req->ring;
        struct drm_device *dev = ring->dev;
        struct drm_i915_private *dev_priv = dev->dev_private;
        struct i915_workarounds *w = &dev_priv->workarounds;
@@ -715,7 +715,7 @@ static int intel_ring_workarounds_emit(struct intel_engine_cs *ring,
                return 0;
 
        ring->gpu_caches_dirty = true;
-       ret = intel_ring_flush_all_caches(ring);
+       ret = intel_ring_flush_all_caches(req);
        if (ret)
                return ret;
 
@@ -733,7 +733,7 @@ static int intel_ring_workarounds_emit(struct intel_engine_cs *ring,
        intel_ring_advance(ring);
 
        ring->gpu_caches_dirty = true;
-       ret = intel_ring_flush_all_caches(ring);
+       ret = intel_ring_flush_all_caches(req);
        if (ret)
                return ret;
 
@@ -742,16 +742,15 @@ static int intel_ring_workarounds_emit(struct intel_engine_cs *ring,
        return 0;
 }
 
-static int intel_rcs_ctx_init(struct intel_engine_cs *ring,
-                             struct intel_context *ctx)
+static int intel_rcs_ctx_init(struct drm_i915_gem_request *req)
 {
        int ret;
 
-       ret = intel_ring_workarounds_emit(ring, ctx);
+       ret = intel_ring_workarounds_emit(req);
        if (ret != 0)
                return ret;
 
-       ret = i915_gem_render_state_init(ring);
+       ret = i915_gem_render_state_init(req);
        if (ret)
                DRM_ERROR("init render state: %d\n", ret);
 
@@ -800,6 +799,11 @@ static int bdw_init_workarounds(struct intel_engine_cs *ring)
        struct drm_device *dev = ring->dev;
        struct drm_i915_private *dev_priv = dev->dev_private;
 
+       WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING);
+
+       /* WaDisableAsyncFlipPerfMode:bdw */
+       WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE);
+
        /* WaDisablePartialInstShootdown:bdw */
        /* WaDisableThreadStallDopClockGating:bdw (pre-production) */
        WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
@@ -861,6 +865,11 @@ static int chv_init_workarounds(struct intel_engine_cs *ring)
        struct drm_device *dev = ring->dev;
        struct drm_i915_private *dev_priv = dev->dev_private;
 
+       WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING);
+
+       /* WaDisableAsyncFlipPerfMode:chv */
+       WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE);
+
        /* WaDisablePartialInstShootdown:chv */
        /* WaDisableThreadStallDopClockGating:chv */
        WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
@@ -901,13 +910,6 @@ static int chv_init_workarounds(struct intel_engine_cs *ring)
                            GEN6_WIZ_HASHING_MASK,
                            GEN6_WIZ_HASHING_16x4);
 
-       if (INTEL_REVID(dev) == SKL_REVID_C0 ||
-           INTEL_REVID(dev) == SKL_REVID_D0)
-               /* WaBarrierPerformanceFixDisable:skl */
-               WA_SET_BIT_MASKED(HDC_CHICKEN0,
-                                 HDC_FENCE_DEST_SLM_DISABLE |
-                                 HDC_BARRIER_PERFORMANCE_DISABLE);
-
        return 0;
 }
 
@@ -915,57 +917,63 @@ static int gen9_init_workarounds(struct intel_engine_cs *ring)
 {
        struct drm_device *dev = ring->dev;
        struct drm_i915_private *dev_priv = dev->dev_private;
+       uint32_t tmp;
 
-       /* WaDisablePartialInstShootdown:skl */
+       /* WaDisablePartialInstShootdown:skl,bxt */
        WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
                          PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
 
-       /* Syncing dependencies between camera and graphics */
+       /* Syncing dependencies between camera and graphics:skl,bxt */
        WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
                          GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC);
 
-       if (INTEL_REVID(dev) == SKL_REVID_A0 ||
-           INTEL_REVID(dev) == SKL_REVID_B0) {
-               /* WaDisableDgMirrorFixInHalfSliceChicken5:skl */
+       if ((IS_SKYLAKE(dev) && (INTEL_REVID(dev) == SKL_REVID_A0 ||
+           INTEL_REVID(dev) == SKL_REVID_B0)) ||
+           (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0)) {
+               /* WaDisableDgMirrorFixInHalfSliceChicken5:skl,bxt */
                WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
                                  GEN9_DG_MIRROR_FIX_ENABLE);
        }
 
-       if (IS_SKYLAKE(dev) && INTEL_REVID(dev) <= SKL_REVID_B0) {
-               /* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:skl */
+       if ((IS_SKYLAKE(dev) && INTEL_REVID(dev) <= SKL_REVID_B0) ||
+           (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0)) {
+               /* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:skl,bxt */
                WA_SET_BIT_MASKED(GEN7_COMMON_SLICE_CHICKEN1,
                                  GEN9_RHWO_OPTIMIZATION_DISABLE);
                WA_SET_BIT_MASKED(GEN9_SLICE_COMMON_ECO_CHICKEN0,
                                  DISABLE_PIXEL_MASK_CAMMING);
        }
 
-       if (INTEL_REVID(dev) >= SKL_REVID_C0) {
-               /* WaEnableYV12BugFixInHalfSliceChicken7:skl */
+       if ((IS_SKYLAKE(dev) && INTEL_REVID(dev) >= SKL_REVID_C0) ||
+           IS_BROXTON(dev)) {
+               /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt */
                WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
                                  GEN9_ENABLE_YV12_BUGFIX);
        }
 
-       if (INTEL_REVID(dev) <= SKL_REVID_D0) {
-               /*
-                *Use Force Non-Coherent whenever executing a 3D context. This
-                * is a workaround for a possible hang in the unlikely event
-                * a TLB invalidation occurs during a PSD flush.
-                */
-               /* WaForceEnableNonCoherent:skl */
-               WA_SET_BIT_MASKED(HDC_CHICKEN0,
-                                 HDC_FORCE_NON_COHERENT);
-       }
-
-       /* Wa4x4STCOptimizationDisable:skl */
+       /* Wa4x4STCOptimizationDisable:skl,bxt */
        WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE);
 
-       /* WaDisablePartialResolveInVc:skl */
+       /* WaDisablePartialResolveInVc:skl,bxt */
        WA_SET_BIT_MASKED(CACHE_MODE_1, GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE);
 
-       /* WaCcsTlbPrefetchDisable:skl */
+       /* WaCcsTlbPrefetchDisable:skl,bxt */
        WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
                          GEN9_CCS_TLB_PREFETCH_ENABLE);
 
+       /* WaDisableMaskBasedCammingInRCC:skl,bxt */
+       if ((IS_SKYLAKE(dev) && INTEL_REVID(dev) == SKL_REVID_C0) ||
+           (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0))
+               WA_SET_BIT_MASKED(SLICE_ECO_CHICKEN0,
+                                 PIXEL_MASK_CAMMING_DISABLE);
+
+       /* WaForceContextSaveRestoreNonCoherent:skl,bxt */
+       tmp = HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT;
+       if ((IS_SKYLAKE(dev) && INTEL_REVID(dev) == SKL_REVID_F0) ||
+           (IS_BROXTON(dev) && INTEL_REVID(dev) >= BXT_REVID_B0))
+               tmp |= HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE;
+       WA_SET_BIT_MASKED(HDC_CHICKEN0, tmp);
+
        return 0;
 }
 
@@ -1024,9 +1032,48 @@ static int skl_init_workarounds(struct intel_engine_cs *ring)
                WA_SET_BIT_MASKED(HIZ_CHICKEN,
                                  BDW_HIZ_POWER_COMPILER_CLOCK_GATING_DISABLE);
 
+       if (INTEL_REVID(dev) <= SKL_REVID_D0) {
+               /*
+                *Use Force Non-Coherent whenever executing a 3D context. This
+                * is a workaround for a possible hang in the unlikely event
+                * a TLB invalidation occurs during a PSD flush.
+                */
+               /* WaForceEnableNonCoherent:skl */
+               WA_SET_BIT_MASKED(HDC_CHICKEN0,
+                                 HDC_FORCE_NON_COHERENT);
+       }
+
+       if (INTEL_REVID(dev) == SKL_REVID_C0 ||
+           INTEL_REVID(dev) == SKL_REVID_D0)
+               /* WaBarrierPerformanceFixDisable:skl */
+               WA_SET_BIT_MASKED(HDC_CHICKEN0,
+                                 HDC_FENCE_DEST_SLM_DISABLE |
+                                 HDC_BARRIER_PERFORMANCE_DISABLE);
+
        return skl_tune_iz_hashing(ring);
 }
 
+static int bxt_init_workarounds(struct intel_engine_cs *ring)
+{
+       struct drm_device *dev = ring->dev;
+       struct drm_i915_private *dev_priv = dev->dev_private;
+
+       gen9_init_workarounds(ring);
+
+       /* WaDisableThreadStallDopClockGating:bxt */
+       WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
+                         STALL_DOP_GATING_DISABLE);
+
+       /* WaDisableSbeCacheDispatchPortSharing:bxt */
+       if (INTEL_REVID(dev) <= BXT_REVID_B0) {
+               WA_SET_BIT_MASKED(
+                       GEN7_HALF_SLICE_CHICKEN1,
+                       GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
+       }
+
+       return 0;
+}
+
 int init_workarounds_ring(struct intel_engine_cs *ring)
 {
        struct drm_device *dev = ring->dev;
@@ -1044,8 +1091,9 @@ int init_workarounds_ring(struct intel_engine_cs *ring)
 
        if (IS_SKYLAKE(dev))
                return skl_init_workarounds(ring);
-       else if (IS_GEN9(dev))
-               return gen9_init_workarounds(ring);
+
+       if (IS_BROXTON(dev))
+               return bxt_init_workarounds(ring);
 
        return 0;
 }
@@ -1066,9 +1114,9 @@ static int init_render_ring(struct intel_engine_cs *ring)
         * to use MI_WAIT_FOR_EVENT within the CS. It should already be
         * programmed to '1' on all products.
         *
-        * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv,bdw,chv
+        * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv
         */
-       if (INTEL_INFO(dev)->gen >= 6 && INTEL_INFO(dev)->gen < 9)
+       if (INTEL_INFO(dev)->gen >= 6 && INTEL_INFO(dev)->gen < 8)
                I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE));
 
        /* Required for the hardware to program scanline values for waiting */
@@ -1093,7 +1141,7 @@ static int init_render_ring(struct intel_engine_cs *ring)
                           _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
        }
 
-       if (INTEL_INFO(dev)->gen >= 6)
+       if (INTEL_INFO(dev)->gen >= 6 && INTEL_INFO(dev)->gen < 8)
                I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
 
        if (HAS_L3_DPF(dev))
@@ -1972,6 +2020,7 @@ static int intel_init_ring_buffer(struct drm_device *dev,
        INIT_LIST_HEAD(&ring->active_list);
        INIT_LIST_HEAD(&ring->request_list);
        INIT_LIST_HEAD(&ring->execlist_queue);
+       i915_gem_batch_pool_init(dev, &ring->batch_pool);
        ringbuf->size = 32 * PAGE_SIZE;
        ringbuf->ring = ring;
        memset(ring->semaphore.sync_seqno, 0, sizeof(ring->semaphore.sync_seqno));
@@ -2050,97 +2099,52 @@ void intel_cleanup_ring_buffer(struct intel_engine_cs *ring)
        cleanup_status_page(ring);
 
        i915_cmd_parser_fini_ring(ring);
+       i915_gem_batch_pool_fini(&ring->batch_pool);
 
        kfree(ringbuf);
        ring->buffer = NULL;
 }
 
-static int intel_ring_wait_request(struct intel_engine_cs *ring, int n)
+static int ring_wait_for_space(struct intel_engine_cs *ring, int n)
 {
        struct intel_ringbuffer *ringbuf = ring->buffer;
        struct drm_i915_gem_request *request;
+       unsigned space;
        int ret;
 
+       /* The whole point of reserving space is to not wait! */
+       WARN_ON(ringbuf->reserved_in_use);
+
        if (intel_ring_space(ringbuf) >= n)
                return 0;
 
        list_for_each_entry(request, &ring->request_list, list) {
-               if (__intel_ring_space(request->postfix, ringbuf->tail,
-                                      ringbuf->size) >= n) {
+               space = __intel_ring_space(request->postfix, ringbuf->tail,
+                                          ringbuf->size);
+               if (space >= n)
                        break;
-               }
        }
 
-       if (&request->list == &ring->request_list)
+       if (WARN_ON(&request->list == &ring->request_list))
                return -ENOSPC;
 
        ret = i915_wait_request(request);
        if (ret)
                return ret;
 
-       i915_gem_retire_requests_ring(ring);
-
+       ringbuf->space = space;
        return 0;
 }
 
-static int ring_wait_for_space(struct intel_engine_cs *ring, int n)
-{
-       struct drm_device *dev = ring->dev;
-       struct drm_i915_private *dev_priv = dev->dev_private;
-       struct intel_ringbuffer *ringbuf = ring->buffer;
-       unsigned long end;
-       int ret;
-
-       ret = intel_ring_wait_request(ring, n);
-       if (ret != -ENOSPC)
-               return ret;
-
-       /* force the tail write in case we have been skipping them */
-       __intel_ring_advance(ring);
-
-       /* With GEM the hangcheck timer should kick us out of the loop,
-        * leaving it early runs the risk of corrupting GEM state (due
-        * to running on almost untested codepaths). But on resume
-        * timers don't work yet, so prevent a complete hang in that
-        * case by choosing an insanely large timeout. */
-       end = jiffies + 60 * HZ;
-
-       ret = 0;
-       trace_i915_ring_wait_begin(ring);
-       do {
-               if (intel_ring_space(ringbuf) >= n)
-                       break;
-               ringbuf->head = I915_READ_HEAD(ring);
-               if (intel_ring_space(ringbuf) >= n)
-                       break;
-
-               msleep(1);
-
-               if (dev_priv->mm.interruptible && signal_pending(current)) {
-                       ret = -ERESTARTSYS;
-                       break;
-               }
-
-               ret = i915_gem_check_wedge(&dev_priv->gpu_error,
-                                          dev_priv->mm.interruptible);
-               if (ret)
-                       break;
-
-               if (time_after(jiffies, end)) {
-                       ret = -EBUSY;
-                       break;
-               }
-       } while (1);
-       trace_i915_ring_wait_end(ring);
-       return ret;
-}
-
 static int intel_wrap_ring_buffer(struct intel_engine_cs *ring)
 {
        uint32_t __iomem *virt;
        struct intel_ringbuffer *ringbuf = ring->buffer;
        int rem = ringbuf->size - ringbuf->tail;
 
+       /* Can't wrap if space has already been reserved! */
+       WARN_ON(ringbuf->reserved_in_use);
+
        if (ringbuf->space < rem) {
                int ret = ring_wait_for_space(ring, rem);
                if (ret)
@@ -2161,65 +2165,104 @@ static int intel_wrap_ring_buffer(struct intel_engine_cs *ring)
 int intel_ring_idle(struct intel_engine_cs *ring)
 {
        struct drm_i915_gem_request *req;
-       int ret;
 
        /* We need to add any requests required to flush the objects and ring */
-       if (ring->outstanding_lazy_request) {
-               ret = i915_add_request(ring);
-               if (ret)
-                       return ret;
-       }
+       WARN_ON(ring->outstanding_lazy_request);
+       if (ring->outstanding_lazy_request)
+               i915_add_request(ring->outstanding_lazy_request);
 
        /* Wait upon the last request to be completed */
        if (list_empty(&ring->request_list))
                return 0;
 
        req = list_entry(ring->request_list.prev,
-                          struct drm_i915_gem_request,
-                          list);
+                       struct drm_i915_gem_request,
+                       list);
 
-       return i915_wait_request(req);
+       /* Make sure we do not trigger any retires */
+       return __i915_wait_request(req,
+                                  atomic_read(&to_i915(ring->dev)->gpu_error.reset_counter),
+                                  to_i915(ring->dev)->mm.interruptible,
+                                  NULL, NULL);
 }
 
-static int
-intel_ring_alloc_request(struct intel_engine_cs *ring)
+int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request)
 {
-       int ret;
-       struct drm_i915_gem_request *request;
-       struct drm_i915_private *dev_private = ring->dev->dev_private;
+       request->ringbuf = request->ring->buffer;
+       return 0;
+}
 
-       if (ring->outstanding_lazy_request)
-               return 0;
+void intel_ring_reserved_space_reserve(struct intel_ringbuffer *ringbuf, int size)
+{
+       /* NB: Until request management is fully tidied up and the OLR is
+        * removed, there are too many ways for get false hits on this
+        * anti-recursion check! */
+       /*WARN_ON(ringbuf->reserved_size);*/
+       WARN_ON(ringbuf->reserved_in_use);
 
-       request = kzalloc(sizeof(*request), GFP_KERNEL);
-       if (request == NULL)
-               return -ENOMEM;
+       ringbuf->reserved_size = size;
 
-       kref_init(&request->ref);
-       request->ring = ring;
-       request->ringbuf = ring->buffer;
-       request->uniq = dev_private->request_uniq++;
+       /*
+        * Really need to call _begin() here but that currently leads to
+        * recursion problems! This will be fixed later but for now just
+        * return and hope for the best. Note that there is only a real
+        * problem if the create of the request never actually calls _begin()
+        * but if they are not submitting any work then why did they create
+        * the request in the first place?
+        */
+}
 
-       ret = i915_gem_get_seqno(ring->dev, &request->seqno);
-       if (ret) {
-               kfree(request);
-               return ret;
-       }
+void intel_ring_reserved_space_cancel(struct intel_ringbuffer *ringbuf)
+{
+       WARN_ON(ringbuf->reserved_in_use);
 
-       ring->outstanding_lazy_request = request;
-       return 0;
+       ringbuf->reserved_size   = 0;
+       ringbuf->reserved_in_use = false;
 }
 
-static int __intel_ring_prepare(struct intel_engine_cs *ring,
-                               int bytes)
+void intel_ring_reserved_space_use(struct intel_ringbuffer *ringbuf)
+{
+       WARN_ON(ringbuf->reserved_in_use);
+
+       ringbuf->reserved_in_use = true;
+       ringbuf->reserved_tail   = ringbuf->tail;
+}
+
+void intel_ring_reserved_space_end(struct intel_ringbuffer *ringbuf)
+{
+       WARN_ON(!ringbuf->reserved_in_use);
+       WARN(ringbuf->tail > ringbuf->reserved_tail + ringbuf->reserved_size,
+            "request reserved size too small: %d vs %d!\n",
+            ringbuf->tail - ringbuf->reserved_tail, ringbuf->reserved_size);
+
+       ringbuf->reserved_size   = 0;
+       ringbuf->reserved_in_use = false;
+}
+
+static int __intel_ring_prepare(struct intel_engine_cs *ring, int bytes)
 {
        struct intel_ringbuffer *ringbuf = ring->buffer;
        int ret;
 
+       /*
+        * Add on the reserved size to the request to make sure that after
+        * the intended commands have been emitted, there is guaranteed to
+        * still be enough free space to send them to the hardware.
+        */
+       if (!ringbuf->reserved_in_use)
+               bytes += ringbuf->reserved_size;
+
        if (unlikely(ringbuf->tail + bytes > ringbuf->effective_size)) {
                ret = intel_wrap_ring_buffer(ring);
                if (unlikely(ret))
                        return ret;
+
+               if(ringbuf->reserved_size) {
+                       uint32_t size = ringbuf->reserved_size;
+
+                       intel_ring_reserved_space_cancel(ringbuf);
+                       intel_ring_reserved_space_reserve(ringbuf, size);
+               }
        }
 
        if (unlikely(ringbuf->space < bytes)) {
@@ -2234,6 +2277,7 @@ static int __intel_ring_prepare(struct intel_engine_cs *ring,
 int intel_ring_begin(struct intel_engine_cs *ring,
                     int num_dwords)
 {
+       struct drm_i915_gem_request *req;
        struct drm_i915_private *dev_priv = ring->dev->dev_private;
        int ret;
 
@@ -2247,7 +2291,7 @@ int intel_ring_begin(struct intel_engine_cs *ring,
                return ret;
 
        /* Preallocate the olr before touching the ring */
-       ret = intel_ring_alloc_request(ring);
+       ret = i915_gem_request_alloc(ring, ring->default_context, &req);
        if (ret)
                return ret;
 
@@ -2848,8 +2892,9 @@ int intel_init_vebox_ring_buffer(struct drm_device *dev)
 }
 
 int
-intel_ring_flush_all_caches(struct intel_engine_cs *ring)
+intel_ring_flush_all_caches(struct drm_i915_gem_request *req)
 {
+       struct intel_engine_cs *ring = req->ring;
        int ret;
 
        if (!ring->gpu_caches_dirty)
@@ -2866,8 +2911,9 @@ intel_ring_flush_all_caches(struct intel_engine_cs *ring)
 }
 
 int
-intel_ring_invalidate_all_caches(struct intel_engine_cs *ring)
+intel_ring_invalidate_all_caches(struct drm_i915_gem_request *req)
 {
+       struct intel_engine_cs *ring = req->ring;
        uint32_t flush_domains;
        int ret;
 
This page took 0.042023 seconds and 5 git commands to generate.