drm/i915: Fix context/engine cleanup order

[deliverable/linux.git] / drivers / gpu / drm / i915 / i915_gem.c
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c

index 47f6a8244151ac0f329290edf3a1e027b4e78cc4..799a53ad04f2a9cfbf5747d39735c777a940a90f 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1146,23 +1146,74 @@ static bool missed_irq(struct drm_i915_private *dev_priv,
         return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings);
  }
  
-static int __i915_spin_request(struct drm_i915_gem_request *req)
+static unsigned long local_clock_us(unsigned *cpu)
+{
+       unsigned long t;
+
+       /* Cheaply and approximately convert from nanoseconds to microseconds.
+        * The result and subsequent calculations are also defined in the same
+        * approximate microseconds units. The principal source of timing
+        * error here is from the simple truncation.
+        *
+        * Note that local_clock() is only defined wrt to the current CPU;
+        * the comparisons are no longer valid if we switch CPUs. Instead of
+        * blocking preemption for the entire busywait, we can detect the CPU
+        * switch and use that as indicator of system load and a reason to
+        * stop busywaiting, see busywait_stop().
+        */
+       *cpu = get_cpu();
+       t = local_clock() >> 10;
+       put_cpu();
+
+       return t;
+}
+
+static bool busywait_stop(unsigned long timeout, unsigned cpu)
+{
+       unsigned this_cpu;
+
+       if (time_after(local_clock_us(&this_cpu), timeout))
+               return true;
+
+       return this_cpu != cpu;
+}
+
+static int __i915_spin_request(struct drm_i915_gem_request *req, int state)
  {
         unsigned long timeout;
+       unsigned cpu;
+
+       /* When waiting for high frequency requests, e.g. during synchronous
+        * rendering split between the CPU and GPU, the finite amount of time
+        * required to set up the irq and wait upon it limits the response
+        * rate. By busywaiting on the request completion for a short while we
+        * can service the high frequency waits as quick as possible. However,
+        * if it is a slow request, we want to sleep as quickly as possible.
+        * The tradeoff between waiting and sleeping is roughly the time it
+        * takes to sleep on a request, on the order of a microsecond.
+        */
  
-       if (i915_gem_request_get_ring(req)->irq_refcount)
+       if (req->ring->irq_refcount)
                 return -EBUSY;
  
-       timeout = jiffies + 1;
+       /* Only spin if we know the GPU is processing this request */
+       if (!i915_gem_request_started(req, true))
+               return -EAGAIN;
+
+       timeout = local_clock_us(&cpu) + 5;
         while (!need_resched()) {
                 if (i915_gem_request_completed(req, true))
                         return 0;
  
-               if (time_after_eq(jiffies, timeout))
+               if (signal_pending_state(state, current))
+                       break;
+
+               if (busywait_stop(timeout, cpu))
                         break;
  
                 cpu_relax_lowlatency();
         }
+
         if (i915_gem_request_completed(req, false))
                 return 0;
  
@@ -1197,9 +1248,10 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
         struct drm_i915_private *dev_priv = dev->dev_private;
         const bool irq_test_in_progress =
                 ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_ring_flag(ring);
+       int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
         DEFINE_WAIT(wait);
         unsigned long timeout_expire;
-       s64 before, now;
+       s64 before = 0; /* Only to silence a compiler warning. */
         int ret;
  
         WARN(!intel_irqs_enabled(dev_priv), "IRQs disabled");
@@ -1210,18 +1262,29 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
         if (i915_gem_request_completed(req, true))
                 return 0;
  
-       timeout_expire = timeout ?
-               jiffies + nsecs_to_jiffies_timeout((u64)*timeout) : 0;
+       timeout_expire = 0;
+       if (timeout) {
+               if (WARN_ON(*timeout < 0))
+                       return -EINVAL;
+
+               if (*timeout == 0)
+                       return -ETIME;
+
+               timeout_expire = jiffies + nsecs_to_jiffies_timeout(*timeout);
+
+               /*
+                * Record current time in case interrupted by signal, or wedged.
+                */
+               before = ktime_get_raw_ns();
+       }
  
         if (INTEL_INFO(dev_priv)->gen >= 6)
                 gen6_rps_boost(dev_priv, rps, req->emitted_jiffies);
  
-       /* Record current time in case interrupted by signal, or wedged */
         trace_i915_gem_request_wait_begin(req);
-       before = ktime_get_raw_ns();
  
         /* Optimistic spin for the next jiffie before touching IRQs */
-       ret = __i915_spin_request(req);
+       ret = __i915_spin_request(req, state);
         if (ret == 0)
                 goto out;
  
@@ -1233,8 +1296,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
         for (;;) {
                 struct timer_list timer;
  
-               prepare_to_wait(&ring->irq_queue, &wait,
-                               interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE);
+               prepare_to_wait(&ring->irq_queue, &wait, state);
  
                 /* We need to check whether any gpu reset happened in between
                  * the caller grabbing the seqno and now ... */
@@ -1252,7 +1314,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
                         break;
                 }
  
-               if (interruptible && signal_pending(current)) {
+               if (signal_pending_state(state, current)) {
                         ret = -ERESTARTSYS;
                         break;
                 }
@@ -1284,11 +1346,10 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
         finish_wait(&ring->irq_queue, &wait);
  
  out:
-       now = ktime_get_raw_ns();
         trace_i915_gem_request_wait_end(req);
  
         if (timeout) {
-               s64 tres = *timeout - (now - before);
+               s64 tres = *timeout - (ktime_get_raw_ns() - before);
  
                 *timeout = tres < 0 ? 0 : tres;
  
@@ -2546,6 +2607,7 @@ void __i915_add_request(struct drm_i915_gem_request *request,
         request->batch_obj = obj;
  
         request->emitted_jiffies = jiffies;
+       request->previous_seqno = ring->last_submitted_seqno;
         ring->last_submitted_seqno = request->seqno;
         list_add_tail(&request->list, &ring->request_list);
  
@@ -2617,10 +2679,8 @@ void i915_gem_request_free(struct kref *req_ref)
                 i915_gem_request_remove_from_client(req);
  
         if (ctx) {
-               if (i915.enable_execlists) {
-                       if (ctx != req->ring->default_context)
-                               intel_lr_context_unpin(req);
-               }
+               if (i915.enable_execlists && ctx != req->i915->kernel_context)
+                       intel_lr_context_unpin(req);
  
                 i915_gem_context_unreference(ctx);
         }
@@ -2628,9 +2688,10 @@ void i915_gem_request_free(struct kref *req_ref)
         kmem_cache_free(req->i915->requests, req);
  }
  
-int i915_gem_request_alloc(struct intel_engine_cs *ring,
-                          struct intel_context *ctx,
-                          struct drm_i915_gem_request **req_out)
+static inline int
+__i915_gem_request_alloc(struct intel_engine_cs *ring,
+                        struct intel_context *ctx,
+                        struct drm_i915_gem_request **req_out)
  {
         struct drm_i915_private *dev_priv = to_i915(ring->dev);
         struct drm_i915_gem_request *req;
@@ -2693,6 +2754,31 @@ err:
         return ret;
  }
  
+/**
+ * i915_gem_request_alloc - allocate a request structure
+ *
+ * @engine: engine that we wish to issue the request on.
+ * @ctx: context that the request will be associated with.
+ *       This can be NULL if the request is not directly related to
+ *       any specific user context, in which case this function will
+ *       choose an appropriate context to use.
+ *
+ * Returns a pointer to the allocated request if successful,
+ * or an error code if not.
+ */
+struct drm_i915_gem_request *
+i915_gem_request_alloc(struct intel_engine_cs *engine,
+                      struct intel_context *ctx)
+{
+       struct drm_i915_gem_request *req;
+       int err;
+
+       if (ctx == NULL)
+               ctx = to_i915(engine->dev)->kernel_context;
+       err = __i915_gem_request_alloc(engine, ctx, &req);
+       return err ? ERR_PTR(err) : req;
+}
+
  void i915_gem_request_cancel(struct drm_i915_gem_request *req)
  {
         intel_ring_reserved_space_cancel(req->ringbuf);
@@ -2934,6 +3020,10 @@ i915_gem_idle_work_handler(struct work_struct *work)
                 if (!list_empty(&ring->request_list))
                         return;
  
+       /* we probably should sync with hangcheck here, using cancel_work_sync.
+        * Also locking seems to be fubar here, ring->request_list is protected
+        * by dev->struct_mutex. */
+
         intel_mark_idle(dev);
  
         if (mutex_trylock(&dev->struct_mutex)) {
@@ -3106,9 +3196,13 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj,
                         return 0;
  
                 if (*to_req == NULL) {
-                       ret = i915_gem_request_alloc(to, to->default_context, to_req);
-                       if (ret)
-                               return ret;
+                       struct drm_i915_gem_request *req;
+
+                       req = i915_gem_request_alloc(to, NULL);
+                       if (IS_ERR(req))
+                               return PTR_ERR(req);
+
+                       *to_req = req;
                 }
  
                 trace_i915_gem_ring_sync_to(*to_req, from, from_req);
@@ -3308,9 +3402,9 @@ int i915_gpu_idle(struct drm_device *dev)
                 if (!i915.enable_execlists) {
                         struct drm_i915_gem_request *req;
  
-                       ret = i915_gem_request_alloc(ring, ring->default_context, &req);
-                       if (ret)
-                               return ret;
+                       req = i915_gem_request_alloc(ring, NULL);
+                       if (IS_ERR(req))
+                               return PTR_ERR(req);
  
                         ret = i915_switch_context(req);
                         if (ret) {
@@ -3424,7 +3518,7 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
         if (flags & PIN_MAPPABLE)
                 end = min_t(u64, end, dev_priv->gtt.mappable_end);
         if (flags & PIN_ZONE_4G)
-               end = min_t(u64, end, (1ULL << 32));
+               end = min_t(u64, end, (1ULL << 32) - PAGE_SIZE);
  
         if (alignment == 0)
                 alignment = flags & PIN_MAPPABLE ? fence_alignment :
@@ -4102,6 +4196,29 @@ i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags)
         return false;
  }
  
+void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)
+{
+       struct drm_i915_gem_object *obj = vma->obj;
+       bool mappable, fenceable;
+       u32 fence_size, fence_alignment;
+
+       fence_size = i915_gem_get_gtt_size(obj->base.dev,
+                                          obj->base.size,
+                                          obj->tiling_mode);
+       fence_alignment = i915_gem_get_gtt_alignment(obj->base.dev,
+                                                    obj->base.size,
+                                                    obj->tiling_mode,
+                                                    true);
+
+       fenceable = (vma->node.size == fence_size &&
+                    (vma->node.start & (fence_alignment - 1)) == 0);
+
+       mappable = (vma->node.start + fence_size <=
+                   to_i915(obj->base.dev)->gtt.mappable_end);
+
+       obj->map_and_fenceable = mappable && fenceable;
+}
+
  static int
  i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
                        struct i915_address_space *vm,
@@ -4169,25 +4286,7 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
  
         if (ggtt_view && ggtt_view->type == I915_GGTT_VIEW_NORMAL &&
             (bound ^ vma->bound) & GLOBAL_BIND) {
-               bool mappable, fenceable;
-               u32 fence_size, fence_alignment;
-
-               fence_size = i915_gem_get_gtt_size(obj->base.dev,
-                                                  obj->base.size,
-                                                  obj->tiling_mode);
-               fence_alignment = i915_gem_get_gtt_alignment(obj->base.dev,
-                                                            obj->base.size,
-                                                            obj->tiling_mode,
-                                                            true);
-
-               fenceable = (vma->node.size == fence_size &&
-                            (vma->node.start & (fence_alignment - 1)) == 0);
-
-               mappable = (vma->node.start + fence_size <=
-                           dev_priv->gtt.mappable_end);
-
-               obj->map_and_fenceable = mappable && fenceable;
-
+               __i915_vma_set_map_and_fenceable(vma);
                 WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable);
         }
  
@@ -4259,10 +4358,20 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
         if (ret)
                 goto unref;
  
-       BUILD_BUG_ON(I915_NUM_RINGS > 16);
-       args->busy = obj->active << 16;
-       if (obj->last_write_req)
-               args->busy |= obj->last_write_req->ring->id;
+       args->busy = 0;
+       if (obj->active) {
+               int i;
+
+               for (i = 0; i < I915_NUM_RINGS; i++) {
+                       struct drm_i915_gem_request *req;
+
+                       req = obj->last_read_req[i];
+                       if (req)
+                               args->busy |= 1 << (16 + req->ring->exec_id);
+               }
+               if (obj->last_write_req)
+                       args->busy |= obj->last_write_req->ring->exec_id;
+       }
  
  unref:
         drm_gem_object_unreference(&obj->base);
@@ -4763,7 +4872,7 @@ i915_gem_init_hw(struct drm_device *dev)
          */
         init_unused_rings(dev);
  
-       BUG_ON(!dev_priv->ring[RCS].default_context);
+       BUG_ON(!dev_priv->kernel_context);
  
         ret = i915_ppgtt_init_hw(dev);
         if (ret) {
@@ -4800,11 +4909,10 @@ i915_gem_init_hw(struct drm_device *dev)
         for_each_ring(ring, dev_priv, i) {
                 struct drm_i915_gem_request *req;
  
-               WARN_ON(!ring->default_context);
-
-               ret = i915_gem_request_alloc(ring, ring->default_context, &req);
-               if (ret) {
-                       i915_gem_cleanup_ringbuffer(dev);
+               req = i915_gem_request_alloc(ring, NULL);
+               if (IS_ERR(req)) {
+                       ret = PTR_ERR(req);
+                       i915_gem_cleanup_engines(dev);
                         goto out;
                 }
  
@@ -4817,7 +4925,7 @@ i915_gem_init_hw(struct drm_device *dev)
                 if (ret && ret != -EIO) {
                         DRM_ERROR("PPGTT enable ring #%d failed %d\n", i, ret);
                         i915_gem_request_cancel(req);
-                       i915_gem_cleanup_ringbuffer(dev);
+                       i915_gem_cleanup_engines(dev);
                         goto out;
                 }
  
@@ -4825,7 +4933,7 @@ i915_gem_init_hw(struct drm_device *dev)
                 if (ret && ret != -EIO) {
                         DRM_ERROR("Context enable ring #%d failed %d\n", i, ret);
                         i915_gem_request_cancel(req);
-                       i915_gem_cleanup_ringbuffer(dev);
+                       i915_gem_cleanup_engines(dev);
                         goto out;
                 }
  
@@ -4900,7 +5008,7 @@ out_unlock:
  }
  
  void
-i915_gem_cleanup_ringbuffer(struct drm_device *dev)
+i915_gem_cleanup_engines(struct drm_device *dev)
  {
         struct drm_i915_private *dev_priv = dev->dev_private;
         struct intel_engine_cs *ring;
@@ -4909,13 +5017,14 @@ i915_gem_cleanup_ringbuffer(struct drm_device *dev)
         for_each_ring(ring, dev_priv, i)
                 dev_priv->gt.cleanup_ring(ring);
  
-    if (i915.enable_execlists)
-            /*
-             * Neither the BIOS, ourselves or any other kernel
-             * expects the system to be in execlists mode on startup,
-             * so we need to reset the GPU back to legacy mode.
-             */
-            intel_gpu_reset(dev);
+       if (i915.enable_execlists) {
+               /*
+                * Neither the BIOS, ourselves or any other kernel
+                * expects the system to be in execlists mode on startup,
+                * so we need to reset the GPU back to legacy mode.
+                */
+               intel_gpu_reset(dev);
+       }
  }
  
  static void
@@ -5043,6 +5152,8 @@ int i915_gem_open(struct drm_device *dev, struct drm_file *file)
         spin_lock_init(&file_priv->mm.lock);
         INIT_LIST_HEAD(&file_priv->mm.request_list);
  
+       file_priv->bsd_ring = -1;
+
         ret = i915_gem_context_open(dev, file);
         if (ret)
                 kfree(file_priv);