drm/i915: Fix context/engine cleanup order
[deliverable/linux.git] / drivers / gpu / drm / i915 / i915_gem.c
index 47f6a8244151ac0f329290edf3a1e027b4e78cc4..799a53ad04f2a9cfbf5747d39735c777a940a90f 100644 (file)
@@ -1146,23 +1146,74 @@ static bool missed_irq(struct drm_i915_private *dev_priv,
        return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings);
 }
 
-static int __i915_spin_request(struct drm_i915_gem_request *req)
+static unsigned long local_clock_us(unsigned *cpu)
+{
+       unsigned long t;
+
+       /* Cheaply and approximately convert from nanoseconds to microseconds.
+        * The result and subsequent calculations are also defined in the same
+        * approximate microseconds units. The principal source of timing
+        * error here is from the simple truncation.
+        *
+        * Note that local_clock() is only defined wrt to the current CPU;
+        * the comparisons are no longer valid if we switch CPUs. Instead of
+        * blocking preemption for the entire busywait, we can detect the CPU
+        * switch and use that as indicator of system load and a reason to
+        * stop busywaiting, see busywait_stop().
+        */
+       *cpu = get_cpu();
+       t = local_clock() >> 10;
+       put_cpu();
+
+       return t;
+}
+
+static bool busywait_stop(unsigned long timeout, unsigned cpu)
+{
+       unsigned this_cpu;
+
+       if (time_after(local_clock_us(&this_cpu), timeout))
+               return true;
+
+       return this_cpu != cpu;
+}
+
+static int __i915_spin_request(struct drm_i915_gem_request *req, int state)
 {
        unsigned long timeout;
+       unsigned cpu;
+
+       /* When waiting for high frequency requests, e.g. during synchronous
+        * rendering split between the CPU and GPU, the finite amount of time
+        * required to set up the irq and wait upon it limits the response
+        * rate. By busywaiting on the request completion for a short while we
+        * can service the high frequency waits as quick as possible. However,
+        * if it is a slow request, we want to sleep as quickly as possible.
+        * The tradeoff between waiting and sleeping is roughly the time it
+        * takes to sleep on a request, on the order of a microsecond.
+        */
 
-       if (i915_gem_request_get_ring(req)->irq_refcount)
+       if (req->ring->irq_refcount)
                return -EBUSY;
 
-       timeout = jiffies + 1;
+       /* Only spin if we know the GPU is processing this request */
+       if (!i915_gem_request_started(req, true))
+               return -EAGAIN;
+
+       timeout = local_clock_us(&cpu) + 5;
        while (!need_resched()) {
                if (i915_gem_request_completed(req, true))
                        return 0;
 
-               if (time_after_eq(jiffies, timeout))
+               if (signal_pending_state(state, current))
+                       break;
+
+               if (busywait_stop(timeout, cpu))
                        break;
 
                cpu_relax_lowlatency();
        }
+
        if (i915_gem_request_completed(req, false))
                return 0;
 
@@ -1197,9 +1248,10 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
        struct drm_i915_private *dev_priv = dev->dev_private;
        const bool irq_test_in_progress =
                ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_ring_flag(ring);
+       int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
        DEFINE_WAIT(wait);
        unsigned long timeout_expire;
-       s64 before, now;
+       s64 before = 0; /* Only to silence a compiler warning. */
        int ret;
 
        WARN(!intel_irqs_enabled(dev_priv), "IRQs disabled");
@@ -1210,18 +1262,29 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
        if (i915_gem_request_completed(req, true))
                return 0;
 
-       timeout_expire = timeout ?
-               jiffies + nsecs_to_jiffies_timeout((u64)*timeout) : 0;
+       timeout_expire = 0;
+       if (timeout) {
+               if (WARN_ON(*timeout < 0))
+                       return -EINVAL;
+
+               if (*timeout == 0)
+                       return -ETIME;
+
+               timeout_expire = jiffies + nsecs_to_jiffies_timeout(*timeout);
+
+               /*
+                * Record current time in case interrupted by signal, or wedged.
+                */
+               before = ktime_get_raw_ns();
+       }
 
        if (INTEL_INFO(dev_priv)->gen >= 6)
                gen6_rps_boost(dev_priv, rps, req->emitted_jiffies);
 
-       /* Record current time in case interrupted by signal, or wedged */
        trace_i915_gem_request_wait_begin(req);
-       before = ktime_get_raw_ns();
 
        /* Optimistic spin for the next jiffie before touching IRQs */
-       ret = __i915_spin_request(req);
+       ret = __i915_spin_request(req, state);
        if (ret == 0)
                goto out;
 
@@ -1233,8 +1296,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
        for (;;) {
                struct timer_list timer;
 
-               prepare_to_wait(&ring->irq_queue, &wait,
-                               interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE);
+               prepare_to_wait(&ring->irq_queue, &wait, state);
 
                /* We need to check whether any gpu reset happened in between
                 * the caller grabbing the seqno and now ... */
@@ -1252,7 +1314,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
                        break;
                }
 
-               if (interruptible && signal_pending(current)) {
+               if (signal_pending_state(state, current)) {
                        ret = -ERESTARTSYS;
                        break;
                }
@@ -1284,11 +1346,10 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
        finish_wait(&ring->irq_queue, &wait);
 
 out:
-       now = ktime_get_raw_ns();
        trace_i915_gem_request_wait_end(req);
 
        if (timeout) {
-               s64 tres = *timeout - (now - before);
+               s64 tres = *timeout - (ktime_get_raw_ns() - before);
 
                *timeout = tres < 0 ? 0 : tres;
 
@@ -2546,6 +2607,7 @@ void __i915_add_request(struct drm_i915_gem_request *request,
        request->batch_obj = obj;
 
        request->emitted_jiffies = jiffies;
+       request->previous_seqno = ring->last_submitted_seqno;
        ring->last_submitted_seqno = request->seqno;
        list_add_tail(&request->list, &ring->request_list);
 
@@ -2617,10 +2679,8 @@ void i915_gem_request_free(struct kref *req_ref)
                i915_gem_request_remove_from_client(req);
 
        if (ctx) {
-               if (i915.enable_execlists) {
-                       if (ctx != req->ring->default_context)
-                               intel_lr_context_unpin(req);
-               }
+               if (i915.enable_execlists && ctx != req->i915->kernel_context)
+                       intel_lr_context_unpin(req);
 
                i915_gem_context_unreference(ctx);
        }
@@ -2628,9 +2688,10 @@ void i915_gem_request_free(struct kref *req_ref)
        kmem_cache_free(req->i915->requests, req);
 }
 
-int i915_gem_request_alloc(struct intel_engine_cs *ring,
-                          struct intel_context *ctx,
-                          struct drm_i915_gem_request **req_out)
+static inline int
+__i915_gem_request_alloc(struct intel_engine_cs *ring,
+                        struct intel_context *ctx,
+                        struct drm_i915_gem_request **req_out)
 {
        struct drm_i915_private *dev_priv = to_i915(ring->dev);
        struct drm_i915_gem_request *req;
@@ -2693,6 +2754,31 @@ err:
        return ret;
 }
 
+/**
+ * i915_gem_request_alloc - allocate a request structure
+ *
+ * @engine: engine that we wish to issue the request on.
+ * @ctx: context that the request will be associated with.
+ *       This can be NULL if the request is not directly related to
+ *       any specific user context, in which case this function will
+ *       choose an appropriate context to use.
+ *
+ * Returns a pointer to the allocated request if successful,
+ * or an error code if not.
+ */
+struct drm_i915_gem_request *
+i915_gem_request_alloc(struct intel_engine_cs *engine,
+                      struct intel_context *ctx)
+{
+       struct drm_i915_gem_request *req;
+       int err;
+
+       if (ctx == NULL)
+               ctx = to_i915(engine->dev)->kernel_context;
+       err = __i915_gem_request_alloc(engine, ctx, &req);
+       return err ? ERR_PTR(err) : req;
+}
+
 void i915_gem_request_cancel(struct drm_i915_gem_request *req)
 {
        intel_ring_reserved_space_cancel(req->ringbuf);
@@ -2934,6 +3020,10 @@ i915_gem_idle_work_handler(struct work_struct *work)
                if (!list_empty(&ring->request_list))
                        return;
 
+       /* we probably should sync with hangcheck here, using cancel_work_sync.
+        * Also locking seems to be fubar here, ring->request_list is protected
+        * by dev->struct_mutex. */
+
        intel_mark_idle(dev);
 
        if (mutex_trylock(&dev->struct_mutex)) {
@@ -3106,9 +3196,13 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj,
                        return 0;
 
                if (*to_req == NULL) {
-                       ret = i915_gem_request_alloc(to, to->default_context, to_req);
-                       if (ret)
-                               return ret;
+                       struct drm_i915_gem_request *req;
+
+                       req = i915_gem_request_alloc(to, NULL);
+                       if (IS_ERR(req))
+                               return PTR_ERR(req);
+
+                       *to_req = req;
                }
 
                trace_i915_gem_ring_sync_to(*to_req, from, from_req);
@@ -3308,9 +3402,9 @@ int i915_gpu_idle(struct drm_device *dev)
                if (!i915.enable_execlists) {
                        struct drm_i915_gem_request *req;
 
-                       ret = i915_gem_request_alloc(ring, ring->default_context, &req);
-                       if (ret)
-                               return ret;
+                       req = i915_gem_request_alloc(ring, NULL);
+                       if (IS_ERR(req))
+                               return PTR_ERR(req);
 
                        ret = i915_switch_context(req);
                        if (ret) {
@@ -3424,7 +3518,7 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
        if (flags & PIN_MAPPABLE)
                end = min_t(u64, end, dev_priv->gtt.mappable_end);
        if (flags & PIN_ZONE_4G)
-               end = min_t(u64, end, (1ULL << 32));
+               end = min_t(u64, end, (1ULL << 32) - PAGE_SIZE);
 
        if (alignment == 0)
                alignment = flags & PIN_MAPPABLE ? fence_alignment :
@@ -4102,6 +4196,29 @@ i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags)
        return false;
 }
 
+void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)
+{
+       struct drm_i915_gem_object *obj = vma->obj;
+       bool mappable, fenceable;
+       u32 fence_size, fence_alignment;
+
+       fence_size = i915_gem_get_gtt_size(obj->base.dev,
+                                          obj->base.size,
+                                          obj->tiling_mode);
+       fence_alignment = i915_gem_get_gtt_alignment(obj->base.dev,
+                                                    obj->base.size,
+                                                    obj->tiling_mode,
+                                                    true);
+
+       fenceable = (vma->node.size == fence_size &&
+                    (vma->node.start & (fence_alignment - 1)) == 0);
+
+       mappable = (vma->node.start + fence_size <=
+                   to_i915(obj->base.dev)->gtt.mappable_end);
+
+       obj->map_and_fenceable = mappable && fenceable;
+}
+
 static int
 i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
                       struct i915_address_space *vm,
@@ -4169,25 +4286,7 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
 
        if (ggtt_view && ggtt_view->type == I915_GGTT_VIEW_NORMAL &&
            (bound ^ vma->bound) & GLOBAL_BIND) {
-               bool mappable, fenceable;
-               u32 fence_size, fence_alignment;
-
-               fence_size = i915_gem_get_gtt_size(obj->base.dev,
-                                                  obj->base.size,
-                                                  obj->tiling_mode);
-               fence_alignment = i915_gem_get_gtt_alignment(obj->base.dev,
-                                                            obj->base.size,
-                                                            obj->tiling_mode,
-                                                            true);
-
-               fenceable = (vma->node.size == fence_size &&
-                            (vma->node.start & (fence_alignment - 1)) == 0);
-
-               mappable = (vma->node.start + fence_size <=
-                           dev_priv->gtt.mappable_end);
-
-               obj->map_and_fenceable = mappable && fenceable;
-
+               __i915_vma_set_map_and_fenceable(vma);
                WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable);
        }
 
@@ -4259,10 +4358,20 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
        if (ret)
                goto unref;
 
-       BUILD_BUG_ON(I915_NUM_RINGS > 16);
-       args->busy = obj->active << 16;
-       if (obj->last_write_req)
-               args->busy |= obj->last_write_req->ring->id;
+       args->busy = 0;
+       if (obj->active) {
+               int i;
+
+               for (i = 0; i < I915_NUM_RINGS; i++) {
+                       struct drm_i915_gem_request *req;
+
+                       req = obj->last_read_req[i];
+                       if (req)
+                               args->busy |= 1 << (16 + req->ring->exec_id);
+               }
+               if (obj->last_write_req)
+                       args->busy |= obj->last_write_req->ring->exec_id;
+       }
 
 unref:
        drm_gem_object_unreference(&obj->base);
@@ -4763,7 +4872,7 @@ i915_gem_init_hw(struct drm_device *dev)
         */
        init_unused_rings(dev);
 
-       BUG_ON(!dev_priv->ring[RCS].default_context);
+       BUG_ON(!dev_priv->kernel_context);
 
        ret = i915_ppgtt_init_hw(dev);
        if (ret) {
@@ -4800,11 +4909,10 @@ i915_gem_init_hw(struct drm_device *dev)
        for_each_ring(ring, dev_priv, i) {
                struct drm_i915_gem_request *req;
 
-               WARN_ON(!ring->default_context);
-
-               ret = i915_gem_request_alloc(ring, ring->default_context, &req);
-               if (ret) {
-                       i915_gem_cleanup_ringbuffer(dev);
+               req = i915_gem_request_alloc(ring, NULL);
+               if (IS_ERR(req)) {
+                       ret = PTR_ERR(req);
+                       i915_gem_cleanup_engines(dev);
                        goto out;
                }
 
@@ -4817,7 +4925,7 @@ i915_gem_init_hw(struct drm_device *dev)
                if (ret && ret != -EIO) {
                        DRM_ERROR("PPGTT enable ring #%d failed %d\n", i, ret);
                        i915_gem_request_cancel(req);
-                       i915_gem_cleanup_ringbuffer(dev);
+                       i915_gem_cleanup_engines(dev);
                        goto out;
                }
 
@@ -4825,7 +4933,7 @@ i915_gem_init_hw(struct drm_device *dev)
                if (ret && ret != -EIO) {
                        DRM_ERROR("Context enable ring #%d failed %d\n", i, ret);
                        i915_gem_request_cancel(req);
-                       i915_gem_cleanup_ringbuffer(dev);
+                       i915_gem_cleanup_engines(dev);
                        goto out;
                }
 
@@ -4900,7 +5008,7 @@ out_unlock:
 }
 
 void
-i915_gem_cleanup_ringbuffer(struct drm_device *dev)
+i915_gem_cleanup_engines(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
        struct intel_engine_cs *ring;
@@ -4909,13 +5017,14 @@ i915_gem_cleanup_ringbuffer(struct drm_device *dev)
        for_each_ring(ring, dev_priv, i)
                dev_priv->gt.cleanup_ring(ring);
 
-    if (i915.enable_execlists)
-            /*
-             * Neither the BIOS, ourselves or any other kernel
-             * expects the system to be in execlists mode on startup,
-             * so we need to reset the GPU back to legacy mode.
-             */
-            intel_gpu_reset(dev);
+       if (i915.enable_execlists) {
+               /*
+                * Neither the BIOS, ourselves or any other kernel
+                * expects the system to be in execlists mode on startup,
+                * so we need to reset the GPU back to legacy mode.
+                */
+               intel_gpu_reset(dev);
+       }
 }
 
 static void
@@ -5043,6 +5152,8 @@ int i915_gem_open(struct drm_device *dev, struct drm_file *file)
        spin_lock_init(&file_priv->mm.lock);
        INIT_LIST_HEAD(&file_priv->mm.request_list);
 
+       file_priv->bsd_ring = -1;
+
        ret = i915_gem_context_open(dev, file);
        if (ret)
                kfree(file_priv);
This page took 0.162404 seconds and 5 git commands to generate.