drm/i915: Fix context/engine cleanup order
[deliverable/linux.git] / drivers / gpu / drm / i915 / i915_gem.c
index b7d7cecdddf679f30f048f467087cde45bd65079..799a53ad04f2a9cfbf5747d39735c777a940a90f 100644 (file)
@@ -1146,23 +1146,74 @@ static bool missed_irq(struct drm_i915_private *dev_priv,
        return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings);
 }
 
-static int __i915_spin_request(struct drm_i915_gem_request *req)
+static unsigned long local_clock_us(unsigned *cpu)
+{
+       unsigned long t;
+
+       /* Cheaply and approximately convert from nanoseconds to microseconds.
+        * The result and subsequent calculations are also defined in the same
+        * approximate microseconds units. The principal source of timing
+        * error here is from the simple truncation.
+        *
+        * Note that local_clock() is only defined wrt to the current CPU;
+        * the comparisons are no longer valid if we switch CPUs. Instead of
+        * blocking preemption for the entire busywait, we can detect the CPU
+        * switch and use that as indicator of system load and a reason to
+        * stop busywaiting, see busywait_stop().
+        */
+       *cpu = get_cpu();
+       t = local_clock() >> 10;
+       put_cpu();
+
+       return t;
+}
+
+static bool busywait_stop(unsigned long timeout, unsigned cpu)
+{
+       unsigned this_cpu;
+
+       if (time_after(local_clock_us(&this_cpu), timeout))
+               return true;
+
+       return this_cpu != cpu;
+}
+
+static int __i915_spin_request(struct drm_i915_gem_request *req, int state)
 {
        unsigned long timeout;
+       unsigned cpu;
+
+       /* When waiting for high frequency requests, e.g. during synchronous
+        * rendering split between the CPU and GPU, the finite amount of time
+        * required to set up the irq and wait upon it limits the response
+        * rate. By busywaiting on the request completion for a short while we
+        * can service the high frequency waits as quick as possible. However,
+        * if it is a slow request, we want to sleep as quickly as possible.
+        * The tradeoff between waiting and sleeping is roughly the time it
+        * takes to sleep on a request, on the order of a microsecond.
+        */
 
-       if (i915_gem_request_get_ring(req)->irq_refcount)
+       if (req->ring->irq_refcount)
                return -EBUSY;
 
-       timeout = jiffies + 1;
+       /* Only spin if we know the GPU is processing this request */
+       if (!i915_gem_request_started(req, true))
+               return -EAGAIN;
+
+       timeout = local_clock_us(&cpu) + 5;
        while (!need_resched()) {
                if (i915_gem_request_completed(req, true))
                        return 0;
 
-               if (time_after_eq(jiffies, timeout))
+               if (signal_pending_state(state, current))
+                       break;
+
+               if (busywait_stop(timeout, cpu))
                        break;
 
                cpu_relax_lowlatency();
        }
+
        if (i915_gem_request_completed(req, false))
                return 0;
 
@@ -1197,9 +1248,10 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
        struct drm_i915_private *dev_priv = dev->dev_private;
        const bool irq_test_in_progress =
                ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_ring_flag(ring);
+       int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
        DEFINE_WAIT(wait);
        unsigned long timeout_expire;
-       s64 before, now;
+       s64 before = 0; /* Only to silence a compiler warning. */
        int ret;
 
        WARN(!intel_irqs_enabled(dev_priv), "IRQs disabled");
@@ -1219,17 +1271,20 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
                        return -ETIME;
 
                timeout_expire = jiffies + nsecs_to_jiffies_timeout(*timeout);
+
+               /*
+                * Record current time in case interrupted by signal, or wedged.
+                */
+               before = ktime_get_raw_ns();
        }
 
        if (INTEL_INFO(dev_priv)->gen >= 6)
                gen6_rps_boost(dev_priv, rps, req->emitted_jiffies);
 
-       /* Record current time in case interrupted by signal, or wedged */
        trace_i915_gem_request_wait_begin(req);
-       before = ktime_get_raw_ns();
 
        /* Optimistic spin for the next jiffie before touching IRQs */
-       ret = __i915_spin_request(req);
+       ret = __i915_spin_request(req, state);
        if (ret == 0)
                goto out;
 
@@ -1241,8 +1296,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
        for (;;) {
                struct timer_list timer;
 
-               prepare_to_wait(&ring->irq_queue, &wait,
-                               interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE);
+               prepare_to_wait(&ring->irq_queue, &wait, state);
 
                /* We need to check whether any gpu reset happened in between
                 * the caller grabbing the seqno and now ... */
@@ -1260,7 +1314,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
                        break;
                }
 
-               if (interruptible && signal_pending(current)) {
+               if (signal_pending_state(state, current)) {
                        ret = -ERESTARTSYS;
                        break;
                }
@@ -1292,11 +1346,10 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
        finish_wait(&ring->irq_queue, &wait);
 
 out:
-       now = ktime_get_raw_ns();
        trace_i915_gem_request_wait_end(req);
 
        if (timeout) {
-               s64 tres = *timeout - (now - before);
+               s64 tres = *timeout - (ktime_get_raw_ns() - before);
 
                *timeout = tres < 0 ? 0 : tres;
 
@@ -2554,6 +2607,7 @@ void __i915_add_request(struct drm_i915_gem_request *request,
        request->batch_obj = obj;
 
        request->emitted_jiffies = jiffies;
+       request->previous_seqno = ring->last_submitted_seqno;
        ring->last_submitted_seqno = request->seqno;
        list_add_tail(&request->list, &ring->request_list);
 
@@ -2625,10 +2679,8 @@ void i915_gem_request_free(struct kref *req_ref)
                i915_gem_request_remove_from_client(req);
 
        if (ctx) {
-               if (i915.enable_execlists) {
-                       if (ctx != req->ring->default_context)
-                               intel_lr_context_unpin(req);
-               }
+               if (i915.enable_execlists && ctx != req->i915->kernel_context)
+                       intel_lr_context_unpin(req);
 
                i915_gem_context_unreference(ctx);
        }
@@ -2636,9 +2688,10 @@ void i915_gem_request_free(struct kref *req_ref)
        kmem_cache_free(req->i915->requests, req);
 }
 
-int i915_gem_request_alloc(struct intel_engine_cs *ring,
-                          struct intel_context *ctx,
-                          struct drm_i915_gem_request **req_out)
+static inline int
+__i915_gem_request_alloc(struct intel_engine_cs *ring,
+                        struct intel_context *ctx,
+                        struct drm_i915_gem_request **req_out)
 {
        struct drm_i915_private *dev_priv = to_i915(ring->dev);
        struct drm_i915_gem_request *req;
@@ -2701,6 +2754,31 @@ err:
        return ret;
 }
 
+/**
+ * i915_gem_request_alloc - allocate a request structure
+ *
+ * @engine: engine that we wish to issue the request on.
+ * @ctx: context that the request will be associated with.
+ *       This can be NULL if the request is not directly related to
+ *       any specific user context, in which case this function will
+ *       choose an appropriate context to use.
+ *
+ * Returns a pointer to the allocated request if successful,
+ * or an error code if not.
+ */
+struct drm_i915_gem_request *
+i915_gem_request_alloc(struct intel_engine_cs *engine,
+                      struct intel_context *ctx)
+{
+       struct drm_i915_gem_request *req;
+       int err;
+
+       if (ctx == NULL)
+               ctx = to_i915(engine->dev)->kernel_context;
+       err = __i915_gem_request_alloc(engine, ctx, &req);
+       return err ? ERR_PTR(err) : req;
+}
+
 void i915_gem_request_cancel(struct drm_i915_gem_request *req)
 {
        intel_ring_reserved_space_cancel(req->ringbuf);
@@ -2765,20 +2843,13 @@ static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
 
        if (i915.enable_execlists) {
                spin_lock_irq(&ring->execlist_lock);
-               while (!list_empty(&ring->execlist_queue)) {
-                       struct drm_i915_gem_request *submit_req;
-
-                       submit_req = list_first_entry(&ring->execlist_queue,
-                                       struct drm_i915_gem_request,
-                                       execlist_link);
-                       list_del(&submit_req->execlist_link);
 
-                       if (submit_req->ctx != ring->default_context)
-                               intel_lr_context_unpin(submit_req);
+               /* list_splice_tail_init checks for empty lists */
+               list_splice_tail_init(&ring->execlist_queue,
+                                     &ring->execlist_retired_req_list);
 
-                       i915_gem_request_unreference(submit_req);
-               }
                spin_unlock_irq(&ring->execlist_lock);
+               intel_execlists_retire_requests(ring);
        }
 
        /*
@@ -3125,9 +3196,13 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj,
                        return 0;
 
                if (*to_req == NULL) {
-                       ret = i915_gem_request_alloc(to, to->default_context, to_req);
-                       if (ret)
-                               return ret;
+                       struct drm_i915_gem_request *req;
+
+                       req = i915_gem_request_alloc(to, NULL);
+                       if (IS_ERR(req))
+                               return PTR_ERR(req);
+
+                       *to_req = req;
                }
 
                trace_i915_gem_ring_sync_to(*to_req, from, from_req);
@@ -3327,9 +3402,9 @@ int i915_gpu_idle(struct drm_device *dev)
                if (!i915.enable_execlists) {
                        struct drm_i915_gem_request *req;
 
-                       ret = i915_gem_request_alloc(ring, ring->default_context, &req);
-                       if (ret)
-                               return ret;
+                       req = i915_gem_request_alloc(ring, NULL);
+                       if (IS_ERR(req))
+                               return PTR_ERR(req);
 
                        ret = i915_switch_context(req);
                        if (ret) {
@@ -3443,7 +3518,7 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
        if (flags & PIN_MAPPABLE)
                end = min_t(u64, end, dev_priv->gtt.mappable_end);
        if (flags & PIN_ZONE_4G)
-               end = min_t(u64, end, (1ULL << 32));
+               end = min_t(u64, end, (1ULL << 32) - PAGE_SIZE);
 
        if (alignment == 0)
                alignment = flags & PIN_MAPPABLE ? fence_alignment :
@@ -3480,30 +3555,50 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
        if (IS_ERR(vma))
                goto err_unpin;
 
-       if (flags & PIN_HIGH) {
-               search_flag = DRM_MM_SEARCH_BELOW;
-               alloc_flag = DRM_MM_CREATE_TOP;
+       if (flags & PIN_OFFSET_FIXED) {
+               uint64_t offset = flags & PIN_OFFSET_MASK;
+
+               if (offset & (alignment - 1) || offset + size > end) {
+                       ret = -EINVAL;
+                       goto err_free_vma;
+               }
+               vma->node.start = offset;
+               vma->node.size = size;
+               vma->node.color = obj->cache_level;
+               ret = drm_mm_reserve_node(&vm->mm, &vma->node);
+               if (ret) {
+                       ret = i915_gem_evict_for_vma(vma);
+                       if (ret == 0)
+                               ret = drm_mm_reserve_node(&vm->mm, &vma->node);
+               }
+               if (ret)
+                       goto err_free_vma;
        } else {
-               search_flag = DRM_MM_SEARCH_DEFAULT;
-               alloc_flag = DRM_MM_CREATE_DEFAULT;
-       }
+               if (flags & PIN_HIGH) {
+                       search_flag = DRM_MM_SEARCH_BELOW;
+                       alloc_flag = DRM_MM_CREATE_TOP;
+               } else {
+                       search_flag = DRM_MM_SEARCH_DEFAULT;
+                       alloc_flag = DRM_MM_CREATE_DEFAULT;
+               }
 
 search_free:
-       ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
-                                                 size, alignment,
-                                                 obj->cache_level,
-                                                 start, end,
-                                                 search_flag,
-                                                 alloc_flag);
-       if (ret) {
-               ret = i915_gem_evict_something(dev, vm, size, alignment,
-                                              obj->cache_level,
-                                              start, end,
-                                              flags);
-               if (ret == 0)
-                       goto search_free;
+               ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
+                                                         size, alignment,
+                                                         obj->cache_level,
+                                                         start, end,
+                                                         search_flag,
+                                                         alloc_flag);
+               if (ret) {
+                       ret = i915_gem_evict_something(dev, vm, size, alignment,
+                                                      obj->cache_level,
+                                                      start, end,
+                                                      flags);
+                       if (ret == 0)
+                               goto search_free;
 
-               goto err_free_vma;
+                       goto err_free_vma;
+               }
        }
        if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) {
                ret = -EINVAL;
@@ -4094,9 +4189,36 @@ i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags)
            vma->node.start < (flags & PIN_OFFSET_MASK))
                return true;
 
+       if (flags & PIN_OFFSET_FIXED &&
+           vma->node.start != (flags & PIN_OFFSET_MASK))
+               return true;
+
        return false;
 }
 
+void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)
+{
+       struct drm_i915_gem_object *obj = vma->obj;
+       bool mappable, fenceable;
+       u32 fence_size, fence_alignment;
+
+       fence_size = i915_gem_get_gtt_size(obj->base.dev,
+                                          obj->base.size,
+                                          obj->tiling_mode);
+       fence_alignment = i915_gem_get_gtt_alignment(obj->base.dev,
+                                                    obj->base.size,
+                                                    obj->tiling_mode,
+                                                    true);
+
+       fenceable = (vma->node.size == fence_size &&
+                    (vma->node.start & (fence_alignment - 1)) == 0);
+
+       mappable = (vma->node.start + fence_size <=
+                   to_i915(obj->base.dev)->gtt.mappable_end);
+
+       obj->map_and_fenceable = mappable && fenceable;
+}
+
 static int
 i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
                       struct i915_address_space *vm,
@@ -4164,25 +4286,7 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
 
        if (ggtt_view && ggtt_view->type == I915_GGTT_VIEW_NORMAL &&
            (bound ^ vma->bound) & GLOBAL_BIND) {
-               bool mappable, fenceable;
-               u32 fence_size, fence_alignment;
-
-               fence_size = i915_gem_get_gtt_size(obj->base.dev,
-                                                  obj->base.size,
-                                                  obj->tiling_mode);
-               fence_alignment = i915_gem_get_gtt_alignment(obj->base.dev,
-                                                            obj->base.size,
-                                                            obj->tiling_mode,
-                                                            true);
-
-               fenceable = (vma->node.size == fence_size &&
-                            (vma->node.start & (fence_alignment - 1)) == 0);
-
-               mappable = (vma->node.start + fence_size <=
-                           dev_priv->gtt.mappable_end);
-
-               obj->map_and_fenceable = mappable && fenceable;
-
+               __i915_vma_set_map_and_fenceable(vma);
                WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable);
        }
 
@@ -4254,10 +4358,20 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
        if (ret)
                goto unref;
 
-       BUILD_BUG_ON(I915_NUM_RINGS > 16);
-       args->busy = obj->active << 16;
-       if (obj->last_write_req)
-               args->busy |= obj->last_write_req->ring->id;
+       args->busy = 0;
+       if (obj->active) {
+               int i;
+
+               for (i = 0; i < I915_NUM_RINGS; i++) {
+                       struct drm_i915_gem_request *req;
+
+                       req = obj->last_read_req[i];
+                       if (req)
+                               args->busy |= 1 << (16 + req->ring->exec_id);
+               }
+               if (obj->last_write_req)
+                       args->busy |= obj->last_write_req->ring->exec_id;
+       }
 
 unref:
        drm_gem_object_unreference(&obj->base);
@@ -4758,7 +4872,7 @@ i915_gem_init_hw(struct drm_device *dev)
         */
        init_unused_rings(dev);
 
-       BUG_ON(!dev_priv->ring[RCS].default_context);
+       BUG_ON(!dev_priv->kernel_context);
 
        ret = i915_ppgtt_init_hw(dev);
        if (ret) {
@@ -4795,11 +4909,10 @@ i915_gem_init_hw(struct drm_device *dev)
        for_each_ring(ring, dev_priv, i) {
                struct drm_i915_gem_request *req;
 
-               WARN_ON(!ring->default_context);
-
-               ret = i915_gem_request_alloc(ring, ring->default_context, &req);
-               if (ret) {
-                       i915_gem_cleanup_ringbuffer(dev);
+               req = i915_gem_request_alloc(ring, NULL);
+               if (IS_ERR(req)) {
+                       ret = PTR_ERR(req);
+                       i915_gem_cleanup_engines(dev);
                        goto out;
                }
 
@@ -4812,7 +4925,7 @@ i915_gem_init_hw(struct drm_device *dev)
                if (ret && ret != -EIO) {
                        DRM_ERROR("PPGTT enable ring #%d failed %d\n", i, ret);
                        i915_gem_request_cancel(req);
-                       i915_gem_cleanup_ringbuffer(dev);
+                       i915_gem_cleanup_engines(dev);
                        goto out;
                }
 
@@ -4820,7 +4933,7 @@ i915_gem_init_hw(struct drm_device *dev)
                if (ret && ret != -EIO) {
                        DRM_ERROR("Context enable ring #%d failed %d\n", i, ret);
                        i915_gem_request_cancel(req);
-                       i915_gem_cleanup_ringbuffer(dev);
+                       i915_gem_cleanup_engines(dev);
                        goto out;
                }
 
@@ -4842,14 +4955,6 @@ int i915_gem_init(struct drm_device *dev)
 
        mutex_lock(&dev->struct_mutex);
 
-       if (IS_VALLEYVIEW(dev)) {
-               /* VLVA0 (potential hack), BIOS isn't actually waking us */
-               I915_WRITE(VLV_GTLC_WAKE_CTRL, VLV_GTLC_ALLOWWAKEREQ);
-               if (wait_for((I915_READ(VLV_GTLC_PW_STATUS) &
-                             VLV_GTLC_ALLOWWAKEACK), 10))
-                       DRM_DEBUG_DRIVER("allow wake ack timed out\n");
-       }
-
        if (!i915.enable_execlists) {
                dev_priv->gt.execbuf_submit = i915_gem_ringbuffer_submission;
                dev_priv->gt.init_rings = i915_gem_init_rings;
@@ -4903,7 +5008,7 @@ out_unlock:
 }
 
 void
-i915_gem_cleanup_ringbuffer(struct drm_device *dev)
+i915_gem_cleanup_engines(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
        struct intel_engine_cs *ring;
@@ -4912,13 +5017,14 @@ i915_gem_cleanup_ringbuffer(struct drm_device *dev)
        for_each_ring(ring, dev_priv, i)
                dev_priv->gt.cleanup_ring(ring);
 
-    if (i915.enable_execlists)
-            /*
-             * Neither the BIOS, ourselves or any other kernel
-             * expects the system to be in execlists mode on startup,
-             * so we need to reset the GPU back to legacy mode.
-             */
-            intel_gpu_reset(dev);
+       if (i915.enable_execlists) {
+               /*
+                * Neither the BIOS, ourselves or any other kernel
+                * expects the system to be in execlists mode on startup,
+                * so we need to reset the GPU back to legacy mode.
+                */
+               intel_gpu_reset(dev);
+       }
 }
 
 static void
@@ -4967,7 +5073,7 @@ i915_gem_load(struct drm_device *dev)
 
        dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL;
 
-       if (INTEL_INFO(dev)->gen >= 7 && !IS_VALLEYVIEW(dev))
+       if (INTEL_INFO(dev)->gen >= 7 && !IS_VALLEYVIEW(dev) && !IS_CHERRYVIEW(dev))
                dev_priv->num_fence_regs = 32;
        else if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
                dev_priv->num_fence_regs = 16;
@@ -5046,6 +5152,8 @@ int i915_gem_open(struct drm_device *dev, struct drm_file *file)
        spin_lock_init(&file_priv->mm.lock);
        INIT_LIST_HEAD(&file_priv->mm.request_list);
 
+       file_priv->bsd_ring = -1;
+
        ret = i915_gem_context_open(dev, file);
        if (ret)
                kfree(file_priv);
@@ -5188,6 +5296,21 @@ bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj)
        return false;
 }
 
+/* Like i915_gem_object_get_page(), but mark the returned page dirty */
+struct page *
+i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, int n)
+{
+       struct page *page;
+
+       /* Only default objects have per-page dirty tracking */
+       if (WARN_ON(obj->ops != &i915_gem_object_ops))
+               return NULL;
+
+       page = i915_gem_object_get_page(obj, n);
+       set_page_dirty(page);
+       return page;
+}
+
 /* Allocate a new GEM object and fill it with the supplied data */
 struct drm_i915_gem_object *
 i915_gem_object_create_from_data(struct drm_device *dev,
@@ -5213,6 +5336,7 @@ i915_gem_object_create_from_data(struct drm_device *dev,
        i915_gem_object_pin_pages(obj);
        sg = obj->pages;
        bytes = sg_copy_from_buffer(sg->sgl, sg->nents, (void *)data, size);
+       obj->dirty = 1;         /* Backing store is now out of date */
        i915_gem_object_unpin_pages(obj);
 
        if (WARN_ON(bytes != size)) {
This page took 0.189023 seconds and 5 git commands to generate.