drivers/gpu/drm/i915/i915_gem_request.c

   1 /*
   2  * Copyright © 2008-2015 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  *
  23  */
  24
  25 #include "i915_drv.h"
  26
  27 int i915_gem_request_add_to_client(struct drm_i915_gem_request *req,
  28                                    struct drm_file *file)
  29 {
  30         struct drm_i915_private *dev_private;
  31         struct drm_i915_file_private *file_priv;
  32
  33         WARN_ON(!req || !file || req->file_priv);
  34
  35         if (!req || !file)
  36                 return -EINVAL;
  37
  38         if (req->file_priv)
  39                 return -EINVAL;
  40
  41         dev_private = req->i915;
  42         file_priv = file->driver_priv;
  43
  44         spin_lock(&file_priv->mm.lock);
  45         req->file_priv = file_priv;
  46         list_add_tail(&req->client_list, &file_priv->mm.request_list);
  47         spin_unlock(&file_priv->mm.lock);
  48
  49         req->pid = get_pid(task_pid(current));
  50
  51         return 0;
  52 }
  53
  54 static inline void
  55 i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
  56 {
  57         struct drm_i915_file_private *file_priv = request->file_priv;
  58
  59         if (!file_priv)
  60                 return;
  61
  62         spin_lock(&file_priv->mm.lock);
  63         list_del(&request->client_list);
  64         request->file_priv = NULL;
  65         spin_unlock(&file_priv->mm.lock);
  66
  67         put_pid(request->pid);
  68         request->pid = NULL;
  69 }
  70
  71 static void i915_gem_request_retire(struct drm_i915_gem_request *request)
  72 {
  73         trace_i915_gem_request_retire(request);
  74         list_del_init(&request->list);
  75
  76         /* We know the GPU must have read the request to have
  77          * sent us the seqno + interrupt, so use the position
  78          * of tail of the request to update the last known position
  79          * of the GPU head.
  80          *
  81          * Note this requires that we are always called in request
  82          * completion order.
  83          */
  84         request->ringbuf->last_retired_head = request->postfix;
  85
  86         i915_gem_request_remove_from_client(request);
  87
  88         if (request->previous_context) {
  89                 if (i915.enable_execlists)
  90                         intel_lr_context_unpin(request->previous_context,
  91                                                request->engine);
  92         }
  93
  94         i915_gem_context_unreference(request->ctx);
  95         i915_gem_request_unreference(request);
  96 }
  97
  98 void i915_gem_request_retire_upto(struct drm_i915_gem_request *req)
  99 {
 100         struct intel_engine_cs *engine = req->engine;
 101         struct drm_i915_gem_request *tmp;
 102
 103         lockdep_assert_held(&req->i915->drm.struct_mutex);
 104
 105         if (list_empty(&req->list))
 106                 return;
 107
 108         do {
 109                 tmp = list_first_entry(&engine->request_list,
 110                                        typeof(*tmp), list);
 111
 112                 i915_gem_request_retire(tmp);
 113         } while (tmp != req);
 114
 115         WARN_ON(i915_verify_lists(engine->dev));
 116 }
 117
 118 static int i915_gem_check_wedge(unsigned int reset_counter, bool interruptible)
 119 {
 120         if (__i915_terminally_wedged(reset_counter))
 121                 return -EIO;
 122
 123         if (__i915_reset_in_progress(reset_counter)) {
 124                 /* Non-interruptible callers can't handle -EAGAIN, hence return
 125                  * -EIO unconditionally for these.
 126                  */
 127                 if (!interruptible)
 128                         return -EIO;
 129
 130                 return -EAGAIN;
 131         }
 132
 133         return 0;
 134 }
 135
 136 static int i915_gem_init_seqno(struct drm_i915_private *dev_priv, u32 seqno)
 137 {
 138         struct intel_engine_cs *engine;
 139         int ret;
 140
 141         /* Carefully retire all requests without writing to the rings */
 142         for_each_engine(engine, dev_priv) {
 143                 ret = intel_engine_idle(engine);
 144                 if (ret)
 145                         return ret;
 146         }
 147         i915_gem_retire_requests(dev_priv);
 148
 149         /* If the seqno wraps around, we need to clear the breadcrumb rbtree */
 150         if (!i915_seqno_passed(seqno, dev_priv->next_seqno)) {
 151                 while (intel_kick_waiters(dev_priv) ||
 152                        intel_kick_signalers(dev_priv))
 153                         yield();
 154         }
 155
 156         /* Finally reset hw state */
 157         for_each_engine(engine, dev_priv)
 158                 intel_ring_init_seqno(engine, seqno);
 159
 160         return 0;
 161 }
 162
 163 int i915_gem_set_seqno(struct drm_device *dev, u32 seqno)
 164 {
 165         struct drm_i915_private *dev_priv = to_i915(dev);
 166         int ret;
 167
 168         if (seqno == 0)
 169                 return -EINVAL;
 170
 171         /* HWS page needs to be set less than what we
 172          * will inject to ring
 173          */
 174         ret = i915_gem_init_seqno(dev_priv, seqno - 1);
 175         if (ret)
 176                 return ret;
 177
 178         /* Carefully set the last_seqno value so that wrap
 179          * detection still works
 180          */
 181         dev_priv->next_seqno = seqno;
 182         dev_priv->last_seqno = seqno - 1;
 183         if (dev_priv->last_seqno == 0)
 184                 dev_priv->last_seqno--;
 185
 186         return 0;
 187 }
 188
 189 static int i915_gem_get_seqno(struct drm_i915_private *dev_priv, u32 *seqno)
 190 {
 191         /* reserve 0 for non-seqno */
 192         if (unlikely(dev_priv->next_seqno == 0)) {
 193                 int ret;
 194
 195                 ret = i915_gem_init_seqno(dev_priv, 0);
 196                 if (ret)
 197                         return ret;
 198
 199                 dev_priv->next_seqno = 1;
 200         }
 201
 202         *seqno = dev_priv->last_seqno = dev_priv->next_seqno++;
 203         return 0;
 204 }
 205
 206 static inline int
 207 __i915_gem_request_alloc(struct intel_engine_cs *engine,
 208                          struct i915_gem_context *ctx,
 209                          struct drm_i915_gem_request **req_out)
 210 {
 211         struct drm_i915_private *dev_priv = engine->i915;
 212         unsigned int reset_counter = i915_reset_counter(&dev_priv->gpu_error);
 213         struct drm_i915_gem_request *req;
 214         int ret;
 215
 216         if (!req_out)
 217                 return -EINVAL;
 218
 219         *req_out = NULL;
 220
 221         /* ABI: Before userspace accesses the GPU (e.g. execbuffer), report
 222          * EIO if the GPU is already wedged, or EAGAIN to drop the struct_mutex
 223          * and restart.
 224          */
 225         ret = i915_gem_check_wedge(reset_counter, dev_priv->mm.interruptible);
 226         if (ret)
 227                 return ret;
 228
 229         /* Move the oldest request to the slab-cache (if not in use!) */
 230         if (!list_empty(&engine->request_list)) {
 231                 req = list_first_entry(&engine->request_list,
 232                                        typeof(*req), list);
 233                 if (i915_gem_request_completed(req))
 234                         i915_gem_request_retire(req);
 235         }
 236
 237         req = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL);
 238         if (!req)
 239                 return -ENOMEM;
 240
 241         ret = i915_gem_get_seqno(dev_priv, &req->seqno);
 242         if (ret)
 243                 goto err;
 244
 245         kref_init(&req->ref);
 246         req->i915 = dev_priv;
 247         req->engine = engine;
 248         req->ctx = ctx;
 249         i915_gem_context_reference(ctx);
 250
 251         /*
 252          * Reserve space in the ring buffer for all the commands required to
 253          * eventually emit this request. This is to guarantee that the
 254          * i915_add_request() call can't fail. Note that the reserve may need
 255          * to be redone if the request is not actually submitted straight
 256          * away, e.g. because a GPU scheduler has deferred it.
 257          */
 258         req->reserved_space = MIN_SPACE_FOR_ADD_REQUEST;
 259
 260         if (i915.enable_execlists)
 261                 ret = intel_logical_ring_alloc_request_extras(req);
 262         else
 263                 ret = intel_ring_alloc_request_extras(req);
 264         if (ret)
 265                 goto err_ctx;
 266
 267         *req_out = req;
 268         return 0;
 269
 270 err_ctx:
 271         i915_gem_context_unreference(ctx);
 272 err:
 273         kmem_cache_free(dev_priv->requests, req);
 274         return ret;
 275 }
 276
 277 /**
 278  * i915_gem_request_alloc - allocate a request structure
 279  *
 280  * @engine: engine that we wish to issue the request on.
 281  * @ctx: context that the request will be associated with.
 282  *       This can be NULL if the request is not directly related to
 283  *       any specific user context, in which case this function will
 284  *       choose an appropriate context to use.
 285  *
 286  * Returns a pointer to the allocated request if successful,
 287  * or an error code if not.
 288  */
 289 struct drm_i915_gem_request *
 290 i915_gem_request_alloc(struct intel_engine_cs *engine,
 291                        struct i915_gem_context *ctx)
 292 {
 293         struct drm_i915_gem_request *req;
 294         int err;
 295
 296         if (!ctx)
 297                 ctx = engine->i915->kernel_context;
 298         err = __i915_gem_request_alloc(engine, ctx, &req);
 299         return err ? ERR_PTR(err) : req;
 300 }
 301
 302 static void i915_gem_mark_busy(const struct intel_engine_cs *engine)
 303 {
 304         struct drm_i915_private *dev_priv = engine->i915;
 305
 306         dev_priv->gt.active_engines |= intel_engine_flag(engine);
 307         if (dev_priv->gt.awake)
 308                 return;
 309
 310         intel_runtime_pm_get_noresume(dev_priv);
 311         dev_priv->gt.awake = true;
 312
 313         intel_enable_gt_powersave(dev_priv);
 314         i915_update_gfx_val(dev_priv);
 315         if (INTEL_GEN(dev_priv) >= 6)
 316                 gen6_rps_busy(dev_priv);
 317
 318         queue_delayed_work(dev_priv->wq,
 319                            &dev_priv->gt.retire_work,
 320                            round_jiffies_up_relative(HZ));
 321 }
 322
 323 /*
 324  * NB: This function is not allowed to fail. Doing so would mean the the
 325  * request is not being tracked for completion but the work itself is
 326  * going to happen on the hardware. This would be a Bad Thing(tm).
 327  */
 328 void __i915_add_request(struct drm_i915_gem_request *request,
 329                         struct drm_i915_gem_object *obj,
 330                         bool flush_caches)
 331 {
 332         struct intel_engine_cs *engine;
 333         struct intel_ringbuffer *ringbuf;
 334         u32 request_start;
 335         u32 reserved_tail;
 336         int ret;
 337
 338         if (WARN_ON(!request))
 339                 return;
 340
 341         engine = request->engine;
 342         ringbuf = request->ringbuf;
 343
 344         /*
 345          * To ensure that this call will not fail, space for its emissions
 346          * should already have been reserved in the ring buffer. Let the ring
 347          * know that it is time to use that space up.
 348          */
 349         request_start = intel_ring_get_tail(ringbuf);
 350         reserved_tail = request->reserved_space;
 351         request->reserved_space = 0;
 352
 353         /*
 354          * Emit any outstanding flushes - execbuf can fail to emit the flush
 355          * after having emitted the batchbuffer command. Hence we need to fix
 356          * things up similar to emitting the lazy request. The difference here
 357          * is that the flush _must_ happen before the next request, no matter
 358          * what.
 359          */
 360         if (flush_caches) {
 361                 if (i915.enable_execlists)
 362                         ret = logical_ring_flush_all_caches(request);
 363                 else
 364                         ret = intel_ring_flush_all_caches(request);
 365                 /* Not allowed to fail! */
 366                 WARN(ret, "*_ring_flush_all_caches failed: %d!\n", ret);
 367         }
 368
 369         trace_i915_gem_request_add(request);
 370
 371         request->head = request_start;
 372
 373         /* Whilst this request exists, batch_obj will be on the
 374          * active_list, and so will hold the active reference. Only when this
 375          * request is retired will the the batch_obj be moved onto the
 376          * inactive_list and lose its active reference. Hence we do not need
 377          * to explicitly hold another reference here.
 378          */
 379         request->batch_obj = obj;
 380
 381         /* Seal the request and mark it as pending execution. Note that
 382          * we may inspect this state, without holding any locks, during
 383          * hangcheck. Hence we apply the barrier to ensure that we do not
 384          * see a more recent value in the hws than we are tracking.
 385          */
 386         request->emitted_jiffies = jiffies;
 387         request->previous_seqno = engine->last_submitted_seqno;
 388         smp_store_mb(engine->last_submitted_seqno, request->seqno);
 389         list_add_tail(&request->list, &engine->request_list);
 390
 391         /* Record the position of the start of the request so that
 392          * should we detect the updated seqno part-way through the
 393          * GPU processing the request, we never over-estimate the
 394          * position of the head.
 395          */
 396         request->postfix = intel_ring_get_tail(ringbuf);
 397
 398         if (i915.enable_execlists) {
 399                 ret = engine->emit_request(request);
 400         } else {
 401                 ret = engine->add_request(request);
 402
 403                 request->tail = intel_ring_get_tail(ringbuf);
 404         }
 405         /* Not allowed to fail! */
 406         WARN(ret, "emit|add_request failed: %d!\n", ret);
 407         /* Sanity check that the reserved size was large enough. */
 408         ret = intel_ring_get_tail(ringbuf) - request_start;
 409         if (ret < 0)
 410                 ret += ringbuf->size;
 411         WARN_ONCE(ret > reserved_tail,
 412                   "Not enough space reserved (%d bytes) "
 413                   "for adding the request (%d bytes)\n",
 414                   reserved_tail, ret);
 415
 416         i915_gem_mark_busy(engine);
 417 }
 418
 419 static unsigned long local_clock_us(unsigned int *cpu)
 420 {
 421         unsigned long t;
 422
 423         /* Cheaply and approximately convert from nanoseconds to microseconds.
 424          * The result and subsequent calculations are also defined in the same
 425          * approximate microseconds units. The principal source of timing
 426          * error here is from the simple truncation.
 427          *
 428          * Note that local_clock() is only defined wrt to the current CPU;
 429          * the comparisons are no longer valid if we switch CPUs. Instead of
 430          * blocking preemption for the entire busywait, we can detect the CPU
 431          * switch and use that as indicator of system load and a reason to
 432          * stop busywaiting, see busywait_stop().
 433          */
 434         *cpu = get_cpu();
 435         t = local_clock() >> 10;
 436         put_cpu();
 437
 438         return t;
 439 }
 440
 441 static bool busywait_stop(unsigned long timeout, unsigned int cpu)
 442 {
 443         unsigned int this_cpu;
 444
 445         if (time_after(local_clock_us(&this_cpu), timeout))
 446                 return true;
 447
 448         return this_cpu != cpu;
 449 }
 450
 451 bool __i915_spin_request(const struct drm_i915_gem_request *req,
 452                          int state, unsigned long timeout_us)
 453 {
 454         unsigned int cpu;
 455
 456         /* When waiting for high frequency requests, e.g. during synchronous
 457          * rendering split between the CPU and GPU, the finite amount of time
 458          * required to set up the irq and wait upon it limits the response
 459          * rate. By busywaiting on the request completion for a short while we
 460          * can service the high frequency waits as quick as possible. However,
 461          * if it is a slow request, we want to sleep as quickly as possible.
 462          * The tradeoff between waiting and sleeping is roughly the time it
 463          * takes to sleep on a request, on the order of a microsecond.
 464          */
 465
 466         timeout_us += local_clock_us(&cpu);
 467         do {
 468                 if (i915_gem_request_completed(req))
 469                         return true;
 470
 471                 if (signal_pending_state(state, current))
 472                         break;
 473
 474                 if (busywait_stop(timeout_us, cpu))
 475                         break;
 476
 477                 cpu_relax_lowlatency();
 478         } while (!need_resched());
 479
 480         return false;
 481 }
 482
 483 /**
 484  * __i915_wait_request - wait until execution of request has finished
 485  * @req: duh!
 486  * @interruptible: do an interruptible wait (normally yes)
 487  * @timeout: in - how long to wait (NULL forever); out - how much time remaining
 488  * @rps: client to charge for RPS boosting
 489  *
 490  * Note: It is of utmost importance that the passed in seqno and reset_counter
 491  * values have been read by the caller in an smp safe manner. Where read-side
 492  * locks are involved, it is sufficient to read the reset_counter before
 493  * unlocking the lock that protects the seqno. For lockless tricks, the
 494  * reset_counter _must_ be read before, and an appropriate smp_rmb must be
 495  * inserted.
 496  *
 497  * Returns 0 if the request was found within the alloted time. Else returns the
 498  * errno with remaining time filled in timeout argument.
 499  */
 500 int __i915_wait_request(struct drm_i915_gem_request *req,
 501                         bool interruptible,
 502                         s64 *timeout,
 503                         struct intel_rps_client *rps)
 504 {
 505         int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
 506         DEFINE_WAIT(reset);
 507         struct intel_wait wait;
 508         unsigned long timeout_remain;
 509         int ret = 0;
 510
 511         might_sleep();
 512
 513         if (list_empty(&req->list))
 514                 return 0;
 515
 516         if (i915_gem_request_completed(req))
 517                 return 0;
 518
 519         timeout_remain = MAX_SCHEDULE_TIMEOUT;
 520         if (timeout) {
 521                 if (WARN_ON(*timeout < 0))
 522                         return -EINVAL;
 523
 524                 if (*timeout == 0)
 525                         return -ETIME;
 526
 527                 /* Record current time in case interrupted, or wedged */
 528                 timeout_remain = nsecs_to_jiffies_timeout(*timeout);
 529                 *timeout += ktime_get_raw_ns();
 530         }
 531
 532         trace_i915_gem_request_wait_begin(req);
 533
 534         /* This client is about to stall waiting for the GPU. In many cases
 535          * this is undesirable and limits the throughput of the system, as
 536          * many clients cannot continue processing user input/output whilst
 537          * blocked. RPS autotuning may take tens of milliseconds to respond
 538          * to the GPU load and thus incurs additional latency for the client.
 539          * We can circumvent that by promoting the GPU frequency to maximum
 540          * before we wait. This makes the GPU throttle up much more quickly
 541          * (good for benchmarks and user experience, e.g. window animations),
 542          * but at a cost of spending more power processing the workload
 543          * (bad for battery). Not all clients even want their results
 544          * immediately and for them we should just let the GPU select its own
 545          * frequency to maximise efficiency. To prevent a single client from
 546          * forcing the clocks too high for the whole system, we only allow
 547          * each client to waitboost once in a busy period.
 548          */
 549         if (INTEL_GEN(req->i915) >= 6)
 550                 gen6_rps_boost(req->i915, rps, req->emitted_jiffies);
 551
 552         /* Optimistic spin for the next ~jiffie before touching IRQs */
 553         if (i915_spin_request(req, state, 5))
 554                 goto complete;
 555
 556         set_current_state(state);
 557         add_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
 558
 559         intel_wait_init(&wait, req->seqno);
 560         if (intel_engine_add_wait(req->engine, &wait))
 561                 /* In order to check that we haven't missed the interrupt
 562                  * as we enabled it, we need to kick ourselves to do a
 563                  * coherent check on the seqno before we sleep.
 564                  */
 565                 goto wakeup;
 566
 567         for (;;) {
 568                 if (signal_pending_state(state, current)) {
 569                         ret = -ERESTARTSYS;
 570                         break;
 571                 }
 572
 573                 timeout_remain = io_schedule_timeout(timeout_remain);
 574                 if (timeout_remain == 0) {
 575                         ret = -ETIME;
 576                         break;
 577                 }
 578
 579                 if (intel_wait_complete(&wait))
 580                         break;
 581
 582                 set_current_state(state);
 583
 584 wakeup:
 585                 /* Carefully check if the request is complete, giving time
 586                  * for the seqno to be visible following the interrupt.
 587                  * We also have to check in case we are kicked by the GPU
 588                  * reset in order to drop the struct_mutex.
 589                  */
 590                 if (__i915_request_irq_complete(req))
 591                         break;
 592
 593                 /* Only spin if we know the GPU is processing this request */
 594                 if (i915_spin_request(req, state, 2))
 595                         break;
 596         }
 597         remove_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
 598
 599         intel_engine_remove_wait(req->engine, &wait);
 600         __set_current_state(TASK_RUNNING);
 601 complete:
 602         trace_i915_gem_request_wait_end(req);
 603
 604         if (timeout) {
 605                 *timeout -= ktime_get_raw_ns();
 606                 if (*timeout < 0)
 607                         *timeout = 0;
 608
 609                 /*
 610                  * Apparently ktime isn't accurate enough and occasionally has a
 611                  * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch
 612                  * things up to make the test happy. We allow up to 1 jiffy.
 613                  *
 614                  * This is a regrssion from the timespec->ktime conversion.
 615                  */
 616                 if (ret == -ETIME && *timeout < jiffies_to_usecs(1)*1000)
 617                         *timeout = 0;
 618         }
 619
 620         if (rps && req->seqno == req->engine->last_submitted_seqno) {
 621                 /* The GPU is now idle and this client has stalled.
 622                  * Since no other client has submitted a request in the
 623                  * meantime, assume that this client is the only one
 624                  * supplying work to the GPU but is unable to keep that
 625                  * work supplied because it is waiting. Since the GPU is
 626                  * then never kept fully busy, RPS autoclocking will
 627                  * keep the clocks relatively low, causing further delays.
 628                  * Compensate by giving the synchronous client credit for
 629                  * a waitboost next time.
 630                  */
 631                 spin_lock(&req->i915->rps.client_lock);
 632                 list_del_init(&rps->link);
 633                 spin_unlock(&req->i915->rps.client_lock);
 634         }
 635
 636         return ret;
 637 }
 638
 639 /**
 640  * Waits for a request to be signaled, and cleans up the
 641  * request and object lists appropriately for that event.
 642  */
 643 int i915_wait_request(struct drm_i915_gem_request *req)
 644 {
 645         int ret;
 646
 647         GEM_BUG_ON(!req);
 648         lockdep_assert_held(&req->i915->drm.struct_mutex);
 649
 650         ret = __i915_wait_request(req, req->i915->mm.interruptible, NULL, NULL);
 651         if (ret)
 652                 return ret;
 653
 654         /* If the GPU hung, we want to keep the requests to find the guilty. */
 655         if (!i915_reset_in_progress(&req->i915->gpu_error))
 656                 i915_gem_request_retire_upto(req);
 657
 658         return 0;
 659 }
 660
 661 void i915_gem_request_free(struct kref *req_ref)
 662 {
 663         struct drm_i915_gem_request *req =
 664                 container_of(req_ref, typeof(*req), ref);
 665         kmem_cache_free(req->i915->requests, req);
 666 }