Commit | Line | Data |
---|---|---|
05235c53 CW |
1 | /* |
2 | * Copyright © 2008-2015 Intel Corporation | |
3 | * | |
4 | * Permission is hereby granted, free of charge, to any person obtaining a | |
5 | * copy of this software and associated documentation files (the "Software"), | |
6 | * to deal in the Software without restriction, including without limitation | |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
8 | * and/or sell copies of the Software, and to permit persons to whom the | |
9 | * Software is furnished to do so, subject to the following conditions: | |
10 | * | |
11 | * The above copyright notice and this permission notice (including the next | |
12 | * paragraph) shall be included in all copies or substantial portions of the | |
13 | * Software. | |
14 | * | |
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
21 | * IN THE SOFTWARE. | |
22 | * | |
23 | */ | |
24 | ||
25 | #include "i915_drv.h" | |
26 | ||
27 | int i915_gem_request_add_to_client(struct drm_i915_gem_request *req, | |
28 | struct drm_file *file) | |
29 | { | |
30 | struct drm_i915_private *dev_private; | |
31 | struct drm_i915_file_private *file_priv; | |
32 | ||
33 | WARN_ON(!req || !file || req->file_priv); | |
34 | ||
35 | if (!req || !file) | |
36 | return -EINVAL; | |
37 | ||
38 | if (req->file_priv) | |
39 | return -EINVAL; | |
40 | ||
41 | dev_private = req->i915; | |
42 | file_priv = file->driver_priv; | |
43 | ||
44 | spin_lock(&file_priv->mm.lock); | |
45 | req->file_priv = file_priv; | |
46 | list_add_tail(&req->client_list, &file_priv->mm.request_list); | |
47 | spin_unlock(&file_priv->mm.lock); | |
48 | ||
49 | req->pid = get_pid(task_pid(current)); | |
50 | ||
51 | return 0; | |
52 | } | |
53 | ||
54 | static inline void | |
55 | i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) | |
56 | { | |
57 | struct drm_i915_file_private *file_priv = request->file_priv; | |
58 | ||
59 | if (!file_priv) | |
60 | return; | |
61 | ||
62 | spin_lock(&file_priv->mm.lock); | |
63 | list_del(&request->client_list); | |
64 | request->file_priv = NULL; | |
65 | spin_unlock(&file_priv->mm.lock); | |
66 | ||
67 | put_pid(request->pid); | |
68 | request->pid = NULL; | |
69 | } | |
70 | ||
71 | static void i915_gem_request_retire(struct drm_i915_gem_request *request) | |
72 | { | |
73 | trace_i915_gem_request_retire(request); | |
74 | list_del_init(&request->list); | |
75 | ||
76 | /* We know the GPU must have read the request to have | |
77 | * sent us the seqno + interrupt, so use the position | |
78 | * of tail of the request to update the last known position | |
79 | * of the GPU head. | |
80 | * | |
81 | * Note this requires that we are always called in request | |
82 | * completion order. | |
83 | */ | |
84 | request->ringbuf->last_retired_head = request->postfix; | |
85 | ||
86 | i915_gem_request_remove_from_client(request); | |
87 | ||
88 | if (request->previous_context) { | |
89 | if (i915.enable_execlists) | |
90 | intel_lr_context_unpin(request->previous_context, | |
91 | request->engine); | |
92 | } | |
93 | ||
94 | i915_gem_context_unreference(request->ctx); | |
95 | i915_gem_request_unreference(request); | |
96 | } | |
97 | ||
98 | void i915_gem_request_retire_upto(struct drm_i915_gem_request *req) | |
99 | { | |
100 | struct intel_engine_cs *engine = req->engine; | |
101 | struct drm_i915_gem_request *tmp; | |
102 | ||
103 | lockdep_assert_held(&req->i915->drm.struct_mutex); | |
104 | ||
105 | if (list_empty(&req->list)) | |
106 | return; | |
107 | ||
108 | do { | |
109 | tmp = list_first_entry(&engine->request_list, | |
110 | typeof(*tmp), list); | |
111 | ||
112 | i915_gem_request_retire(tmp); | |
113 | } while (tmp != req); | |
114 | ||
115 | WARN_ON(i915_verify_lists(engine->dev)); | |
116 | } | |
117 | ||
118 | static int i915_gem_check_wedge(unsigned int reset_counter, bool interruptible) | |
119 | { | |
120 | if (__i915_terminally_wedged(reset_counter)) | |
121 | return -EIO; | |
122 | ||
123 | if (__i915_reset_in_progress(reset_counter)) { | |
124 | /* Non-interruptible callers can't handle -EAGAIN, hence return | |
125 | * -EIO unconditionally for these. | |
126 | */ | |
127 | if (!interruptible) | |
128 | return -EIO; | |
129 | ||
130 | return -EAGAIN; | |
131 | } | |
132 | ||
133 | return 0; | |
134 | } | |
135 | ||
136 | static int i915_gem_init_seqno(struct drm_i915_private *dev_priv, u32 seqno) | |
137 | { | |
138 | struct intel_engine_cs *engine; | |
139 | int ret; | |
140 | ||
141 | /* Carefully retire all requests without writing to the rings */ | |
142 | for_each_engine(engine, dev_priv) { | |
143 | ret = intel_engine_idle(engine); | |
144 | if (ret) | |
145 | return ret; | |
146 | } | |
147 | i915_gem_retire_requests(dev_priv); | |
148 | ||
149 | /* If the seqno wraps around, we need to clear the breadcrumb rbtree */ | |
150 | if (!i915_seqno_passed(seqno, dev_priv->next_seqno)) { | |
151 | while (intel_kick_waiters(dev_priv) || | |
152 | intel_kick_signalers(dev_priv)) | |
153 | yield(); | |
154 | } | |
155 | ||
156 | /* Finally reset hw state */ | |
157 | for_each_engine(engine, dev_priv) | |
158 | intel_ring_init_seqno(engine, seqno); | |
159 | ||
160 | return 0; | |
161 | } | |
162 | ||
163 | int i915_gem_set_seqno(struct drm_device *dev, u32 seqno) | |
164 | { | |
165 | struct drm_i915_private *dev_priv = to_i915(dev); | |
166 | int ret; | |
167 | ||
168 | if (seqno == 0) | |
169 | return -EINVAL; | |
170 | ||
171 | /* HWS page needs to be set less than what we | |
172 | * will inject to ring | |
173 | */ | |
174 | ret = i915_gem_init_seqno(dev_priv, seqno - 1); | |
175 | if (ret) | |
176 | return ret; | |
177 | ||
178 | /* Carefully set the last_seqno value so that wrap | |
179 | * detection still works | |
180 | */ | |
181 | dev_priv->next_seqno = seqno; | |
182 | dev_priv->last_seqno = seqno - 1; | |
183 | if (dev_priv->last_seqno == 0) | |
184 | dev_priv->last_seqno--; | |
185 | ||
186 | return 0; | |
187 | } | |
188 | ||
189 | static int i915_gem_get_seqno(struct drm_i915_private *dev_priv, u32 *seqno) | |
190 | { | |
191 | /* reserve 0 for non-seqno */ | |
192 | if (unlikely(dev_priv->next_seqno == 0)) { | |
193 | int ret; | |
194 | ||
195 | ret = i915_gem_init_seqno(dev_priv, 0); | |
196 | if (ret) | |
197 | return ret; | |
198 | ||
199 | dev_priv->next_seqno = 1; | |
200 | } | |
201 | ||
202 | *seqno = dev_priv->last_seqno = dev_priv->next_seqno++; | |
203 | return 0; | |
204 | } | |
205 | ||
206 | static inline int | |
207 | __i915_gem_request_alloc(struct intel_engine_cs *engine, | |
208 | struct i915_gem_context *ctx, | |
209 | struct drm_i915_gem_request **req_out) | |
210 | { | |
211 | struct drm_i915_private *dev_priv = engine->i915; | |
212 | unsigned int reset_counter = i915_reset_counter(&dev_priv->gpu_error); | |
213 | struct drm_i915_gem_request *req; | |
214 | int ret; | |
215 | ||
216 | if (!req_out) | |
217 | return -EINVAL; | |
218 | ||
219 | *req_out = NULL; | |
220 | ||
221 | /* ABI: Before userspace accesses the GPU (e.g. execbuffer), report | |
222 | * EIO if the GPU is already wedged, or EAGAIN to drop the struct_mutex | |
223 | * and restart. | |
224 | */ | |
225 | ret = i915_gem_check_wedge(reset_counter, dev_priv->mm.interruptible); | |
226 | if (ret) | |
227 | return ret; | |
228 | ||
9b5f4e5e CW |
229 | /* Move the oldest request to the slab-cache (if not in use!) */ |
230 | if (!list_empty(&engine->request_list)) { | |
231 | req = list_first_entry(&engine->request_list, | |
232 | typeof(*req), list); | |
233 | if (i915_gem_request_completed(req)) | |
234 | i915_gem_request_retire(req); | |
235 | } | |
236 | ||
05235c53 CW |
237 | req = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL); |
238 | if (!req) | |
239 | return -ENOMEM; | |
240 | ||
241 | ret = i915_gem_get_seqno(dev_priv, &req->seqno); | |
242 | if (ret) | |
243 | goto err; | |
244 | ||
245 | kref_init(&req->ref); | |
246 | req->i915 = dev_priv; | |
247 | req->engine = engine; | |
248 | req->ctx = ctx; | |
249 | i915_gem_context_reference(ctx); | |
250 | ||
251 | /* | |
252 | * Reserve space in the ring buffer for all the commands required to | |
253 | * eventually emit this request. This is to guarantee that the | |
254 | * i915_add_request() call can't fail. Note that the reserve may need | |
255 | * to be redone if the request is not actually submitted straight | |
256 | * away, e.g. because a GPU scheduler has deferred it. | |
257 | */ | |
258 | req->reserved_space = MIN_SPACE_FOR_ADD_REQUEST; | |
259 | ||
260 | if (i915.enable_execlists) | |
261 | ret = intel_logical_ring_alloc_request_extras(req); | |
262 | else | |
263 | ret = intel_ring_alloc_request_extras(req); | |
264 | if (ret) | |
265 | goto err_ctx; | |
266 | ||
267 | *req_out = req; | |
268 | return 0; | |
269 | ||
270 | err_ctx: | |
271 | i915_gem_context_unreference(ctx); | |
272 | err: | |
273 | kmem_cache_free(dev_priv->requests, req); | |
274 | return ret; | |
275 | } | |
276 | ||
277 | /** | |
278 | * i915_gem_request_alloc - allocate a request structure | |
279 | * | |
280 | * @engine: engine that we wish to issue the request on. | |
281 | * @ctx: context that the request will be associated with. | |
282 | * This can be NULL if the request is not directly related to | |
283 | * any specific user context, in which case this function will | |
284 | * choose an appropriate context to use. | |
285 | * | |
286 | * Returns a pointer to the allocated request if successful, | |
287 | * or an error code if not. | |
288 | */ | |
289 | struct drm_i915_gem_request * | |
290 | i915_gem_request_alloc(struct intel_engine_cs *engine, | |
291 | struct i915_gem_context *ctx) | |
292 | { | |
293 | struct drm_i915_gem_request *req; | |
294 | int err; | |
295 | ||
296 | if (!ctx) | |
297 | ctx = engine->i915->kernel_context; | |
298 | err = __i915_gem_request_alloc(engine, ctx, &req); | |
299 | return err ? ERR_PTR(err) : req; | |
300 | } | |
301 | ||
302 | static void i915_gem_mark_busy(const struct intel_engine_cs *engine) | |
303 | { | |
304 | struct drm_i915_private *dev_priv = engine->i915; | |
305 | ||
306 | dev_priv->gt.active_engines |= intel_engine_flag(engine); | |
307 | if (dev_priv->gt.awake) | |
308 | return; | |
309 | ||
310 | intel_runtime_pm_get_noresume(dev_priv); | |
311 | dev_priv->gt.awake = true; | |
312 | ||
313 | intel_enable_gt_powersave(dev_priv); | |
314 | i915_update_gfx_val(dev_priv); | |
315 | if (INTEL_GEN(dev_priv) >= 6) | |
316 | gen6_rps_busy(dev_priv); | |
317 | ||
318 | queue_delayed_work(dev_priv->wq, | |
319 | &dev_priv->gt.retire_work, | |
320 | round_jiffies_up_relative(HZ)); | |
321 | } | |
322 | ||
323 | /* | |
324 | * NB: This function is not allowed to fail. Doing so would mean the the | |
325 | * request is not being tracked for completion but the work itself is | |
326 | * going to happen on the hardware. This would be a Bad Thing(tm). | |
327 | */ | |
328 | void __i915_add_request(struct drm_i915_gem_request *request, | |
329 | struct drm_i915_gem_object *obj, | |
330 | bool flush_caches) | |
331 | { | |
332 | struct intel_engine_cs *engine; | |
333 | struct intel_ringbuffer *ringbuf; | |
334 | u32 request_start; | |
335 | u32 reserved_tail; | |
336 | int ret; | |
337 | ||
338 | if (WARN_ON(!request)) | |
339 | return; | |
340 | ||
341 | engine = request->engine; | |
342 | ringbuf = request->ringbuf; | |
343 | ||
344 | /* | |
345 | * To ensure that this call will not fail, space for its emissions | |
346 | * should already have been reserved in the ring buffer. Let the ring | |
347 | * know that it is time to use that space up. | |
348 | */ | |
349 | request_start = intel_ring_get_tail(ringbuf); | |
350 | reserved_tail = request->reserved_space; | |
351 | request->reserved_space = 0; | |
352 | ||
353 | /* | |
354 | * Emit any outstanding flushes - execbuf can fail to emit the flush | |
355 | * after having emitted the batchbuffer command. Hence we need to fix | |
356 | * things up similar to emitting the lazy request. The difference here | |
357 | * is that the flush _must_ happen before the next request, no matter | |
358 | * what. | |
359 | */ | |
360 | if (flush_caches) { | |
361 | if (i915.enable_execlists) | |
362 | ret = logical_ring_flush_all_caches(request); | |
363 | else | |
364 | ret = intel_ring_flush_all_caches(request); | |
365 | /* Not allowed to fail! */ | |
366 | WARN(ret, "*_ring_flush_all_caches failed: %d!\n", ret); | |
367 | } | |
368 | ||
369 | trace_i915_gem_request_add(request); | |
370 | ||
371 | request->head = request_start; | |
372 | ||
373 | /* Whilst this request exists, batch_obj will be on the | |
374 | * active_list, and so will hold the active reference. Only when this | |
375 | * request is retired will the the batch_obj be moved onto the | |
376 | * inactive_list and lose its active reference. Hence we do not need | |
377 | * to explicitly hold another reference here. | |
378 | */ | |
379 | request->batch_obj = obj; | |
380 | ||
381 | /* Seal the request and mark it as pending execution. Note that | |
382 | * we may inspect this state, without holding any locks, during | |
383 | * hangcheck. Hence we apply the barrier to ensure that we do not | |
384 | * see a more recent value in the hws than we are tracking. | |
385 | */ | |
386 | request->emitted_jiffies = jiffies; | |
387 | request->previous_seqno = engine->last_submitted_seqno; | |
388 | smp_store_mb(engine->last_submitted_seqno, request->seqno); | |
389 | list_add_tail(&request->list, &engine->request_list); | |
390 | ||
391 | /* Record the position of the start of the request so that | |
392 | * should we detect the updated seqno part-way through the | |
393 | * GPU processing the request, we never over-estimate the | |
394 | * position of the head. | |
395 | */ | |
396 | request->postfix = intel_ring_get_tail(ringbuf); | |
397 | ||
398 | if (i915.enable_execlists) { | |
399 | ret = engine->emit_request(request); | |
400 | } else { | |
401 | ret = engine->add_request(request); | |
402 | ||
403 | request->tail = intel_ring_get_tail(ringbuf); | |
404 | } | |
405 | /* Not allowed to fail! */ | |
406 | WARN(ret, "emit|add_request failed: %d!\n", ret); | |
407 | /* Sanity check that the reserved size was large enough. */ | |
408 | ret = intel_ring_get_tail(ringbuf) - request_start; | |
409 | if (ret < 0) | |
410 | ret += ringbuf->size; | |
411 | WARN_ONCE(ret > reserved_tail, | |
412 | "Not enough space reserved (%d bytes) " | |
413 | "for adding the request (%d bytes)\n", | |
414 | reserved_tail, ret); | |
415 | ||
416 | i915_gem_mark_busy(engine); | |
417 | } | |
418 | ||
419 | static unsigned long local_clock_us(unsigned int *cpu) | |
420 | { | |
421 | unsigned long t; | |
422 | ||
423 | /* Cheaply and approximately convert from nanoseconds to microseconds. | |
424 | * The result and subsequent calculations are also defined in the same | |
425 | * approximate microseconds units. The principal source of timing | |
426 | * error here is from the simple truncation. | |
427 | * | |
428 | * Note that local_clock() is only defined wrt to the current CPU; | |
429 | * the comparisons are no longer valid if we switch CPUs. Instead of | |
430 | * blocking preemption for the entire busywait, we can detect the CPU | |
431 | * switch and use that as indicator of system load and a reason to | |
432 | * stop busywaiting, see busywait_stop(). | |
433 | */ | |
434 | *cpu = get_cpu(); | |
435 | t = local_clock() >> 10; | |
436 | put_cpu(); | |
437 | ||
438 | return t; | |
439 | } | |
440 | ||
441 | static bool busywait_stop(unsigned long timeout, unsigned int cpu) | |
442 | { | |
443 | unsigned int this_cpu; | |
444 | ||
445 | if (time_after(local_clock_us(&this_cpu), timeout)) | |
446 | return true; | |
447 | ||
448 | return this_cpu != cpu; | |
449 | } | |
450 | ||
451 | bool __i915_spin_request(const struct drm_i915_gem_request *req, | |
452 | int state, unsigned long timeout_us) | |
453 | { | |
454 | unsigned int cpu; | |
455 | ||
456 | /* When waiting for high frequency requests, e.g. during synchronous | |
457 | * rendering split between the CPU and GPU, the finite amount of time | |
458 | * required to set up the irq and wait upon it limits the response | |
459 | * rate. By busywaiting on the request completion for a short while we | |
460 | * can service the high frequency waits as quick as possible. However, | |
461 | * if it is a slow request, we want to sleep as quickly as possible. | |
462 | * The tradeoff between waiting and sleeping is roughly the time it | |
463 | * takes to sleep on a request, on the order of a microsecond. | |
464 | */ | |
465 | ||
466 | timeout_us += local_clock_us(&cpu); | |
467 | do { | |
468 | if (i915_gem_request_completed(req)) | |
469 | return true; | |
470 | ||
471 | if (signal_pending_state(state, current)) | |
472 | break; | |
473 | ||
474 | if (busywait_stop(timeout_us, cpu)) | |
475 | break; | |
476 | ||
477 | cpu_relax_lowlatency(); | |
478 | } while (!need_resched()); | |
479 | ||
480 | return false; | |
481 | } | |
482 | ||
483 | /** | |
484 | * __i915_wait_request - wait until execution of request has finished | |
485 | * @req: duh! | |
486 | * @interruptible: do an interruptible wait (normally yes) | |
487 | * @timeout: in - how long to wait (NULL forever); out - how much time remaining | |
488 | * @rps: client to charge for RPS boosting | |
489 | * | |
490 | * Note: It is of utmost importance that the passed in seqno and reset_counter | |
491 | * values have been read by the caller in an smp safe manner. Where read-side | |
492 | * locks are involved, it is sufficient to read the reset_counter before | |
493 | * unlocking the lock that protects the seqno. For lockless tricks, the | |
494 | * reset_counter _must_ be read before, and an appropriate smp_rmb must be | |
495 | * inserted. | |
496 | * | |
497 | * Returns 0 if the request was found within the alloted time. Else returns the | |
498 | * errno with remaining time filled in timeout argument. | |
499 | */ | |
500 | int __i915_wait_request(struct drm_i915_gem_request *req, | |
501 | bool interruptible, | |
502 | s64 *timeout, | |
503 | struct intel_rps_client *rps) | |
504 | { | |
505 | int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; | |
506 | DEFINE_WAIT(reset); | |
507 | struct intel_wait wait; | |
508 | unsigned long timeout_remain; | |
509 | int ret = 0; | |
510 | ||
511 | might_sleep(); | |
512 | ||
513 | if (list_empty(&req->list)) | |
514 | return 0; | |
515 | ||
516 | if (i915_gem_request_completed(req)) | |
517 | return 0; | |
518 | ||
519 | timeout_remain = MAX_SCHEDULE_TIMEOUT; | |
520 | if (timeout) { | |
521 | if (WARN_ON(*timeout < 0)) | |
522 | return -EINVAL; | |
523 | ||
524 | if (*timeout == 0) | |
525 | return -ETIME; | |
526 | ||
527 | /* Record current time in case interrupted, or wedged */ | |
528 | timeout_remain = nsecs_to_jiffies_timeout(*timeout); | |
529 | *timeout += ktime_get_raw_ns(); | |
530 | } | |
531 | ||
532 | trace_i915_gem_request_wait_begin(req); | |
533 | ||
534 | /* This client is about to stall waiting for the GPU. In many cases | |
535 | * this is undesirable and limits the throughput of the system, as | |
536 | * many clients cannot continue processing user input/output whilst | |
537 | * blocked. RPS autotuning may take tens of milliseconds to respond | |
538 | * to the GPU load and thus incurs additional latency for the client. | |
539 | * We can circumvent that by promoting the GPU frequency to maximum | |
540 | * before we wait. This makes the GPU throttle up much more quickly | |
541 | * (good for benchmarks and user experience, e.g. window animations), | |
542 | * but at a cost of spending more power processing the workload | |
543 | * (bad for battery). Not all clients even want their results | |
544 | * immediately and for them we should just let the GPU select its own | |
545 | * frequency to maximise efficiency. To prevent a single client from | |
546 | * forcing the clocks too high for the whole system, we only allow | |
547 | * each client to waitboost once in a busy period. | |
548 | */ | |
549 | if (INTEL_GEN(req->i915) >= 6) | |
550 | gen6_rps_boost(req->i915, rps, req->emitted_jiffies); | |
551 | ||
552 | /* Optimistic spin for the next ~jiffie before touching IRQs */ | |
553 | if (i915_spin_request(req, state, 5)) | |
554 | goto complete; | |
555 | ||
556 | set_current_state(state); | |
557 | add_wait_queue(&req->i915->gpu_error.wait_queue, &reset); | |
558 | ||
559 | intel_wait_init(&wait, req->seqno); | |
560 | if (intel_engine_add_wait(req->engine, &wait)) | |
561 | /* In order to check that we haven't missed the interrupt | |
562 | * as we enabled it, we need to kick ourselves to do a | |
563 | * coherent check on the seqno before we sleep. | |
564 | */ | |
565 | goto wakeup; | |
566 | ||
567 | for (;;) { | |
568 | if (signal_pending_state(state, current)) { | |
569 | ret = -ERESTARTSYS; | |
570 | break; | |
571 | } | |
572 | ||
573 | timeout_remain = io_schedule_timeout(timeout_remain); | |
574 | if (timeout_remain == 0) { | |
575 | ret = -ETIME; | |
576 | break; | |
577 | } | |
578 | ||
579 | if (intel_wait_complete(&wait)) | |
580 | break; | |
581 | ||
582 | set_current_state(state); | |
583 | ||
584 | wakeup: | |
585 | /* Carefully check if the request is complete, giving time | |
586 | * for the seqno to be visible following the interrupt. | |
587 | * We also have to check in case we are kicked by the GPU | |
588 | * reset in order to drop the struct_mutex. | |
589 | */ | |
590 | if (__i915_request_irq_complete(req)) | |
591 | break; | |
592 | ||
593 | /* Only spin if we know the GPU is processing this request */ | |
594 | if (i915_spin_request(req, state, 2)) | |
595 | break; | |
596 | } | |
597 | remove_wait_queue(&req->i915->gpu_error.wait_queue, &reset); | |
598 | ||
599 | intel_engine_remove_wait(req->engine, &wait); | |
600 | __set_current_state(TASK_RUNNING); | |
601 | complete: | |
602 | trace_i915_gem_request_wait_end(req); | |
603 | ||
604 | if (timeout) { | |
605 | *timeout -= ktime_get_raw_ns(); | |
606 | if (*timeout < 0) | |
607 | *timeout = 0; | |
608 | ||
609 | /* | |
610 | * Apparently ktime isn't accurate enough and occasionally has a | |
611 | * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch | |
612 | * things up to make the test happy. We allow up to 1 jiffy. | |
613 | * | |
614 | * This is a regrssion from the timespec->ktime conversion. | |
615 | */ | |
616 | if (ret == -ETIME && *timeout < jiffies_to_usecs(1)*1000) | |
617 | *timeout = 0; | |
618 | } | |
619 | ||
620 | if (rps && req->seqno == req->engine->last_submitted_seqno) { | |
621 | /* The GPU is now idle and this client has stalled. | |
622 | * Since no other client has submitted a request in the | |
623 | * meantime, assume that this client is the only one | |
624 | * supplying work to the GPU but is unable to keep that | |
625 | * work supplied because it is waiting. Since the GPU is | |
626 | * then never kept fully busy, RPS autoclocking will | |
627 | * keep the clocks relatively low, causing further delays. | |
628 | * Compensate by giving the synchronous client credit for | |
629 | * a waitboost next time. | |
630 | */ | |
631 | spin_lock(&req->i915->rps.client_lock); | |
632 | list_del_init(&rps->link); | |
633 | spin_unlock(&req->i915->rps.client_lock); | |
634 | } | |
635 | ||
636 | return ret; | |
637 | } | |
638 | ||
639 | /** | |
640 | * Waits for a request to be signaled, and cleans up the | |
641 | * request and object lists appropriately for that event. | |
642 | */ | |
643 | int i915_wait_request(struct drm_i915_gem_request *req) | |
644 | { | |
645 | int ret; | |
646 | ||
647 | GEM_BUG_ON(!req); | |
648 | lockdep_assert_held(&req->i915->drm.struct_mutex); | |
649 | ||
650 | ret = __i915_wait_request(req, req->i915->mm.interruptible, NULL, NULL); | |
651 | if (ret) | |
652 | return ret; | |
653 | ||
654 | /* If the GPU hung, we want to keep the requests to find the guilty. */ | |
655 | if (!i915_reset_in_progress(&req->i915->gpu_error)) | |
656 | i915_gem_request_retire_upto(req); | |
657 | ||
658 | return 0; | |
659 | } | |
660 | ||
661 | void i915_gem_request_free(struct kref *req_ref) | |
662 | { | |
663 | struct drm_i915_gem_request *req = | |
664 | container_of(req_ref, typeof(*req), ref); | |
665 | kmem_cache_free(req->i915->requests, req); | |
666 | } |