drm/modes: drop __drm_framebuffer_unregister.
[deliverable/linux.git] / drivers / gpu / drm / i915 / i915_gem.c
CommitLineData
673a394b 1/*
be6a0376 2 * Copyright © 2008-2015 Intel Corporation
673a394b
EA
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Eric Anholt <eric@anholt.net>
25 *
26 */
27
760285e7 28#include <drm/drmP.h>
0de23977 29#include <drm/drm_vma_manager.h>
760285e7 30#include <drm/i915_drm.h>
673a394b 31#include "i915_drv.h"
eb82289a 32#include "i915_vgpu.h"
1c5d22f7 33#include "i915_trace.h"
652c393a 34#include "intel_drv.h"
5949eac4 35#include <linux/shmem_fs.h>
5a0e3ad6 36#include <linux/slab.h>
673a394b 37#include <linux/swap.h>
79e53945 38#include <linux/pci.h>
1286ff73 39#include <linux/dma-buf.h>
673a394b 40
b4716185
CW
41#define RQ_BUG_ON(expr)
42
05394f39 43static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
e62b59e4 44static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
c8725f3d 45static void
b4716185
CW
46i915_gem_object_retire__write(struct drm_i915_gem_object *obj);
47static void
48i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring);
61050808 49
c76ce038
CW
50static bool cpu_cache_is_coherent(struct drm_device *dev,
51 enum i915_cache_level level)
52{
53 return HAS_LLC(dev) || level != I915_CACHE_NONE;
54}
55
2c22569b
CW
56static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
57{
58 if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
59 return true;
60
61 return obj->pin_display;
62}
63
73aa808f
CW
64/* some bookkeeping */
65static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
66 size_t size)
67{
c20e8355 68 spin_lock(&dev_priv->mm.object_stat_lock);
73aa808f
CW
69 dev_priv->mm.object_count++;
70 dev_priv->mm.object_memory += size;
c20e8355 71 spin_unlock(&dev_priv->mm.object_stat_lock);
73aa808f
CW
72}
73
74static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
75 size_t size)
76{
c20e8355 77 spin_lock(&dev_priv->mm.object_stat_lock);
73aa808f
CW
78 dev_priv->mm.object_count--;
79 dev_priv->mm.object_memory -= size;
c20e8355 80 spin_unlock(&dev_priv->mm.object_stat_lock);
73aa808f
CW
81}
82
21dd3734 83static int
33196ded 84i915_gem_wait_for_error(struct i915_gpu_error *error)
30dbf0c0 85{
30dbf0c0
CW
86 int ret;
87
7abb690a
DV
88#define EXIT_COND (!i915_reset_in_progress(error) || \
89 i915_terminally_wedged(error))
1f83fee0 90 if (EXIT_COND)
30dbf0c0
CW
91 return 0;
92
0a6759c6
DV
93 /*
94 * Only wait 10 seconds for the gpu reset to complete to avoid hanging
95 * userspace. If it takes that long something really bad is going on and
96 * we should simply try to bail out and fail as gracefully as possible.
97 */
1f83fee0
DV
98 ret = wait_event_interruptible_timeout(error->reset_queue,
99 EXIT_COND,
100 10*HZ);
0a6759c6
DV
101 if (ret == 0) {
102 DRM_ERROR("Timed out waiting for the gpu reset to complete\n");
103 return -EIO;
104 } else if (ret < 0) {
30dbf0c0 105 return ret;
0a6759c6 106 }
1f83fee0 107#undef EXIT_COND
30dbf0c0 108
21dd3734 109 return 0;
30dbf0c0
CW
110}
111
54cf91dc 112int i915_mutex_lock_interruptible(struct drm_device *dev)
76c1dec1 113{
33196ded 114 struct drm_i915_private *dev_priv = dev->dev_private;
76c1dec1
CW
115 int ret;
116
33196ded 117 ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
76c1dec1
CW
118 if (ret)
119 return ret;
120
121 ret = mutex_lock_interruptible(&dev->struct_mutex);
122 if (ret)
123 return ret;
124
23bc5982 125 WARN_ON(i915_verify_lists(dev));
76c1dec1
CW
126 return 0;
127}
30dbf0c0 128
5a125c3c
EA
129int
130i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
05394f39 131 struct drm_file *file)
5a125c3c 132{
72e96d64 133 struct drm_i915_private *dev_priv = to_i915(dev);
62106b4f 134 struct i915_ggtt *ggtt = &dev_priv->ggtt;
72e96d64 135 struct drm_i915_gem_get_aperture *args = data;
ca1543be 136 struct i915_vma *vma;
6299f992 137 size_t pinned;
5a125c3c 138
6299f992 139 pinned = 0;
73aa808f 140 mutex_lock(&dev->struct_mutex);
1c7f4bca 141 list_for_each_entry(vma, &ggtt->base.active_list, vm_link)
ca1543be
TU
142 if (vma->pin_count)
143 pinned += vma->node.size;
1c7f4bca 144 list_for_each_entry(vma, &ggtt->base.inactive_list, vm_link)
ca1543be
TU
145 if (vma->pin_count)
146 pinned += vma->node.size;
73aa808f 147 mutex_unlock(&dev->struct_mutex);
5a125c3c 148
72e96d64 149 args->aper_size = ggtt->base.total;
0206e353 150 args->aper_available_size = args->aper_size - pinned;
6299f992 151
5a125c3c
EA
152 return 0;
153}
154
6a2c4232
CW
155static int
156i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
00731155 157{
6a2c4232
CW
158 struct address_space *mapping = file_inode(obj->base.filp)->i_mapping;
159 char *vaddr = obj->phys_handle->vaddr;
160 struct sg_table *st;
161 struct scatterlist *sg;
162 int i;
00731155 163
6a2c4232
CW
164 if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
165 return -EINVAL;
166
167 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
168 struct page *page;
169 char *src;
170
171 page = shmem_read_mapping_page(mapping, i);
172 if (IS_ERR(page))
173 return PTR_ERR(page);
174
175 src = kmap_atomic(page);
176 memcpy(vaddr, src, PAGE_SIZE);
177 drm_clflush_virt_range(vaddr, PAGE_SIZE);
178 kunmap_atomic(src);
179
09cbfeaf 180 put_page(page);
6a2c4232
CW
181 vaddr += PAGE_SIZE;
182 }
183
184 i915_gem_chipset_flush(obj->base.dev);
185
186 st = kmalloc(sizeof(*st), GFP_KERNEL);
187 if (st == NULL)
188 return -ENOMEM;
189
190 if (sg_alloc_table(st, 1, GFP_KERNEL)) {
191 kfree(st);
192 return -ENOMEM;
193 }
194
195 sg = st->sgl;
196 sg->offset = 0;
197 sg->length = obj->base.size;
00731155 198
6a2c4232
CW
199 sg_dma_address(sg) = obj->phys_handle->busaddr;
200 sg_dma_len(sg) = obj->base.size;
201
202 obj->pages = st;
6a2c4232
CW
203 return 0;
204}
205
206static void
207i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj)
208{
209 int ret;
210
211 BUG_ON(obj->madv == __I915_MADV_PURGED);
00731155 212
6a2c4232
CW
213 ret = i915_gem_object_set_to_cpu_domain(obj, true);
214 if (ret) {
215 /* In the event of a disaster, abandon all caches and
216 * hope for the best.
217 */
218 WARN_ON(ret != -EIO);
219 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
220 }
221
222 if (obj->madv == I915_MADV_DONTNEED)
223 obj->dirty = 0;
224
225 if (obj->dirty) {
00731155 226 struct address_space *mapping = file_inode(obj->base.filp)->i_mapping;
6a2c4232 227 char *vaddr = obj->phys_handle->vaddr;
00731155
CW
228 int i;
229
230 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
6a2c4232
CW
231 struct page *page;
232 char *dst;
233
234 page = shmem_read_mapping_page(mapping, i);
235 if (IS_ERR(page))
236 continue;
237
238 dst = kmap_atomic(page);
239 drm_clflush_virt_range(vaddr, PAGE_SIZE);
240 memcpy(dst, vaddr, PAGE_SIZE);
241 kunmap_atomic(dst);
242
243 set_page_dirty(page);
244 if (obj->madv == I915_MADV_WILLNEED)
00731155 245 mark_page_accessed(page);
09cbfeaf 246 put_page(page);
00731155
CW
247 vaddr += PAGE_SIZE;
248 }
6a2c4232 249 obj->dirty = 0;
00731155
CW
250 }
251
6a2c4232
CW
252 sg_free_table(obj->pages);
253 kfree(obj->pages);
6a2c4232
CW
254}
255
256static void
257i915_gem_object_release_phys(struct drm_i915_gem_object *obj)
258{
259 drm_pci_free(obj->base.dev, obj->phys_handle);
260}
261
262static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
263 .get_pages = i915_gem_object_get_pages_phys,
264 .put_pages = i915_gem_object_put_pages_phys,
265 .release = i915_gem_object_release_phys,
266};
267
268static int
269drop_pages(struct drm_i915_gem_object *obj)
270{
271 struct i915_vma *vma, *next;
272 int ret;
273
274 drm_gem_object_reference(&obj->base);
1c7f4bca 275 list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link)
6a2c4232
CW
276 if (i915_vma_unbind(vma))
277 break;
278
279 ret = i915_gem_object_put_pages(obj);
280 drm_gem_object_unreference(&obj->base);
281
282 return ret;
00731155
CW
283}
284
285int
286i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
287 int align)
288{
289 drm_dma_handle_t *phys;
6a2c4232 290 int ret;
00731155
CW
291
292 if (obj->phys_handle) {
293 if ((unsigned long)obj->phys_handle->vaddr & (align -1))
294 return -EBUSY;
295
296 return 0;
297 }
298
299 if (obj->madv != I915_MADV_WILLNEED)
300 return -EFAULT;
301
302 if (obj->base.filp == NULL)
303 return -EINVAL;
304
6a2c4232
CW
305 ret = drop_pages(obj);
306 if (ret)
307 return ret;
308
00731155
CW
309 /* create a new object */
310 phys = drm_pci_alloc(obj->base.dev, obj->base.size, align);
311 if (!phys)
312 return -ENOMEM;
313
00731155 314 obj->phys_handle = phys;
6a2c4232
CW
315 obj->ops = &i915_gem_phys_ops;
316
317 return i915_gem_object_get_pages(obj);
00731155
CW
318}
319
320static int
321i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
322 struct drm_i915_gem_pwrite *args,
323 struct drm_file *file_priv)
324{
325 struct drm_device *dev = obj->base.dev;
326 void *vaddr = obj->phys_handle->vaddr + args->offset;
327 char __user *user_data = to_user_ptr(args->data_ptr);
063e4e6b 328 int ret = 0;
6a2c4232
CW
329
330 /* We manually control the domain here and pretend that it
331 * remains coherent i.e. in the GTT domain, like shmem_pwrite.
332 */
333 ret = i915_gem_object_wait_rendering(obj, false);
334 if (ret)
335 return ret;
00731155 336
77a0d1ca 337 intel_fb_obj_invalidate(obj, ORIGIN_CPU);
00731155
CW
338 if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) {
339 unsigned long unwritten;
340
341 /* The physical object once assigned is fixed for the lifetime
342 * of the obj, so we can safely drop the lock and continue
343 * to access vaddr.
344 */
345 mutex_unlock(&dev->struct_mutex);
346 unwritten = copy_from_user(vaddr, user_data, args->size);
347 mutex_lock(&dev->struct_mutex);
063e4e6b
PZ
348 if (unwritten) {
349 ret = -EFAULT;
350 goto out;
351 }
00731155
CW
352 }
353
6a2c4232 354 drm_clflush_virt_range(vaddr, args->size);
00731155 355 i915_gem_chipset_flush(dev);
063e4e6b
PZ
356
357out:
de152b62 358 intel_fb_obj_flush(obj, false, ORIGIN_CPU);
063e4e6b 359 return ret;
00731155
CW
360}
361
42dcedd4
CW
362void *i915_gem_object_alloc(struct drm_device *dev)
363{
364 struct drm_i915_private *dev_priv = dev->dev_private;
efab6d8d 365 return kmem_cache_zalloc(dev_priv->objects, GFP_KERNEL);
42dcedd4
CW
366}
367
368void i915_gem_object_free(struct drm_i915_gem_object *obj)
369{
370 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
efab6d8d 371 kmem_cache_free(dev_priv->objects, obj);
42dcedd4
CW
372}
373
ff72145b
DA
374static int
375i915_gem_create(struct drm_file *file,
376 struct drm_device *dev,
377 uint64_t size,
378 uint32_t *handle_p)
673a394b 379{
05394f39 380 struct drm_i915_gem_object *obj;
a1a2d1d3
PP
381 int ret;
382 u32 handle;
673a394b 383
ff72145b 384 size = roundup(size, PAGE_SIZE);
8ffc0246
CW
385 if (size == 0)
386 return -EINVAL;
673a394b
EA
387
388 /* Allocate the new object */
ff72145b 389 obj = i915_gem_alloc_object(dev, size);
673a394b
EA
390 if (obj == NULL)
391 return -ENOMEM;
392
05394f39 393 ret = drm_gem_handle_create(file, &obj->base, &handle);
202f2fef 394 /* drop reference from allocate - handle holds it now */
d861e338
DV
395 drm_gem_object_unreference_unlocked(&obj->base);
396 if (ret)
397 return ret;
202f2fef 398
ff72145b 399 *handle_p = handle;
673a394b
EA
400 return 0;
401}
402
ff72145b
DA
403int
404i915_gem_dumb_create(struct drm_file *file,
405 struct drm_device *dev,
406 struct drm_mode_create_dumb *args)
407{
408 /* have to work out size/pitch and return them */
de45eaf7 409 args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64);
ff72145b
DA
410 args->size = args->pitch * args->height;
411 return i915_gem_create(file, dev,
da6b51d0 412 args->size, &args->handle);
ff72145b
DA
413}
414
ff72145b
DA
415/**
416 * Creates a new mm object and returns a handle to it.
417 */
418int
419i915_gem_create_ioctl(struct drm_device *dev, void *data,
420 struct drm_file *file)
421{
422 struct drm_i915_gem_create *args = data;
63ed2cb2 423
ff72145b 424 return i915_gem_create(file, dev,
da6b51d0 425 args->size, &args->handle);
ff72145b
DA
426}
427
8461d226
DV
428static inline int
429__copy_to_user_swizzled(char __user *cpu_vaddr,
430 const char *gpu_vaddr, int gpu_offset,
431 int length)
432{
433 int ret, cpu_offset = 0;
434
435 while (length > 0) {
436 int cacheline_end = ALIGN(gpu_offset + 1, 64);
437 int this_length = min(cacheline_end - gpu_offset, length);
438 int swizzled_gpu_offset = gpu_offset ^ 64;
439
440 ret = __copy_to_user(cpu_vaddr + cpu_offset,
441 gpu_vaddr + swizzled_gpu_offset,
442 this_length);
443 if (ret)
444 return ret + length;
445
446 cpu_offset += this_length;
447 gpu_offset += this_length;
448 length -= this_length;
449 }
450
451 return 0;
452}
453
8c59967c 454static inline int
4f0c7cfb
BW
455__copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
456 const char __user *cpu_vaddr,
8c59967c
DV
457 int length)
458{
459 int ret, cpu_offset = 0;
460
461 while (length > 0) {
462 int cacheline_end = ALIGN(gpu_offset + 1, 64);
463 int this_length = min(cacheline_end - gpu_offset, length);
464 int swizzled_gpu_offset = gpu_offset ^ 64;
465
466 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset,
467 cpu_vaddr + cpu_offset,
468 this_length);
469 if (ret)
470 return ret + length;
471
472 cpu_offset += this_length;
473 gpu_offset += this_length;
474 length -= this_length;
475 }
476
477 return 0;
478}
479
4c914c0c
BV
480/*
481 * Pins the specified object's pages and synchronizes the object with
482 * GPU accesses. Sets needs_clflush to non-zero if the caller should
483 * flush the object from the CPU cache.
484 */
485int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
486 int *needs_clflush)
487{
488 int ret;
489
490 *needs_clflush = 0;
491
1db6e2e7 492 if (WARN_ON((obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE) == 0))
4c914c0c
BV
493 return -EINVAL;
494
495 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) {
496 /* If we're not in the cpu read domain, set ourself into the gtt
497 * read domain and manually flush cachelines (if required). This
498 * optimizes for the case when the gpu will dirty the data
499 * anyway again before the next pread happens. */
500 *needs_clflush = !cpu_cache_is_coherent(obj->base.dev,
501 obj->cache_level);
502 ret = i915_gem_object_wait_rendering(obj, true);
503 if (ret)
504 return ret;
505 }
506
507 ret = i915_gem_object_get_pages(obj);
508 if (ret)
509 return ret;
510
511 i915_gem_object_pin_pages(obj);
512
513 return ret;
514}
515
d174bd64
DV
516/* Per-page copy function for the shmem pread fastpath.
517 * Flushes invalid cachelines before reading the target if
518 * needs_clflush is set. */
eb01459f 519static int
d174bd64
DV
520shmem_pread_fast(struct page *page, int shmem_page_offset, int page_length,
521 char __user *user_data,
522 bool page_do_bit17_swizzling, bool needs_clflush)
523{
524 char *vaddr;
525 int ret;
526
e7e58eb5 527 if (unlikely(page_do_bit17_swizzling))
d174bd64
DV
528 return -EINVAL;
529
530 vaddr = kmap_atomic(page);
531 if (needs_clflush)
532 drm_clflush_virt_range(vaddr + shmem_page_offset,
533 page_length);
534 ret = __copy_to_user_inatomic(user_data,
535 vaddr + shmem_page_offset,
536 page_length);
537 kunmap_atomic(vaddr);
538
f60d7f0c 539 return ret ? -EFAULT : 0;
d174bd64
DV
540}
541
23c18c71
DV
542static void
543shmem_clflush_swizzled_range(char *addr, unsigned long length,
544 bool swizzled)
545{
e7e58eb5 546 if (unlikely(swizzled)) {
23c18c71
DV
547 unsigned long start = (unsigned long) addr;
548 unsigned long end = (unsigned long) addr + length;
549
550 /* For swizzling simply ensure that we always flush both
551 * channels. Lame, but simple and it works. Swizzled
552 * pwrite/pread is far from a hotpath - current userspace
553 * doesn't use it at all. */
554 start = round_down(start, 128);
555 end = round_up(end, 128);
556
557 drm_clflush_virt_range((void *)start, end - start);
558 } else {
559 drm_clflush_virt_range(addr, length);
560 }
561
562}
563
d174bd64
DV
564/* Only difference to the fast-path function is that this can handle bit17
565 * and uses non-atomic copy and kmap functions. */
566static int
567shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length,
568 char __user *user_data,
569 bool page_do_bit17_swizzling, bool needs_clflush)
570{
571 char *vaddr;
572 int ret;
573
574 vaddr = kmap(page);
575 if (needs_clflush)
23c18c71
DV
576 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
577 page_length,
578 page_do_bit17_swizzling);
d174bd64
DV
579
580 if (page_do_bit17_swizzling)
581 ret = __copy_to_user_swizzled(user_data,
582 vaddr, shmem_page_offset,
583 page_length);
584 else
585 ret = __copy_to_user(user_data,
586 vaddr + shmem_page_offset,
587 page_length);
588 kunmap(page);
589
f60d7f0c 590 return ret ? - EFAULT : 0;
d174bd64
DV
591}
592
eb01459f 593static int
dbf7bff0
DV
594i915_gem_shmem_pread(struct drm_device *dev,
595 struct drm_i915_gem_object *obj,
596 struct drm_i915_gem_pread *args,
597 struct drm_file *file)
eb01459f 598{
8461d226 599 char __user *user_data;
eb01459f 600 ssize_t remain;
8461d226 601 loff_t offset;
eb2c0c81 602 int shmem_page_offset, page_length, ret = 0;
8461d226 603 int obj_do_bit17_swizzling, page_do_bit17_swizzling;
96d79b52 604 int prefaulted = 0;
8489731c 605 int needs_clflush = 0;
67d5a50c 606 struct sg_page_iter sg_iter;
eb01459f 607
2bb4629a 608 user_data = to_user_ptr(args->data_ptr);
eb01459f
EA
609 remain = args->size;
610
8461d226 611 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
eb01459f 612
4c914c0c 613 ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
f60d7f0c
CW
614 if (ret)
615 return ret;
616
8461d226 617 offset = args->offset;
eb01459f 618
67d5a50c
ID
619 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
620 offset >> PAGE_SHIFT) {
2db76d7c 621 struct page *page = sg_page_iter_page(&sg_iter);
9da3da66
CW
622
623 if (remain <= 0)
624 break;
625
eb01459f
EA
626 /* Operation in this page
627 *
eb01459f 628 * shmem_page_offset = offset within page in shmem file
eb01459f
EA
629 * page_length = bytes to copy for this page
630 */
c8cbbb8b 631 shmem_page_offset = offset_in_page(offset);
eb01459f
EA
632 page_length = remain;
633 if ((shmem_page_offset + page_length) > PAGE_SIZE)
634 page_length = PAGE_SIZE - shmem_page_offset;
eb01459f 635
8461d226
DV
636 page_do_bit17_swizzling = obj_do_bit17_swizzling &&
637 (page_to_phys(page) & (1 << 17)) != 0;
638
d174bd64
DV
639 ret = shmem_pread_fast(page, shmem_page_offset, page_length,
640 user_data, page_do_bit17_swizzling,
641 needs_clflush);
642 if (ret == 0)
643 goto next_page;
dbf7bff0 644
dbf7bff0
DV
645 mutex_unlock(&dev->struct_mutex);
646
d330a953 647 if (likely(!i915.prefault_disable) && !prefaulted) {
f56f821f 648 ret = fault_in_multipages_writeable(user_data, remain);
96d79b52
DV
649 /* Userspace is tricking us, but we've already clobbered
650 * its pages with the prefault and promised to write the
651 * data up to the first fault. Hence ignore any errors
652 * and just continue. */
653 (void)ret;
654 prefaulted = 1;
655 }
eb01459f 656
d174bd64
DV
657 ret = shmem_pread_slow(page, shmem_page_offset, page_length,
658 user_data, page_do_bit17_swizzling,
659 needs_clflush);
eb01459f 660
dbf7bff0 661 mutex_lock(&dev->struct_mutex);
f60d7f0c 662
f60d7f0c 663 if (ret)
8461d226 664 goto out;
8461d226 665
17793c9a 666next_page:
eb01459f 667 remain -= page_length;
8461d226 668 user_data += page_length;
eb01459f
EA
669 offset += page_length;
670 }
671
4f27b75d 672out:
f60d7f0c
CW
673 i915_gem_object_unpin_pages(obj);
674
eb01459f
EA
675 return ret;
676}
677
673a394b
EA
678/**
679 * Reads data from the object referenced by handle.
680 *
681 * On error, the contents of *data are undefined.
682 */
683int
684i915_gem_pread_ioctl(struct drm_device *dev, void *data,
05394f39 685 struct drm_file *file)
673a394b
EA
686{
687 struct drm_i915_gem_pread *args = data;
05394f39 688 struct drm_i915_gem_object *obj;
35b62a89 689 int ret = 0;
673a394b 690
51311d0a
CW
691 if (args->size == 0)
692 return 0;
693
694 if (!access_ok(VERIFY_WRITE,
2bb4629a 695 to_user_ptr(args->data_ptr),
51311d0a
CW
696 args->size))
697 return -EFAULT;
698
4f27b75d 699 ret = i915_mutex_lock_interruptible(dev);
1d7cfea1 700 if (ret)
4f27b75d 701 return ret;
673a394b 702
05394f39 703 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
c8725226 704 if (&obj->base == NULL) {
1d7cfea1
CW
705 ret = -ENOENT;
706 goto unlock;
4f27b75d 707 }
673a394b 708
7dcd2499 709 /* Bounds check source. */
05394f39
CW
710 if (args->offset > obj->base.size ||
711 args->size > obj->base.size - args->offset) {
ce9d419d 712 ret = -EINVAL;
35b62a89 713 goto out;
ce9d419d
CW
714 }
715
1286ff73
DV
716 /* prime objects have no backing filp to GEM pread/pwrite
717 * pages from.
718 */
719 if (!obj->base.filp) {
720 ret = -EINVAL;
721 goto out;
722 }
723
db53a302
CW
724 trace_i915_gem_object_pread(obj, args->offset, args->size);
725
dbf7bff0 726 ret = i915_gem_shmem_pread(dev, obj, args, file);
673a394b 727
35b62a89 728out:
05394f39 729 drm_gem_object_unreference(&obj->base);
1d7cfea1 730unlock:
4f27b75d 731 mutex_unlock(&dev->struct_mutex);
eb01459f 732 return ret;
673a394b
EA
733}
734
0839ccb8
KP
735/* This is the fast write path which cannot handle
736 * page faults in the source data
9b7530cc 737 */
0839ccb8
KP
738
739static inline int
740fast_user_write(struct io_mapping *mapping,
741 loff_t page_base, int page_offset,
742 char __user *user_data,
743 int length)
9b7530cc 744{
4f0c7cfb
BW
745 void __iomem *vaddr_atomic;
746 void *vaddr;
0839ccb8 747 unsigned long unwritten;
9b7530cc 748
3e4d3af5 749 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base);
4f0c7cfb
BW
750 /* We can use the cpu mem copy function because this is X86. */
751 vaddr = (void __force*)vaddr_atomic + page_offset;
752 unwritten = __copy_from_user_inatomic_nocache(vaddr,
0839ccb8 753 user_data, length);
3e4d3af5 754 io_mapping_unmap_atomic(vaddr_atomic);
fbd5a26d 755 return unwritten;
0839ccb8
KP
756}
757
3de09aa3
EA
758/**
759 * This is the fast pwrite path, where we copy the data directly from the
760 * user into the GTT, uncached.
761 */
673a394b 762static int
05394f39
CW
763i915_gem_gtt_pwrite_fast(struct drm_device *dev,
764 struct drm_i915_gem_object *obj,
3de09aa3 765 struct drm_i915_gem_pwrite *args,
05394f39 766 struct drm_file *file)
673a394b 767{
72e96d64
JL
768 struct drm_i915_private *dev_priv = to_i915(dev);
769 struct i915_ggtt *ggtt = &dev_priv->ggtt;
673a394b 770 ssize_t remain;
0839ccb8 771 loff_t offset, page_base;
673a394b 772 char __user *user_data;
935aaa69
DV
773 int page_offset, page_length, ret;
774
1ec9e26d 775 ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK);
935aaa69
DV
776 if (ret)
777 goto out;
778
779 ret = i915_gem_object_set_to_gtt_domain(obj, true);
780 if (ret)
781 goto out_unpin;
782
783 ret = i915_gem_object_put_fence(obj);
784 if (ret)
785 goto out_unpin;
673a394b 786
2bb4629a 787 user_data = to_user_ptr(args->data_ptr);
673a394b 788 remain = args->size;
673a394b 789
f343c5f6 790 offset = i915_gem_obj_ggtt_offset(obj) + args->offset;
673a394b 791
77a0d1ca 792 intel_fb_obj_invalidate(obj, ORIGIN_GTT);
063e4e6b 793
673a394b
EA
794 while (remain > 0) {
795 /* Operation in this page
796 *
0839ccb8
KP
797 * page_base = page offset within aperture
798 * page_offset = offset within page
799 * page_length = bytes to copy for this page
673a394b 800 */
c8cbbb8b
CW
801 page_base = offset & PAGE_MASK;
802 page_offset = offset_in_page(offset);
0839ccb8
KP
803 page_length = remain;
804 if ((page_offset + remain) > PAGE_SIZE)
805 page_length = PAGE_SIZE - page_offset;
806
0839ccb8 807 /* If we get a fault while copying data, then (presumably) our
3de09aa3
EA
808 * source page isn't available. Return the error and we'll
809 * retry in the slow path.
0839ccb8 810 */
72e96d64 811 if (fast_user_write(ggtt->mappable, page_base,
935aaa69
DV
812 page_offset, user_data, page_length)) {
813 ret = -EFAULT;
063e4e6b 814 goto out_flush;
935aaa69 815 }
673a394b 816
0839ccb8
KP
817 remain -= page_length;
818 user_data += page_length;
819 offset += page_length;
673a394b 820 }
673a394b 821
063e4e6b 822out_flush:
de152b62 823 intel_fb_obj_flush(obj, false, ORIGIN_GTT);
935aaa69 824out_unpin:
d7f46fc4 825 i915_gem_object_ggtt_unpin(obj);
935aaa69 826out:
3de09aa3 827 return ret;
673a394b
EA
828}
829
d174bd64
DV
830/* Per-page copy function for the shmem pwrite fastpath.
831 * Flushes invalid cachelines before writing to the target if
832 * needs_clflush_before is set and flushes out any written cachelines after
833 * writing if needs_clflush is set. */
3043c60c 834static int
d174bd64
DV
835shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length,
836 char __user *user_data,
837 bool page_do_bit17_swizzling,
838 bool needs_clflush_before,
839 bool needs_clflush_after)
673a394b 840{
d174bd64 841 char *vaddr;
673a394b 842 int ret;
3de09aa3 843
e7e58eb5 844 if (unlikely(page_do_bit17_swizzling))
d174bd64 845 return -EINVAL;
3de09aa3 846
d174bd64
DV
847 vaddr = kmap_atomic(page);
848 if (needs_clflush_before)
849 drm_clflush_virt_range(vaddr + shmem_page_offset,
850 page_length);
c2831a94
CW
851 ret = __copy_from_user_inatomic(vaddr + shmem_page_offset,
852 user_data, page_length);
d174bd64
DV
853 if (needs_clflush_after)
854 drm_clflush_virt_range(vaddr + shmem_page_offset,
855 page_length);
856 kunmap_atomic(vaddr);
3de09aa3 857
755d2218 858 return ret ? -EFAULT : 0;
3de09aa3
EA
859}
860
d174bd64
DV
861/* Only difference to the fast-path function is that this can handle bit17
862 * and uses non-atomic copy and kmap functions. */
3043c60c 863static int
d174bd64
DV
864shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length,
865 char __user *user_data,
866 bool page_do_bit17_swizzling,
867 bool needs_clflush_before,
868 bool needs_clflush_after)
673a394b 869{
d174bd64
DV
870 char *vaddr;
871 int ret;
e5281ccd 872
d174bd64 873 vaddr = kmap(page);
e7e58eb5 874 if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
23c18c71
DV
875 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
876 page_length,
877 page_do_bit17_swizzling);
d174bd64
DV
878 if (page_do_bit17_swizzling)
879 ret = __copy_from_user_swizzled(vaddr, shmem_page_offset,
e5281ccd
CW
880 user_data,
881 page_length);
d174bd64
DV
882 else
883 ret = __copy_from_user(vaddr + shmem_page_offset,
884 user_data,
885 page_length);
886 if (needs_clflush_after)
23c18c71
DV
887 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
888 page_length,
889 page_do_bit17_swizzling);
d174bd64 890 kunmap(page);
40123c1f 891
755d2218 892 return ret ? -EFAULT : 0;
40123c1f
EA
893}
894
40123c1f 895static int
e244a443
DV
896i915_gem_shmem_pwrite(struct drm_device *dev,
897 struct drm_i915_gem_object *obj,
898 struct drm_i915_gem_pwrite *args,
899 struct drm_file *file)
40123c1f 900{
40123c1f 901 ssize_t remain;
8c59967c
DV
902 loff_t offset;
903 char __user *user_data;
eb2c0c81 904 int shmem_page_offset, page_length, ret = 0;
8c59967c 905 int obj_do_bit17_swizzling, page_do_bit17_swizzling;
e244a443 906 int hit_slowpath = 0;
58642885
DV
907 int needs_clflush_after = 0;
908 int needs_clflush_before = 0;
67d5a50c 909 struct sg_page_iter sg_iter;
40123c1f 910
2bb4629a 911 user_data = to_user_ptr(args->data_ptr);
40123c1f
EA
912 remain = args->size;
913
8c59967c 914 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
40123c1f 915
58642885
DV
916 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
917 /* If we're not in the cpu write domain, set ourself into the gtt
918 * write domain and manually flush cachelines (if required). This
919 * optimizes for the case when the gpu will use the data
920 * right away and we therefore have to clflush anyway. */
2c22569b 921 needs_clflush_after = cpu_write_needs_clflush(obj);
23f54483
BW
922 ret = i915_gem_object_wait_rendering(obj, false);
923 if (ret)
924 return ret;
58642885 925 }
c76ce038
CW
926 /* Same trick applies to invalidate partially written cachelines read
927 * before writing. */
928 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0)
929 needs_clflush_before =
930 !cpu_cache_is_coherent(dev, obj->cache_level);
58642885 931
755d2218
CW
932 ret = i915_gem_object_get_pages(obj);
933 if (ret)
934 return ret;
935
77a0d1ca 936 intel_fb_obj_invalidate(obj, ORIGIN_CPU);
063e4e6b 937
755d2218
CW
938 i915_gem_object_pin_pages(obj);
939
673a394b 940 offset = args->offset;
05394f39 941 obj->dirty = 1;
673a394b 942
67d5a50c
ID
943 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
944 offset >> PAGE_SHIFT) {
2db76d7c 945 struct page *page = sg_page_iter_page(&sg_iter);
58642885 946 int partial_cacheline_write;
e5281ccd 947
9da3da66
CW
948 if (remain <= 0)
949 break;
950
40123c1f
EA
951 /* Operation in this page
952 *
40123c1f 953 * shmem_page_offset = offset within page in shmem file
40123c1f
EA
954 * page_length = bytes to copy for this page
955 */
c8cbbb8b 956 shmem_page_offset = offset_in_page(offset);
40123c1f
EA
957
958 page_length = remain;
959 if ((shmem_page_offset + page_length) > PAGE_SIZE)
960 page_length = PAGE_SIZE - shmem_page_offset;
40123c1f 961
58642885
DV
962 /* If we don't overwrite a cacheline completely we need to be
963 * careful to have up-to-date data by first clflushing. Don't
964 * overcomplicate things and flush the entire patch. */
965 partial_cacheline_write = needs_clflush_before &&
966 ((shmem_page_offset | page_length)
967 & (boot_cpu_data.x86_clflush_size - 1));
968
8c59967c
DV
969 page_do_bit17_swizzling = obj_do_bit17_swizzling &&
970 (page_to_phys(page) & (1 << 17)) != 0;
971
d174bd64
DV
972 ret = shmem_pwrite_fast(page, shmem_page_offset, page_length,
973 user_data, page_do_bit17_swizzling,
974 partial_cacheline_write,
975 needs_clflush_after);
976 if (ret == 0)
977 goto next_page;
e244a443
DV
978
979 hit_slowpath = 1;
e244a443 980 mutex_unlock(&dev->struct_mutex);
d174bd64
DV
981 ret = shmem_pwrite_slow(page, shmem_page_offset, page_length,
982 user_data, page_do_bit17_swizzling,
983 partial_cacheline_write,
984 needs_clflush_after);
40123c1f 985
e244a443 986 mutex_lock(&dev->struct_mutex);
755d2218 987
755d2218 988 if (ret)
8c59967c 989 goto out;
8c59967c 990
17793c9a 991next_page:
40123c1f 992 remain -= page_length;
8c59967c 993 user_data += page_length;
40123c1f 994 offset += page_length;
673a394b
EA
995 }
996
fbd5a26d 997out:
755d2218
CW
998 i915_gem_object_unpin_pages(obj);
999
e244a443 1000 if (hit_slowpath) {
8dcf015e
DV
1001 /*
1002 * Fixup: Flush cpu caches in case we didn't flush the dirty
1003 * cachelines in-line while writing and the object moved
1004 * out of the cpu write domain while we've dropped the lock.
1005 */
1006 if (!needs_clflush_after &&
1007 obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
000433b6 1008 if (i915_gem_clflush_object(obj, obj->pin_display))
ed75a55b 1009 needs_clflush_after = true;
e244a443 1010 }
8c59967c 1011 }
673a394b 1012
58642885 1013 if (needs_clflush_after)
e76e9aeb 1014 i915_gem_chipset_flush(dev);
ed75a55b
VS
1015 else
1016 obj->cache_dirty = true;
58642885 1017
de152b62 1018 intel_fb_obj_flush(obj, false, ORIGIN_CPU);
40123c1f 1019 return ret;
673a394b
EA
1020}
1021
1022/**
1023 * Writes data to the object referenced by handle.
1024 *
1025 * On error, the contents of the buffer that were to be modified are undefined.
1026 */
1027int
1028i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
fbd5a26d 1029 struct drm_file *file)
673a394b 1030{
5d77d9c5 1031 struct drm_i915_private *dev_priv = dev->dev_private;
673a394b 1032 struct drm_i915_gem_pwrite *args = data;
05394f39 1033 struct drm_i915_gem_object *obj;
51311d0a
CW
1034 int ret;
1035
1036 if (args->size == 0)
1037 return 0;
1038
1039 if (!access_ok(VERIFY_READ,
2bb4629a 1040 to_user_ptr(args->data_ptr),
51311d0a
CW
1041 args->size))
1042 return -EFAULT;
1043
d330a953 1044 if (likely(!i915.prefault_disable)) {
0b74b508
XZ
1045 ret = fault_in_multipages_readable(to_user_ptr(args->data_ptr),
1046 args->size);
1047 if (ret)
1048 return -EFAULT;
1049 }
673a394b 1050
5d77d9c5
ID
1051 intel_runtime_pm_get(dev_priv);
1052
fbd5a26d 1053 ret = i915_mutex_lock_interruptible(dev);
1d7cfea1 1054 if (ret)
5d77d9c5 1055 goto put_rpm;
1d7cfea1 1056
05394f39 1057 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
c8725226 1058 if (&obj->base == NULL) {
1d7cfea1
CW
1059 ret = -ENOENT;
1060 goto unlock;
fbd5a26d 1061 }
673a394b 1062
7dcd2499 1063 /* Bounds check destination. */
05394f39
CW
1064 if (args->offset > obj->base.size ||
1065 args->size > obj->base.size - args->offset) {
ce9d419d 1066 ret = -EINVAL;
35b62a89 1067 goto out;
ce9d419d
CW
1068 }
1069
1286ff73
DV
1070 /* prime objects have no backing filp to GEM pread/pwrite
1071 * pages from.
1072 */
1073 if (!obj->base.filp) {
1074 ret = -EINVAL;
1075 goto out;
1076 }
1077
db53a302
CW
1078 trace_i915_gem_object_pwrite(obj, args->offset, args->size);
1079
935aaa69 1080 ret = -EFAULT;
673a394b
EA
1081 /* We can only do the GTT pwrite on untiled buffers, as otherwise
1082 * it would end up going through the fenced access, and we'll get
1083 * different detiling behavior between reading and writing.
1084 * pread/pwrite currently are reading and writing from the CPU
1085 * perspective, requiring manual detiling by the client.
1086 */
2c22569b
CW
1087 if (obj->tiling_mode == I915_TILING_NONE &&
1088 obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
1089 cpu_write_needs_clflush(obj)) {
fbd5a26d 1090 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file);
935aaa69
DV
1091 /* Note that the gtt paths might fail with non-page-backed user
1092 * pointers (e.g. gtt mappings when moving data between
1093 * textures). Fallback to the shmem path in that case. */
fbd5a26d 1094 }
673a394b 1095
6a2c4232
CW
1096 if (ret == -EFAULT || ret == -ENOSPC) {
1097 if (obj->phys_handle)
1098 ret = i915_gem_phys_pwrite(obj, args, file);
1099 else
1100 ret = i915_gem_shmem_pwrite(dev, obj, args, file);
1101 }
5c0480f2 1102
35b62a89 1103out:
05394f39 1104 drm_gem_object_unreference(&obj->base);
1d7cfea1 1105unlock:
fbd5a26d 1106 mutex_unlock(&dev->struct_mutex);
5d77d9c5
ID
1107put_rpm:
1108 intel_runtime_pm_put(dev_priv);
1109
673a394b
EA
1110 return ret;
1111}
1112
b361237b 1113int
33196ded 1114i915_gem_check_wedge(struct i915_gpu_error *error,
b361237b
CW
1115 bool interruptible)
1116{
1f83fee0 1117 if (i915_reset_in_progress(error)) {
b361237b
CW
1118 /* Non-interruptible callers can't handle -EAGAIN, hence return
1119 * -EIO unconditionally for these. */
1120 if (!interruptible)
1121 return -EIO;
1122
1f83fee0
DV
1123 /* Recovery complete, but the reset failed ... */
1124 if (i915_terminally_wedged(error))
b361237b
CW
1125 return -EIO;
1126
6689c167
MA
1127 /*
1128 * Check if GPU Reset is in progress - we need intel_ring_begin
1129 * to work properly to reinit the hw state while the gpu is
1130 * still marked as reset-in-progress. Handle this with a flag.
1131 */
1132 if (!error->reload_in_reset)
1133 return -EAGAIN;
b361237b
CW
1134 }
1135
1136 return 0;
1137}
1138
094f9a54
CW
1139static void fake_irq(unsigned long data)
1140{
1141 wake_up_process((struct task_struct *)data);
1142}
1143
1144static bool missed_irq(struct drm_i915_private *dev_priv,
0bc40be8 1145 struct intel_engine_cs *engine)
094f9a54 1146{
0bc40be8 1147 return test_bit(engine->id, &dev_priv->gpu_error.missed_irq_rings);
094f9a54
CW
1148}
1149
ca5b721e
CW
1150static unsigned long local_clock_us(unsigned *cpu)
1151{
1152 unsigned long t;
1153
1154 /* Cheaply and approximately convert from nanoseconds to microseconds.
1155 * The result and subsequent calculations are also defined in the same
1156 * approximate microseconds units. The principal source of timing
1157 * error here is from the simple truncation.
1158 *
1159 * Note that local_clock() is only defined wrt to the current CPU;
1160 * the comparisons are no longer valid if we switch CPUs. Instead of
1161 * blocking preemption for the entire busywait, we can detect the CPU
1162 * switch and use that as indicator of system load and a reason to
1163 * stop busywaiting, see busywait_stop().
1164 */
1165 *cpu = get_cpu();
1166 t = local_clock() >> 10;
1167 put_cpu();
1168
1169 return t;
1170}
1171
1172static bool busywait_stop(unsigned long timeout, unsigned cpu)
1173{
1174 unsigned this_cpu;
1175
1176 if (time_after(local_clock_us(&this_cpu), timeout))
1177 return true;
1178
1179 return this_cpu != cpu;
1180}
1181
91b0c352 1182static int __i915_spin_request(struct drm_i915_gem_request *req, int state)
b29c19b6 1183{
2def4ad9 1184 unsigned long timeout;
ca5b721e
CW
1185 unsigned cpu;
1186
1187 /* When waiting for high frequency requests, e.g. during synchronous
1188 * rendering split between the CPU and GPU, the finite amount of time
1189 * required to set up the irq and wait upon it limits the response
1190 * rate. By busywaiting on the request completion for a short while we
1191 * can service the high frequency waits as quick as possible. However,
1192 * if it is a slow request, we want to sleep as quickly as possible.
1193 * The tradeoff between waiting and sleeping is roughly the time it
1194 * takes to sleep on a request, on the order of a microsecond.
1195 */
2def4ad9 1196
4a570db5 1197 if (req->engine->irq_refcount)
2def4ad9
CW
1198 return -EBUSY;
1199
821485dc
CW
1200 /* Only spin if we know the GPU is processing this request */
1201 if (!i915_gem_request_started(req, true))
1202 return -EAGAIN;
1203
ca5b721e 1204 timeout = local_clock_us(&cpu) + 5;
2def4ad9 1205 while (!need_resched()) {
eed29a5b 1206 if (i915_gem_request_completed(req, true))
2def4ad9
CW
1207 return 0;
1208
91b0c352
CW
1209 if (signal_pending_state(state, current))
1210 break;
1211
ca5b721e 1212 if (busywait_stop(timeout, cpu))
2def4ad9 1213 break;
b29c19b6 1214
2def4ad9
CW
1215 cpu_relax_lowlatency();
1216 }
821485dc 1217
eed29a5b 1218 if (i915_gem_request_completed(req, false))
2def4ad9
CW
1219 return 0;
1220
1221 return -EAGAIN;
b29c19b6
CW
1222}
1223
b361237b 1224/**
9c654818
JH
1225 * __i915_wait_request - wait until execution of request has finished
1226 * @req: duh!
1227 * @reset_counter: reset sequence associated with the given request
b361237b
CW
1228 * @interruptible: do an interruptible wait (normally yes)
1229 * @timeout: in - how long to wait (NULL forever); out - how much time remaining
1230 *
f69061be
DV
1231 * Note: It is of utmost importance that the passed in seqno and reset_counter
1232 * values have been read by the caller in an smp safe manner. Where read-side
1233 * locks are involved, it is sufficient to read the reset_counter before
1234 * unlocking the lock that protects the seqno. For lockless tricks, the
1235 * reset_counter _must_ be read before, and an appropriate smp_rmb must be
1236 * inserted.
1237 *
9c654818 1238 * Returns 0 if the request was found within the alloted time. Else returns the
b361237b
CW
1239 * errno with remaining time filled in timeout argument.
1240 */
9c654818 1241int __i915_wait_request(struct drm_i915_gem_request *req,
f69061be 1242 unsigned reset_counter,
b29c19b6 1243 bool interruptible,
5ed0bdf2 1244 s64 *timeout,
2e1b8730 1245 struct intel_rps_client *rps)
b361237b 1246{
666796da 1247 struct intel_engine_cs *engine = i915_gem_request_get_engine(req);
e2f80391 1248 struct drm_device *dev = engine->dev;
3e31c6c0 1249 struct drm_i915_private *dev_priv = dev->dev_private;
168c3f21 1250 const bool irq_test_in_progress =
666796da 1251 ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_engine_flag(engine);
91b0c352 1252 int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
094f9a54 1253 DEFINE_WAIT(wait);
47e9766d 1254 unsigned long timeout_expire;
e0313db0 1255 s64 before = 0; /* Only to silence a compiler warning. */
b361237b
CW
1256 int ret;
1257
9df7575f 1258 WARN(!intel_irqs_enabled(dev_priv), "IRQs disabled");
c67a470b 1259
b4716185
CW
1260 if (list_empty(&req->list))
1261 return 0;
1262
1b5a433a 1263 if (i915_gem_request_completed(req, true))
b361237b
CW
1264 return 0;
1265
bb6d1984
CW
1266 timeout_expire = 0;
1267 if (timeout) {
1268 if (WARN_ON(*timeout < 0))
1269 return -EINVAL;
1270
1271 if (*timeout == 0)
1272 return -ETIME;
1273
1274 timeout_expire = jiffies + nsecs_to_jiffies_timeout(*timeout);
e0313db0
TU
1275
1276 /*
1277 * Record current time in case interrupted by signal, or wedged.
1278 */
1279 before = ktime_get_raw_ns();
bb6d1984 1280 }
b361237b 1281
2e1b8730 1282 if (INTEL_INFO(dev_priv)->gen >= 6)
e61b9958 1283 gen6_rps_boost(dev_priv, rps, req->emitted_jiffies);
b361237b 1284
74328ee5 1285 trace_i915_gem_request_wait_begin(req);
2def4ad9
CW
1286
1287 /* Optimistic spin for the next jiffie before touching IRQs */
91b0c352 1288 ret = __i915_spin_request(req, state);
2def4ad9
CW
1289 if (ret == 0)
1290 goto out;
1291
e2f80391 1292 if (!irq_test_in_progress && WARN_ON(!engine->irq_get(engine))) {
2def4ad9
CW
1293 ret = -ENODEV;
1294 goto out;
1295 }
1296
094f9a54
CW
1297 for (;;) {
1298 struct timer_list timer;
b361237b 1299
e2f80391 1300 prepare_to_wait(&engine->irq_queue, &wait, state);
b361237b 1301
f69061be
DV
1302 /* We need to check whether any gpu reset happened in between
1303 * the caller grabbing the seqno and now ... */
094f9a54
CW
1304 if (reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) {
1305 /* ... but upgrade the -EAGAIN to an -EIO if the gpu
1306 * is truely gone. */
1307 ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible);
1308 if (ret == 0)
1309 ret = -EAGAIN;
1310 break;
1311 }
f69061be 1312
1b5a433a 1313 if (i915_gem_request_completed(req, false)) {
094f9a54
CW
1314 ret = 0;
1315 break;
1316 }
b361237b 1317
91b0c352 1318 if (signal_pending_state(state, current)) {
094f9a54
CW
1319 ret = -ERESTARTSYS;
1320 break;
1321 }
1322
47e9766d 1323 if (timeout && time_after_eq(jiffies, timeout_expire)) {
094f9a54
CW
1324 ret = -ETIME;
1325 break;
1326 }
1327
1328 timer.function = NULL;
e2f80391 1329 if (timeout || missed_irq(dev_priv, engine)) {
47e9766d
MK
1330 unsigned long expire;
1331
094f9a54 1332 setup_timer_on_stack(&timer, fake_irq, (unsigned long)current);
e2f80391 1333 expire = missed_irq(dev_priv, engine) ? jiffies + 1 : timeout_expire;
094f9a54
CW
1334 mod_timer(&timer, expire);
1335 }
1336
5035c275 1337 io_schedule();
094f9a54 1338
094f9a54
CW
1339 if (timer.function) {
1340 del_singleshot_timer_sync(&timer);
1341 destroy_timer_on_stack(&timer);
1342 }
1343 }
168c3f21 1344 if (!irq_test_in_progress)
e2f80391 1345 engine->irq_put(engine);
094f9a54 1346
e2f80391 1347 finish_wait(&engine->irq_queue, &wait);
b361237b 1348
2def4ad9 1349out:
2def4ad9
CW
1350 trace_i915_gem_request_wait_end(req);
1351
b361237b 1352 if (timeout) {
e0313db0 1353 s64 tres = *timeout - (ktime_get_raw_ns() - before);
5ed0bdf2
TG
1354
1355 *timeout = tres < 0 ? 0 : tres;
9cca3068
DV
1356
1357 /*
1358 * Apparently ktime isn't accurate enough and occasionally has a
1359 * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch
1360 * things up to make the test happy. We allow up to 1 jiffy.
1361 *
1362 * This is a regrssion from the timespec->ktime conversion.
1363 */
1364 if (ret == -ETIME && *timeout < jiffies_to_usecs(1)*1000)
1365 *timeout = 0;
b361237b
CW
1366 }
1367
094f9a54 1368 return ret;
b361237b
CW
1369}
1370
fcfa423c
JH
1371int i915_gem_request_add_to_client(struct drm_i915_gem_request *req,
1372 struct drm_file *file)
1373{
fcfa423c
JH
1374 struct drm_i915_file_private *file_priv;
1375
1376 WARN_ON(!req || !file || req->file_priv);
1377
1378 if (!req || !file)
1379 return -EINVAL;
1380
1381 if (req->file_priv)
1382 return -EINVAL;
1383
fcfa423c
JH
1384 file_priv = file->driver_priv;
1385
1386 spin_lock(&file_priv->mm.lock);
1387 req->file_priv = file_priv;
1388 list_add_tail(&req->client_list, &file_priv->mm.request_list);
1389 spin_unlock(&file_priv->mm.lock);
1390
1391 req->pid = get_pid(task_pid(current));
1392
1393 return 0;
1394}
1395
b4716185
CW
1396static inline void
1397i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
1398{
1399 struct drm_i915_file_private *file_priv = request->file_priv;
1400
1401 if (!file_priv)
1402 return;
1403
1404 spin_lock(&file_priv->mm.lock);
1405 list_del(&request->client_list);
1406 request->file_priv = NULL;
1407 spin_unlock(&file_priv->mm.lock);
fcfa423c
JH
1408
1409 put_pid(request->pid);
1410 request->pid = NULL;
b4716185
CW
1411}
1412
1413static void i915_gem_request_retire(struct drm_i915_gem_request *request)
1414{
1415 trace_i915_gem_request_retire(request);
1416
1417 /* We know the GPU must have read the request to have
1418 * sent us the seqno + interrupt, so use the position
1419 * of tail of the request to update the last known position
1420 * of the GPU head.
1421 *
1422 * Note this requires that we are always called in request
1423 * completion order.
1424 */
1425 request->ringbuf->last_retired_head = request->postfix;
1426
1427 list_del_init(&request->list);
1428 i915_gem_request_remove_from_client(request);
1429
b4716185
CW
1430 i915_gem_request_unreference(request);
1431}
1432
1433static void
1434__i915_gem_request_retire__upto(struct drm_i915_gem_request *req)
1435{
4a570db5 1436 struct intel_engine_cs *engine = req->engine;
b4716185
CW
1437 struct drm_i915_gem_request *tmp;
1438
1439 lockdep_assert_held(&engine->dev->struct_mutex);
1440
1441 if (list_empty(&req->list))
1442 return;
1443
1444 do {
1445 tmp = list_first_entry(&engine->request_list,
1446 typeof(*tmp), list);
1447
1448 i915_gem_request_retire(tmp);
1449 } while (tmp != req);
1450
1451 WARN_ON(i915_verify_lists(engine->dev));
1452}
1453
b361237b 1454/**
a4b3a571 1455 * Waits for a request to be signaled, and cleans up the
b361237b
CW
1456 * request and object lists appropriately for that event.
1457 */
1458int
a4b3a571 1459i915_wait_request(struct drm_i915_gem_request *req)
b361237b 1460{
a4b3a571
DV
1461 struct drm_device *dev;
1462 struct drm_i915_private *dev_priv;
1463 bool interruptible;
b361237b
CW
1464 int ret;
1465
a4b3a571
DV
1466 BUG_ON(req == NULL);
1467
4a570db5 1468 dev = req->engine->dev;
a4b3a571
DV
1469 dev_priv = dev->dev_private;
1470 interruptible = dev_priv->mm.interruptible;
1471
b361237b 1472 BUG_ON(!mutex_is_locked(&dev->struct_mutex));
b361237b 1473
33196ded 1474 ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible);
b361237b
CW
1475 if (ret)
1476 return ret;
1477
b4716185
CW
1478 ret = __i915_wait_request(req,
1479 atomic_read(&dev_priv->gpu_error.reset_counter),
9c654818 1480 interruptible, NULL, NULL);
b4716185
CW
1481 if (ret)
1482 return ret;
d26e3af8 1483
b4716185 1484 __i915_gem_request_retire__upto(req);
d26e3af8
CW
1485 return 0;
1486}
1487
b361237b
CW
1488/**
1489 * Ensures that all rendering to the object has completed and the object is
1490 * safe to unbind from the GTT or access from the CPU.
1491 */
2e2f351d 1492int
b361237b
CW
1493i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
1494 bool readonly)
1495{
b4716185 1496 int ret, i;
b361237b 1497
b4716185 1498 if (!obj->active)
b361237b
CW
1499 return 0;
1500
b4716185
CW
1501 if (readonly) {
1502 if (obj->last_write_req != NULL) {
1503 ret = i915_wait_request(obj->last_write_req);
1504 if (ret)
1505 return ret;
b361237b 1506
4a570db5 1507 i = obj->last_write_req->engine->id;
b4716185
CW
1508 if (obj->last_read_req[i] == obj->last_write_req)
1509 i915_gem_object_retire__read(obj, i);
1510 else
1511 i915_gem_object_retire__write(obj);
1512 }
1513 } else {
666796da 1514 for (i = 0; i < I915_NUM_ENGINES; i++) {
b4716185
CW
1515 if (obj->last_read_req[i] == NULL)
1516 continue;
1517
1518 ret = i915_wait_request(obj->last_read_req[i]);
1519 if (ret)
1520 return ret;
1521
1522 i915_gem_object_retire__read(obj, i);
1523 }
1524 RQ_BUG_ON(obj->active);
1525 }
1526
1527 return 0;
1528}
1529
1530static void
1531i915_gem_object_retire_request(struct drm_i915_gem_object *obj,
1532 struct drm_i915_gem_request *req)
1533{
4a570db5 1534 int ring = req->engine->id;
b4716185
CW
1535
1536 if (obj->last_read_req[ring] == req)
1537 i915_gem_object_retire__read(obj, ring);
1538 else if (obj->last_write_req == req)
1539 i915_gem_object_retire__write(obj);
1540
1541 __i915_gem_request_retire__upto(req);
b361237b
CW
1542}
1543
3236f57a
CW
1544/* A nonblocking variant of the above wait. This is a highly dangerous routine
1545 * as the object state may change during this call.
1546 */
1547static __must_check int
1548i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
2e1b8730 1549 struct intel_rps_client *rps,
3236f57a
CW
1550 bool readonly)
1551{
1552 struct drm_device *dev = obj->base.dev;
1553 struct drm_i915_private *dev_priv = dev->dev_private;
666796da 1554 struct drm_i915_gem_request *requests[I915_NUM_ENGINES];
f69061be 1555 unsigned reset_counter;
b4716185 1556 int ret, i, n = 0;
3236f57a
CW
1557
1558 BUG_ON(!mutex_is_locked(&dev->struct_mutex));
1559 BUG_ON(!dev_priv->mm.interruptible);
1560
b4716185 1561 if (!obj->active)
3236f57a
CW
1562 return 0;
1563
33196ded 1564 ret = i915_gem_check_wedge(&dev_priv->gpu_error, true);
3236f57a
CW
1565 if (ret)
1566 return ret;
1567
f69061be 1568 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
b4716185
CW
1569
1570 if (readonly) {
1571 struct drm_i915_gem_request *req;
1572
1573 req = obj->last_write_req;
1574 if (req == NULL)
1575 return 0;
1576
b4716185
CW
1577 requests[n++] = i915_gem_request_reference(req);
1578 } else {
666796da 1579 for (i = 0; i < I915_NUM_ENGINES; i++) {
b4716185
CW
1580 struct drm_i915_gem_request *req;
1581
1582 req = obj->last_read_req[i];
1583 if (req == NULL)
1584 continue;
1585
b4716185
CW
1586 requests[n++] = i915_gem_request_reference(req);
1587 }
1588 }
1589
3236f57a 1590 mutex_unlock(&dev->struct_mutex);
b4716185
CW
1591 for (i = 0; ret == 0 && i < n; i++)
1592 ret = __i915_wait_request(requests[i], reset_counter, true,
2e1b8730 1593 NULL, rps);
3236f57a
CW
1594 mutex_lock(&dev->struct_mutex);
1595
b4716185
CW
1596 for (i = 0; i < n; i++) {
1597 if (ret == 0)
1598 i915_gem_object_retire_request(obj, requests[i]);
1599 i915_gem_request_unreference(requests[i]);
1600 }
1601
1602 return ret;
3236f57a
CW
1603}
1604
2e1b8730
CW
1605static struct intel_rps_client *to_rps_client(struct drm_file *file)
1606{
1607 struct drm_i915_file_private *fpriv = file->driver_priv;
1608 return &fpriv->rps;
1609}
1610
673a394b 1611/**
2ef7eeaa
EA
1612 * Called when user space prepares to use an object with the CPU, either
1613 * through the mmap ioctl's mapping or a GTT mapping.
673a394b
EA
1614 */
1615int
1616i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
05394f39 1617 struct drm_file *file)
673a394b
EA
1618{
1619 struct drm_i915_gem_set_domain *args = data;
05394f39 1620 struct drm_i915_gem_object *obj;
2ef7eeaa
EA
1621 uint32_t read_domains = args->read_domains;
1622 uint32_t write_domain = args->write_domain;
673a394b
EA
1623 int ret;
1624
2ef7eeaa 1625 /* Only handle setting domains to types used by the CPU. */
21d509e3 1626 if (write_domain & I915_GEM_GPU_DOMAINS)
2ef7eeaa
EA
1627 return -EINVAL;
1628
21d509e3 1629 if (read_domains & I915_GEM_GPU_DOMAINS)
2ef7eeaa
EA
1630 return -EINVAL;
1631
1632 /* Having something in the write domain implies it's in the read
1633 * domain, and only that read domain. Enforce that in the request.
1634 */
1635 if (write_domain != 0 && read_domains != write_domain)
1636 return -EINVAL;
1637
76c1dec1 1638 ret = i915_mutex_lock_interruptible(dev);
1d7cfea1 1639 if (ret)
76c1dec1 1640 return ret;
1d7cfea1 1641
05394f39 1642 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
c8725226 1643 if (&obj->base == NULL) {
1d7cfea1
CW
1644 ret = -ENOENT;
1645 goto unlock;
76c1dec1 1646 }
673a394b 1647
3236f57a
CW
1648 /* Try to flush the object off the GPU without holding the lock.
1649 * We will repeat the flush holding the lock in the normal manner
1650 * to catch cases where we are gazumped.
1651 */
6e4930f6 1652 ret = i915_gem_object_wait_rendering__nonblocking(obj,
2e1b8730 1653 to_rps_client(file),
6e4930f6 1654 !write_domain);
3236f57a
CW
1655 if (ret)
1656 goto unref;
1657
43566ded 1658 if (read_domains & I915_GEM_DOMAIN_GTT)
2ef7eeaa 1659 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
43566ded 1660 else
e47c68e9 1661 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
2ef7eeaa 1662
031b698a
DV
1663 if (write_domain != 0)
1664 intel_fb_obj_invalidate(obj,
1665 write_domain == I915_GEM_DOMAIN_GTT ?
1666 ORIGIN_GTT : ORIGIN_CPU);
1667
3236f57a 1668unref:
05394f39 1669 drm_gem_object_unreference(&obj->base);
1d7cfea1 1670unlock:
673a394b
EA
1671 mutex_unlock(&dev->struct_mutex);
1672 return ret;
1673}
1674
1675/**
1676 * Called when user space has done writes to this buffer
1677 */
1678int
1679i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
05394f39 1680 struct drm_file *file)
673a394b
EA
1681{
1682 struct drm_i915_gem_sw_finish *args = data;
05394f39 1683 struct drm_i915_gem_object *obj;
673a394b
EA
1684 int ret = 0;
1685
76c1dec1 1686 ret = i915_mutex_lock_interruptible(dev);
1d7cfea1 1687 if (ret)
76c1dec1 1688 return ret;
1d7cfea1 1689
05394f39 1690 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
c8725226 1691 if (&obj->base == NULL) {
1d7cfea1
CW
1692 ret = -ENOENT;
1693 goto unlock;
673a394b
EA
1694 }
1695
673a394b 1696 /* Pinned buffers may be scanout, so flush the cache */
2c22569b 1697 if (obj->pin_display)
e62b59e4 1698 i915_gem_object_flush_cpu_write_domain(obj);
e47c68e9 1699
05394f39 1700 drm_gem_object_unreference(&obj->base);
1d7cfea1 1701unlock:
673a394b
EA
1702 mutex_unlock(&dev->struct_mutex);
1703 return ret;
1704}
1705
1706/**
1707 * Maps the contents of an object, returning the address it is mapped
1708 * into.
1709 *
1710 * While the mapping holds a reference on the contents of the object, it doesn't
1711 * imply a ref on the object itself.
34367381
DV
1712 *
1713 * IMPORTANT:
1714 *
1715 * DRM driver writers who look a this function as an example for how to do GEM
1716 * mmap support, please don't implement mmap support like here. The modern way
1717 * to implement DRM mmap support is with an mmap offset ioctl (like
1718 * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly.
1719 * That way debug tooling like valgrind will understand what's going on, hiding
1720 * the mmap call in a driver private ioctl will break that. The i915 driver only
1721 * does cpu mmaps this way because we didn't know better.
673a394b
EA
1722 */
1723int
1724i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
05394f39 1725 struct drm_file *file)
673a394b
EA
1726{
1727 struct drm_i915_gem_mmap *args = data;
1728 struct drm_gem_object *obj;
673a394b
EA
1729 unsigned long addr;
1730
1816f923
AG
1731 if (args->flags & ~(I915_MMAP_WC))
1732 return -EINVAL;
1733
1734 if (args->flags & I915_MMAP_WC && !cpu_has_pat)
1735 return -ENODEV;
1736
05394f39 1737 obj = drm_gem_object_lookup(dev, file, args->handle);
673a394b 1738 if (obj == NULL)
bf79cb91 1739 return -ENOENT;
673a394b 1740
1286ff73
DV
1741 /* prime objects have no backing filp to GEM mmap
1742 * pages from.
1743 */
1744 if (!obj->filp) {
1745 drm_gem_object_unreference_unlocked(obj);
1746 return -EINVAL;
1747 }
1748
6be5ceb0 1749 addr = vm_mmap(obj->filp, 0, args->size,
673a394b
EA
1750 PROT_READ | PROT_WRITE, MAP_SHARED,
1751 args->offset);
1816f923
AG
1752 if (args->flags & I915_MMAP_WC) {
1753 struct mm_struct *mm = current->mm;
1754 struct vm_area_struct *vma;
1755
1756 down_write(&mm->mmap_sem);
1757 vma = find_vma(mm, addr);
1758 if (vma)
1759 vma->vm_page_prot =
1760 pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
1761 else
1762 addr = -ENOMEM;
1763 up_write(&mm->mmap_sem);
1764 }
bc9025bd 1765 drm_gem_object_unreference_unlocked(obj);
673a394b
EA
1766 if (IS_ERR((void *)addr))
1767 return addr;
1768
1769 args->addr_ptr = (uint64_t) addr;
1770
1771 return 0;
1772}
1773
de151cf6
JB
1774/**
1775 * i915_gem_fault - fault a page into the GTT
d9072a3e
GT
1776 * @vma: VMA in question
1777 * @vmf: fault info
de151cf6
JB
1778 *
1779 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
1780 * from userspace. The fault handler takes care of binding the object to
1781 * the GTT (if needed), allocating and programming a fence register (again,
1782 * only if needed based on whether the old reg is still valid or the object
1783 * is tiled) and inserting a new PTE into the faulting process.
1784 *
1785 * Note that the faulting process may involve evicting existing objects
1786 * from the GTT and/or fence registers to make room. So performance may
1787 * suffer if the GTT working set is large or there are few fence registers
1788 * left.
1789 */
1790int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1791{
05394f39
CW
1792 struct drm_i915_gem_object *obj = to_intel_bo(vma->vm_private_data);
1793 struct drm_device *dev = obj->base.dev;
72e96d64
JL
1794 struct drm_i915_private *dev_priv = to_i915(dev);
1795 struct i915_ggtt *ggtt = &dev_priv->ggtt;
c5ad54cf 1796 struct i915_ggtt_view view = i915_ggtt_view_normal;
de151cf6
JB
1797 pgoff_t page_offset;
1798 unsigned long pfn;
1799 int ret = 0;
0f973f27 1800 bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
de151cf6 1801
f65c9168
PZ
1802 intel_runtime_pm_get(dev_priv);
1803
de151cf6
JB
1804 /* We don't use vmf->pgoff since that has the fake offset */
1805 page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >>
1806 PAGE_SHIFT;
1807
d9bc7e9f
CW
1808 ret = i915_mutex_lock_interruptible(dev);
1809 if (ret)
1810 goto out;
a00b10c3 1811
db53a302
CW
1812 trace_i915_gem_object_fault(obj, page_offset, true, write);
1813
6e4930f6
CW
1814 /* Try to flush the object off the GPU first without holding the lock.
1815 * Upon reacquiring the lock, we will perform our sanity checks and then
1816 * repeat the flush holding the lock in the normal manner to catch cases
1817 * where we are gazumped.
1818 */
1819 ret = i915_gem_object_wait_rendering__nonblocking(obj, NULL, !write);
1820 if (ret)
1821 goto unlock;
1822
eb119bd6
CW
1823 /* Access to snoopable pages through the GTT is incoherent. */
1824 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) {
ddeff6ee 1825 ret = -EFAULT;
eb119bd6
CW
1826 goto unlock;
1827 }
1828
c5ad54cf 1829 /* Use a partial view if the object is bigger than the aperture. */
72e96d64 1830 if (obj->base.size >= ggtt->mappable_end &&
e7ded2d7 1831 obj->tiling_mode == I915_TILING_NONE) {
c5ad54cf 1832 static const unsigned int chunk_size = 256; // 1 MiB
e7ded2d7 1833
c5ad54cf
JL
1834 memset(&view, 0, sizeof(view));
1835 view.type = I915_GGTT_VIEW_PARTIAL;
1836 view.params.partial.offset = rounddown(page_offset, chunk_size);
1837 view.params.partial.size =
1838 min_t(unsigned int,
1839 chunk_size,
1840 (vma->vm_end - vma->vm_start)/PAGE_SIZE -
1841 view.params.partial.offset);
1842 }
1843
1844 /* Now pin it into the GTT if needed */
1845 ret = i915_gem_object_ggtt_pin(obj, &view, 0, PIN_MAPPABLE);
c9839303
CW
1846 if (ret)
1847 goto unlock;
4a684a41 1848
c9839303
CW
1849 ret = i915_gem_object_set_to_gtt_domain(obj, write);
1850 if (ret)
1851 goto unpin;
74898d7e 1852
06d98131 1853 ret = i915_gem_object_get_fence(obj);
d9e86c0e 1854 if (ret)
c9839303 1855 goto unpin;
7d1c4804 1856
b90b91d8 1857 /* Finally, remap it using the new GTT offset */
72e96d64 1858 pfn = ggtt->mappable_base +
c5ad54cf 1859 i915_gem_obj_ggtt_offset_view(obj, &view);
f343c5f6 1860 pfn >>= PAGE_SHIFT;
de151cf6 1861
c5ad54cf
JL
1862 if (unlikely(view.type == I915_GGTT_VIEW_PARTIAL)) {
1863 /* Overriding existing pages in partial view does not cause
1864 * us any trouble as TLBs are still valid because the fault
1865 * is due to userspace losing part of the mapping or never
1866 * having accessed it before (at this partials' range).
1867 */
1868 unsigned long base = vma->vm_start +
1869 (view.params.partial.offset << PAGE_SHIFT);
1870 unsigned int i;
b90b91d8 1871
c5ad54cf
JL
1872 for (i = 0; i < view.params.partial.size; i++) {
1873 ret = vm_insert_pfn(vma, base + i * PAGE_SIZE, pfn + i);
b90b91d8
CW
1874 if (ret)
1875 break;
1876 }
1877
1878 obj->fault_mappable = true;
c5ad54cf
JL
1879 } else {
1880 if (!obj->fault_mappable) {
1881 unsigned long size = min_t(unsigned long,
1882 vma->vm_end - vma->vm_start,
1883 obj->base.size);
1884 int i;
1885
1886 for (i = 0; i < size >> PAGE_SHIFT; i++) {
1887 ret = vm_insert_pfn(vma,
1888 (unsigned long)vma->vm_start + i * PAGE_SIZE,
1889 pfn + i);
1890 if (ret)
1891 break;
1892 }
1893
1894 obj->fault_mappable = true;
1895 } else
1896 ret = vm_insert_pfn(vma,
1897 (unsigned long)vmf->virtual_address,
1898 pfn + page_offset);
1899 }
c9839303 1900unpin:
c5ad54cf 1901 i915_gem_object_ggtt_unpin_view(obj, &view);
c715089f 1902unlock:
de151cf6 1903 mutex_unlock(&dev->struct_mutex);
d9bc7e9f 1904out:
de151cf6 1905 switch (ret) {
d9bc7e9f 1906 case -EIO:
2232f031
DV
1907 /*
1908 * We eat errors when the gpu is terminally wedged to avoid
1909 * userspace unduly crashing (gl has no provisions for mmaps to
1910 * fail). But any other -EIO isn't ours (e.g. swap in failure)
1911 * and so needs to be reported.
1912 */
1913 if (!i915_terminally_wedged(&dev_priv->gpu_error)) {
f65c9168
PZ
1914 ret = VM_FAULT_SIGBUS;
1915 break;
1916 }
045e769a 1917 case -EAGAIN:
571c608d
DV
1918 /*
1919 * EAGAIN means the gpu is hung and we'll wait for the error
1920 * handler to reset everything when re-faulting in
1921 * i915_mutex_lock_interruptible.
d9bc7e9f 1922 */
c715089f
CW
1923 case 0:
1924 case -ERESTARTSYS:
bed636ab 1925 case -EINTR:
e79e0fe3
DR
1926 case -EBUSY:
1927 /*
1928 * EBUSY is ok: this just means that another thread
1929 * already did the job.
1930 */
f65c9168
PZ
1931 ret = VM_FAULT_NOPAGE;
1932 break;
de151cf6 1933 case -ENOMEM:
f65c9168
PZ
1934 ret = VM_FAULT_OOM;
1935 break;
a7c2e1aa 1936 case -ENOSPC:
45d67817 1937 case -EFAULT:
f65c9168
PZ
1938 ret = VM_FAULT_SIGBUS;
1939 break;
de151cf6 1940 default:
a7c2e1aa 1941 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret);
f65c9168
PZ
1942 ret = VM_FAULT_SIGBUS;
1943 break;
de151cf6 1944 }
f65c9168
PZ
1945
1946 intel_runtime_pm_put(dev_priv);
1947 return ret;
de151cf6
JB
1948}
1949
901782b2
CW
1950/**
1951 * i915_gem_release_mmap - remove physical page mappings
1952 * @obj: obj in question
1953 *
af901ca1 1954 * Preserve the reservation of the mmapping with the DRM core code, but
901782b2
CW
1955 * relinquish ownership of the pages back to the system.
1956 *
1957 * It is vital that we remove the page mapping if we have mapped a tiled
1958 * object through the GTT and then lose the fence register due to
1959 * resource pressure. Similarly if the object has been moved out of the
1960 * aperture, than pages mapped into userspace must be revoked. Removing the
1961 * mapping will then trigger a page fault on the next user access, allowing
1962 * fixup by i915_gem_fault().
1963 */
d05ca301 1964void
05394f39 1965i915_gem_release_mmap(struct drm_i915_gem_object *obj)
901782b2 1966{
6299f992
CW
1967 if (!obj->fault_mappable)
1968 return;
901782b2 1969
6796cb16
DH
1970 drm_vma_node_unmap(&obj->base.vma_node,
1971 obj->base.dev->anon_inode->i_mapping);
6299f992 1972 obj->fault_mappable = false;
901782b2
CW
1973}
1974
eedd10f4
CW
1975void
1976i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv)
1977{
1978 struct drm_i915_gem_object *obj;
1979
1980 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list)
1981 i915_gem_release_mmap(obj);
1982}
1983
0fa87796 1984uint32_t
e28f8711 1985i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode)
92b88aeb 1986{
e28f8711 1987 uint32_t gtt_size;
92b88aeb
CW
1988
1989 if (INTEL_INFO(dev)->gen >= 4 ||
e28f8711
CW
1990 tiling_mode == I915_TILING_NONE)
1991 return size;
92b88aeb
CW
1992
1993 /* Previous chips need a power-of-two fence region when tiling */
1994 if (INTEL_INFO(dev)->gen == 3)
e28f8711 1995 gtt_size = 1024*1024;
92b88aeb 1996 else
e28f8711 1997 gtt_size = 512*1024;
92b88aeb 1998
e28f8711
CW
1999 while (gtt_size < size)
2000 gtt_size <<= 1;
92b88aeb 2001
e28f8711 2002 return gtt_size;
92b88aeb
CW
2003}
2004
de151cf6
JB
2005/**
2006 * i915_gem_get_gtt_alignment - return required GTT alignment for an object
2007 * @obj: object to check
2008 *
2009 * Return the required GTT alignment for an object, taking into account
5e783301 2010 * potential fence register mapping.
de151cf6 2011 */
d865110c
ID
2012uint32_t
2013i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size,
2014 int tiling_mode, bool fenced)
de151cf6 2015{
de151cf6
JB
2016 /*
2017 * Minimum alignment is 4k (GTT page size), but might be greater
2018 * if a fence register is needed for the object.
2019 */
d865110c 2020 if (INTEL_INFO(dev)->gen >= 4 || (!fenced && IS_G33(dev)) ||
e28f8711 2021 tiling_mode == I915_TILING_NONE)
de151cf6
JB
2022 return 4096;
2023
a00b10c3
CW
2024 /*
2025 * Previous chips need to be aligned to the size of the smallest
2026 * fence register that can contain the object.
2027 */
e28f8711 2028 return i915_gem_get_gtt_size(dev, size, tiling_mode);
a00b10c3
CW
2029}
2030
d8cb5086
CW
2031static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
2032{
2033 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
2034 int ret;
2035
da494d7c
DV
2036 dev_priv->mm.shrinker_no_lock_stealing = true;
2037
d8cb5086
CW
2038 ret = drm_gem_create_mmap_offset(&obj->base);
2039 if (ret != -ENOSPC)
da494d7c 2040 goto out;
d8cb5086
CW
2041
2042 /* Badly fragmented mmap space? The only way we can recover
2043 * space is by destroying unwanted objects. We can't randomly release
2044 * mmap_offsets as userspace expects them to be persistent for the
2045 * lifetime of the objects. The closest we can is to release the
2046 * offsets on purgeable objects by truncating it and marking it purged,
2047 * which prevents userspace from ever using that object again.
2048 */
21ab4e74
CW
2049 i915_gem_shrink(dev_priv,
2050 obj->base.size >> PAGE_SHIFT,
2051 I915_SHRINK_BOUND |
2052 I915_SHRINK_UNBOUND |
2053 I915_SHRINK_PURGEABLE);
d8cb5086
CW
2054 ret = drm_gem_create_mmap_offset(&obj->base);
2055 if (ret != -ENOSPC)
da494d7c 2056 goto out;
d8cb5086
CW
2057
2058 i915_gem_shrink_all(dev_priv);
da494d7c
DV
2059 ret = drm_gem_create_mmap_offset(&obj->base);
2060out:
2061 dev_priv->mm.shrinker_no_lock_stealing = false;
2062
2063 return ret;
d8cb5086
CW
2064}
2065
2066static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
2067{
d8cb5086
CW
2068 drm_gem_free_mmap_offset(&obj->base);
2069}
2070
da6b51d0 2071int
ff72145b
DA
2072i915_gem_mmap_gtt(struct drm_file *file,
2073 struct drm_device *dev,
da6b51d0 2074 uint32_t handle,
ff72145b 2075 uint64_t *offset)
de151cf6 2076{
05394f39 2077 struct drm_i915_gem_object *obj;
de151cf6
JB
2078 int ret;
2079
76c1dec1 2080 ret = i915_mutex_lock_interruptible(dev);
1d7cfea1 2081 if (ret)
76c1dec1 2082 return ret;
de151cf6 2083
ff72145b 2084 obj = to_intel_bo(drm_gem_object_lookup(dev, file, handle));
c8725226 2085 if (&obj->base == NULL) {
1d7cfea1
CW
2086 ret = -ENOENT;
2087 goto unlock;
2088 }
de151cf6 2089
05394f39 2090 if (obj->madv != I915_MADV_WILLNEED) {
bd9b6a4e 2091 DRM_DEBUG("Attempting to mmap a purgeable buffer\n");
8c99e57d 2092 ret = -EFAULT;
1d7cfea1 2093 goto out;
ab18282d
CW
2094 }
2095
d8cb5086
CW
2096 ret = i915_gem_object_create_mmap_offset(obj);
2097 if (ret)
2098 goto out;
de151cf6 2099
0de23977 2100 *offset = drm_vma_node_offset_addr(&obj->base.vma_node);
de151cf6 2101
1d7cfea1 2102out:
05394f39 2103 drm_gem_object_unreference(&obj->base);
1d7cfea1 2104unlock:
de151cf6 2105 mutex_unlock(&dev->struct_mutex);
1d7cfea1 2106 return ret;
de151cf6
JB
2107}
2108
ff72145b
DA
2109/**
2110 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
2111 * @dev: DRM device
2112 * @data: GTT mapping ioctl data
2113 * @file: GEM object info
2114 *
2115 * Simply returns the fake offset to userspace so it can mmap it.
2116 * The mmap call will end up in drm_gem_mmap(), which will set things
2117 * up so we can get faults in the handler above.
2118 *
2119 * The fault handler will take care of binding the object into the GTT
2120 * (since it may have been evicted to make room for something), allocating
2121 * a fence register, and mapping the appropriate aperture address into
2122 * userspace.
2123 */
2124int
2125i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
2126 struct drm_file *file)
2127{
2128 struct drm_i915_gem_mmap_gtt *args = data;
2129
da6b51d0 2130 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
ff72145b
DA
2131}
2132
225067ee
DV
2133/* Immediately discard the backing storage */
2134static void
2135i915_gem_object_truncate(struct drm_i915_gem_object *obj)
e5281ccd 2136{
4d6294bf 2137 i915_gem_object_free_mmap_offset(obj);
1286ff73 2138
4d6294bf
CW
2139 if (obj->base.filp == NULL)
2140 return;
e5281ccd 2141
225067ee
DV
2142 /* Our goal here is to return as much of the memory as
2143 * is possible back to the system as we are called from OOM.
2144 * To do this we must instruct the shmfs to drop all of its
2145 * backing pages, *now*.
2146 */
5537252b 2147 shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1);
225067ee
DV
2148 obj->madv = __I915_MADV_PURGED;
2149}
e5281ccd 2150
5537252b
CW
2151/* Try to discard unwanted pages */
2152static void
2153i915_gem_object_invalidate(struct drm_i915_gem_object *obj)
225067ee 2154{
5537252b
CW
2155 struct address_space *mapping;
2156
2157 switch (obj->madv) {
2158 case I915_MADV_DONTNEED:
2159 i915_gem_object_truncate(obj);
2160 case __I915_MADV_PURGED:
2161 return;
2162 }
2163
2164 if (obj->base.filp == NULL)
2165 return;
2166
2167 mapping = file_inode(obj->base.filp)->i_mapping,
2168 invalidate_mapping_pages(mapping, 0, (loff_t)-1);
e5281ccd
CW
2169}
2170
5cdf5881 2171static void
05394f39 2172i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
673a394b 2173{
90797e6d
ID
2174 struct sg_page_iter sg_iter;
2175 int ret;
1286ff73 2176
05394f39 2177 BUG_ON(obj->madv == __I915_MADV_PURGED);
673a394b 2178
6c085a72
CW
2179 ret = i915_gem_object_set_to_cpu_domain(obj, true);
2180 if (ret) {
2181 /* In the event of a disaster, abandon all caches and
2182 * hope for the best.
2183 */
2184 WARN_ON(ret != -EIO);
2c22569b 2185 i915_gem_clflush_object(obj, true);
6c085a72
CW
2186 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
2187 }
2188
e2273302
ID
2189 i915_gem_gtt_finish_object(obj);
2190
6dacfd2f 2191 if (i915_gem_object_needs_bit17_swizzle(obj))
280b713b
EA
2192 i915_gem_object_save_bit_17_swizzle(obj);
2193
05394f39
CW
2194 if (obj->madv == I915_MADV_DONTNEED)
2195 obj->dirty = 0;
3ef94daa 2196
90797e6d 2197 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
2db76d7c 2198 struct page *page = sg_page_iter_page(&sg_iter);
9da3da66 2199
05394f39 2200 if (obj->dirty)
9da3da66 2201 set_page_dirty(page);
3ef94daa 2202
05394f39 2203 if (obj->madv == I915_MADV_WILLNEED)
9da3da66 2204 mark_page_accessed(page);
3ef94daa 2205
09cbfeaf 2206 put_page(page);
3ef94daa 2207 }
05394f39 2208 obj->dirty = 0;
673a394b 2209
9da3da66
CW
2210 sg_free_table(obj->pages);
2211 kfree(obj->pages);
37e680a1 2212}
6c085a72 2213
dd624afd 2214int
37e680a1
CW
2215i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
2216{
2217 const struct drm_i915_gem_object_ops *ops = obj->ops;
2218
2f745ad3 2219 if (obj->pages == NULL)
37e680a1
CW
2220 return 0;
2221
a5570178
CW
2222 if (obj->pages_pin_count)
2223 return -EBUSY;
2224
9843877d 2225 BUG_ON(i915_gem_obj_bound_any(obj));
3e123027 2226
a2165e31
CW
2227 /* ->put_pages might need to allocate memory for the bit17 swizzle
2228 * array, hence protect them from being reaped by removing them from gtt
2229 * lists early. */
35c20a60 2230 list_del(&obj->global_list);
a2165e31 2231
0a798eb9 2232 if (obj->mapping) {
fb8621d3
CW
2233 if (is_vmalloc_addr(obj->mapping))
2234 vunmap(obj->mapping);
2235 else
2236 kunmap(kmap_to_page(obj->mapping));
0a798eb9
CW
2237 obj->mapping = NULL;
2238 }
2239
37e680a1 2240 ops->put_pages(obj);
05394f39 2241 obj->pages = NULL;
37e680a1 2242
5537252b 2243 i915_gem_object_invalidate(obj);
6c085a72
CW
2244
2245 return 0;
2246}
2247
37e680a1 2248static int
6c085a72 2249i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
e5281ccd 2250{
6c085a72 2251 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
e5281ccd
CW
2252 int page_count, i;
2253 struct address_space *mapping;
9da3da66
CW
2254 struct sg_table *st;
2255 struct scatterlist *sg;
90797e6d 2256 struct sg_page_iter sg_iter;
e5281ccd 2257 struct page *page;
90797e6d 2258 unsigned long last_pfn = 0; /* suppress gcc warning */
e2273302 2259 int ret;
6c085a72 2260 gfp_t gfp;
e5281ccd 2261
6c085a72
CW
2262 /* Assert that the object is not currently in any GPU domain. As it
2263 * wasn't in the GTT, there shouldn't be any way it could have been in
2264 * a GPU cache
2265 */
2266 BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
2267 BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
2268
9da3da66
CW
2269 st = kmalloc(sizeof(*st), GFP_KERNEL);
2270 if (st == NULL)
2271 return -ENOMEM;
2272
05394f39 2273 page_count = obj->base.size / PAGE_SIZE;
9da3da66 2274 if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
9da3da66 2275 kfree(st);
e5281ccd 2276 return -ENOMEM;
9da3da66 2277 }
e5281ccd 2278
9da3da66
CW
2279 /* Get the list of pages out of our struct file. They'll be pinned
2280 * at this point until we release them.
2281 *
2282 * Fail silently without starting the shrinker
2283 */
496ad9aa 2284 mapping = file_inode(obj->base.filp)->i_mapping;
c62d2555 2285 gfp = mapping_gfp_constraint(mapping, ~(__GFP_IO | __GFP_RECLAIM));
d0164adc 2286 gfp |= __GFP_NORETRY | __GFP_NOWARN;
90797e6d
ID
2287 sg = st->sgl;
2288 st->nents = 0;
2289 for (i = 0; i < page_count; i++) {
6c085a72
CW
2290 page = shmem_read_mapping_page_gfp(mapping, i, gfp);
2291 if (IS_ERR(page)) {
21ab4e74
CW
2292 i915_gem_shrink(dev_priv,
2293 page_count,
2294 I915_SHRINK_BOUND |
2295 I915_SHRINK_UNBOUND |
2296 I915_SHRINK_PURGEABLE);
6c085a72
CW
2297 page = shmem_read_mapping_page_gfp(mapping, i, gfp);
2298 }
2299 if (IS_ERR(page)) {
2300 /* We've tried hard to allocate the memory by reaping
2301 * our own buffer, now let the real VM do its job and
2302 * go down in flames if truly OOM.
2303 */
6c085a72 2304 i915_gem_shrink_all(dev_priv);
f461d1be 2305 page = shmem_read_mapping_page(mapping, i);
e2273302
ID
2306 if (IS_ERR(page)) {
2307 ret = PTR_ERR(page);
6c085a72 2308 goto err_pages;
e2273302 2309 }
6c085a72 2310 }
426729dc
KRW
2311#ifdef CONFIG_SWIOTLB
2312 if (swiotlb_nr_tbl()) {
2313 st->nents++;
2314 sg_set_page(sg, page, PAGE_SIZE, 0);
2315 sg = sg_next(sg);
2316 continue;
2317 }
2318#endif
90797e6d
ID
2319 if (!i || page_to_pfn(page) != last_pfn + 1) {
2320 if (i)
2321 sg = sg_next(sg);
2322 st->nents++;
2323 sg_set_page(sg, page, PAGE_SIZE, 0);
2324 } else {
2325 sg->length += PAGE_SIZE;
2326 }
2327 last_pfn = page_to_pfn(page);
3bbbe706
DV
2328
2329 /* Check that the i965g/gm workaround works. */
2330 WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL));
e5281ccd 2331 }
426729dc
KRW
2332#ifdef CONFIG_SWIOTLB
2333 if (!swiotlb_nr_tbl())
2334#endif
2335 sg_mark_end(sg);
74ce6b6c
CW
2336 obj->pages = st;
2337
e2273302
ID
2338 ret = i915_gem_gtt_prepare_object(obj);
2339 if (ret)
2340 goto err_pages;
2341
6dacfd2f 2342 if (i915_gem_object_needs_bit17_swizzle(obj))
e5281ccd
CW
2343 i915_gem_object_do_bit_17_swizzle(obj);
2344
656bfa3a
DV
2345 if (obj->tiling_mode != I915_TILING_NONE &&
2346 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES)
2347 i915_gem_object_pin_pages(obj);
2348
e5281ccd
CW
2349 return 0;
2350
2351err_pages:
90797e6d
ID
2352 sg_mark_end(sg);
2353 for_each_sg_page(st->sgl, &sg_iter, st->nents, 0)
09cbfeaf 2354 put_page(sg_page_iter_page(&sg_iter));
9da3da66
CW
2355 sg_free_table(st);
2356 kfree(st);
0820baf3
CW
2357
2358 /* shmemfs first checks if there is enough memory to allocate the page
2359 * and reports ENOSPC should there be insufficient, along with the usual
2360 * ENOMEM for a genuine allocation failure.
2361 *
2362 * We use ENOSPC in our driver to mean that we have run out of aperture
2363 * space and so want to translate the error from shmemfs back to our
2364 * usual understanding of ENOMEM.
2365 */
e2273302
ID
2366 if (ret == -ENOSPC)
2367 ret = -ENOMEM;
2368
2369 return ret;
673a394b
EA
2370}
2371
37e680a1
CW
2372/* Ensure that the associated pages are gathered from the backing storage
2373 * and pinned into our object. i915_gem_object_get_pages() may be called
2374 * multiple times before they are released by a single call to
2375 * i915_gem_object_put_pages() - once the pages are no longer referenced
2376 * either as a result of memory pressure (reaping pages under the shrinker)
2377 * or as the object is itself released.
2378 */
2379int
2380i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
2381{
2382 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
2383 const struct drm_i915_gem_object_ops *ops = obj->ops;
2384 int ret;
2385
2f745ad3 2386 if (obj->pages)
37e680a1
CW
2387 return 0;
2388
43e28f09 2389 if (obj->madv != I915_MADV_WILLNEED) {
bd9b6a4e 2390 DRM_DEBUG("Attempting to obtain a purgeable object\n");
8c99e57d 2391 return -EFAULT;
43e28f09
CW
2392 }
2393
a5570178
CW
2394 BUG_ON(obj->pages_pin_count);
2395
37e680a1
CW
2396 ret = ops->get_pages(obj);
2397 if (ret)
2398 return ret;
2399
35c20a60 2400 list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list);
ee286370
CW
2401
2402 obj->get_page.sg = obj->pages->sgl;
2403 obj->get_page.last = 0;
2404
37e680a1 2405 return 0;
673a394b
EA
2406}
2407
0a798eb9
CW
2408void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj)
2409{
2410 int ret;
2411
2412 lockdep_assert_held(&obj->base.dev->struct_mutex);
2413
2414 ret = i915_gem_object_get_pages(obj);
2415 if (ret)
2416 return ERR_PTR(ret);
2417
2418 i915_gem_object_pin_pages(obj);
2419
2420 if (obj->mapping == NULL) {
0a798eb9 2421 struct page **pages;
0a798eb9 2422
fb8621d3
CW
2423 pages = NULL;
2424 if (obj->base.size == PAGE_SIZE)
2425 obj->mapping = kmap(sg_page(obj->pages->sgl));
2426 else
2427 pages = drm_malloc_gfp(obj->base.size >> PAGE_SHIFT,
2428 sizeof(*pages),
2429 GFP_TEMPORARY);
0a798eb9 2430 if (pages != NULL) {
fb8621d3
CW
2431 struct sg_page_iter sg_iter;
2432 int n;
2433
0a798eb9
CW
2434 n = 0;
2435 for_each_sg_page(obj->pages->sgl, &sg_iter,
2436 obj->pages->nents, 0)
2437 pages[n++] = sg_page_iter_page(&sg_iter);
2438
2439 obj->mapping = vmap(pages, n, 0, PAGE_KERNEL);
2440 drm_free_large(pages);
2441 }
2442 if (obj->mapping == NULL) {
2443 i915_gem_object_unpin_pages(obj);
2444 return ERR_PTR(-ENOMEM);
2445 }
2446 }
2447
2448 return obj->mapping;
2449}
2450
b4716185 2451void i915_vma_move_to_active(struct i915_vma *vma,
b2af0376 2452 struct drm_i915_gem_request *req)
673a394b 2453{
b4716185 2454 struct drm_i915_gem_object *obj = vma->obj;
e2f80391 2455 struct intel_engine_cs *engine;
b2af0376 2456
666796da 2457 engine = i915_gem_request_get_engine(req);
673a394b
EA
2458
2459 /* Add a reference if we're newly entering the active list. */
b4716185 2460 if (obj->active == 0)
05394f39 2461 drm_gem_object_reference(&obj->base);
666796da 2462 obj->active |= intel_engine_flag(engine);
e35a41de 2463
117897f4 2464 list_move_tail(&obj->engine_list[engine->id], &engine->active_list);
e2f80391 2465 i915_gem_request_assign(&obj->last_read_req[engine->id], req);
caea7476 2466
1c7f4bca 2467 list_move_tail(&vma->vm_link, &vma->vm->active_list);
caea7476
CW
2468}
2469
b4716185
CW
2470static void
2471i915_gem_object_retire__write(struct drm_i915_gem_object *obj)
e2d05a8b 2472{
b4716185 2473 RQ_BUG_ON(obj->last_write_req == NULL);
666796da 2474 RQ_BUG_ON(!(obj->active & intel_engine_flag(obj->last_write_req->engine)));
b4716185
CW
2475
2476 i915_gem_request_assign(&obj->last_write_req, NULL);
de152b62 2477 intel_fb_obj_flush(obj, true, ORIGIN_CS);
e2d05a8b
BW
2478}
2479
caea7476 2480static void
b4716185 2481i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring)
ce44b0ea 2482{
feb822cf 2483 struct i915_vma *vma;
ce44b0ea 2484
b4716185
CW
2485 RQ_BUG_ON(obj->last_read_req[ring] == NULL);
2486 RQ_BUG_ON(!(obj->active & (1 << ring)));
2487
117897f4 2488 list_del_init(&obj->engine_list[ring]);
b4716185
CW
2489 i915_gem_request_assign(&obj->last_read_req[ring], NULL);
2490
4a570db5 2491 if (obj->last_write_req && obj->last_write_req->engine->id == ring)
b4716185
CW
2492 i915_gem_object_retire__write(obj);
2493
2494 obj->active &= ~(1 << ring);
2495 if (obj->active)
2496 return;
caea7476 2497
6c246959
CW
2498 /* Bump our place on the bound list to keep it roughly in LRU order
2499 * so that we don't steal from recently used but inactive objects
2500 * (unless we are forced to ofc!)
2501 */
2502 list_move_tail(&obj->global_list,
2503 &to_i915(obj->base.dev)->mm.bound_list);
2504
1c7f4bca
CW
2505 list_for_each_entry(vma, &obj->vma_list, obj_link) {
2506 if (!list_empty(&vma->vm_link))
2507 list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
feb822cf 2508 }
caea7476 2509
97b2a6a1 2510 i915_gem_request_assign(&obj->last_fenced_req, NULL);
caea7476 2511 drm_gem_object_unreference(&obj->base);
c8725f3d
CW
2512}
2513
9d773091 2514static int
fca26bb4 2515i915_gem_init_seqno(struct drm_device *dev, u32 seqno)
53d227f2 2516{
9d773091 2517 struct drm_i915_private *dev_priv = dev->dev_private;
e2f80391 2518 struct intel_engine_cs *engine;
29dcb570 2519 int ret;
53d227f2 2520
107f27a5 2521 /* Carefully retire all requests without writing to the rings */
b4ac5afc 2522 for_each_engine(engine, dev_priv) {
666796da 2523 ret = intel_engine_idle(engine);
107f27a5
CW
2524 if (ret)
2525 return ret;
9d773091 2526 }
9d773091 2527 i915_gem_retire_requests(dev);
107f27a5
CW
2528
2529 /* Finally reset hw state */
29dcb570 2530 for_each_engine(engine, dev_priv)
e2f80391 2531 intel_ring_init_seqno(engine, seqno);
498d2ac1 2532
9d773091 2533 return 0;
53d227f2
DV
2534}
2535
fca26bb4
MK
2536int i915_gem_set_seqno(struct drm_device *dev, u32 seqno)
2537{
2538 struct drm_i915_private *dev_priv = dev->dev_private;
2539 int ret;
2540
2541 if (seqno == 0)
2542 return -EINVAL;
2543
2544 /* HWS page needs to be set less than what we
2545 * will inject to ring
2546 */
2547 ret = i915_gem_init_seqno(dev, seqno - 1);
2548 if (ret)
2549 return ret;
2550
2551 /* Carefully set the last_seqno value so that wrap
2552 * detection still works
2553 */
2554 dev_priv->next_seqno = seqno;
2555 dev_priv->last_seqno = seqno - 1;
2556 if (dev_priv->last_seqno == 0)
2557 dev_priv->last_seqno--;
2558
2559 return 0;
2560}
2561
9d773091
CW
2562int
2563i915_gem_get_seqno(struct drm_device *dev, u32 *seqno)
53d227f2 2564{
9d773091
CW
2565 struct drm_i915_private *dev_priv = dev->dev_private;
2566
2567 /* reserve 0 for non-seqno */
2568 if (dev_priv->next_seqno == 0) {
fca26bb4 2569 int ret = i915_gem_init_seqno(dev, 0);
9d773091
CW
2570 if (ret)
2571 return ret;
53d227f2 2572
9d773091
CW
2573 dev_priv->next_seqno = 1;
2574 }
53d227f2 2575
f72b3435 2576 *seqno = dev_priv->last_seqno = dev_priv->next_seqno++;
9d773091 2577 return 0;
53d227f2
DV
2578}
2579
bf7dc5b7
JH
2580/*
2581 * NB: This function is not allowed to fail. Doing so would mean the the
2582 * request is not being tracked for completion but the work itself is
2583 * going to happen on the hardware. This would be a Bad Thing(tm).
2584 */
75289874 2585void __i915_add_request(struct drm_i915_gem_request *request,
5b4a60c2
JH
2586 struct drm_i915_gem_object *obj,
2587 bool flush_caches)
673a394b 2588{
e2f80391 2589 struct intel_engine_cs *engine;
75289874 2590 struct drm_i915_private *dev_priv;
48e29f55 2591 struct intel_ringbuffer *ringbuf;
6d3d8274 2592 u32 request_start;
3cce469c
CW
2593 int ret;
2594
48e29f55 2595 if (WARN_ON(request == NULL))
bf7dc5b7 2596 return;
48e29f55 2597
4a570db5 2598 engine = request->engine;
39dabecd 2599 dev_priv = request->i915;
75289874
JH
2600 ringbuf = request->ringbuf;
2601
29b1b415
JH
2602 /*
2603 * To ensure that this call will not fail, space for its emissions
2604 * should already have been reserved in the ring buffer. Let the ring
2605 * know that it is time to use that space up.
2606 */
2607 intel_ring_reserved_space_use(ringbuf);
2608
48e29f55 2609 request_start = intel_ring_get_tail(ringbuf);
cc889e0f
DV
2610 /*
2611 * Emit any outstanding flushes - execbuf can fail to emit the flush
2612 * after having emitted the batchbuffer command. Hence we need to fix
2613 * things up similar to emitting the lazy request. The difference here
2614 * is that the flush _must_ happen before the next request, no matter
2615 * what.
2616 */
5b4a60c2
JH
2617 if (flush_caches) {
2618 if (i915.enable_execlists)
4866d729 2619 ret = logical_ring_flush_all_caches(request);
5b4a60c2 2620 else
4866d729 2621 ret = intel_ring_flush_all_caches(request);
5b4a60c2
JH
2622 /* Not allowed to fail! */
2623 WARN(ret, "*_ring_flush_all_caches failed: %d!\n", ret);
2624 }
cc889e0f 2625
7c90b7de
CW
2626 trace_i915_gem_request_add(request);
2627
2628 request->head = request_start;
2629
2630 /* Whilst this request exists, batch_obj will be on the
2631 * active_list, and so will hold the active reference. Only when this
2632 * request is retired will the the batch_obj be moved onto the
2633 * inactive_list and lose its active reference. Hence we do not need
2634 * to explicitly hold another reference here.
2635 */
2636 request->batch_obj = obj;
2637
2638 /* Seal the request and mark it as pending execution. Note that
2639 * we may inspect this state, without holding any locks, during
2640 * hangcheck. Hence we apply the barrier to ensure that we do not
2641 * see a more recent value in the hws than we are tracking.
2642 */
2643 request->emitted_jiffies = jiffies;
2644 request->previous_seqno = engine->last_submitted_seqno;
2645 smp_store_mb(engine->last_submitted_seqno, request->seqno);
2646 list_add_tail(&request->list, &engine->request_list);
2647
a71d8d94
CW
2648 /* Record the position of the start of the request so that
2649 * should we detect the updated seqno part-way through the
2650 * GPU processing the request, we never over-estimate the
2651 * position of the head.
2652 */
6d3d8274 2653 request->postfix = intel_ring_get_tail(ringbuf);
a71d8d94 2654
bf7dc5b7 2655 if (i915.enable_execlists)
e2f80391 2656 ret = engine->emit_request(request);
bf7dc5b7 2657 else {
e2f80391 2658 ret = engine->add_request(request);
53292cdb
MT
2659
2660 request->tail = intel_ring_get_tail(ringbuf);
48e29f55 2661 }
bf7dc5b7
JH
2662 /* Not allowed to fail! */
2663 WARN(ret, "emit|add_request failed: %d!\n", ret);
673a394b 2664
e2f80391 2665 i915_queue_hangcheck(engine->dev);
10cd45b6 2666
87255483
DV
2667 queue_delayed_work(dev_priv->wq,
2668 &dev_priv->mm.retire_work,
2669 round_jiffies_up_relative(HZ));
2670 intel_mark_busy(dev_priv->dev);
cc889e0f 2671
29b1b415
JH
2672 /* Sanity check that the reserved size was large enough. */
2673 intel_ring_reserved_space_end(ringbuf);
673a394b
EA
2674}
2675
939fd762 2676static bool i915_context_is_banned(struct drm_i915_private *dev_priv,
273497e5 2677 const struct intel_context *ctx)
be62acb4 2678{
44e2c070 2679 unsigned long elapsed;
be62acb4 2680
44e2c070
MK
2681 elapsed = get_seconds() - ctx->hang_stats.guilty_ts;
2682
2683 if (ctx->hang_stats.banned)
be62acb4
MK
2684 return true;
2685
676fa572
CW
2686 if (ctx->hang_stats.ban_period_seconds &&
2687 elapsed <= ctx->hang_stats.ban_period_seconds) {
ccc7bed0 2688 if (!i915_gem_context_is_default(ctx)) {
3fac8978 2689 DRM_DEBUG("context hanging too fast, banning!\n");
ccc7bed0 2690 return true;
88b4aa87
MK
2691 } else if (i915_stop_ring_allow_ban(dev_priv)) {
2692 if (i915_stop_ring_allow_warn(dev_priv))
2693 DRM_ERROR("gpu hanging too fast, banning!\n");
ccc7bed0 2694 return true;
3fac8978 2695 }
be62acb4
MK
2696 }
2697
2698 return false;
2699}
2700
939fd762 2701static void i915_set_reset_status(struct drm_i915_private *dev_priv,
273497e5 2702 struct intel_context *ctx,
b6b0fac0 2703 const bool guilty)
aa60c664 2704{
44e2c070
MK
2705 struct i915_ctx_hang_stats *hs;
2706
2707 if (WARN_ON(!ctx))
2708 return;
aa60c664 2709
44e2c070
MK
2710 hs = &ctx->hang_stats;
2711
2712 if (guilty) {
939fd762 2713 hs->banned = i915_context_is_banned(dev_priv, ctx);
44e2c070
MK
2714 hs->batch_active++;
2715 hs->guilty_ts = get_seconds();
2716 } else {
2717 hs->batch_pending++;
aa60c664
MK
2718 }
2719}
2720
abfe262a
JH
2721void i915_gem_request_free(struct kref *req_ref)
2722{
2723 struct drm_i915_gem_request *req = container_of(req_ref,
2724 typeof(*req), ref);
2725 struct intel_context *ctx = req->ctx;
2726
fcfa423c
JH
2727 if (req->file_priv)
2728 i915_gem_request_remove_from_client(req);
2729
0794aed3 2730 if (ctx) {
e28e404c 2731 if (i915.enable_execlists && ctx != req->i915->kernel_context)
4a570db5 2732 intel_lr_context_unpin(ctx, req->engine);
abfe262a 2733
dcb4c12a
OM
2734 i915_gem_context_unreference(ctx);
2735 }
abfe262a 2736
efab6d8d 2737 kmem_cache_free(req->i915->requests, req);
0e50e96b
MK
2738}
2739
26827088 2740static inline int
0bc40be8 2741__i915_gem_request_alloc(struct intel_engine_cs *engine,
26827088
DG
2742 struct intel_context *ctx,
2743 struct drm_i915_gem_request **req_out)
6689cb2b 2744{
0bc40be8 2745 struct drm_i915_private *dev_priv = to_i915(engine->dev);
eed29a5b 2746 struct drm_i915_gem_request *req;
6689cb2b 2747 int ret;
6689cb2b 2748
217e46b5
JH
2749 if (!req_out)
2750 return -EINVAL;
2751
bccca494 2752 *req_out = NULL;
6689cb2b 2753
eed29a5b
DV
2754 req = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL);
2755 if (req == NULL)
6689cb2b
JH
2756 return -ENOMEM;
2757
0bc40be8 2758 ret = i915_gem_get_seqno(engine->dev, &req->seqno);
9a0c1e27
CW
2759 if (ret)
2760 goto err;
6689cb2b 2761
40e895ce
JH
2762 kref_init(&req->ref);
2763 req->i915 = dev_priv;
4a570db5 2764 req->engine = engine;
40e895ce
JH
2765 req->ctx = ctx;
2766 i915_gem_context_reference(req->ctx);
6689cb2b
JH
2767
2768 if (i915.enable_execlists)
40e895ce 2769 ret = intel_logical_ring_alloc_request_extras(req);
6689cb2b 2770 else
eed29a5b 2771 ret = intel_ring_alloc_request_extras(req);
40e895ce
JH
2772 if (ret) {
2773 i915_gem_context_unreference(req->ctx);
9a0c1e27 2774 goto err;
40e895ce 2775 }
6689cb2b 2776
29b1b415
JH
2777 /*
2778 * Reserve space in the ring buffer for all the commands required to
2779 * eventually emit this request. This is to guarantee that the
2780 * i915_add_request() call can't fail. Note that the reserve may need
2781 * to be redone if the request is not actually submitted straight
2782 * away, e.g. because a GPU scheduler has deferred it.
29b1b415 2783 */
ccd98fe4
JH
2784 if (i915.enable_execlists)
2785 ret = intel_logical_ring_reserve_space(req);
2786 else
2787 ret = intel_ring_reserve_space(req);
2788 if (ret) {
2789 /*
2790 * At this point, the request is fully allocated even if not
2791 * fully prepared. Thus it can be cleaned up using the proper
2792 * free code.
2793 */
2794 i915_gem_request_cancel(req);
2795 return ret;
2796 }
29b1b415 2797
bccca494 2798 *req_out = req;
6689cb2b 2799 return 0;
9a0c1e27
CW
2800
2801err:
2802 kmem_cache_free(dev_priv->requests, req);
2803 return ret;
0e50e96b
MK
2804}
2805
26827088
DG
2806/**
2807 * i915_gem_request_alloc - allocate a request structure
2808 *
2809 * @engine: engine that we wish to issue the request on.
2810 * @ctx: context that the request will be associated with.
2811 * This can be NULL if the request is not directly related to
2812 * any specific user context, in which case this function will
2813 * choose an appropriate context to use.
2814 *
2815 * Returns a pointer to the allocated request if successful,
2816 * or an error code if not.
2817 */
2818struct drm_i915_gem_request *
2819i915_gem_request_alloc(struct intel_engine_cs *engine,
2820 struct intel_context *ctx)
2821{
2822 struct drm_i915_gem_request *req;
2823 int err;
2824
2825 if (ctx == NULL)
ed54c1a1 2826 ctx = to_i915(engine->dev)->kernel_context;
26827088
DG
2827 err = __i915_gem_request_alloc(engine, ctx, &req);
2828 return err ? ERR_PTR(err) : req;
2829}
2830
29b1b415
JH
2831void i915_gem_request_cancel(struct drm_i915_gem_request *req)
2832{
2833 intel_ring_reserved_space_cancel(req->ringbuf);
2834
2835 i915_gem_request_unreference(req);
2836}
2837
8d9fc7fd 2838struct drm_i915_gem_request *
0bc40be8 2839i915_gem_find_active_request(struct intel_engine_cs *engine)
9375e446 2840{
4db080f9
CW
2841 struct drm_i915_gem_request *request;
2842
0bc40be8 2843 list_for_each_entry(request, &engine->request_list, list) {
1b5a433a 2844 if (i915_gem_request_completed(request, false))
4db080f9 2845 continue;
aa60c664 2846
b6b0fac0 2847 return request;
4db080f9 2848 }
b6b0fac0
MK
2849
2850 return NULL;
2851}
2852
666796da 2853static void i915_gem_reset_engine_status(struct drm_i915_private *dev_priv,
0bc40be8 2854 struct intel_engine_cs *engine)
b6b0fac0
MK
2855{
2856 struct drm_i915_gem_request *request;
2857 bool ring_hung;
2858
0bc40be8 2859 request = i915_gem_find_active_request(engine);
b6b0fac0
MK
2860
2861 if (request == NULL)
2862 return;
2863
0bc40be8 2864 ring_hung = engine->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG;
b6b0fac0 2865
939fd762 2866 i915_set_reset_status(dev_priv, request->ctx, ring_hung);
b6b0fac0 2867
0bc40be8 2868 list_for_each_entry_continue(request, &engine->request_list, list)
939fd762 2869 i915_set_reset_status(dev_priv, request->ctx, false);
4db080f9 2870}
aa60c664 2871
666796da 2872static void i915_gem_reset_engine_cleanup(struct drm_i915_private *dev_priv,
0bc40be8 2873 struct intel_engine_cs *engine)
4db080f9 2874{
608c1a52
CW
2875 struct intel_ringbuffer *buffer;
2876
0bc40be8 2877 while (!list_empty(&engine->active_list)) {
05394f39 2878 struct drm_i915_gem_object *obj;
9375e446 2879
0bc40be8 2880 obj = list_first_entry(&engine->active_list,
05394f39 2881 struct drm_i915_gem_object,
117897f4 2882 engine_list[engine->id]);
9375e446 2883
0bc40be8 2884 i915_gem_object_retire__read(obj, engine->id);
673a394b 2885 }
1d62beea 2886
dcb4c12a
OM
2887 /*
2888 * Clear the execlists queue up before freeing the requests, as those
2889 * are the ones that keep the context and ringbuffer backing objects
2890 * pinned in place.
2891 */
dcb4c12a 2892
7de1691a 2893 if (i915.enable_execlists) {
27af5eea
TU
2894 /* Ensure irq handler finishes or is cancelled. */
2895 tasklet_kill(&engine->irq_tasklet);
1197b4f2 2896
27af5eea 2897 spin_lock_bh(&engine->execlist_lock);
c5baa566 2898 /* list_splice_tail_init checks for empty lists */
0bc40be8
TU
2899 list_splice_tail_init(&engine->execlist_queue,
2900 &engine->execlist_retired_req_list);
27af5eea 2901 spin_unlock_bh(&engine->execlist_lock);
1197b4f2 2902
0bc40be8 2903 intel_execlists_retire_requests(engine);
dcb4c12a
OM
2904 }
2905
1d62beea
BW
2906 /*
2907 * We must free the requests after all the corresponding objects have
2908 * been moved off active lists. Which is the same order as the normal
2909 * retire_requests function does. This is important if object hold
2910 * implicit references on things like e.g. ppgtt address spaces through
2911 * the request.
2912 */
0bc40be8 2913 while (!list_empty(&engine->request_list)) {
1d62beea
BW
2914 struct drm_i915_gem_request *request;
2915
0bc40be8 2916 request = list_first_entry(&engine->request_list,
1d62beea
BW
2917 struct drm_i915_gem_request,
2918 list);
2919
b4716185 2920 i915_gem_request_retire(request);
1d62beea 2921 }
608c1a52
CW
2922
2923 /* Having flushed all requests from all queues, we know that all
2924 * ringbuffers must now be empty. However, since we do not reclaim
2925 * all space when retiring the request (to prevent HEADs colliding
2926 * with rapid ringbuffer wraparound) the amount of available space
2927 * upon reset is less than when we start. Do one more pass over
2928 * all the ringbuffers to reset last_retired_head.
2929 */
0bc40be8 2930 list_for_each_entry(buffer, &engine->buffers, link) {
608c1a52
CW
2931 buffer->last_retired_head = buffer->tail;
2932 intel_ring_update_space(buffer);
2933 }
2ed53a94
CW
2934
2935 intel_ring_init_seqno(engine, engine->last_submitted_seqno);
673a394b
EA
2936}
2937
069efc1d 2938void i915_gem_reset(struct drm_device *dev)
673a394b 2939{
77f01230 2940 struct drm_i915_private *dev_priv = dev->dev_private;
e2f80391 2941 struct intel_engine_cs *engine;
673a394b 2942
4db080f9
CW
2943 /*
2944 * Before we free the objects from the requests, we need to inspect
2945 * them for finding the guilty party. As the requests only borrow
2946 * their reference to the objects, the inspection must be done first.
2947 */
b4ac5afc 2948 for_each_engine(engine, dev_priv)
666796da 2949 i915_gem_reset_engine_status(dev_priv, engine);
4db080f9 2950
b4ac5afc 2951 for_each_engine(engine, dev_priv)
666796da 2952 i915_gem_reset_engine_cleanup(dev_priv, engine);
dfaae392 2953
acce9ffa
BW
2954 i915_gem_context_reset(dev);
2955
19b2dbde 2956 i915_gem_restore_fences(dev);
b4716185
CW
2957
2958 WARN_ON(i915_verify_lists(dev));
673a394b
EA
2959}
2960
2961/**
2962 * This function clears the request list as sequence numbers are passed.
2963 */
1cf0ba14 2964void
0bc40be8 2965i915_gem_retire_requests_ring(struct intel_engine_cs *engine)
673a394b 2966{
0bc40be8 2967 WARN_ON(i915_verify_lists(engine->dev));
673a394b 2968
832a3aad
CW
2969 /* Retire requests first as we use it above for the early return.
2970 * If we retire requests last, we may use a later seqno and so clear
2971 * the requests lists without clearing the active list, leading to
2972 * confusion.
e9103038 2973 */
0bc40be8 2974 while (!list_empty(&engine->request_list)) {
673a394b 2975 struct drm_i915_gem_request *request;
673a394b 2976
0bc40be8 2977 request = list_first_entry(&engine->request_list,
673a394b
EA
2978 struct drm_i915_gem_request,
2979 list);
673a394b 2980
1b5a433a 2981 if (!i915_gem_request_completed(request, true))
b84d5f0c
CW
2982 break;
2983
b4716185 2984 i915_gem_request_retire(request);
b84d5f0c 2985 }
673a394b 2986
832a3aad
CW
2987 /* Move any buffers on the active list that are no longer referenced
2988 * by the ringbuffer to the flushing/inactive lists as appropriate,
2989 * before we free the context associated with the requests.
2990 */
0bc40be8 2991 while (!list_empty(&engine->active_list)) {
832a3aad
CW
2992 struct drm_i915_gem_object *obj;
2993
0bc40be8
TU
2994 obj = list_first_entry(&engine->active_list,
2995 struct drm_i915_gem_object,
117897f4 2996 engine_list[engine->id]);
832a3aad 2997
0bc40be8 2998 if (!list_empty(&obj->last_read_req[engine->id]->list))
832a3aad
CW
2999 break;
3000
0bc40be8 3001 i915_gem_object_retire__read(obj, engine->id);
832a3aad
CW
3002 }
3003
0bc40be8
TU
3004 if (unlikely(engine->trace_irq_req &&
3005 i915_gem_request_completed(engine->trace_irq_req, true))) {
3006 engine->irq_put(engine);
3007 i915_gem_request_assign(&engine->trace_irq_req, NULL);
9d34e5db 3008 }
23bc5982 3009
0bc40be8 3010 WARN_ON(i915_verify_lists(engine->dev));
673a394b
EA
3011}
3012
b29c19b6 3013bool
b09a1fec
CW
3014i915_gem_retire_requests(struct drm_device *dev)
3015{
3e31c6c0 3016 struct drm_i915_private *dev_priv = dev->dev_private;
e2f80391 3017 struct intel_engine_cs *engine;
b29c19b6 3018 bool idle = true;
b09a1fec 3019
b4ac5afc 3020 for_each_engine(engine, dev_priv) {
e2f80391
TU
3021 i915_gem_retire_requests_ring(engine);
3022 idle &= list_empty(&engine->request_list);
c86ee3a9 3023 if (i915.enable_execlists) {
27af5eea 3024 spin_lock_bh(&engine->execlist_lock);
e2f80391 3025 idle &= list_empty(&engine->execlist_queue);
27af5eea 3026 spin_unlock_bh(&engine->execlist_lock);
c86ee3a9 3027
e2f80391 3028 intel_execlists_retire_requests(engine);
c86ee3a9 3029 }
b29c19b6
CW
3030 }
3031
3032 if (idle)
3033 mod_delayed_work(dev_priv->wq,
3034 &dev_priv->mm.idle_work,
3035 msecs_to_jiffies(100));
3036
3037 return idle;
b09a1fec
CW
3038}
3039
75ef9da2 3040static void
673a394b
EA
3041i915_gem_retire_work_handler(struct work_struct *work)
3042{
b29c19b6
CW
3043 struct drm_i915_private *dev_priv =
3044 container_of(work, typeof(*dev_priv), mm.retire_work.work);
3045 struct drm_device *dev = dev_priv->dev;
0a58705b 3046 bool idle;
673a394b 3047
891b48cf 3048 /* Come back later if the device is busy... */
b29c19b6
CW
3049 idle = false;
3050 if (mutex_trylock(&dev->struct_mutex)) {
3051 idle = i915_gem_retire_requests(dev);
3052 mutex_unlock(&dev->struct_mutex);
673a394b 3053 }
b29c19b6 3054 if (!idle)
bcb45086
CW
3055 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work,
3056 round_jiffies_up_relative(HZ));
b29c19b6 3057}
0a58705b 3058
b29c19b6
CW
3059static void
3060i915_gem_idle_work_handler(struct work_struct *work)
3061{
3062 struct drm_i915_private *dev_priv =
3063 container_of(work, typeof(*dev_priv), mm.idle_work.work);
35c94185 3064 struct drm_device *dev = dev_priv->dev;
b4ac5afc 3065 struct intel_engine_cs *engine;
b29c19b6 3066
b4ac5afc
DG
3067 for_each_engine(engine, dev_priv)
3068 if (!list_empty(&engine->request_list))
423795cb 3069 return;
35c94185 3070
30ecad77 3071 /* we probably should sync with hangcheck here, using cancel_work_sync.
b4ac5afc 3072 * Also locking seems to be fubar here, engine->request_list is protected
30ecad77
DV
3073 * by dev->struct_mutex. */
3074
35c94185
CW
3075 intel_mark_idle(dev);
3076
3077 if (mutex_trylock(&dev->struct_mutex)) {
b4ac5afc 3078 for_each_engine(engine, dev_priv)
e2f80391 3079 i915_gem_batch_pool_fini(&engine->batch_pool);
b29c19b6 3080
35c94185
CW
3081 mutex_unlock(&dev->struct_mutex);
3082 }
673a394b
EA
3083}
3084
30dfebf3
DV
3085/**
3086 * Ensures that an object will eventually get non-busy by flushing any required
3087 * write domains, emitting any outstanding lazy request and retiring and
3088 * completed requests.
3089 */
3090static int
3091i915_gem_object_flush_active(struct drm_i915_gem_object *obj)
3092{
a5ac0f90 3093 int i;
b4716185
CW
3094
3095 if (!obj->active)
3096 return 0;
30dfebf3 3097
666796da 3098 for (i = 0; i < I915_NUM_ENGINES; i++) {
b4716185 3099 struct drm_i915_gem_request *req;
41c52415 3100
b4716185
CW
3101 req = obj->last_read_req[i];
3102 if (req == NULL)
3103 continue;
3104
3105 if (list_empty(&req->list))
3106 goto retire;
3107
b4716185
CW
3108 if (i915_gem_request_completed(req, true)) {
3109 __i915_gem_request_retire__upto(req);
3110retire:
3111 i915_gem_object_retire__read(obj, i);
3112 }
30dfebf3
DV
3113 }
3114
3115 return 0;
3116}
3117
23ba4fd0
BW
3118/**
3119 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
3120 * @DRM_IOCTL_ARGS: standard ioctl arguments
3121 *
3122 * Returns 0 if successful, else an error is returned with the remaining time in
3123 * the timeout parameter.
3124 * -ETIME: object is still busy after timeout
3125 * -ERESTARTSYS: signal interrupted the wait
3126 * -ENONENT: object doesn't exist
3127 * Also possible, but rare:
3128 * -EAGAIN: GPU wedged
3129 * -ENOMEM: damn
3130 * -ENODEV: Internal IRQ fail
3131 * -E?: The add request failed
3132 *
3133 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any
3134 * non-zero timeout parameter the wait ioctl will wait for the given number of
3135 * nanoseconds on an object becoming unbusy. Since the wait itself does so
3136 * without holding struct_mutex the object may become re-busied before this
3137 * function completes. A similar but shorter * race condition exists in the busy
3138 * ioctl
3139 */
3140int
3141i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
3142{
3e31c6c0 3143 struct drm_i915_private *dev_priv = dev->dev_private;
23ba4fd0
BW
3144 struct drm_i915_gem_wait *args = data;
3145 struct drm_i915_gem_object *obj;
666796da 3146 struct drm_i915_gem_request *req[I915_NUM_ENGINES];
f69061be 3147 unsigned reset_counter;
b4716185
CW
3148 int i, n = 0;
3149 int ret;
23ba4fd0 3150
11b5d511
DV
3151 if (args->flags != 0)
3152 return -EINVAL;
3153
23ba4fd0
BW
3154 ret = i915_mutex_lock_interruptible(dev);
3155 if (ret)
3156 return ret;
3157
3158 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->bo_handle));
3159 if (&obj->base == NULL) {
3160 mutex_unlock(&dev->struct_mutex);
3161 return -ENOENT;
3162 }
3163
30dfebf3
DV
3164 /* Need to make sure the object gets inactive eventually. */
3165 ret = i915_gem_object_flush_active(obj);
23ba4fd0
BW
3166 if (ret)
3167 goto out;
3168
b4716185 3169 if (!obj->active)
97b2a6a1 3170 goto out;
23ba4fd0 3171
23ba4fd0 3172 /* Do this after OLR check to make sure we make forward progress polling
762e4583 3173 * on this IOCTL with a timeout == 0 (like busy ioctl)
23ba4fd0 3174 */
762e4583 3175 if (args->timeout_ns == 0) {
23ba4fd0
BW
3176 ret = -ETIME;
3177 goto out;
3178 }
3179
3180 drm_gem_object_unreference(&obj->base);
f69061be 3181 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
b4716185 3182
666796da 3183 for (i = 0; i < I915_NUM_ENGINES; i++) {
b4716185
CW
3184 if (obj->last_read_req[i] == NULL)
3185 continue;
3186
3187 req[n++] = i915_gem_request_reference(obj->last_read_req[i]);
3188 }
3189
23ba4fd0
BW
3190 mutex_unlock(&dev->struct_mutex);
3191
b4716185
CW
3192 for (i = 0; i < n; i++) {
3193 if (ret == 0)
3194 ret = __i915_wait_request(req[i], reset_counter, true,
3195 args->timeout_ns > 0 ? &args->timeout_ns : NULL,
b6aa0873 3196 to_rps_client(file));
b4716185
CW
3197 i915_gem_request_unreference__unlocked(req[i]);
3198 }
ff865885 3199 return ret;
23ba4fd0
BW
3200
3201out:
3202 drm_gem_object_unreference(&obj->base);
3203 mutex_unlock(&dev->struct_mutex);
3204 return ret;
3205}
3206
b4716185
CW
3207static int
3208__i915_gem_object_sync(struct drm_i915_gem_object *obj,
3209 struct intel_engine_cs *to,
91af127f
JH
3210 struct drm_i915_gem_request *from_req,
3211 struct drm_i915_gem_request **to_req)
b4716185
CW
3212{
3213 struct intel_engine_cs *from;
3214 int ret;
3215
666796da 3216 from = i915_gem_request_get_engine(from_req);
b4716185
CW
3217 if (to == from)
3218 return 0;
3219
91af127f 3220 if (i915_gem_request_completed(from_req, true))
b4716185
CW
3221 return 0;
3222
b4716185 3223 if (!i915_semaphore_is_enabled(obj->base.dev)) {
a6f766f3 3224 struct drm_i915_private *i915 = to_i915(obj->base.dev);
91af127f 3225 ret = __i915_wait_request(from_req,
a6f766f3
CW
3226 atomic_read(&i915->gpu_error.reset_counter),
3227 i915->mm.interruptible,
3228 NULL,
3229 &i915->rps.semaphores);
b4716185
CW
3230 if (ret)
3231 return ret;
3232
91af127f 3233 i915_gem_object_retire_request(obj, from_req);
b4716185
CW
3234 } else {
3235 int idx = intel_ring_sync_index(from, to);
91af127f
JH
3236 u32 seqno = i915_gem_request_get_seqno(from_req);
3237
3238 WARN_ON(!to_req);
b4716185
CW
3239
3240 if (seqno <= from->semaphore.sync_seqno[idx])
3241 return 0;
3242
91af127f 3243 if (*to_req == NULL) {
26827088
DG
3244 struct drm_i915_gem_request *req;
3245
3246 req = i915_gem_request_alloc(to, NULL);
3247 if (IS_ERR(req))
3248 return PTR_ERR(req);
3249
3250 *to_req = req;
91af127f
JH
3251 }
3252
599d924c
JH
3253 trace_i915_gem_ring_sync_to(*to_req, from, from_req);
3254 ret = to->semaphore.sync_to(*to_req, from, seqno);
b4716185
CW
3255 if (ret)
3256 return ret;
3257
3258 /* We use last_read_req because sync_to()
3259 * might have just caused seqno wrap under
3260 * the radar.
3261 */
3262 from->semaphore.sync_seqno[idx] =
3263 i915_gem_request_get_seqno(obj->last_read_req[from->id]);
3264 }
3265
3266 return 0;
3267}
3268
5816d648
BW
3269/**
3270 * i915_gem_object_sync - sync an object to a ring.
3271 *
3272 * @obj: object which may be in use on another ring.
3273 * @to: ring we wish to use the object on. May be NULL.
91af127f
JH
3274 * @to_req: request we wish to use the object for. See below.
3275 * This will be allocated and returned if a request is
3276 * required but not passed in.
5816d648
BW
3277 *
3278 * This code is meant to abstract object synchronization with the GPU.
3279 * Calling with NULL implies synchronizing the object with the CPU
b4716185 3280 * rather than a particular GPU ring. Conceptually we serialise writes
91af127f 3281 * between engines inside the GPU. We only allow one engine to write
b4716185
CW
3282 * into a buffer at any time, but multiple readers. To ensure each has
3283 * a coherent view of memory, we must:
3284 *
3285 * - If there is an outstanding write request to the object, the new
3286 * request must wait for it to complete (either CPU or in hw, requests
3287 * on the same ring will be naturally ordered).
3288 *
3289 * - If we are a write request (pending_write_domain is set), the new
3290 * request must wait for outstanding read requests to complete.
5816d648 3291 *
91af127f
JH
3292 * For CPU synchronisation (NULL to) no request is required. For syncing with
3293 * rings to_req must be non-NULL. However, a request does not have to be
3294 * pre-allocated. If *to_req is NULL and sync commands will be emitted then a
3295 * request will be allocated automatically and returned through *to_req. Note
3296 * that it is not guaranteed that commands will be emitted (because the system
3297 * might already be idle). Hence there is no need to create a request that
3298 * might never have any work submitted. Note further that if a request is
3299 * returned in *to_req, it is the responsibility of the caller to submit
3300 * that request (after potentially adding more work to it).
3301 *
5816d648
BW
3302 * Returns 0 if successful, else propagates up the lower layer error.
3303 */
2911a35b
BW
3304int
3305i915_gem_object_sync(struct drm_i915_gem_object *obj,
91af127f
JH
3306 struct intel_engine_cs *to,
3307 struct drm_i915_gem_request **to_req)
2911a35b 3308{
b4716185 3309 const bool readonly = obj->base.pending_write_domain == 0;
666796da 3310 struct drm_i915_gem_request *req[I915_NUM_ENGINES];
b4716185 3311 int ret, i, n;
41c52415 3312
b4716185 3313 if (!obj->active)
2911a35b
BW
3314 return 0;
3315
b4716185
CW
3316 if (to == NULL)
3317 return i915_gem_object_wait_rendering(obj, readonly);
2911a35b 3318
b4716185
CW
3319 n = 0;
3320 if (readonly) {
3321 if (obj->last_write_req)
3322 req[n++] = obj->last_write_req;
3323 } else {
666796da 3324 for (i = 0; i < I915_NUM_ENGINES; i++)
b4716185
CW
3325 if (obj->last_read_req[i])
3326 req[n++] = obj->last_read_req[i];
3327 }
3328 for (i = 0; i < n; i++) {
91af127f 3329 ret = __i915_gem_object_sync(obj, to, req[i], to_req);
b4716185
CW
3330 if (ret)
3331 return ret;
3332 }
2911a35b 3333
b4716185 3334 return 0;
2911a35b
BW
3335}
3336
b5ffc9bc
CW
3337static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
3338{
3339 u32 old_write_domain, old_read_domains;
3340
b5ffc9bc
CW
3341 /* Force a pagefault for domain tracking on next user access */
3342 i915_gem_release_mmap(obj);
3343
b97c3d9c
KP
3344 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
3345 return;
3346
97c809fd
CW
3347 /* Wait for any direct GTT access to complete */
3348 mb();
3349
b5ffc9bc
CW
3350 old_read_domains = obj->base.read_domains;
3351 old_write_domain = obj->base.write_domain;
3352
3353 obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT;
3354 obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT;
3355
3356 trace_i915_gem_object_change_domain(obj,
3357 old_read_domains,
3358 old_write_domain);
3359}
3360
e9f24d5f 3361static int __i915_vma_unbind(struct i915_vma *vma, bool wait)
673a394b 3362{
07fe0b12 3363 struct drm_i915_gem_object *obj = vma->obj;
3e31c6c0 3364 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
43e28f09 3365 int ret;
673a394b 3366
1c7f4bca 3367 if (list_empty(&vma->obj_link))
673a394b
EA
3368 return 0;
3369
0ff501cb
DV
3370 if (!drm_mm_node_allocated(&vma->node)) {
3371 i915_gem_vma_destroy(vma);
0ff501cb
DV
3372 return 0;
3373 }
433544bd 3374
d7f46fc4 3375 if (vma->pin_count)
31d8d651 3376 return -EBUSY;
673a394b 3377
c4670ad0
CW
3378 BUG_ON(obj->pages == NULL);
3379
e9f24d5f
TU
3380 if (wait) {
3381 ret = i915_gem_object_wait_rendering(obj, false);
3382 if (ret)
3383 return ret;
3384 }
a8198eea 3385
596c5923 3386 if (vma->is_ggtt && vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
8b1bc9b4 3387 i915_gem_object_finish_gtt(obj);
5323fd04 3388
8b1bc9b4
DV
3389 /* release the fence reg _after_ flushing */
3390 ret = i915_gem_object_put_fence(obj);
3391 if (ret)
3392 return ret;
3393 }
96b47b65 3394
07fe0b12 3395 trace_i915_vma_unbind(vma);
db53a302 3396
777dc5bb 3397 vma->vm->unbind_vma(vma);
5e562f1d 3398 vma->bound = 0;
6f65e29a 3399
1c7f4bca 3400 list_del_init(&vma->vm_link);
596c5923 3401 if (vma->is_ggtt) {
fe14d5f4
TU
3402 if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
3403 obj->map_and_fenceable = false;
3404 } else if (vma->ggtt_view.pages) {
3405 sg_free_table(vma->ggtt_view.pages);
3406 kfree(vma->ggtt_view.pages);
fe14d5f4 3407 }
016a65a3 3408 vma->ggtt_view.pages = NULL;
fe14d5f4 3409 }
673a394b 3410
2f633156
BW
3411 drm_mm_remove_node(&vma->node);
3412 i915_gem_vma_destroy(vma);
3413
3414 /* Since the unbound list is global, only move to that list if
b93dab6e 3415 * no more VMAs exist. */
e2273302 3416 if (list_empty(&obj->vma_list))
2f633156 3417 list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list);
673a394b 3418
70903c3b
CW
3419 /* And finally now the object is completely decoupled from this vma,
3420 * we can drop its hold on the backing storage and allow it to be
3421 * reaped by the shrinker.
3422 */
3423 i915_gem_object_unpin_pages(obj);
3424
88241785 3425 return 0;
54cf91dc
CW
3426}
3427
e9f24d5f
TU
3428int i915_vma_unbind(struct i915_vma *vma)
3429{
3430 return __i915_vma_unbind(vma, true);
3431}
3432
3433int __i915_vma_unbind_no_wait(struct i915_vma *vma)
3434{
3435 return __i915_vma_unbind(vma, false);
3436}
3437
b2da9fe5 3438int i915_gpu_idle(struct drm_device *dev)
4df2faf4 3439{
3e31c6c0 3440 struct drm_i915_private *dev_priv = dev->dev_private;
e2f80391 3441 struct intel_engine_cs *engine;
b4ac5afc 3442 int ret;
4df2faf4 3443
4df2faf4 3444 /* Flush everything onto the inactive list. */
b4ac5afc 3445 for_each_engine(engine, dev_priv) {
ecdb5fd8 3446 if (!i915.enable_execlists) {
73cfa865
JH
3447 struct drm_i915_gem_request *req;
3448
e2f80391 3449 req = i915_gem_request_alloc(engine, NULL);
26827088
DG
3450 if (IS_ERR(req))
3451 return PTR_ERR(req);
73cfa865 3452
ba01cc93 3453 ret = i915_switch_context(req);
73cfa865
JH
3454 if (ret) {
3455 i915_gem_request_cancel(req);
3456 return ret;
3457 }
3458
75289874 3459 i915_add_request_no_flush(req);
ecdb5fd8 3460 }
b6c7488d 3461
666796da 3462 ret = intel_engine_idle(engine);
1ec14ad3
CW
3463 if (ret)
3464 return ret;
3465 }
4df2faf4 3466
b4716185 3467 WARN_ON(i915_verify_lists(dev));
8a1a49f9 3468 return 0;
4df2faf4
DV
3469}
3470
4144f9b5 3471static bool i915_gem_valid_gtt_space(struct i915_vma *vma,
42d6ab48
CW
3472 unsigned long cache_level)
3473{
4144f9b5 3474 struct drm_mm_node *gtt_space = &vma->node;
42d6ab48
CW
3475 struct drm_mm_node *other;
3476
4144f9b5
CW
3477 /*
3478 * On some machines we have to be careful when putting differing types
3479 * of snoopable memory together to avoid the prefetcher crossing memory
3480 * domains and dying. During vm initialisation, we decide whether or not
3481 * these constraints apply and set the drm_mm.color_adjust
3482 * appropriately.
42d6ab48 3483 */
4144f9b5 3484 if (vma->vm->mm.color_adjust == NULL)
42d6ab48
CW
3485 return true;
3486
c6cfb325 3487 if (!drm_mm_node_allocated(gtt_space))
42d6ab48
CW
3488 return true;
3489
3490 if (list_empty(&gtt_space->node_list))
3491 return true;
3492
3493 other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list);
3494 if (other->allocated && !other->hole_follows && other->color != cache_level)
3495 return false;
3496
3497 other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list);
3498 if (other->allocated && !gtt_space->hole_follows && other->color != cache_level)
3499 return false;
3500
3501 return true;
3502}
3503
673a394b 3504/**
91e6711e
JL
3505 * Finds free space in the GTT aperture and binds the object or a view of it
3506 * there.
673a394b 3507 */
262de145 3508static struct i915_vma *
07fe0b12
BW
3509i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
3510 struct i915_address_space *vm,
ec7adb6e 3511 const struct i915_ggtt_view *ggtt_view,
07fe0b12 3512 unsigned alignment,
ec7adb6e 3513 uint64_t flags)
673a394b 3514{
05394f39 3515 struct drm_device *dev = obj->base.dev;
72e96d64
JL
3516 struct drm_i915_private *dev_priv = to_i915(dev);
3517 struct i915_ggtt *ggtt = &dev_priv->ggtt;
65bd342f 3518 u32 fence_alignment, unfenced_alignment;
101b506a
MT
3519 u32 search_flag, alloc_flag;
3520 u64 start, end;
65bd342f 3521 u64 size, fence_size;
2f633156 3522 struct i915_vma *vma;
07f73f69 3523 int ret;
673a394b 3524
91e6711e
JL
3525 if (i915_is_ggtt(vm)) {
3526 u32 view_size;
3527
3528 if (WARN_ON(!ggtt_view))
3529 return ERR_PTR(-EINVAL);
ec7adb6e 3530
91e6711e
JL
3531 view_size = i915_ggtt_view_size(obj, ggtt_view);
3532
3533 fence_size = i915_gem_get_gtt_size(dev,
3534 view_size,
3535 obj->tiling_mode);
3536 fence_alignment = i915_gem_get_gtt_alignment(dev,
3537 view_size,
3538 obj->tiling_mode,
3539 true);
3540 unfenced_alignment = i915_gem_get_gtt_alignment(dev,
3541 view_size,
3542 obj->tiling_mode,
3543 false);
3544 size = flags & PIN_MAPPABLE ? fence_size : view_size;
3545 } else {
3546 fence_size = i915_gem_get_gtt_size(dev,
3547 obj->base.size,
3548 obj->tiling_mode);
3549 fence_alignment = i915_gem_get_gtt_alignment(dev,
3550 obj->base.size,
3551 obj->tiling_mode,
3552 true);
3553 unfenced_alignment =
3554 i915_gem_get_gtt_alignment(dev,
3555 obj->base.size,
3556 obj->tiling_mode,
3557 false);
3558 size = flags & PIN_MAPPABLE ? fence_size : obj->base.size;
3559 }
a00b10c3 3560
101b506a
MT
3561 start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
3562 end = vm->total;
3563 if (flags & PIN_MAPPABLE)
72e96d64 3564 end = min_t(u64, end, ggtt->mappable_end);
101b506a 3565 if (flags & PIN_ZONE_4G)
48ea1e32 3566 end = min_t(u64, end, (1ULL << 32) - PAGE_SIZE);
101b506a 3567
673a394b 3568 if (alignment == 0)
1ec9e26d 3569 alignment = flags & PIN_MAPPABLE ? fence_alignment :
5e783301 3570 unfenced_alignment;
1ec9e26d 3571 if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) {
91e6711e
JL
3572 DRM_DEBUG("Invalid object (view type=%u) alignment requested %u\n",
3573 ggtt_view ? ggtt_view->type : 0,
3574 alignment);
262de145 3575 return ERR_PTR(-EINVAL);
673a394b
EA
3576 }
3577
91e6711e
JL
3578 /* If binding the object/GGTT view requires more space than the entire
3579 * aperture has, reject it early before evicting everything in a vain
3580 * attempt to find space.
654fc607 3581 */
91e6711e 3582 if (size > end) {
65bd342f 3583 DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: size=%llu > %s aperture=%llu\n",
91e6711e
JL
3584 ggtt_view ? ggtt_view->type : 0,
3585 size,
1ec9e26d 3586 flags & PIN_MAPPABLE ? "mappable" : "total",
d23db88c 3587 end);
262de145 3588 return ERR_PTR(-E2BIG);
654fc607
CW
3589 }
3590
37e680a1 3591 ret = i915_gem_object_get_pages(obj);
6c085a72 3592 if (ret)
262de145 3593 return ERR_PTR(ret);
6c085a72 3594
fbdda6fb
CW
3595 i915_gem_object_pin_pages(obj);
3596
ec7adb6e
JL
3597 vma = ggtt_view ? i915_gem_obj_lookup_or_create_ggtt_vma(obj, ggtt_view) :
3598 i915_gem_obj_lookup_or_create_vma(obj, vm);
3599
262de145 3600 if (IS_ERR(vma))
bc6bc15b 3601 goto err_unpin;
2f633156 3602
506a8e87
CW
3603 if (flags & PIN_OFFSET_FIXED) {
3604 uint64_t offset = flags & PIN_OFFSET_MASK;
3605
3606 if (offset & (alignment - 1) || offset + size > end) {
3607 ret = -EINVAL;
3608 goto err_free_vma;
3609 }
3610 vma->node.start = offset;
3611 vma->node.size = size;
3612 vma->node.color = obj->cache_level;
3613 ret = drm_mm_reserve_node(&vm->mm, &vma->node);
3614 if (ret) {
3615 ret = i915_gem_evict_for_vma(vma);
3616 if (ret == 0)
3617 ret = drm_mm_reserve_node(&vm->mm, &vma->node);
3618 }
3619 if (ret)
3620 goto err_free_vma;
101b506a 3621 } else {
506a8e87
CW
3622 if (flags & PIN_HIGH) {
3623 search_flag = DRM_MM_SEARCH_BELOW;
3624 alloc_flag = DRM_MM_CREATE_TOP;
3625 } else {
3626 search_flag = DRM_MM_SEARCH_DEFAULT;
3627 alloc_flag = DRM_MM_CREATE_DEFAULT;
3628 }
101b506a 3629
0a9ae0d7 3630search_free:
506a8e87
CW
3631 ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
3632 size, alignment,
3633 obj->cache_level,
3634 start, end,
3635 search_flag,
3636 alloc_flag);
3637 if (ret) {
3638 ret = i915_gem_evict_something(dev, vm, size, alignment,
3639 obj->cache_level,
3640 start, end,
3641 flags);
3642 if (ret == 0)
3643 goto search_free;
9731129c 3644
506a8e87
CW
3645 goto err_free_vma;
3646 }
673a394b 3647 }
4144f9b5 3648 if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) {
2f633156 3649 ret = -EINVAL;
bc6bc15b 3650 goto err_remove_node;
673a394b
EA
3651 }
3652
fe14d5f4 3653 trace_i915_vma_bind(vma, flags);
0875546c 3654 ret = i915_vma_bind(vma, obj->cache_level, flags);
fe14d5f4 3655 if (ret)
e2273302 3656 goto err_remove_node;
fe14d5f4 3657
35c20a60 3658 list_move_tail(&obj->global_list, &dev_priv->mm.bound_list);
1c7f4bca 3659 list_add_tail(&vma->vm_link, &vm->inactive_list);
bf1a1092 3660
262de145 3661 return vma;
2f633156 3662
bc6bc15b 3663err_remove_node:
6286ef9b 3664 drm_mm_remove_node(&vma->node);
bc6bc15b 3665err_free_vma:
2f633156 3666 i915_gem_vma_destroy(vma);
262de145 3667 vma = ERR_PTR(ret);
bc6bc15b 3668err_unpin:
2f633156 3669 i915_gem_object_unpin_pages(obj);
262de145 3670 return vma;
673a394b
EA
3671}
3672
000433b6 3673bool
2c22569b
CW
3674i915_gem_clflush_object(struct drm_i915_gem_object *obj,
3675 bool force)
673a394b 3676{
673a394b
EA
3677 /* If we don't have a page list set up, then we're not pinned
3678 * to GPU, and we can ignore the cache flush because it'll happen
3679 * again at bind time.
3680 */
05394f39 3681 if (obj->pages == NULL)
000433b6 3682 return false;
673a394b 3683
769ce464
ID
3684 /*
3685 * Stolen memory is always coherent with the GPU as it is explicitly
3686 * marked as wc by the system, or the system is cache-coherent.
3687 */
6a2c4232 3688 if (obj->stolen || obj->phys_handle)
000433b6 3689 return false;
769ce464 3690
9c23f7fc
CW
3691 /* If the GPU is snooping the contents of the CPU cache,
3692 * we do not need to manually clear the CPU cache lines. However,
3693 * the caches are only snooped when the render cache is
3694 * flushed/invalidated. As we always have to emit invalidations
3695 * and flushes when moving into and out of the RENDER domain, correct
3696 * snooping behaviour occurs naturally as the result of our domain
3697 * tracking.
3698 */
0f71979a
CW
3699 if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) {
3700 obj->cache_dirty = true;
000433b6 3701 return false;
0f71979a 3702 }
9c23f7fc 3703
1c5d22f7 3704 trace_i915_gem_object_clflush(obj);
9da3da66 3705 drm_clflush_sg(obj->pages);
0f71979a 3706 obj->cache_dirty = false;
000433b6
CW
3707
3708 return true;
e47c68e9
EA
3709}
3710
3711/** Flushes the GTT write domain for the object if it's dirty. */
3712static void
05394f39 3713i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
e47c68e9 3714{
1c5d22f7
CW
3715 uint32_t old_write_domain;
3716
05394f39 3717 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT)
e47c68e9
EA
3718 return;
3719
63256ec5 3720 /* No actual flushing is required for the GTT write domain. Writes
e47c68e9
EA
3721 * to it immediately go to main memory as far as we know, so there's
3722 * no chipset flush. It also doesn't land in render cache.
63256ec5
CW
3723 *
3724 * However, we do have to enforce the order so that all writes through
3725 * the GTT land before any writes to the device, such as updates to
3726 * the GATT itself.
e47c68e9 3727 */
63256ec5
CW
3728 wmb();
3729
05394f39
CW
3730 old_write_domain = obj->base.write_domain;
3731 obj->base.write_domain = 0;
1c5d22f7 3732
de152b62 3733 intel_fb_obj_flush(obj, false, ORIGIN_GTT);
f99d7069 3734
1c5d22f7 3735 trace_i915_gem_object_change_domain(obj,
05394f39 3736 obj->base.read_domains,
1c5d22f7 3737 old_write_domain);
e47c68e9
EA
3738}
3739
3740/** Flushes the CPU write domain for the object if it's dirty. */
3741static void
e62b59e4 3742i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
e47c68e9 3743{
1c5d22f7 3744 uint32_t old_write_domain;
e47c68e9 3745
05394f39 3746 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
e47c68e9
EA
3747 return;
3748
e62b59e4 3749 if (i915_gem_clflush_object(obj, obj->pin_display))
000433b6
CW
3750 i915_gem_chipset_flush(obj->base.dev);
3751
05394f39
CW
3752 old_write_domain = obj->base.write_domain;
3753 obj->base.write_domain = 0;
1c5d22f7 3754
de152b62 3755 intel_fb_obj_flush(obj, false, ORIGIN_CPU);
f99d7069 3756
1c5d22f7 3757 trace_i915_gem_object_change_domain(obj,
05394f39 3758 obj->base.read_domains,
1c5d22f7 3759 old_write_domain);
e47c68e9
EA
3760}
3761
2ef7eeaa
EA
3762/**
3763 * Moves a single object to the GTT read, and possibly write domain.
3764 *
3765 * This function returns when the move is complete, including waiting on
3766 * flushes to occur.
3767 */
79e53945 3768int
2021746e 3769i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
2ef7eeaa 3770{
72e96d64
JL
3771 struct drm_device *dev = obj->base.dev;
3772 struct drm_i915_private *dev_priv = to_i915(dev);
3773 struct i915_ggtt *ggtt = &dev_priv->ggtt;
1c5d22f7 3774 uint32_t old_write_domain, old_read_domains;
43566ded 3775 struct i915_vma *vma;
e47c68e9 3776 int ret;
2ef7eeaa 3777
8d7e3de1
CW
3778 if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
3779 return 0;
3780
0201f1ec 3781 ret = i915_gem_object_wait_rendering(obj, !write);
88241785
CW
3782 if (ret)
3783 return ret;
3784
43566ded
CW
3785 /* Flush and acquire obj->pages so that we are coherent through
3786 * direct access in memory with previous cached writes through
3787 * shmemfs and that our cache domain tracking remains valid.
3788 * For example, if the obj->filp was moved to swap without us
3789 * being notified and releasing the pages, we would mistakenly
3790 * continue to assume that the obj remained out of the CPU cached
3791 * domain.
3792 */
3793 ret = i915_gem_object_get_pages(obj);
3794 if (ret)
3795 return ret;
3796
e62b59e4 3797 i915_gem_object_flush_cpu_write_domain(obj);
1c5d22f7 3798
d0a57789
CW
3799 /* Serialise direct access to this object with the barriers for
3800 * coherent writes from the GPU, by effectively invalidating the
3801 * GTT domain upon first access.
3802 */
3803 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
3804 mb();
3805
05394f39
CW
3806 old_write_domain = obj->base.write_domain;
3807 old_read_domains = obj->base.read_domains;
1c5d22f7 3808
e47c68e9
EA
3809 /* It should now be out of any other write domains, and we can update
3810 * the domain values for our changes.
3811 */
05394f39
CW
3812 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
3813 obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
e47c68e9 3814 if (write) {
05394f39
CW
3815 obj->base.read_domains = I915_GEM_DOMAIN_GTT;
3816 obj->base.write_domain = I915_GEM_DOMAIN_GTT;
3817 obj->dirty = 1;
2ef7eeaa
EA
3818 }
3819
1c5d22f7
CW
3820 trace_i915_gem_object_change_domain(obj,
3821 old_read_domains,
3822 old_write_domain);
3823
8325a09d 3824 /* And bump the LRU for this access */
43566ded
CW
3825 vma = i915_gem_obj_to_ggtt(obj);
3826 if (vma && drm_mm_node_allocated(&vma->node) && !obj->active)
1c7f4bca 3827 list_move_tail(&vma->vm_link,
72e96d64 3828 &ggtt->base.inactive_list);
8325a09d 3829
e47c68e9
EA
3830 return 0;
3831}
3832
ef55f92a
CW
3833/**
3834 * Changes the cache-level of an object across all VMA.
3835 *
3836 * After this function returns, the object will be in the new cache-level
3837 * across all GTT and the contents of the backing storage will be coherent,
3838 * with respect to the new cache-level. In order to keep the backing storage
3839 * coherent for all users, we only allow a single cache level to be set
3840 * globally on the object and prevent it from being changed whilst the
3841 * hardware is reading from the object. That is if the object is currently
3842 * on the scanout it will be set to uncached (or equivalent display
3843 * cache coherency) and all non-MOCS GPU access will also be uncached so
3844 * that all direct access to the scanout remains coherent.
3845 */
e4ffd173
CW
3846int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
3847 enum i915_cache_level cache_level)
3848{
7bddb01f 3849 struct drm_device *dev = obj->base.dev;
df6f783a 3850 struct i915_vma *vma, *next;
ef55f92a 3851 bool bound = false;
ed75a55b 3852 int ret = 0;
e4ffd173
CW
3853
3854 if (obj->cache_level == cache_level)
ed75a55b 3855 goto out;
e4ffd173 3856
ef55f92a
CW
3857 /* Inspect the list of currently bound VMA and unbind any that would
3858 * be invalid given the new cache-level. This is principally to
3859 * catch the issue of the CS prefetch crossing page boundaries and
3860 * reading an invalid PTE on older architectures.
3861 */
1c7f4bca 3862 list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) {
ef55f92a
CW
3863 if (!drm_mm_node_allocated(&vma->node))
3864 continue;
3865
3866 if (vma->pin_count) {
3867 DRM_DEBUG("can not change the cache level of pinned objects\n");
3868 return -EBUSY;
3869 }
3870
4144f9b5 3871 if (!i915_gem_valid_gtt_space(vma, cache_level)) {
07fe0b12 3872 ret = i915_vma_unbind(vma);
3089c6f2
BW
3873 if (ret)
3874 return ret;
ef55f92a
CW
3875 } else
3876 bound = true;
42d6ab48
CW
3877 }
3878
ef55f92a
CW
3879 /* We can reuse the existing drm_mm nodes but need to change the
3880 * cache-level on the PTE. We could simply unbind them all and
3881 * rebind with the correct cache-level on next use. However since
3882 * we already have a valid slot, dma mapping, pages etc, we may as
3883 * rewrite the PTE in the belief that doing so tramples upon less
3884 * state and so involves less work.
3885 */
3886 if (bound) {
3887 /* Before we change the PTE, the GPU must not be accessing it.
3888 * If we wait upon the object, we know that all the bound
3889 * VMA are no longer active.
3890 */
2e2f351d 3891 ret = i915_gem_object_wait_rendering(obj, false);
e4ffd173
CW
3892 if (ret)
3893 return ret;
3894
ef55f92a
CW
3895 if (!HAS_LLC(dev) && cache_level != I915_CACHE_NONE) {
3896 /* Access to snoopable pages through the GTT is
3897 * incoherent and on some machines causes a hard
3898 * lockup. Relinquish the CPU mmaping to force
3899 * userspace to refault in the pages and we can
3900 * then double check if the GTT mapping is still
3901 * valid for that pointer access.
3902 */
3903 i915_gem_release_mmap(obj);
3904
3905 /* As we no longer need a fence for GTT access,
3906 * we can relinquish it now (and so prevent having
3907 * to steal a fence from someone else on the next
3908 * fence request). Note GPU activity would have
3909 * dropped the fence as all snoopable access is
3910 * supposed to be linear.
3911 */
e4ffd173
CW
3912 ret = i915_gem_object_put_fence(obj);
3913 if (ret)
3914 return ret;
ef55f92a
CW
3915 } else {
3916 /* We either have incoherent backing store and
3917 * so no GTT access or the architecture is fully
3918 * coherent. In such cases, existing GTT mmaps
3919 * ignore the cache bit in the PTE and we can
3920 * rewrite it without confusing the GPU or having
3921 * to force userspace to fault back in its mmaps.
3922 */
e4ffd173
CW
3923 }
3924
1c7f4bca 3925 list_for_each_entry(vma, &obj->vma_list, obj_link) {
ef55f92a
CW
3926 if (!drm_mm_node_allocated(&vma->node))
3927 continue;
3928
3929 ret = i915_vma_bind(vma, cache_level, PIN_UPDATE);
3930 if (ret)
3931 return ret;
3932 }
e4ffd173
CW
3933 }
3934
1c7f4bca 3935 list_for_each_entry(vma, &obj->vma_list, obj_link)
2c22569b
CW
3936 vma->node.color = cache_level;
3937 obj->cache_level = cache_level;
3938
ed75a55b 3939out:
ef55f92a
CW
3940 /* Flush the dirty CPU caches to the backing storage so that the
3941 * object is now coherent at its new cache level (with respect
3942 * to the access domain).
3943 */
0f71979a
CW
3944 if (obj->cache_dirty &&
3945 obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
3946 cpu_write_needs_clflush(obj)) {
3947 if (i915_gem_clflush_object(obj, true))
3948 i915_gem_chipset_flush(obj->base.dev);
e4ffd173
CW
3949 }
3950
e4ffd173
CW
3951 return 0;
3952}
3953
199adf40
BW
3954int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
3955 struct drm_file *file)
e6994aee 3956{
199adf40 3957 struct drm_i915_gem_caching *args = data;
e6994aee 3958 struct drm_i915_gem_object *obj;
e6994aee
CW
3959
3960 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
432be69d
CW
3961 if (&obj->base == NULL)
3962 return -ENOENT;
e6994aee 3963
651d794f
CW
3964 switch (obj->cache_level) {
3965 case I915_CACHE_LLC:
3966 case I915_CACHE_L3_LLC:
3967 args->caching = I915_CACHING_CACHED;
3968 break;
3969
4257d3ba
CW
3970 case I915_CACHE_WT:
3971 args->caching = I915_CACHING_DISPLAY;
3972 break;
3973
651d794f
CW
3974 default:
3975 args->caching = I915_CACHING_NONE;
3976 break;
3977 }
e6994aee 3978
432be69d
CW
3979 drm_gem_object_unreference_unlocked(&obj->base);
3980 return 0;
e6994aee
CW
3981}
3982
199adf40
BW
3983int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
3984 struct drm_file *file)
e6994aee 3985{
fd0fe6ac 3986 struct drm_i915_private *dev_priv = dev->dev_private;
199adf40 3987 struct drm_i915_gem_caching *args = data;
e6994aee
CW
3988 struct drm_i915_gem_object *obj;
3989 enum i915_cache_level level;
3990 int ret;
3991
199adf40
BW
3992 switch (args->caching) {
3993 case I915_CACHING_NONE:
e6994aee
CW
3994 level = I915_CACHE_NONE;
3995 break;
199adf40 3996 case I915_CACHING_CACHED:
e5756c10
ID
3997 /*
3998 * Due to a HW issue on BXT A stepping, GPU stores via a
3999 * snooped mapping may leave stale data in a corresponding CPU
4000 * cacheline, whereas normally such cachelines would get
4001 * invalidated.
4002 */
ca377809 4003 if (!HAS_LLC(dev) && !HAS_SNOOP(dev))
e5756c10
ID
4004 return -ENODEV;
4005
e6994aee
CW
4006 level = I915_CACHE_LLC;
4007 break;
4257d3ba
CW
4008 case I915_CACHING_DISPLAY:
4009 level = HAS_WT(dev) ? I915_CACHE_WT : I915_CACHE_NONE;
4010 break;
e6994aee
CW
4011 default:
4012 return -EINVAL;
4013 }
4014
fd0fe6ac
ID
4015 intel_runtime_pm_get(dev_priv);
4016
3bc2913e
BW
4017 ret = i915_mutex_lock_interruptible(dev);
4018 if (ret)
fd0fe6ac 4019 goto rpm_put;
3bc2913e 4020
e6994aee
CW
4021 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
4022 if (&obj->base == NULL) {
4023 ret = -ENOENT;
4024 goto unlock;
4025 }
4026
4027 ret = i915_gem_object_set_cache_level(obj, level);
4028
4029 drm_gem_object_unreference(&obj->base);
4030unlock:
4031 mutex_unlock(&dev->struct_mutex);
fd0fe6ac
ID
4032rpm_put:
4033 intel_runtime_pm_put(dev_priv);
4034
e6994aee
CW
4035 return ret;
4036}
4037
b9241ea3 4038/*
2da3b9b9
CW
4039 * Prepare buffer for display plane (scanout, cursors, etc).
4040 * Can be called from an uninterruptible phase (modesetting) and allows
4041 * any flushes to be pipelined (for pageflips).
b9241ea3
ZW
4042 */
4043int
2da3b9b9
CW
4044i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
4045 u32 alignment,
e6617330 4046 const struct i915_ggtt_view *view)
b9241ea3 4047{
2da3b9b9 4048 u32 old_read_domains, old_write_domain;
b9241ea3
ZW
4049 int ret;
4050
cc98b413
CW
4051 /* Mark the pin_display early so that we account for the
4052 * display coherency whilst setting up the cache domains.
4053 */
8a0c39b1 4054 obj->pin_display++;
cc98b413 4055
a7ef0640
EA
4056 /* The display engine is not coherent with the LLC cache on gen6. As
4057 * a result, we make sure that the pinning that is about to occur is
4058 * done with uncached PTEs. This is lowest common denominator for all
4059 * chipsets.
4060 *
4061 * However for gen6+, we could do better by using the GFDT bit instead
4062 * of uncaching, which would allow us to flush all the LLC-cached data
4063 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
4064 */
651d794f
CW
4065 ret = i915_gem_object_set_cache_level(obj,
4066 HAS_WT(obj->base.dev) ? I915_CACHE_WT : I915_CACHE_NONE);
a7ef0640 4067 if (ret)
cc98b413 4068 goto err_unpin_display;
a7ef0640 4069
2da3b9b9
CW
4070 /* As the user may map the buffer once pinned in the display plane
4071 * (e.g. libkms for the bootup splash), we have to ensure that we
4072 * always use map_and_fenceable for all scanout buffers.
4073 */
50470bb0
TU
4074 ret = i915_gem_object_ggtt_pin(obj, view, alignment,
4075 view->type == I915_GGTT_VIEW_NORMAL ?
4076 PIN_MAPPABLE : 0);
2da3b9b9 4077 if (ret)
cc98b413 4078 goto err_unpin_display;
2da3b9b9 4079
e62b59e4 4080 i915_gem_object_flush_cpu_write_domain(obj);
b118c1e3 4081
2da3b9b9 4082 old_write_domain = obj->base.write_domain;
05394f39 4083 old_read_domains = obj->base.read_domains;
2da3b9b9
CW
4084
4085 /* It should now be out of any other write domains, and we can update
4086 * the domain values for our changes.
4087 */
e5f1d962 4088 obj->base.write_domain = 0;
05394f39 4089 obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
b9241ea3
ZW
4090
4091 trace_i915_gem_object_change_domain(obj,
4092 old_read_domains,
2da3b9b9 4093 old_write_domain);
b9241ea3
ZW
4094
4095 return 0;
cc98b413
CW
4096
4097err_unpin_display:
8a0c39b1 4098 obj->pin_display--;
cc98b413
CW
4099 return ret;
4100}
4101
4102void
e6617330
TU
4103i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj,
4104 const struct i915_ggtt_view *view)
cc98b413 4105{
8a0c39b1
TU
4106 if (WARN_ON(obj->pin_display == 0))
4107 return;
4108
e6617330
TU
4109 i915_gem_object_ggtt_unpin_view(obj, view);
4110
8a0c39b1 4111 obj->pin_display--;
b9241ea3
ZW
4112}
4113
e47c68e9
EA
4114/**
4115 * Moves a single object to the CPU read, and possibly write domain.
4116 *
4117 * This function returns when the move is complete, including waiting on
4118 * flushes to occur.
4119 */
dabdfe02 4120int
919926ae 4121i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
e47c68e9 4122{
1c5d22f7 4123 uint32_t old_write_domain, old_read_domains;
e47c68e9
EA
4124 int ret;
4125
8d7e3de1
CW
4126 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
4127 return 0;
4128
0201f1ec 4129 ret = i915_gem_object_wait_rendering(obj, !write);
88241785
CW
4130 if (ret)
4131 return ret;
4132
e47c68e9 4133 i915_gem_object_flush_gtt_write_domain(obj);
2ef7eeaa 4134
05394f39
CW
4135 old_write_domain = obj->base.write_domain;
4136 old_read_domains = obj->base.read_domains;
1c5d22f7 4137
e47c68e9 4138 /* Flush the CPU cache if it's still invalid. */
05394f39 4139 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
2c22569b 4140 i915_gem_clflush_object(obj, false);
2ef7eeaa 4141
05394f39 4142 obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
2ef7eeaa
EA
4143 }
4144
4145 /* It should now be out of any other write domains, and we can update
4146 * the domain values for our changes.
4147 */
05394f39 4148 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
e47c68e9
EA
4149
4150 /* If we're writing through the CPU, then the GPU read domains will
4151 * need to be invalidated at next use.
4152 */
4153 if (write) {
05394f39
CW
4154 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
4155 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
e47c68e9 4156 }
2ef7eeaa 4157
1c5d22f7
CW
4158 trace_i915_gem_object_change_domain(obj,
4159 old_read_domains,
4160 old_write_domain);
4161
2ef7eeaa
EA
4162 return 0;
4163}
4164
673a394b
EA
4165/* Throttle our rendering by waiting until the ring has completed our requests
4166 * emitted over 20 msec ago.
4167 *
b962442e
EA
4168 * Note that if we were to use the current jiffies each time around the loop,
4169 * we wouldn't escape the function with any frames outstanding if the time to
4170 * render a frame was over 20ms.
4171 *
673a394b
EA
4172 * This should get us reasonable parallelism between CPU and GPU but also
4173 * relatively low latency when blocking on a particular request to finish.
4174 */
40a5f0de 4175static int
f787a5f5 4176i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
40a5f0de 4177{
f787a5f5
CW
4178 struct drm_i915_private *dev_priv = dev->dev_private;
4179 struct drm_i915_file_private *file_priv = file->driver_priv;
d0bc54f2 4180 unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES;
54fb2411 4181 struct drm_i915_gem_request *request, *target = NULL;
f69061be 4182 unsigned reset_counter;
f787a5f5 4183 int ret;
93533c29 4184
308887aa
DV
4185 ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
4186 if (ret)
4187 return ret;
4188
4189 ret = i915_gem_check_wedge(&dev_priv->gpu_error, false);
4190 if (ret)
4191 return ret;
e110e8d6 4192
1c25595f 4193 spin_lock(&file_priv->mm.lock);
f787a5f5 4194 list_for_each_entry(request, &file_priv->mm.request_list, client_list) {
b962442e
EA
4195 if (time_after_eq(request->emitted_jiffies, recent_enough))
4196 break;
40a5f0de 4197
fcfa423c
JH
4198 /*
4199 * Note that the request might not have been submitted yet.
4200 * In which case emitted_jiffies will be zero.
4201 */
4202 if (!request->emitted_jiffies)
4203 continue;
4204
54fb2411 4205 target = request;
b962442e 4206 }
f69061be 4207 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
ff865885
JH
4208 if (target)
4209 i915_gem_request_reference(target);
1c25595f 4210 spin_unlock(&file_priv->mm.lock);
40a5f0de 4211
54fb2411 4212 if (target == NULL)
f787a5f5 4213 return 0;
2bc43b5c 4214
9c654818 4215 ret = __i915_wait_request(target, reset_counter, true, NULL, NULL);
f787a5f5
CW
4216 if (ret == 0)
4217 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0);
40a5f0de 4218
41037f9f 4219 i915_gem_request_unreference__unlocked(target);
ff865885 4220
40a5f0de
EA
4221 return ret;
4222}
4223
d23db88c
CW
4224static bool
4225i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags)
4226{
4227 struct drm_i915_gem_object *obj = vma->obj;
4228
4229 if (alignment &&
4230 vma->node.start & (alignment - 1))
4231 return true;
4232
4233 if (flags & PIN_MAPPABLE && !obj->map_and_fenceable)
4234 return true;
4235
4236 if (flags & PIN_OFFSET_BIAS &&
4237 vma->node.start < (flags & PIN_OFFSET_MASK))
4238 return true;
4239
506a8e87
CW
4240 if (flags & PIN_OFFSET_FIXED &&
4241 vma->node.start != (flags & PIN_OFFSET_MASK))
4242 return true;
4243
d23db88c
CW
4244 return false;
4245}
4246
d0710abb
CW
4247void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)
4248{
4249 struct drm_i915_gem_object *obj = vma->obj;
4250 bool mappable, fenceable;
4251 u32 fence_size, fence_alignment;
4252
4253 fence_size = i915_gem_get_gtt_size(obj->base.dev,
4254 obj->base.size,
4255 obj->tiling_mode);
4256 fence_alignment = i915_gem_get_gtt_alignment(obj->base.dev,
4257 obj->base.size,
4258 obj->tiling_mode,
4259 true);
4260
4261 fenceable = (vma->node.size == fence_size &&
4262 (vma->node.start & (fence_alignment - 1)) == 0);
4263
4264 mappable = (vma->node.start + fence_size <=
62106b4f 4265 to_i915(obj->base.dev)->ggtt.mappable_end);
d0710abb
CW
4266
4267 obj->map_and_fenceable = mappable && fenceable;
4268}
4269
ec7adb6e
JL
4270static int
4271i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
4272 struct i915_address_space *vm,
4273 const struct i915_ggtt_view *ggtt_view,
4274 uint32_t alignment,
4275 uint64_t flags)
673a394b 4276{
6e7186af 4277 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
07fe0b12 4278 struct i915_vma *vma;
ef79e17c 4279 unsigned bound;
673a394b
EA
4280 int ret;
4281
6e7186af
BW
4282 if (WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base))
4283 return -ENODEV;
4284
bf3d149b 4285 if (WARN_ON(flags & (PIN_GLOBAL | PIN_MAPPABLE) && !i915_is_ggtt(vm)))
1ec9e26d 4286 return -EINVAL;
07fe0b12 4287
c826c449
CW
4288 if (WARN_ON((flags & (PIN_MAPPABLE | PIN_GLOBAL)) == PIN_MAPPABLE))
4289 return -EINVAL;
4290
ec7adb6e
JL
4291 if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view))
4292 return -EINVAL;
4293
4294 vma = ggtt_view ? i915_gem_obj_to_ggtt_view(obj, ggtt_view) :
4295 i915_gem_obj_to_vma(obj, vm);
4296
07fe0b12 4297 if (vma) {
d7f46fc4
BW
4298 if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT))
4299 return -EBUSY;
4300
d23db88c 4301 if (i915_vma_misplaced(vma, alignment, flags)) {
d7f46fc4 4302 WARN(vma->pin_count,
ec7adb6e 4303 "bo is already pinned in %s with incorrect alignment:"
088e0df4 4304 " offset=%08x %08x, req.alignment=%x, req.map_and_fenceable=%d,"
75e9e915 4305 " obj->map_and_fenceable=%d\n",
ec7adb6e 4306 ggtt_view ? "ggtt" : "ppgtt",
088e0df4
MT
4307 upper_32_bits(vma->node.start),
4308 lower_32_bits(vma->node.start),
fe14d5f4 4309 alignment,
d23db88c 4310 !!(flags & PIN_MAPPABLE),
05394f39 4311 obj->map_and_fenceable);
07fe0b12 4312 ret = i915_vma_unbind(vma);
ac0c6b5a
CW
4313 if (ret)
4314 return ret;
8ea99c92
DV
4315
4316 vma = NULL;
ac0c6b5a
CW
4317 }
4318 }
4319
ef79e17c 4320 bound = vma ? vma->bound : 0;
8ea99c92 4321 if (vma == NULL || !drm_mm_node_allocated(&vma->node)) {
ec7adb6e
JL
4322 vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view, alignment,
4323 flags);
262de145
DV
4324 if (IS_ERR(vma))
4325 return PTR_ERR(vma);
0875546c
DV
4326 } else {
4327 ret = i915_vma_bind(vma, obj->cache_level, flags);
fe14d5f4
TU
4328 if (ret)
4329 return ret;
4330 }
74898d7e 4331
91e6711e
JL
4332 if (ggtt_view && ggtt_view->type == I915_GGTT_VIEW_NORMAL &&
4333 (bound ^ vma->bound) & GLOBAL_BIND) {
d0710abb 4334 __i915_vma_set_map_and_fenceable(vma);
91e6711e
JL
4335 WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable);
4336 }
ef79e17c 4337
8ea99c92 4338 vma->pin_count++;
673a394b
EA
4339 return 0;
4340}
4341
ec7adb6e
JL
4342int
4343i915_gem_object_pin(struct drm_i915_gem_object *obj,
4344 struct i915_address_space *vm,
4345 uint32_t alignment,
4346 uint64_t flags)
4347{
4348 return i915_gem_object_do_pin(obj, vm,
4349 i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL,
4350 alignment, flags);
4351}
4352
4353int
4354i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
4355 const struct i915_ggtt_view *view,
4356 uint32_t alignment,
4357 uint64_t flags)
4358{
72e96d64
JL
4359 struct drm_device *dev = obj->base.dev;
4360 struct drm_i915_private *dev_priv = to_i915(dev);
4361 struct i915_ggtt *ggtt = &dev_priv->ggtt;
4362
ade7daa1 4363 BUG_ON(!view);
ec7adb6e 4364
72e96d64 4365 return i915_gem_object_do_pin(obj, &ggtt->base, view,
6fafab76 4366 alignment, flags | PIN_GLOBAL);
ec7adb6e
JL
4367}
4368
673a394b 4369void
e6617330
TU
4370i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj,
4371 const struct i915_ggtt_view *view)
673a394b 4372{
e6617330 4373 struct i915_vma *vma = i915_gem_obj_to_ggtt_view(obj, view);
673a394b 4374
d7f46fc4 4375 BUG_ON(!vma);
e6617330 4376 WARN_ON(vma->pin_count == 0);
9abc4648 4377 WARN_ON(!i915_gem_obj_ggtt_bound_view(obj, view));
d7f46fc4 4378
30154650 4379 --vma->pin_count;
673a394b
EA
4380}
4381
673a394b
EA
4382int
4383i915_gem_busy_ioctl(struct drm_device *dev, void *data,
05394f39 4384 struct drm_file *file)
673a394b
EA
4385{
4386 struct drm_i915_gem_busy *args = data;
05394f39 4387 struct drm_i915_gem_object *obj;
30dbf0c0
CW
4388 int ret;
4389
76c1dec1 4390 ret = i915_mutex_lock_interruptible(dev);
1d7cfea1 4391 if (ret)
76c1dec1 4392 return ret;
673a394b 4393
05394f39 4394 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
c8725226 4395 if (&obj->base == NULL) {
1d7cfea1
CW
4396 ret = -ENOENT;
4397 goto unlock;
673a394b 4398 }
d1b851fc 4399
0be555b6
CW
4400 /* Count all active objects as busy, even if they are currently not used
4401 * by the gpu. Users of this interface expect objects to eventually
4402 * become non-busy without any further actions, therefore emit any
4403 * necessary flushes here.
c4de0a5d 4404 */
30dfebf3 4405 ret = i915_gem_object_flush_active(obj);
b4716185
CW
4406 if (ret)
4407 goto unref;
0be555b6 4408
426960be
CW
4409 args->busy = 0;
4410 if (obj->active) {
4411 int i;
4412
666796da 4413 for (i = 0; i < I915_NUM_ENGINES; i++) {
426960be
CW
4414 struct drm_i915_gem_request *req;
4415
4416 req = obj->last_read_req[i];
4417 if (req)
4a570db5 4418 args->busy |= 1 << (16 + req->engine->exec_id);
426960be
CW
4419 }
4420 if (obj->last_write_req)
4a570db5 4421 args->busy |= obj->last_write_req->engine->exec_id;
426960be 4422 }
673a394b 4423
b4716185 4424unref:
05394f39 4425 drm_gem_object_unreference(&obj->base);
1d7cfea1 4426unlock:
673a394b 4427 mutex_unlock(&dev->struct_mutex);
1d7cfea1 4428 return ret;
673a394b
EA
4429}
4430
4431int
4432i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
4433 struct drm_file *file_priv)
4434{
0206e353 4435 return i915_gem_ring_throttle(dev, file_priv);
673a394b
EA
4436}
4437
3ef94daa
CW
4438int
4439i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
4440 struct drm_file *file_priv)
4441{
656bfa3a 4442 struct drm_i915_private *dev_priv = dev->dev_private;
3ef94daa 4443 struct drm_i915_gem_madvise *args = data;
05394f39 4444 struct drm_i915_gem_object *obj;
76c1dec1 4445 int ret;
3ef94daa
CW
4446
4447 switch (args->madv) {
4448 case I915_MADV_DONTNEED:
4449 case I915_MADV_WILLNEED:
4450 break;
4451 default:
4452 return -EINVAL;
4453 }
4454
1d7cfea1
CW
4455 ret = i915_mutex_lock_interruptible(dev);
4456 if (ret)
4457 return ret;
4458
05394f39 4459 obj = to_intel_bo(drm_gem_object_lookup(dev, file_priv, args->handle));
c8725226 4460 if (&obj->base == NULL) {
1d7cfea1
CW
4461 ret = -ENOENT;
4462 goto unlock;
3ef94daa 4463 }
3ef94daa 4464
d7f46fc4 4465 if (i915_gem_obj_is_pinned(obj)) {
1d7cfea1
CW
4466 ret = -EINVAL;
4467 goto out;
3ef94daa
CW
4468 }
4469
656bfa3a
DV
4470 if (obj->pages &&
4471 obj->tiling_mode != I915_TILING_NONE &&
4472 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
4473 if (obj->madv == I915_MADV_WILLNEED)
4474 i915_gem_object_unpin_pages(obj);
4475 if (args->madv == I915_MADV_WILLNEED)
4476 i915_gem_object_pin_pages(obj);
4477 }
4478
05394f39
CW
4479 if (obj->madv != __I915_MADV_PURGED)
4480 obj->madv = args->madv;
3ef94daa 4481
6c085a72 4482 /* if the object is no longer attached, discard its backing storage */
be6a0376 4483 if (obj->madv == I915_MADV_DONTNEED && obj->pages == NULL)
2d7ef395
CW
4484 i915_gem_object_truncate(obj);
4485
05394f39 4486 args->retained = obj->madv != __I915_MADV_PURGED;
bb6baf76 4487
1d7cfea1 4488out:
05394f39 4489 drm_gem_object_unreference(&obj->base);
1d7cfea1 4490unlock:
3ef94daa 4491 mutex_unlock(&dev->struct_mutex);
1d7cfea1 4492 return ret;
3ef94daa
CW
4493}
4494
37e680a1
CW
4495void i915_gem_object_init(struct drm_i915_gem_object *obj,
4496 const struct drm_i915_gem_object_ops *ops)
0327d6ba 4497{
b4716185
CW
4498 int i;
4499
35c20a60 4500 INIT_LIST_HEAD(&obj->global_list);
666796da 4501 for (i = 0; i < I915_NUM_ENGINES; i++)
117897f4 4502 INIT_LIST_HEAD(&obj->engine_list[i]);
b25cb2f8 4503 INIT_LIST_HEAD(&obj->obj_exec_link);
2f633156 4504 INIT_LIST_HEAD(&obj->vma_list);
8d9d5744 4505 INIT_LIST_HEAD(&obj->batch_pool_link);
0327d6ba 4506
37e680a1
CW
4507 obj->ops = ops;
4508
0327d6ba
CW
4509 obj->fence_reg = I915_FENCE_REG_NONE;
4510 obj->madv = I915_MADV_WILLNEED;
0327d6ba
CW
4511
4512 i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size);
4513}
4514
37e680a1 4515static const struct drm_i915_gem_object_ops i915_gem_object_ops = {
de472664 4516 .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE,
37e680a1
CW
4517 .get_pages = i915_gem_object_get_pages_gtt,
4518 .put_pages = i915_gem_object_put_pages_gtt,
4519};
4520
05394f39
CW
4521struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
4522 size_t size)
ac52bc56 4523{
c397b908 4524 struct drm_i915_gem_object *obj;
5949eac4 4525 struct address_space *mapping;
1a240d4d 4526 gfp_t mask;
ac52bc56 4527
42dcedd4 4528 obj = i915_gem_object_alloc(dev);
c397b908
DV
4529 if (obj == NULL)
4530 return NULL;
673a394b 4531
c397b908 4532 if (drm_gem_object_init(dev, &obj->base, size) != 0) {
42dcedd4 4533 i915_gem_object_free(obj);
c397b908
DV
4534 return NULL;
4535 }
673a394b 4536
bed1ea95
CW
4537 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
4538 if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) {
4539 /* 965gm cannot relocate objects above 4GiB. */
4540 mask &= ~__GFP_HIGHMEM;
4541 mask |= __GFP_DMA32;
4542 }
4543
496ad9aa 4544 mapping = file_inode(obj->base.filp)->i_mapping;
bed1ea95 4545 mapping_set_gfp_mask(mapping, mask);
5949eac4 4546
37e680a1 4547 i915_gem_object_init(obj, &i915_gem_object_ops);
73aa808f 4548
c397b908
DV
4549 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
4550 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
673a394b 4551
3d29b842
ED
4552 if (HAS_LLC(dev)) {
4553 /* On some devices, we can have the GPU use the LLC (the CPU
a1871112
EA
4554 * cache) for about a 10% performance improvement
4555 * compared to uncached. Graphics requests other than
4556 * display scanout are coherent with the CPU in
4557 * accessing this cache. This means in this mode we
4558 * don't need to clflush on the CPU side, and on the
4559 * GPU side we only need to flush internal caches to
4560 * get data visible to the CPU.
4561 *
4562 * However, we maintain the display planes as UC, and so
4563 * need to rebind when first used as such.
4564 */
4565 obj->cache_level = I915_CACHE_LLC;
4566 } else
4567 obj->cache_level = I915_CACHE_NONE;
4568
d861e338
DV
4569 trace_i915_gem_object_create(obj);
4570
05394f39 4571 return obj;
c397b908
DV
4572}
4573
340fbd8c
CW
4574static bool discard_backing_storage(struct drm_i915_gem_object *obj)
4575{
4576 /* If we are the last user of the backing storage (be it shmemfs
4577 * pages or stolen etc), we know that the pages are going to be
4578 * immediately released. In this case, we can then skip copying
4579 * back the contents from the GPU.
4580 */
4581
4582 if (obj->madv != I915_MADV_WILLNEED)
4583 return false;
4584
4585 if (obj->base.filp == NULL)
4586 return true;
4587
4588 /* At first glance, this looks racy, but then again so would be
4589 * userspace racing mmap against close. However, the first external
4590 * reference to the filp can only be obtained through the
4591 * i915_gem_mmap_ioctl() which safeguards us against the user
4592 * acquiring such a reference whilst we are in the middle of
4593 * freeing the object.
4594 */
4595 return atomic_long_read(&obj->base.filp->f_count) == 1;
4596}
4597
1488fc08 4598void i915_gem_free_object(struct drm_gem_object *gem_obj)
673a394b 4599{
1488fc08 4600 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
05394f39 4601 struct drm_device *dev = obj->base.dev;
3e31c6c0 4602 struct drm_i915_private *dev_priv = dev->dev_private;
07fe0b12 4603 struct i915_vma *vma, *next;
673a394b 4604
f65c9168
PZ
4605 intel_runtime_pm_get(dev_priv);
4606
26e12f89
CW
4607 trace_i915_gem_object_destroy(obj);
4608
1c7f4bca 4609 list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) {
d7f46fc4
BW
4610 int ret;
4611
4612 vma->pin_count = 0;
4613 ret = i915_vma_unbind(vma);
07fe0b12
BW
4614 if (WARN_ON(ret == -ERESTARTSYS)) {
4615 bool was_interruptible;
1488fc08 4616
07fe0b12
BW
4617 was_interruptible = dev_priv->mm.interruptible;
4618 dev_priv->mm.interruptible = false;
1488fc08 4619
07fe0b12 4620 WARN_ON(i915_vma_unbind(vma));
1488fc08 4621
07fe0b12
BW
4622 dev_priv->mm.interruptible = was_interruptible;
4623 }
1488fc08
CW
4624 }
4625
1d64ae71
BW
4626 /* Stolen objects don't hold a ref, but do hold pin count. Fix that up
4627 * before progressing. */
4628 if (obj->stolen)
4629 i915_gem_object_unpin_pages(obj);
4630
a071fa00
DV
4631 WARN_ON(obj->frontbuffer_bits);
4632
656bfa3a
DV
4633 if (obj->pages && obj->madv == I915_MADV_WILLNEED &&
4634 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES &&
4635 obj->tiling_mode != I915_TILING_NONE)
4636 i915_gem_object_unpin_pages(obj);
4637
401c29f6
BW
4638 if (WARN_ON(obj->pages_pin_count))
4639 obj->pages_pin_count = 0;
340fbd8c 4640 if (discard_backing_storage(obj))
5537252b 4641 obj->madv = I915_MADV_DONTNEED;
37e680a1 4642 i915_gem_object_put_pages(obj);
d8cb5086 4643 i915_gem_object_free_mmap_offset(obj);
de151cf6 4644
9da3da66
CW
4645 BUG_ON(obj->pages);
4646
2f745ad3
CW
4647 if (obj->base.import_attach)
4648 drm_prime_gem_destroy(&obj->base, NULL);
de151cf6 4649
5cc9ed4b
CW
4650 if (obj->ops->release)
4651 obj->ops->release(obj);
4652
05394f39
CW
4653 drm_gem_object_release(&obj->base);
4654 i915_gem_info_remove_obj(dev_priv, obj->base.size);
c397b908 4655
05394f39 4656 kfree(obj->bit_17);
42dcedd4 4657 i915_gem_object_free(obj);
f65c9168
PZ
4658
4659 intel_runtime_pm_put(dev_priv);
673a394b
EA
4660}
4661
ec7adb6e
JL
4662struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
4663 struct i915_address_space *vm)
e656a6cb
DV
4664{
4665 struct i915_vma *vma;
1c7f4bca 4666 list_for_each_entry(vma, &obj->vma_list, obj_link) {
1b683729
TU
4667 if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL &&
4668 vma->vm == vm)
e656a6cb 4669 return vma;
ec7adb6e
JL
4670 }
4671 return NULL;
4672}
4673
4674struct i915_vma *i915_gem_obj_to_ggtt_view(struct drm_i915_gem_object *obj,
4675 const struct i915_ggtt_view *view)
4676{
72e96d64
JL
4677 struct drm_device *dev = obj->base.dev;
4678 struct drm_i915_private *dev_priv = to_i915(dev);
4679 struct i915_ggtt *ggtt = &dev_priv->ggtt;
ec7adb6e 4680 struct i915_vma *vma;
e656a6cb 4681
ade7daa1 4682 BUG_ON(!view);
ec7adb6e 4683
1c7f4bca 4684 list_for_each_entry(vma, &obj->vma_list, obj_link)
72e96d64 4685 if (vma->vm == &ggtt->base &&
9abc4648 4686 i915_ggtt_view_equal(&vma->ggtt_view, view))
ec7adb6e 4687 return vma;
e656a6cb
DV
4688 return NULL;
4689}
4690
2f633156
BW
4691void i915_gem_vma_destroy(struct i915_vma *vma)
4692{
4693 WARN_ON(vma->node.allocated);
aaa05667
CW
4694
4695 /* Keep the vma as a placeholder in the execbuffer reservation lists */
4696 if (!list_empty(&vma->exec_list))
4697 return;
4698
596c5923
CW
4699 if (!vma->is_ggtt)
4700 i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm));
b9d06dd9 4701
1c7f4bca 4702 list_del(&vma->obj_link);
b93dab6e 4703
e20d2ab7 4704 kmem_cache_free(to_i915(vma->obj->base.dev)->vmas, vma);
2f633156
BW
4705}
4706
e3efda49 4707static void
117897f4 4708i915_gem_stop_engines(struct drm_device *dev)
e3efda49
CW
4709{
4710 struct drm_i915_private *dev_priv = dev->dev_private;
e2f80391 4711 struct intel_engine_cs *engine;
e3efda49 4712
b4ac5afc 4713 for_each_engine(engine, dev_priv)
117897f4 4714 dev_priv->gt.stop_engine(engine);
e3efda49
CW
4715}
4716
29105ccc 4717int
45c5f202 4718i915_gem_suspend(struct drm_device *dev)
29105ccc 4719{
3e31c6c0 4720 struct drm_i915_private *dev_priv = dev->dev_private;
45c5f202 4721 int ret = 0;
28dfe52a 4722
45c5f202 4723 mutex_lock(&dev->struct_mutex);
b2da9fe5 4724 ret = i915_gpu_idle(dev);
f7403347 4725 if (ret)
45c5f202 4726 goto err;
f7403347 4727
b2da9fe5 4728 i915_gem_retire_requests(dev);
673a394b 4729
117897f4 4730 i915_gem_stop_engines(dev);
45c5f202
CW
4731 mutex_unlock(&dev->struct_mutex);
4732
737b1506 4733 cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
29105ccc 4734 cancel_delayed_work_sync(&dev_priv->mm.retire_work);
274fa1c1 4735 flush_delayed_work(&dev_priv->mm.idle_work);
29105ccc 4736
bdcf120b
CW
4737 /* Assert that we sucessfully flushed all the work and
4738 * reset the GPU back to its idle, low power state.
4739 */
4740 WARN_ON(dev_priv->mm.busy);
4741
673a394b 4742 return 0;
45c5f202
CW
4743
4744err:
4745 mutex_unlock(&dev->struct_mutex);
4746 return ret;
673a394b
EA
4747}
4748
6909a666 4749int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice)
b9524a1e 4750{
4a570db5 4751 struct intel_engine_cs *engine = req->engine;
e2f80391 4752 struct drm_device *dev = engine->dev;
3e31c6c0 4753 struct drm_i915_private *dev_priv = dev->dev_private;
35a85ac6 4754 u32 *remap_info = dev_priv->l3_parity.remap_info[slice];
c3787e2e 4755 int i, ret;
b9524a1e 4756
040d2baa 4757 if (!HAS_L3_DPF(dev) || !remap_info)
c3787e2e 4758 return 0;
b9524a1e 4759
5fb9de1a 4760 ret = intel_ring_begin(req, GEN7_L3LOG_SIZE / 4 * 3);
c3787e2e
BW
4761 if (ret)
4762 return ret;
b9524a1e 4763
c3787e2e
BW
4764 /*
4765 * Note: We do not worry about the concurrent register cacheline hang
4766 * here because no other code should access these registers other than
4767 * at initialization time.
4768 */
6fa1c5f1 4769 for (i = 0; i < GEN7_L3LOG_SIZE / 4; i++) {
e2f80391
TU
4770 intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(1));
4771 intel_ring_emit_reg(engine, GEN7_L3LOG(slice, i));
4772 intel_ring_emit(engine, remap_info[i]);
b9524a1e
BW
4773 }
4774
e2f80391 4775 intel_ring_advance(engine);
b9524a1e 4776
c3787e2e 4777 return ret;
b9524a1e
BW
4778}
4779
f691e2f4
DV
4780void i915_gem_init_swizzling(struct drm_device *dev)
4781{
3e31c6c0 4782 struct drm_i915_private *dev_priv = dev->dev_private;
f691e2f4 4783
11782b02 4784 if (INTEL_INFO(dev)->gen < 5 ||
f691e2f4
DV
4785 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
4786 return;
4787
4788 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
4789 DISP_TILE_SURFACE_SWIZZLING);
4790
11782b02
DV
4791 if (IS_GEN5(dev))
4792 return;
4793
f691e2f4
DV
4794 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
4795 if (IS_GEN6(dev))
6b26c86d 4796 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB));
8782e26c 4797 else if (IS_GEN7(dev))
6b26c86d 4798 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB));
31a5336e
BW
4799 else if (IS_GEN8(dev))
4800 I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW));
8782e26c
BW
4801 else
4802 BUG();
f691e2f4 4803}
e21af88d 4804
81e7f200
VS
4805static void init_unused_ring(struct drm_device *dev, u32 base)
4806{
4807 struct drm_i915_private *dev_priv = dev->dev_private;
4808
4809 I915_WRITE(RING_CTL(base), 0);
4810 I915_WRITE(RING_HEAD(base), 0);
4811 I915_WRITE(RING_TAIL(base), 0);
4812 I915_WRITE(RING_START(base), 0);
4813}
4814
4815static void init_unused_rings(struct drm_device *dev)
4816{
4817 if (IS_I830(dev)) {
4818 init_unused_ring(dev, PRB1_BASE);
4819 init_unused_ring(dev, SRB0_BASE);
4820 init_unused_ring(dev, SRB1_BASE);
4821 init_unused_ring(dev, SRB2_BASE);
4822 init_unused_ring(dev, SRB3_BASE);
4823 } else if (IS_GEN2(dev)) {
4824 init_unused_ring(dev, SRB0_BASE);
4825 init_unused_ring(dev, SRB1_BASE);
4826 } else if (IS_GEN3(dev)) {
4827 init_unused_ring(dev, PRB1_BASE);
4828 init_unused_ring(dev, PRB2_BASE);
4829 }
4830}
4831
117897f4 4832int i915_gem_init_engines(struct drm_device *dev)
8187a2b7 4833{
4fc7c971 4834 struct drm_i915_private *dev_priv = dev->dev_private;
8187a2b7 4835 int ret;
68f95ba9 4836
5c1143bb 4837 ret = intel_init_render_ring_buffer(dev);
68f95ba9 4838 if (ret)
b6913e4b 4839 return ret;
68f95ba9
CW
4840
4841 if (HAS_BSD(dev)) {
5c1143bb 4842 ret = intel_init_bsd_ring_buffer(dev);
68f95ba9
CW
4843 if (ret)
4844 goto cleanup_render_ring;
d1b851fc 4845 }
68f95ba9 4846
d39398f5 4847 if (HAS_BLT(dev)) {
549f7365
CW
4848 ret = intel_init_blt_ring_buffer(dev);
4849 if (ret)
4850 goto cleanup_bsd_ring;
4851 }
4852
9a8a2213
BW
4853 if (HAS_VEBOX(dev)) {
4854 ret = intel_init_vebox_ring_buffer(dev);
4855 if (ret)
4856 goto cleanup_blt_ring;
4857 }
4858
845f74a7
ZY
4859 if (HAS_BSD2(dev)) {
4860 ret = intel_init_bsd2_ring_buffer(dev);
4861 if (ret)
4862 goto cleanup_vebox_ring;
4863 }
9a8a2213 4864
4fc7c971
BW
4865 return 0;
4866
9a8a2213 4867cleanup_vebox_ring:
117897f4 4868 intel_cleanup_engine(&dev_priv->engine[VECS]);
4fc7c971 4869cleanup_blt_ring:
117897f4 4870 intel_cleanup_engine(&dev_priv->engine[BCS]);
4fc7c971 4871cleanup_bsd_ring:
117897f4 4872 intel_cleanup_engine(&dev_priv->engine[VCS]);
4fc7c971 4873cleanup_render_ring:
117897f4 4874 intel_cleanup_engine(&dev_priv->engine[RCS]);
4fc7c971
BW
4875
4876 return ret;
4877}
4878
4879int
4880i915_gem_init_hw(struct drm_device *dev)
4881{
3e31c6c0 4882 struct drm_i915_private *dev_priv = dev->dev_private;
e2f80391 4883 struct intel_engine_cs *engine;
b4ac5afc 4884 int ret, j;
4fc7c971
BW
4885
4886 if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt())
4887 return -EIO;
4888
5e4f5189
CW
4889 /* Double layer security blanket, see i915_gem_init() */
4890 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4891
59124506 4892 if (dev_priv->ellc_size)
05e21cc4 4893 I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf));
4fc7c971 4894
0bf21347
VS
4895 if (IS_HASWELL(dev))
4896 I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev) ?
4897 LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED);
9435373e 4898
88a2b2a3 4899 if (HAS_PCH_NOP(dev)) {
6ba844b0
DV
4900 if (IS_IVYBRIDGE(dev)) {
4901 u32 temp = I915_READ(GEN7_MSG_CTL);
4902 temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK);
4903 I915_WRITE(GEN7_MSG_CTL, temp);
4904 } else if (INTEL_INFO(dev)->gen >= 7) {
4905 u32 temp = I915_READ(HSW_NDE_RSTWRN_OPT);
4906 temp &= ~RESET_PCH_HANDSHAKE_ENABLE;
4907 I915_WRITE(HSW_NDE_RSTWRN_OPT, temp);
4908 }
88a2b2a3
BW
4909 }
4910
4fc7c971
BW
4911 i915_gem_init_swizzling(dev);
4912
d5abdfda
DV
4913 /*
4914 * At least 830 can leave some of the unused rings
4915 * "active" (ie. head != tail) after resume which
4916 * will prevent c3 entry. Makes sure all unused rings
4917 * are totally idle.
4918 */
4919 init_unused_rings(dev);
4920
ed54c1a1 4921 BUG_ON(!dev_priv->kernel_context);
90638cc1 4922
4ad2fd88
JH
4923 ret = i915_ppgtt_init_hw(dev);
4924 if (ret) {
4925 DRM_ERROR("PPGTT enable HW failed %d\n", ret);
4926 goto out;
4927 }
4928
4929 /* Need to do basic initialisation of all rings first: */
b4ac5afc 4930 for_each_engine(engine, dev_priv) {
e2f80391 4931 ret = engine->init_hw(engine);
35a57ffb 4932 if (ret)
5e4f5189 4933 goto out;
35a57ffb 4934 }
99433931 4935
33a732f4 4936 /* We can't enable contexts until all firmware is loaded */
87bcdd2e
JB
4937 if (HAS_GUC_UCODE(dev)) {
4938 ret = intel_guc_ucode_load(dev);
4939 if (ret) {
9f9e539f
DV
4940 DRM_ERROR("Failed to initialize GuC, error %d\n", ret);
4941 ret = -EIO;
4942 goto out;
87bcdd2e 4943 }
33a732f4
AD
4944 }
4945
e84fe803
NH
4946 /*
4947 * Increment the next seqno by 0x100 so we have a visible break
4948 * on re-initialisation
4949 */
4950 ret = i915_gem_set_seqno(dev, dev_priv->next_seqno+0x100);
4951 if (ret)
4952 goto out;
4953
4ad2fd88 4954 /* Now it is safe to go back round and do everything else: */
b4ac5afc 4955 for_each_engine(engine, dev_priv) {
dc4be607
JH
4956 struct drm_i915_gem_request *req;
4957
e2f80391 4958 req = i915_gem_request_alloc(engine, NULL);
26827088
DG
4959 if (IS_ERR(req)) {
4960 ret = PTR_ERR(req);
117897f4 4961 i915_gem_cleanup_engines(dev);
dc4be607
JH
4962 goto out;
4963 }
4964
e2f80391 4965 if (engine->id == RCS) {
4ad2fd88 4966 for (j = 0; j < NUM_L3_SLICES(dev); j++)
6909a666 4967 i915_gem_l3_remap(req, j);
4ad2fd88 4968 }
c3787e2e 4969
b3dd6b96 4970 ret = i915_ppgtt_init_ring(req);
4ad2fd88 4971 if (ret && ret != -EIO) {
b4ac5afc
DG
4972 DRM_ERROR("PPGTT enable %s failed %d\n",
4973 engine->name, ret);
dc4be607 4974 i915_gem_request_cancel(req);
117897f4 4975 i915_gem_cleanup_engines(dev);
4ad2fd88
JH
4976 goto out;
4977 }
82460d97 4978
b3dd6b96 4979 ret = i915_gem_context_enable(req);
90638cc1 4980 if (ret && ret != -EIO) {
b4ac5afc
DG
4981 DRM_ERROR("Context enable %s failed %d\n",
4982 engine->name, ret);
dc4be607 4983 i915_gem_request_cancel(req);
117897f4 4984 i915_gem_cleanup_engines(dev);
90638cc1
JH
4985 goto out;
4986 }
dc4be607 4987
75289874 4988 i915_add_request_no_flush(req);
b7c36d25 4989 }
e21af88d 4990
5e4f5189
CW
4991out:
4992 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
2fa48d8d 4993 return ret;
8187a2b7
ZN
4994}
4995
1070a42b
CW
4996int i915_gem_init(struct drm_device *dev)
4997{
4998 struct drm_i915_private *dev_priv = dev->dev_private;
1070a42b
CW
4999 int ret;
5000
127f1003
OM
5001 i915.enable_execlists = intel_sanitize_enable_execlists(dev,
5002 i915.enable_execlists);
5003
1070a42b 5004 mutex_lock(&dev->struct_mutex);
d62b4892 5005
a83014d3 5006 if (!i915.enable_execlists) {
f3dc74c0 5007 dev_priv->gt.execbuf_submit = i915_gem_ringbuffer_submission;
117897f4
TU
5008 dev_priv->gt.init_engines = i915_gem_init_engines;
5009 dev_priv->gt.cleanup_engine = intel_cleanup_engine;
5010 dev_priv->gt.stop_engine = intel_stop_engine;
454afebd 5011 } else {
f3dc74c0 5012 dev_priv->gt.execbuf_submit = intel_execlists_submission;
117897f4
TU
5013 dev_priv->gt.init_engines = intel_logical_rings_init;
5014 dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup;
5015 dev_priv->gt.stop_engine = intel_logical_ring_stop;
a83014d3
OM
5016 }
5017
5e4f5189
CW
5018 /* This is just a security blanket to placate dragons.
5019 * On some systems, we very sporadically observe that the first TLBs
5020 * used by the CS may be stale, despite us poking the TLB reset. If
5021 * we hold the forcewake during initialisation these problems
5022 * just magically go away.
5023 */
5024 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
5025
6c5566a8 5026 ret = i915_gem_init_userptr(dev);
7bcc3777
JN
5027 if (ret)
5028 goto out_unlock;
6c5566a8 5029
d85489d3 5030 i915_gem_init_ggtt(dev);
d62b4892 5031
2fa48d8d 5032 ret = i915_gem_context_init(dev);
7bcc3777
JN
5033 if (ret)
5034 goto out_unlock;
2fa48d8d 5035
117897f4 5036 ret = dev_priv->gt.init_engines(dev);
35a57ffb 5037 if (ret)
7bcc3777 5038 goto out_unlock;
2fa48d8d 5039
1070a42b 5040 ret = i915_gem_init_hw(dev);
60990320
CW
5041 if (ret == -EIO) {
5042 /* Allow ring initialisation to fail by marking the GPU as
5043 * wedged. But we only want to do this where the GPU is angry,
5044 * for all other failure, such as an allocation failure, bail.
5045 */
5046 DRM_ERROR("Failed to initialize GPU, declaring it wedged\n");
805de8f4 5047 atomic_or(I915_WEDGED, &dev_priv->gpu_error.reset_counter);
60990320 5048 ret = 0;
1070a42b 5049 }
7bcc3777
JN
5050
5051out_unlock:
5e4f5189 5052 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
60990320 5053 mutex_unlock(&dev->struct_mutex);
1070a42b 5054
60990320 5055 return ret;
1070a42b
CW
5056}
5057
8187a2b7 5058void
117897f4 5059i915_gem_cleanup_engines(struct drm_device *dev)
8187a2b7 5060{
3e31c6c0 5061 struct drm_i915_private *dev_priv = dev->dev_private;
e2f80391 5062 struct intel_engine_cs *engine;
8187a2b7 5063
b4ac5afc 5064 for_each_engine(engine, dev_priv)
117897f4 5065 dev_priv->gt.cleanup_engine(engine);
a647828a 5066
ee4b6faf
MK
5067 if (i915.enable_execlists)
5068 /*
5069 * Neither the BIOS, ourselves or any other kernel
5070 * expects the system to be in execlists mode on startup,
5071 * so we need to reset the GPU back to legacy mode.
5072 */
5073 intel_gpu_reset(dev, ALL_ENGINES);
8187a2b7
ZN
5074}
5075
64193406 5076static void
666796da 5077init_engine_lists(struct intel_engine_cs *engine)
64193406 5078{
0bc40be8
TU
5079 INIT_LIST_HEAD(&engine->active_list);
5080 INIT_LIST_HEAD(&engine->request_list);
64193406
CW
5081}
5082
40ae4e16
ID
5083void
5084i915_gem_load_init_fences(struct drm_i915_private *dev_priv)
5085{
5086 struct drm_device *dev = dev_priv->dev;
5087
5088 if (INTEL_INFO(dev_priv)->gen >= 7 && !IS_VALLEYVIEW(dev_priv) &&
5089 !IS_CHERRYVIEW(dev_priv))
5090 dev_priv->num_fence_regs = 32;
5091 else if (INTEL_INFO(dev_priv)->gen >= 4 || IS_I945G(dev_priv) ||
5092 IS_I945GM(dev_priv) || IS_G33(dev_priv))
5093 dev_priv->num_fence_regs = 16;
5094 else
5095 dev_priv->num_fence_regs = 8;
5096
5097 if (intel_vgpu_active(dev))
5098 dev_priv->num_fence_regs =
5099 I915_READ(vgtif_reg(avail_rs.fence_num));
5100
5101 /* Initialize fence registers to zero */
5102 i915_gem_restore_fences(dev);
5103
5104 i915_gem_detect_bit_6_swizzle(dev);
5105}
5106
673a394b 5107void
d64aa096 5108i915_gem_load_init(struct drm_device *dev)
673a394b 5109{
3e31c6c0 5110 struct drm_i915_private *dev_priv = dev->dev_private;
42dcedd4
CW
5111 int i;
5112
efab6d8d 5113 dev_priv->objects =
42dcedd4
CW
5114 kmem_cache_create("i915_gem_object",
5115 sizeof(struct drm_i915_gem_object), 0,
5116 SLAB_HWCACHE_ALIGN,
5117 NULL);
e20d2ab7
CW
5118 dev_priv->vmas =
5119 kmem_cache_create("i915_gem_vma",
5120 sizeof(struct i915_vma), 0,
5121 SLAB_HWCACHE_ALIGN,
5122 NULL);
efab6d8d
CW
5123 dev_priv->requests =
5124 kmem_cache_create("i915_gem_request",
5125 sizeof(struct drm_i915_gem_request), 0,
5126 SLAB_HWCACHE_ALIGN,
5127 NULL);
673a394b 5128
fc8c067e 5129 INIT_LIST_HEAD(&dev_priv->vm_list);
a33afea5 5130 INIT_LIST_HEAD(&dev_priv->context_list);
6c085a72
CW
5131 INIT_LIST_HEAD(&dev_priv->mm.unbound_list);
5132 INIT_LIST_HEAD(&dev_priv->mm.bound_list);
a09ba7fa 5133 INIT_LIST_HEAD(&dev_priv->mm.fence_list);
666796da
TU
5134 for (i = 0; i < I915_NUM_ENGINES; i++)
5135 init_engine_lists(&dev_priv->engine[i]);
4b9de737 5136 for (i = 0; i < I915_MAX_NUM_FENCES; i++)
007cc8ac 5137 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list);
673a394b
EA
5138 INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
5139 i915_gem_retire_work_handler);
b29c19b6
CW
5140 INIT_DELAYED_WORK(&dev_priv->mm.idle_work,
5141 i915_gem_idle_work_handler);
1f83fee0 5142 init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
31169714 5143
72bfa19c
CW
5144 dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL;
5145
e84fe803
NH
5146 /*
5147 * Set initial sequence number for requests.
5148 * Using this number allows the wraparound to happen early,
5149 * catching any obvious problems.
5150 */
5151 dev_priv->next_seqno = ((u32)~0 - 0x1100);
5152 dev_priv->last_seqno = ((u32)~0 - 0x1101);
5153
19b2dbde 5154 INIT_LIST_HEAD(&dev_priv->mm.fence_list);
10ed13e4 5155
6b95a207 5156 init_waitqueue_head(&dev_priv->pending_flip_queue);
17250b71 5157
ce453d81
CW
5158 dev_priv->mm.interruptible = true;
5159
f99d7069 5160 mutex_init(&dev_priv->fb_tracking.lock);
673a394b 5161}
71acb5eb 5162
d64aa096
ID
5163void i915_gem_load_cleanup(struct drm_device *dev)
5164{
5165 struct drm_i915_private *dev_priv = to_i915(dev);
5166
5167 kmem_cache_destroy(dev_priv->requests);
5168 kmem_cache_destroy(dev_priv->vmas);
5169 kmem_cache_destroy(dev_priv->objects);
5170}
5171
f787a5f5 5172void i915_gem_release(struct drm_device *dev, struct drm_file *file)
b962442e 5173{
f787a5f5 5174 struct drm_i915_file_private *file_priv = file->driver_priv;
b962442e
EA
5175
5176 /* Clean up our request list when the client is going away, so that
5177 * later retire_requests won't dereference our soon-to-be-gone
5178 * file_priv.
5179 */
1c25595f 5180 spin_lock(&file_priv->mm.lock);
f787a5f5
CW
5181 while (!list_empty(&file_priv->mm.request_list)) {
5182 struct drm_i915_gem_request *request;
5183
5184 request = list_first_entry(&file_priv->mm.request_list,
5185 struct drm_i915_gem_request,
5186 client_list);
5187 list_del(&request->client_list);
5188 request->file_priv = NULL;
5189 }
1c25595f 5190 spin_unlock(&file_priv->mm.lock);
b29c19b6 5191
2e1b8730 5192 if (!list_empty(&file_priv->rps.link)) {
8d3afd7d 5193 spin_lock(&to_i915(dev)->rps.client_lock);
2e1b8730 5194 list_del(&file_priv->rps.link);
8d3afd7d 5195 spin_unlock(&to_i915(dev)->rps.client_lock);
1854d5ca 5196 }
b29c19b6
CW
5197}
5198
5199int i915_gem_open(struct drm_device *dev, struct drm_file *file)
5200{
5201 struct drm_i915_file_private *file_priv;
e422b888 5202 int ret;
b29c19b6
CW
5203
5204 DRM_DEBUG_DRIVER("\n");
5205
5206 file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL);
5207 if (!file_priv)
5208 return -ENOMEM;
5209
5210 file->driver_priv = file_priv;
5211 file_priv->dev_priv = dev->dev_private;
ab0e7ff9 5212 file_priv->file = file;
2e1b8730 5213 INIT_LIST_HEAD(&file_priv->rps.link);
b29c19b6
CW
5214
5215 spin_lock_init(&file_priv->mm.lock);
5216 INIT_LIST_HEAD(&file_priv->mm.request_list);
b29c19b6 5217
de1add36
TU
5218 file_priv->bsd_ring = -1;
5219
e422b888
BW
5220 ret = i915_gem_context_open(dev, file);
5221 if (ret)
5222 kfree(file_priv);
b29c19b6 5223
e422b888 5224 return ret;
b29c19b6
CW
5225}
5226
b680c37a
DV
5227/**
5228 * i915_gem_track_fb - update frontbuffer tracking
d9072a3e
GT
5229 * @old: current GEM buffer for the frontbuffer slots
5230 * @new: new GEM buffer for the frontbuffer slots
5231 * @frontbuffer_bits: bitmask of frontbuffer slots
b680c37a
DV
5232 *
5233 * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them
5234 * from @old and setting them in @new. Both @old and @new can be NULL.
5235 */
a071fa00
DV
5236void i915_gem_track_fb(struct drm_i915_gem_object *old,
5237 struct drm_i915_gem_object *new,
5238 unsigned frontbuffer_bits)
5239{
5240 if (old) {
5241 WARN_ON(!mutex_is_locked(&old->base.dev->struct_mutex));
5242 WARN_ON(!(old->frontbuffer_bits & frontbuffer_bits));
5243 old->frontbuffer_bits &= ~frontbuffer_bits;
5244 }
5245
5246 if (new) {
5247 WARN_ON(!mutex_is_locked(&new->base.dev->struct_mutex));
5248 WARN_ON(new->frontbuffer_bits & frontbuffer_bits);
5249 new->frontbuffer_bits |= frontbuffer_bits;
5250 }
5251}
5252
a70a3148 5253/* All the new VM stuff */
088e0df4
MT
5254u64 i915_gem_obj_offset(struct drm_i915_gem_object *o,
5255 struct i915_address_space *vm)
a70a3148
BW
5256{
5257 struct drm_i915_private *dev_priv = o->base.dev->dev_private;
5258 struct i915_vma *vma;
5259
896ab1a5 5260 WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base);
a70a3148 5261
1c7f4bca 5262 list_for_each_entry(vma, &o->vma_list, obj_link) {
596c5923 5263 if (vma->is_ggtt &&
ec7adb6e
JL
5264 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL)
5265 continue;
5266 if (vma->vm == vm)
a70a3148 5267 return vma->node.start;
a70a3148 5268 }
ec7adb6e 5269
f25748ea
DV
5270 WARN(1, "%s vma for this object not found.\n",
5271 i915_is_ggtt(vm) ? "global" : "ppgtt");
a70a3148
BW
5272 return -1;
5273}
5274
088e0df4
MT
5275u64 i915_gem_obj_ggtt_offset_view(struct drm_i915_gem_object *o,
5276 const struct i915_ggtt_view *view)
a70a3148 5277{
72e96d64
JL
5278 struct drm_i915_private *dev_priv = to_i915(o->base.dev);
5279 struct i915_ggtt *ggtt = &dev_priv->ggtt;
a70a3148
BW
5280 struct i915_vma *vma;
5281
1c7f4bca 5282 list_for_each_entry(vma, &o->vma_list, obj_link)
72e96d64 5283 if (vma->vm == &ggtt->base &&
9abc4648 5284 i915_ggtt_view_equal(&vma->ggtt_view, view))
ec7adb6e
JL
5285 return vma->node.start;
5286
5678ad73 5287 WARN(1, "global vma for this object not found. (view=%u)\n", view->type);
ec7adb6e
JL
5288 return -1;
5289}
5290
5291bool i915_gem_obj_bound(struct drm_i915_gem_object *o,
5292 struct i915_address_space *vm)
5293{
5294 struct i915_vma *vma;
5295
1c7f4bca 5296 list_for_each_entry(vma, &o->vma_list, obj_link) {
596c5923 5297 if (vma->is_ggtt &&
ec7adb6e
JL
5298 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL)
5299 continue;
5300 if (vma->vm == vm && drm_mm_node_allocated(&vma->node))
5301 return true;
5302 }
5303
5304 return false;
5305}
5306
5307bool i915_gem_obj_ggtt_bound_view(struct drm_i915_gem_object *o,
9abc4648 5308 const struct i915_ggtt_view *view)
ec7adb6e 5309{
72e96d64
JL
5310 struct drm_i915_private *dev_priv = to_i915(o->base.dev);
5311 struct i915_ggtt *ggtt = &dev_priv->ggtt;
ec7adb6e
JL
5312 struct i915_vma *vma;
5313
1c7f4bca 5314 list_for_each_entry(vma, &o->vma_list, obj_link)
72e96d64 5315 if (vma->vm == &ggtt->base &&
9abc4648 5316 i915_ggtt_view_equal(&vma->ggtt_view, view) &&
fe14d5f4 5317 drm_mm_node_allocated(&vma->node))
a70a3148
BW
5318 return true;
5319
5320 return false;
5321}
5322
5323bool i915_gem_obj_bound_any(struct drm_i915_gem_object *o)
5324{
5a1d5eb0 5325 struct i915_vma *vma;
a70a3148 5326
1c7f4bca 5327 list_for_each_entry(vma, &o->vma_list, obj_link)
5a1d5eb0 5328 if (drm_mm_node_allocated(&vma->node))
a70a3148
BW
5329 return true;
5330
5331 return false;
5332}
5333
5334unsigned long i915_gem_obj_size(struct drm_i915_gem_object *o,
5335 struct i915_address_space *vm)
5336{
5337 struct drm_i915_private *dev_priv = o->base.dev->dev_private;
5338 struct i915_vma *vma;
5339
896ab1a5 5340 WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base);
a70a3148
BW
5341
5342 BUG_ON(list_empty(&o->vma_list));
5343
1c7f4bca 5344 list_for_each_entry(vma, &o->vma_list, obj_link) {
596c5923 5345 if (vma->is_ggtt &&
ec7adb6e
JL
5346 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL)
5347 continue;
a70a3148
BW
5348 if (vma->vm == vm)
5349 return vma->node.size;
ec7adb6e 5350 }
a70a3148
BW
5351 return 0;
5352}
5353
ec7adb6e 5354bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj)
5c2abbea
BW
5355{
5356 struct i915_vma *vma;
1c7f4bca 5357 list_for_each_entry(vma, &obj->vma_list, obj_link)
ec7adb6e
JL
5358 if (vma->pin_count > 0)
5359 return true;
a6631ae1 5360
ec7adb6e 5361 return false;
5c2abbea 5362}
ea70299d 5363
033908ae
DG
5364/* Like i915_gem_object_get_page(), but mark the returned page dirty */
5365struct page *
5366i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, int n)
5367{
5368 struct page *page;
5369
5370 /* Only default objects have per-page dirty tracking */
de472664 5371 if (WARN_ON((obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE) == 0))
033908ae
DG
5372 return NULL;
5373
5374 page = i915_gem_object_get_page(obj, n);
5375 set_page_dirty(page);
5376 return page;
5377}
5378
ea70299d
DG
5379/* Allocate a new GEM object and fill it with the supplied data */
5380struct drm_i915_gem_object *
5381i915_gem_object_create_from_data(struct drm_device *dev,
5382 const void *data, size_t size)
5383{
5384 struct drm_i915_gem_object *obj;
5385 struct sg_table *sg;
5386 size_t bytes;
5387 int ret;
5388
5389 obj = i915_gem_alloc_object(dev, round_up(size, PAGE_SIZE));
5390 if (IS_ERR_OR_NULL(obj))
5391 return obj;
5392
5393 ret = i915_gem_object_set_to_cpu_domain(obj, true);
5394 if (ret)
5395 goto fail;
5396
5397 ret = i915_gem_object_get_pages(obj);
5398 if (ret)
5399 goto fail;
5400
5401 i915_gem_object_pin_pages(obj);
5402 sg = obj->pages;
5403 bytes = sg_copy_from_buffer(sg->sgl, sg->nents, (void *)data, size);
9e7d18c0 5404 obj->dirty = 1; /* Backing store is now out of date */
ea70299d
DG
5405 i915_gem_object_unpin_pages(obj);
5406
5407 if (WARN_ON(bytes != size)) {
5408 DRM_ERROR("Incomplete copy, wrote %zu of %zu", bytes, size);
5409 ret = -EFAULT;
5410 goto fail;
5411 }
5412
5413 return obj;
5414
5415fail:
5416 drm_gem_object_unreference(&obj->base);
5417 return ERR_PTR(ret);
5418}
This page took 2.445988 seconds and 5 git commands to generate.