drm/i915: Retire oldest completed request before allocating next
[deliverable/linux.git] / drivers / gpu / drm / i915 / i915_gem.c
CommitLineData
673a394b 1/*
be6a0376 2 * Copyright © 2008-2015 Intel Corporation
673a394b
EA
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Eric Anholt <eric@anholt.net>
25 *
26 */
27
760285e7 28#include <drm/drmP.h>
0de23977 29#include <drm/drm_vma_manager.h>
760285e7 30#include <drm/i915_drm.h>
673a394b 31#include "i915_drv.h"
eb82289a 32#include "i915_vgpu.h"
1c5d22f7 33#include "i915_trace.h"
652c393a 34#include "intel_drv.h"
0ccdacf6 35#include "intel_mocs.h"
5949eac4 36#include <linux/shmem_fs.h>
5a0e3ad6 37#include <linux/slab.h>
673a394b 38#include <linux/swap.h>
79e53945 39#include <linux/pci.h>
1286ff73 40#include <linux/dma-buf.h>
673a394b 41
05394f39 42static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
e62b59e4 43static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
c8725f3d 44static void
b4716185
CW
45i915_gem_object_retire__write(struct drm_i915_gem_object *obj);
46static void
47i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring);
61050808 48
c76ce038
CW
49static bool cpu_cache_is_coherent(struct drm_device *dev,
50 enum i915_cache_level level)
51{
52 return HAS_LLC(dev) || level != I915_CACHE_NONE;
53}
54
2c22569b
CW
55static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
56{
b50a5371
AS
57 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
58 return false;
59
2c22569b
CW
60 if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
61 return true;
62
63 return obj->pin_display;
64}
65
4f1959ee
AS
66static int
67insert_mappable_node(struct drm_i915_private *i915,
68 struct drm_mm_node *node, u32 size)
69{
70 memset(node, 0, sizeof(*node));
71 return drm_mm_insert_node_in_range_generic(&i915->ggtt.base.mm, node,
72 size, 0, 0, 0,
73 i915->ggtt.mappable_end,
74 DRM_MM_SEARCH_DEFAULT,
75 DRM_MM_CREATE_DEFAULT);
76}
77
78static void
79remove_mappable_node(struct drm_mm_node *node)
80{
81 drm_mm_remove_node(node);
82}
83
73aa808f
CW
84/* some bookkeeping */
85static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
86 size_t size)
87{
c20e8355 88 spin_lock(&dev_priv->mm.object_stat_lock);
73aa808f
CW
89 dev_priv->mm.object_count++;
90 dev_priv->mm.object_memory += size;
c20e8355 91 spin_unlock(&dev_priv->mm.object_stat_lock);
73aa808f
CW
92}
93
94static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
95 size_t size)
96{
c20e8355 97 spin_lock(&dev_priv->mm.object_stat_lock);
73aa808f
CW
98 dev_priv->mm.object_count--;
99 dev_priv->mm.object_memory -= size;
c20e8355 100 spin_unlock(&dev_priv->mm.object_stat_lock);
73aa808f
CW
101}
102
21dd3734 103static int
33196ded 104i915_gem_wait_for_error(struct i915_gpu_error *error)
30dbf0c0 105{
30dbf0c0
CW
106 int ret;
107
d98c52cf 108 if (!i915_reset_in_progress(error))
30dbf0c0
CW
109 return 0;
110
0a6759c6
DV
111 /*
112 * Only wait 10 seconds for the gpu reset to complete to avoid hanging
113 * userspace. If it takes that long something really bad is going on and
114 * we should simply try to bail out and fail as gracefully as possible.
115 */
1f83fee0 116 ret = wait_event_interruptible_timeout(error->reset_queue,
d98c52cf 117 !i915_reset_in_progress(error),
1f83fee0 118 10*HZ);
0a6759c6
DV
119 if (ret == 0) {
120 DRM_ERROR("Timed out waiting for the gpu reset to complete\n");
121 return -EIO;
122 } else if (ret < 0) {
30dbf0c0 123 return ret;
d98c52cf
CW
124 } else {
125 return 0;
0a6759c6 126 }
30dbf0c0
CW
127}
128
54cf91dc 129int i915_mutex_lock_interruptible(struct drm_device *dev)
76c1dec1 130{
fac5e23e 131 struct drm_i915_private *dev_priv = to_i915(dev);
76c1dec1
CW
132 int ret;
133
33196ded 134 ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
76c1dec1
CW
135 if (ret)
136 return ret;
137
138 ret = mutex_lock_interruptible(&dev->struct_mutex);
139 if (ret)
140 return ret;
141
23bc5982 142 WARN_ON(i915_verify_lists(dev));
76c1dec1
CW
143 return 0;
144}
30dbf0c0 145
5a125c3c
EA
146int
147i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
05394f39 148 struct drm_file *file)
5a125c3c 149{
72e96d64 150 struct drm_i915_private *dev_priv = to_i915(dev);
62106b4f 151 struct i915_ggtt *ggtt = &dev_priv->ggtt;
72e96d64 152 struct drm_i915_gem_get_aperture *args = data;
ca1543be 153 struct i915_vma *vma;
6299f992 154 size_t pinned;
5a125c3c 155
6299f992 156 pinned = 0;
73aa808f 157 mutex_lock(&dev->struct_mutex);
1c7f4bca 158 list_for_each_entry(vma, &ggtt->base.active_list, vm_link)
ca1543be
TU
159 if (vma->pin_count)
160 pinned += vma->node.size;
1c7f4bca 161 list_for_each_entry(vma, &ggtt->base.inactive_list, vm_link)
ca1543be
TU
162 if (vma->pin_count)
163 pinned += vma->node.size;
73aa808f 164 mutex_unlock(&dev->struct_mutex);
5a125c3c 165
72e96d64 166 args->aper_size = ggtt->base.total;
0206e353 167 args->aper_available_size = args->aper_size - pinned;
6299f992 168
5a125c3c
EA
169 return 0;
170}
171
6a2c4232
CW
172static int
173i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
00731155 174{
6a2c4232
CW
175 struct address_space *mapping = file_inode(obj->base.filp)->i_mapping;
176 char *vaddr = obj->phys_handle->vaddr;
177 struct sg_table *st;
178 struct scatterlist *sg;
179 int i;
00731155 180
6a2c4232
CW
181 if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
182 return -EINVAL;
183
184 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
185 struct page *page;
186 char *src;
187
188 page = shmem_read_mapping_page(mapping, i);
189 if (IS_ERR(page))
190 return PTR_ERR(page);
191
192 src = kmap_atomic(page);
193 memcpy(vaddr, src, PAGE_SIZE);
194 drm_clflush_virt_range(vaddr, PAGE_SIZE);
195 kunmap_atomic(src);
196
09cbfeaf 197 put_page(page);
6a2c4232
CW
198 vaddr += PAGE_SIZE;
199 }
200
c033666a 201 i915_gem_chipset_flush(to_i915(obj->base.dev));
6a2c4232
CW
202
203 st = kmalloc(sizeof(*st), GFP_KERNEL);
204 if (st == NULL)
205 return -ENOMEM;
206
207 if (sg_alloc_table(st, 1, GFP_KERNEL)) {
208 kfree(st);
209 return -ENOMEM;
210 }
211
212 sg = st->sgl;
213 sg->offset = 0;
214 sg->length = obj->base.size;
00731155 215
6a2c4232
CW
216 sg_dma_address(sg) = obj->phys_handle->busaddr;
217 sg_dma_len(sg) = obj->base.size;
218
219 obj->pages = st;
6a2c4232
CW
220 return 0;
221}
222
223static void
224i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj)
225{
226 int ret;
227
228 BUG_ON(obj->madv == __I915_MADV_PURGED);
00731155 229
6a2c4232 230 ret = i915_gem_object_set_to_cpu_domain(obj, true);
f4457ae7 231 if (WARN_ON(ret)) {
6a2c4232
CW
232 /* In the event of a disaster, abandon all caches and
233 * hope for the best.
234 */
6a2c4232
CW
235 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
236 }
237
238 if (obj->madv == I915_MADV_DONTNEED)
239 obj->dirty = 0;
240
241 if (obj->dirty) {
00731155 242 struct address_space *mapping = file_inode(obj->base.filp)->i_mapping;
6a2c4232 243 char *vaddr = obj->phys_handle->vaddr;
00731155
CW
244 int i;
245
246 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
6a2c4232
CW
247 struct page *page;
248 char *dst;
249
250 page = shmem_read_mapping_page(mapping, i);
251 if (IS_ERR(page))
252 continue;
253
254 dst = kmap_atomic(page);
255 drm_clflush_virt_range(vaddr, PAGE_SIZE);
256 memcpy(dst, vaddr, PAGE_SIZE);
257 kunmap_atomic(dst);
258
259 set_page_dirty(page);
260 if (obj->madv == I915_MADV_WILLNEED)
00731155 261 mark_page_accessed(page);
09cbfeaf 262 put_page(page);
00731155
CW
263 vaddr += PAGE_SIZE;
264 }
6a2c4232 265 obj->dirty = 0;
00731155
CW
266 }
267
6a2c4232
CW
268 sg_free_table(obj->pages);
269 kfree(obj->pages);
6a2c4232
CW
270}
271
272static void
273i915_gem_object_release_phys(struct drm_i915_gem_object *obj)
274{
275 drm_pci_free(obj->base.dev, obj->phys_handle);
276}
277
278static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
279 .get_pages = i915_gem_object_get_pages_phys,
280 .put_pages = i915_gem_object_put_pages_phys,
281 .release = i915_gem_object_release_phys,
282};
283
284static int
285drop_pages(struct drm_i915_gem_object *obj)
286{
287 struct i915_vma *vma, *next;
288 int ret;
289
290 drm_gem_object_reference(&obj->base);
1c7f4bca 291 list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link)
6a2c4232
CW
292 if (i915_vma_unbind(vma))
293 break;
294
295 ret = i915_gem_object_put_pages(obj);
296 drm_gem_object_unreference(&obj->base);
297
298 return ret;
00731155
CW
299}
300
301int
302i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
303 int align)
304{
305 drm_dma_handle_t *phys;
6a2c4232 306 int ret;
00731155
CW
307
308 if (obj->phys_handle) {
309 if ((unsigned long)obj->phys_handle->vaddr & (align -1))
310 return -EBUSY;
311
312 return 0;
313 }
314
315 if (obj->madv != I915_MADV_WILLNEED)
316 return -EFAULT;
317
318 if (obj->base.filp == NULL)
319 return -EINVAL;
320
6a2c4232
CW
321 ret = drop_pages(obj);
322 if (ret)
323 return ret;
324
00731155
CW
325 /* create a new object */
326 phys = drm_pci_alloc(obj->base.dev, obj->base.size, align);
327 if (!phys)
328 return -ENOMEM;
329
00731155 330 obj->phys_handle = phys;
6a2c4232
CW
331 obj->ops = &i915_gem_phys_ops;
332
333 return i915_gem_object_get_pages(obj);
00731155
CW
334}
335
336static int
337i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
338 struct drm_i915_gem_pwrite *args,
339 struct drm_file *file_priv)
340{
341 struct drm_device *dev = obj->base.dev;
342 void *vaddr = obj->phys_handle->vaddr + args->offset;
3ed605bc 343 char __user *user_data = u64_to_user_ptr(args->data_ptr);
063e4e6b 344 int ret = 0;
6a2c4232
CW
345
346 /* We manually control the domain here and pretend that it
347 * remains coherent i.e. in the GTT domain, like shmem_pwrite.
348 */
349 ret = i915_gem_object_wait_rendering(obj, false);
350 if (ret)
351 return ret;
00731155 352
77a0d1ca 353 intel_fb_obj_invalidate(obj, ORIGIN_CPU);
00731155
CW
354 if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) {
355 unsigned long unwritten;
356
357 /* The physical object once assigned is fixed for the lifetime
358 * of the obj, so we can safely drop the lock and continue
359 * to access vaddr.
360 */
361 mutex_unlock(&dev->struct_mutex);
362 unwritten = copy_from_user(vaddr, user_data, args->size);
363 mutex_lock(&dev->struct_mutex);
063e4e6b
PZ
364 if (unwritten) {
365 ret = -EFAULT;
366 goto out;
367 }
00731155
CW
368 }
369
6a2c4232 370 drm_clflush_virt_range(vaddr, args->size);
c033666a 371 i915_gem_chipset_flush(to_i915(dev));
063e4e6b
PZ
372
373out:
de152b62 374 intel_fb_obj_flush(obj, false, ORIGIN_CPU);
063e4e6b 375 return ret;
00731155
CW
376}
377
42dcedd4
CW
378void *i915_gem_object_alloc(struct drm_device *dev)
379{
fac5e23e 380 struct drm_i915_private *dev_priv = to_i915(dev);
efab6d8d 381 return kmem_cache_zalloc(dev_priv->objects, GFP_KERNEL);
42dcedd4
CW
382}
383
384void i915_gem_object_free(struct drm_i915_gem_object *obj)
385{
fac5e23e 386 struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
efab6d8d 387 kmem_cache_free(dev_priv->objects, obj);
42dcedd4
CW
388}
389
ff72145b
DA
390static int
391i915_gem_create(struct drm_file *file,
392 struct drm_device *dev,
393 uint64_t size,
394 uint32_t *handle_p)
673a394b 395{
05394f39 396 struct drm_i915_gem_object *obj;
a1a2d1d3
PP
397 int ret;
398 u32 handle;
673a394b 399
ff72145b 400 size = roundup(size, PAGE_SIZE);
8ffc0246
CW
401 if (size == 0)
402 return -EINVAL;
673a394b
EA
403
404 /* Allocate the new object */
d37cd8a8 405 obj = i915_gem_object_create(dev, size);
fe3db79b
CW
406 if (IS_ERR(obj))
407 return PTR_ERR(obj);
673a394b 408
05394f39 409 ret = drm_gem_handle_create(file, &obj->base, &handle);
202f2fef 410 /* drop reference from allocate - handle holds it now */
d861e338
DV
411 drm_gem_object_unreference_unlocked(&obj->base);
412 if (ret)
413 return ret;
202f2fef 414
ff72145b 415 *handle_p = handle;
673a394b
EA
416 return 0;
417}
418
ff72145b
DA
419int
420i915_gem_dumb_create(struct drm_file *file,
421 struct drm_device *dev,
422 struct drm_mode_create_dumb *args)
423{
424 /* have to work out size/pitch and return them */
de45eaf7 425 args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64);
ff72145b
DA
426 args->size = args->pitch * args->height;
427 return i915_gem_create(file, dev,
da6b51d0 428 args->size, &args->handle);
ff72145b
DA
429}
430
ff72145b
DA
431/**
432 * Creates a new mm object and returns a handle to it.
14bb2c11
TU
433 * @dev: drm device pointer
434 * @data: ioctl data blob
435 * @file: drm file pointer
ff72145b
DA
436 */
437int
438i915_gem_create_ioctl(struct drm_device *dev, void *data,
439 struct drm_file *file)
440{
441 struct drm_i915_gem_create *args = data;
63ed2cb2 442
ff72145b 443 return i915_gem_create(file, dev,
da6b51d0 444 args->size, &args->handle);
ff72145b
DA
445}
446
8461d226
DV
447static inline int
448__copy_to_user_swizzled(char __user *cpu_vaddr,
449 const char *gpu_vaddr, int gpu_offset,
450 int length)
451{
452 int ret, cpu_offset = 0;
453
454 while (length > 0) {
455 int cacheline_end = ALIGN(gpu_offset + 1, 64);
456 int this_length = min(cacheline_end - gpu_offset, length);
457 int swizzled_gpu_offset = gpu_offset ^ 64;
458
459 ret = __copy_to_user(cpu_vaddr + cpu_offset,
460 gpu_vaddr + swizzled_gpu_offset,
461 this_length);
462 if (ret)
463 return ret + length;
464
465 cpu_offset += this_length;
466 gpu_offset += this_length;
467 length -= this_length;
468 }
469
470 return 0;
471}
472
8c59967c 473static inline int
4f0c7cfb
BW
474__copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
475 const char __user *cpu_vaddr,
8c59967c
DV
476 int length)
477{
478 int ret, cpu_offset = 0;
479
480 while (length > 0) {
481 int cacheline_end = ALIGN(gpu_offset + 1, 64);
482 int this_length = min(cacheline_end - gpu_offset, length);
483 int swizzled_gpu_offset = gpu_offset ^ 64;
484
485 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset,
486 cpu_vaddr + cpu_offset,
487 this_length);
488 if (ret)
489 return ret + length;
490
491 cpu_offset += this_length;
492 gpu_offset += this_length;
493 length -= this_length;
494 }
495
496 return 0;
497}
498
4c914c0c
BV
499/*
500 * Pins the specified object's pages and synchronizes the object with
501 * GPU accesses. Sets needs_clflush to non-zero if the caller should
502 * flush the object from the CPU cache.
503 */
504int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
505 int *needs_clflush)
506{
507 int ret;
508
509 *needs_clflush = 0;
510
b9bcd14a 511 if (WARN_ON(!i915_gem_object_has_struct_page(obj)))
4c914c0c
BV
512 return -EINVAL;
513
514 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) {
515 /* If we're not in the cpu read domain, set ourself into the gtt
516 * read domain and manually flush cachelines (if required). This
517 * optimizes for the case when the gpu will dirty the data
518 * anyway again before the next pread happens. */
519 *needs_clflush = !cpu_cache_is_coherent(obj->base.dev,
520 obj->cache_level);
521 ret = i915_gem_object_wait_rendering(obj, true);
522 if (ret)
523 return ret;
524 }
525
526 ret = i915_gem_object_get_pages(obj);
527 if (ret)
528 return ret;
529
530 i915_gem_object_pin_pages(obj);
531
532 return ret;
533}
534
d174bd64
DV
535/* Per-page copy function for the shmem pread fastpath.
536 * Flushes invalid cachelines before reading the target if
537 * needs_clflush is set. */
eb01459f 538static int
d174bd64
DV
539shmem_pread_fast(struct page *page, int shmem_page_offset, int page_length,
540 char __user *user_data,
541 bool page_do_bit17_swizzling, bool needs_clflush)
542{
543 char *vaddr;
544 int ret;
545
e7e58eb5 546 if (unlikely(page_do_bit17_swizzling))
d174bd64
DV
547 return -EINVAL;
548
549 vaddr = kmap_atomic(page);
550 if (needs_clflush)
551 drm_clflush_virt_range(vaddr + shmem_page_offset,
552 page_length);
553 ret = __copy_to_user_inatomic(user_data,
554 vaddr + shmem_page_offset,
555 page_length);
556 kunmap_atomic(vaddr);
557
f60d7f0c 558 return ret ? -EFAULT : 0;
d174bd64
DV
559}
560
23c18c71
DV
561static void
562shmem_clflush_swizzled_range(char *addr, unsigned long length,
563 bool swizzled)
564{
e7e58eb5 565 if (unlikely(swizzled)) {
23c18c71
DV
566 unsigned long start = (unsigned long) addr;
567 unsigned long end = (unsigned long) addr + length;
568
569 /* For swizzling simply ensure that we always flush both
570 * channels. Lame, but simple and it works. Swizzled
571 * pwrite/pread is far from a hotpath - current userspace
572 * doesn't use it at all. */
573 start = round_down(start, 128);
574 end = round_up(end, 128);
575
576 drm_clflush_virt_range((void *)start, end - start);
577 } else {
578 drm_clflush_virt_range(addr, length);
579 }
580
581}
582
d174bd64
DV
583/* Only difference to the fast-path function is that this can handle bit17
584 * and uses non-atomic copy and kmap functions. */
585static int
586shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length,
587 char __user *user_data,
588 bool page_do_bit17_swizzling, bool needs_clflush)
589{
590 char *vaddr;
591 int ret;
592
593 vaddr = kmap(page);
594 if (needs_clflush)
23c18c71
DV
595 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
596 page_length,
597 page_do_bit17_swizzling);
d174bd64
DV
598
599 if (page_do_bit17_swizzling)
600 ret = __copy_to_user_swizzled(user_data,
601 vaddr, shmem_page_offset,
602 page_length);
603 else
604 ret = __copy_to_user(user_data,
605 vaddr + shmem_page_offset,
606 page_length);
607 kunmap(page);
608
f60d7f0c 609 return ret ? - EFAULT : 0;
d174bd64
DV
610}
611
b50a5371
AS
612static inline unsigned long
613slow_user_access(struct io_mapping *mapping,
614 uint64_t page_base, int page_offset,
615 char __user *user_data,
616 unsigned long length, bool pwrite)
617{
618 void __iomem *ioaddr;
619 void *vaddr;
620 uint64_t unwritten;
621
622 ioaddr = io_mapping_map_wc(mapping, page_base, PAGE_SIZE);
623 /* We can use the cpu mem copy function because this is X86. */
624 vaddr = (void __force *)ioaddr + page_offset;
625 if (pwrite)
626 unwritten = __copy_from_user(vaddr, user_data, length);
627 else
628 unwritten = __copy_to_user(user_data, vaddr, length);
629
630 io_mapping_unmap(ioaddr);
631 return unwritten;
632}
633
634static int
635i915_gem_gtt_pread(struct drm_device *dev,
636 struct drm_i915_gem_object *obj, uint64_t size,
637 uint64_t data_offset, uint64_t data_ptr)
638{
fac5e23e 639 struct drm_i915_private *dev_priv = to_i915(dev);
b50a5371
AS
640 struct i915_ggtt *ggtt = &dev_priv->ggtt;
641 struct drm_mm_node node;
642 char __user *user_data;
643 uint64_t remain;
644 uint64_t offset;
645 int ret;
646
647 ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE);
648 if (ret) {
649 ret = insert_mappable_node(dev_priv, &node, PAGE_SIZE);
650 if (ret)
651 goto out;
652
653 ret = i915_gem_object_get_pages(obj);
654 if (ret) {
655 remove_mappable_node(&node);
656 goto out;
657 }
658
659 i915_gem_object_pin_pages(obj);
660 } else {
661 node.start = i915_gem_obj_ggtt_offset(obj);
662 node.allocated = false;
663 ret = i915_gem_object_put_fence(obj);
664 if (ret)
665 goto out_unpin;
666 }
667
668 ret = i915_gem_object_set_to_gtt_domain(obj, false);
669 if (ret)
670 goto out_unpin;
671
672 user_data = u64_to_user_ptr(data_ptr);
673 remain = size;
674 offset = data_offset;
675
676 mutex_unlock(&dev->struct_mutex);
677 if (likely(!i915.prefault_disable)) {
678 ret = fault_in_multipages_writeable(user_data, remain);
679 if (ret) {
680 mutex_lock(&dev->struct_mutex);
681 goto out_unpin;
682 }
683 }
684
685 while (remain > 0) {
686 /* Operation in this page
687 *
688 * page_base = page offset within aperture
689 * page_offset = offset within page
690 * page_length = bytes to copy for this page
691 */
692 u32 page_base = node.start;
693 unsigned page_offset = offset_in_page(offset);
694 unsigned page_length = PAGE_SIZE - page_offset;
695 page_length = remain < page_length ? remain : page_length;
696 if (node.allocated) {
697 wmb();
698 ggtt->base.insert_page(&ggtt->base,
699 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
700 node.start,
701 I915_CACHE_NONE, 0);
702 wmb();
703 } else {
704 page_base += offset & PAGE_MASK;
705 }
706 /* This is a slow read/write as it tries to read from
707 * and write to user memory which may result into page
708 * faults, and so we cannot perform this under struct_mutex.
709 */
710 if (slow_user_access(ggtt->mappable, page_base,
711 page_offset, user_data,
712 page_length, false)) {
713 ret = -EFAULT;
714 break;
715 }
716
717 remain -= page_length;
718 user_data += page_length;
719 offset += page_length;
720 }
721
722 mutex_lock(&dev->struct_mutex);
723 if (ret == 0 && (obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) {
724 /* The user has modified the object whilst we tried
725 * reading from it, and we now have no idea what domain
726 * the pages should be in. As we have just been touching
727 * them directly, flush everything back to the GTT
728 * domain.
729 */
730 ret = i915_gem_object_set_to_gtt_domain(obj, false);
731 }
732
733out_unpin:
734 if (node.allocated) {
735 wmb();
736 ggtt->base.clear_range(&ggtt->base,
737 node.start, node.size,
738 true);
739 i915_gem_object_unpin_pages(obj);
740 remove_mappable_node(&node);
741 } else {
742 i915_gem_object_ggtt_unpin(obj);
743 }
744out:
745 return ret;
746}
747
eb01459f 748static int
dbf7bff0
DV
749i915_gem_shmem_pread(struct drm_device *dev,
750 struct drm_i915_gem_object *obj,
751 struct drm_i915_gem_pread *args,
752 struct drm_file *file)
eb01459f 753{
8461d226 754 char __user *user_data;
eb01459f 755 ssize_t remain;
8461d226 756 loff_t offset;
eb2c0c81 757 int shmem_page_offset, page_length, ret = 0;
8461d226 758 int obj_do_bit17_swizzling, page_do_bit17_swizzling;
96d79b52 759 int prefaulted = 0;
8489731c 760 int needs_clflush = 0;
67d5a50c 761 struct sg_page_iter sg_iter;
eb01459f 762
6eae0059 763 if (!i915_gem_object_has_struct_page(obj))
b50a5371
AS
764 return -ENODEV;
765
3ed605bc 766 user_data = u64_to_user_ptr(args->data_ptr);
eb01459f
EA
767 remain = args->size;
768
8461d226 769 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
eb01459f 770
4c914c0c 771 ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
f60d7f0c
CW
772 if (ret)
773 return ret;
774
8461d226 775 offset = args->offset;
eb01459f 776
67d5a50c
ID
777 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
778 offset >> PAGE_SHIFT) {
2db76d7c 779 struct page *page = sg_page_iter_page(&sg_iter);
9da3da66
CW
780
781 if (remain <= 0)
782 break;
783
eb01459f
EA
784 /* Operation in this page
785 *
eb01459f 786 * shmem_page_offset = offset within page in shmem file
eb01459f
EA
787 * page_length = bytes to copy for this page
788 */
c8cbbb8b 789 shmem_page_offset = offset_in_page(offset);
eb01459f
EA
790 page_length = remain;
791 if ((shmem_page_offset + page_length) > PAGE_SIZE)
792 page_length = PAGE_SIZE - shmem_page_offset;
eb01459f 793
8461d226
DV
794 page_do_bit17_swizzling = obj_do_bit17_swizzling &&
795 (page_to_phys(page) & (1 << 17)) != 0;
796
d174bd64
DV
797 ret = shmem_pread_fast(page, shmem_page_offset, page_length,
798 user_data, page_do_bit17_swizzling,
799 needs_clflush);
800 if (ret == 0)
801 goto next_page;
dbf7bff0 802
dbf7bff0
DV
803 mutex_unlock(&dev->struct_mutex);
804
d330a953 805 if (likely(!i915.prefault_disable) && !prefaulted) {
f56f821f 806 ret = fault_in_multipages_writeable(user_data, remain);
96d79b52
DV
807 /* Userspace is tricking us, but we've already clobbered
808 * its pages with the prefault and promised to write the
809 * data up to the first fault. Hence ignore any errors
810 * and just continue. */
811 (void)ret;
812 prefaulted = 1;
813 }
eb01459f 814
d174bd64
DV
815 ret = shmem_pread_slow(page, shmem_page_offset, page_length,
816 user_data, page_do_bit17_swizzling,
817 needs_clflush);
eb01459f 818
dbf7bff0 819 mutex_lock(&dev->struct_mutex);
f60d7f0c 820
f60d7f0c 821 if (ret)
8461d226 822 goto out;
8461d226 823
17793c9a 824next_page:
eb01459f 825 remain -= page_length;
8461d226 826 user_data += page_length;
eb01459f
EA
827 offset += page_length;
828 }
829
4f27b75d 830out:
f60d7f0c
CW
831 i915_gem_object_unpin_pages(obj);
832
eb01459f
EA
833 return ret;
834}
835
673a394b
EA
836/**
837 * Reads data from the object referenced by handle.
14bb2c11
TU
838 * @dev: drm device pointer
839 * @data: ioctl data blob
840 * @file: drm file pointer
673a394b
EA
841 *
842 * On error, the contents of *data are undefined.
843 */
844int
845i915_gem_pread_ioctl(struct drm_device *dev, void *data,
05394f39 846 struct drm_file *file)
673a394b
EA
847{
848 struct drm_i915_gem_pread *args = data;
05394f39 849 struct drm_i915_gem_object *obj;
35b62a89 850 int ret = 0;
673a394b 851
51311d0a
CW
852 if (args->size == 0)
853 return 0;
854
855 if (!access_ok(VERIFY_WRITE,
3ed605bc 856 u64_to_user_ptr(args->data_ptr),
51311d0a
CW
857 args->size))
858 return -EFAULT;
859
4f27b75d 860 ret = i915_mutex_lock_interruptible(dev);
1d7cfea1 861 if (ret)
4f27b75d 862 return ret;
673a394b 863
a8ad0bd8 864 obj = to_intel_bo(drm_gem_object_lookup(file, args->handle));
c8725226 865 if (&obj->base == NULL) {
1d7cfea1
CW
866 ret = -ENOENT;
867 goto unlock;
4f27b75d 868 }
673a394b 869
7dcd2499 870 /* Bounds check source. */
05394f39
CW
871 if (args->offset > obj->base.size ||
872 args->size > obj->base.size - args->offset) {
ce9d419d 873 ret = -EINVAL;
35b62a89 874 goto out;
ce9d419d
CW
875 }
876
db53a302
CW
877 trace_i915_gem_object_pread(obj, args->offset, args->size);
878
dbf7bff0 879 ret = i915_gem_shmem_pread(dev, obj, args, file);
673a394b 880
b50a5371
AS
881 /* pread for non shmem backed objects */
882 if (ret == -EFAULT || ret == -ENODEV)
883 ret = i915_gem_gtt_pread(dev, obj, args->size,
884 args->offset, args->data_ptr);
885
35b62a89 886out:
05394f39 887 drm_gem_object_unreference(&obj->base);
1d7cfea1 888unlock:
4f27b75d 889 mutex_unlock(&dev->struct_mutex);
eb01459f 890 return ret;
673a394b
EA
891}
892
0839ccb8
KP
893/* This is the fast write path which cannot handle
894 * page faults in the source data
9b7530cc 895 */
0839ccb8
KP
896
897static inline int
898fast_user_write(struct io_mapping *mapping,
899 loff_t page_base, int page_offset,
900 char __user *user_data,
901 int length)
9b7530cc 902{
4f0c7cfb
BW
903 void __iomem *vaddr_atomic;
904 void *vaddr;
0839ccb8 905 unsigned long unwritten;
9b7530cc 906
3e4d3af5 907 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base);
4f0c7cfb
BW
908 /* We can use the cpu mem copy function because this is X86. */
909 vaddr = (void __force*)vaddr_atomic + page_offset;
910 unwritten = __copy_from_user_inatomic_nocache(vaddr,
0839ccb8 911 user_data, length);
3e4d3af5 912 io_mapping_unmap_atomic(vaddr_atomic);
fbd5a26d 913 return unwritten;
0839ccb8
KP
914}
915
3de09aa3
EA
916/**
917 * This is the fast pwrite path, where we copy the data directly from the
918 * user into the GTT, uncached.
62f90b38 919 * @i915: i915 device private data
14bb2c11
TU
920 * @obj: i915 gem object
921 * @args: pwrite arguments structure
922 * @file: drm file pointer
3de09aa3 923 */
673a394b 924static int
4f1959ee 925i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
05394f39 926 struct drm_i915_gem_object *obj,
3de09aa3 927 struct drm_i915_gem_pwrite *args,
05394f39 928 struct drm_file *file)
673a394b 929{
4f1959ee 930 struct i915_ggtt *ggtt = &i915->ggtt;
b50a5371 931 struct drm_device *dev = obj->base.dev;
4f1959ee
AS
932 struct drm_mm_node node;
933 uint64_t remain, offset;
673a394b 934 char __user *user_data;
4f1959ee 935 int ret;
b50a5371
AS
936 bool hit_slow_path = false;
937
938 if (obj->tiling_mode != I915_TILING_NONE)
939 return -EFAULT;
935aaa69 940
1ec9e26d 941 ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK);
4f1959ee
AS
942 if (ret) {
943 ret = insert_mappable_node(i915, &node, PAGE_SIZE);
944 if (ret)
945 goto out;
946
947 ret = i915_gem_object_get_pages(obj);
948 if (ret) {
949 remove_mappable_node(&node);
950 goto out;
951 }
952
953 i915_gem_object_pin_pages(obj);
954 } else {
955 node.start = i915_gem_obj_ggtt_offset(obj);
956 node.allocated = false;
b50a5371
AS
957 ret = i915_gem_object_put_fence(obj);
958 if (ret)
959 goto out_unpin;
4f1959ee 960 }
935aaa69
DV
961
962 ret = i915_gem_object_set_to_gtt_domain(obj, true);
963 if (ret)
964 goto out_unpin;
965
77a0d1ca 966 intel_fb_obj_invalidate(obj, ORIGIN_GTT);
4f1959ee 967 obj->dirty = true;
063e4e6b 968
4f1959ee
AS
969 user_data = u64_to_user_ptr(args->data_ptr);
970 offset = args->offset;
971 remain = args->size;
972 while (remain) {
673a394b
EA
973 /* Operation in this page
974 *
0839ccb8
KP
975 * page_base = page offset within aperture
976 * page_offset = offset within page
977 * page_length = bytes to copy for this page
673a394b 978 */
4f1959ee
AS
979 u32 page_base = node.start;
980 unsigned page_offset = offset_in_page(offset);
981 unsigned page_length = PAGE_SIZE - page_offset;
982 page_length = remain < page_length ? remain : page_length;
983 if (node.allocated) {
984 wmb(); /* flush the write before we modify the GGTT */
985 ggtt->base.insert_page(&ggtt->base,
986 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
987 node.start, I915_CACHE_NONE, 0);
988 wmb(); /* flush modifications to the GGTT (insert_page) */
989 } else {
990 page_base += offset & PAGE_MASK;
991 }
0839ccb8 992 /* If we get a fault while copying data, then (presumably) our
3de09aa3
EA
993 * source page isn't available. Return the error and we'll
994 * retry in the slow path.
b50a5371
AS
995 * If the object is non-shmem backed, we retry again with the
996 * path that handles page fault.
0839ccb8 997 */
72e96d64 998 if (fast_user_write(ggtt->mappable, page_base,
935aaa69 999 page_offset, user_data, page_length)) {
b50a5371
AS
1000 hit_slow_path = true;
1001 mutex_unlock(&dev->struct_mutex);
1002 if (slow_user_access(ggtt->mappable,
1003 page_base,
1004 page_offset, user_data,
1005 page_length, true)) {
1006 ret = -EFAULT;
1007 mutex_lock(&dev->struct_mutex);
1008 goto out_flush;
1009 }
1010
1011 mutex_lock(&dev->struct_mutex);
935aaa69 1012 }
673a394b 1013
0839ccb8
KP
1014 remain -= page_length;
1015 user_data += page_length;
1016 offset += page_length;
673a394b 1017 }
673a394b 1018
063e4e6b 1019out_flush:
b50a5371
AS
1020 if (hit_slow_path) {
1021 if (ret == 0 &&
1022 (obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) {
1023 /* The user has modified the object whilst we tried
1024 * reading from it, and we now have no idea what domain
1025 * the pages should be in. As we have just been touching
1026 * them directly, flush everything back to the GTT
1027 * domain.
1028 */
1029 ret = i915_gem_object_set_to_gtt_domain(obj, false);
1030 }
1031 }
1032
de152b62 1033 intel_fb_obj_flush(obj, false, ORIGIN_GTT);
935aaa69 1034out_unpin:
4f1959ee
AS
1035 if (node.allocated) {
1036 wmb();
1037 ggtt->base.clear_range(&ggtt->base,
1038 node.start, node.size,
1039 true);
1040 i915_gem_object_unpin_pages(obj);
1041 remove_mappable_node(&node);
1042 } else {
1043 i915_gem_object_ggtt_unpin(obj);
1044 }
935aaa69 1045out:
3de09aa3 1046 return ret;
673a394b
EA
1047}
1048
d174bd64
DV
1049/* Per-page copy function for the shmem pwrite fastpath.
1050 * Flushes invalid cachelines before writing to the target if
1051 * needs_clflush_before is set and flushes out any written cachelines after
1052 * writing if needs_clflush is set. */
3043c60c 1053static int
d174bd64
DV
1054shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length,
1055 char __user *user_data,
1056 bool page_do_bit17_swizzling,
1057 bool needs_clflush_before,
1058 bool needs_clflush_after)
673a394b 1059{
d174bd64 1060 char *vaddr;
673a394b 1061 int ret;
3de09aa3 1062
e7e58eb5 1063 if (unlikely(page_do_bit17_swizzling))
d174bd64 1064 return -EINVAL;
3de09aa3 1065
d174bd64
DV
1066 vaddr = kmap_atomic(page);
1067 if (needs_clflush_before)
1068 drm_clflush_virt_range(vaddr + shmem_page_offset,
1069 page_length);
c2831a94
CW
1070 ret = __copy_from_user_inatomic(vaddr + shmem_page_offset,
1071 user_data, page_length);
d174bd64
DV
1072 if (needs_clflush_after)
1073 drm_clflush_virt_range(vaddr + shmem_page_offset,
1074 page_length);
1075 kunmap_atomic(vaddr);
3de09aa3 1076
755d2218 1077 return ret ? -EFAULT : 0;
3de09aa3
EA
1078}
1079
d174bd64
DV
1080/* Only difference to the fast-path function is that this can handle bit17
1081 * and uses non-atomic copy and kmap functions. */
3043c60c 1082static int
d174bd64
DV
1083shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length,
1084 char __user *user_data,
1085 bool page_do_bit17_swizzling,
1086 bool needs_clflush_before,
1087 bool needs_clflush_after)
673a394b 1088{
d174bd64
DV
1089 char *vaddr;
1090 int ret;
e5281ccd 1091
d174bd64 1092 vaddr = kmap(page);
e7e58eb5 1093 if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
23c18c71
DV
1094 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
1095 page_length,
1096 page_do_bit17_swizzling);
d174bd64
DV
1097 if (page_do_bit17_swizzling)
1098 ret = __copy_from_user_swizzled(vaddr, shmem_page_offset,
e5281ccd
CW
1099 user_data,
1100 page_length);
d174bd64
DV
1101 else
1102 ret = __copy_from_user(vaddr + shmem_page_offset,
1103 user_data,
1104 page_length);
1105 if (needs_clflush_after)
23c18c71
DV
1106 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
1107 page_length,
1108 page_do_bit17_swizzling);
d174bd64 1109 kunmap(page);
40123c1f 1110
755d2218 1111 return ret ? -EFAULT : 0;
40123c1f
EA
1112}
1113
40123c1f 1114static int
e244a443
DV
1115i915_gem_shmem_pwrite(struct drm_device *dev,
1116 struct drm_i915_gem_object *obj,
1117 struct drm_i915_gem_pwrite *args,
1118 struct drm_file *file)
40123c1f 1119{
40123c1f 1120 ssize_t remain;
8c59967c
DV
1121 loff_t offset;
1122 char __user *user_data;
eb2c0c81 1123 int shmem_page_offset, page_length, ret = 0;
8c59967c 1124 int obj_do_bit17_swizzling, page_do_bit17_swizzling;
e244a443 1125 int hit_slowpath = 0;
58642885
DV
1126 int needs_clflush_after = 0;
1127 int needs_clflush_before = 0;
67d5a50c 1128 struct sg_page_iter sg_iter;
40123c1f 1129
3ed605bc 1130 user_data = u64_to_user_ptr(args->data_ptr);
40123c1f
EA
1131 remain = args->size;
1132
8c59967c 1133 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
40123c1f 1134
58642885
DV
1135 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
1136 /* If we're not in the cpu write domain, set ourself into the gtt
1137 * write domain and manually flush cachelines (if required). This
1138 * optimizes for the case when the gpu will use the data
1139 * right away and we therefore have to clflush anyway. */
2c22569b 1140 needs_clflush_after = cpu_write_needs_clflush(obj);
23f54483
BW
1141 ret = i915_gem_object_wait_rendering(obj, false);
1142 if (ret)
1143 return ret;
58642885 1144 }
c76ce038
CW
1145 /* Same trick applies to invalidate partially written cachelines read
1146 * before writing. */
1147 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0)
1148 needs_clflush_before =
1149 !cpu_cache_is_coherent(dev, obj->cache_level);
58642885 1150
755d2218
CW
1151 ret = i915_gem_object_get_pages(obj);
1152 if (ret)
1153 return ret;
1154
77a0d1ca 1155 intel_fb_obj_invalidate(obj, ORIGIN_CPU);
063e4e6b 1156
755d2218
CW
1157 i915_gem_object_pin_pages(obj);
1158
673a394b 1159 offset = args->offset;
05394f39 1160 obj->dirty = 1;
673a394b 1161
67d5a50c
ID
1162 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
1163 offset >> PAGE_SHIFT) {
2db76d7c 1164 struct page *page = sg_page_iter_page(&sg_iter);
58642885 1165 int partial_cacheline_write;
e5281ccd 1166
9da3da66
CW
1167 if (remain <= 0)
1168 break;
1169
40123c1f
EA
1170 /* Operation in this page
1171 *
40123c1f 1172 * shmem_page_offset = offset within page in shmem file
40123c1f
EA
1173 * page_length = bytes to copy for this page
1174 */
c8cbbb8b 1175 shmem_page_offset = offset_in_page(offset);
40123c1f
EA
1176
1177 page_length = remain;
1178 if ((shmem_page_offset + page_length) > PAGE_SIZE)
1179 page_length = PAGE_SIZE - shmem_page_offset;
40123c1f 1180
58642885
DV
1181 /* If we don't overwrite a cacheline completely we need to be
1182 * careful to have up-to-date data by first clflushing. Don't
1183 * overcomplicate things and flush the entire patch. */
1184 partial_cacheline_write = needs_clflush_before &&
1185 ((shmem_page_offset | page_length)
1186 & (boot_cpu_data.x86_clflush_size - 1));
1187
8c59967c
DV
1188 page_do_bit17_swizzling = obj_do_bit17_swizzling &&
1189 (page_to_phys(page) & (1 << 17)) != 0;
1190
d174bd64
DV
1191 ret = shmem_pwrite_fast(page, shmem_page_offset, page_length,
1192 user_data, page_do_bit17_swizzling,
1193 partial_cacheline_write,
1194 needs_clflush_after);
1195 if (ret == 0)
1196 goto next_page;
e244a443
DV
1197
1198 hit_slowpath = 1;
e244a443 1199 mutex_unlock(&dev->struct_mutex);
d174bd64
DV
1200 ret = shmem_pwrite_slow(page, shmem_page_offset, page_length,
1201 user_data, page_do_bit17_swizzling,
1202 partial_cacheline_write,
1203 needs_clflush_after);
40123c1f 1204
e244a443 1205 mutex_lock(&dev->struct_mutex);
755d2218 1206
755d2218 1207 if (ret)
8c59967c 1208 goto out;
8c59967c 1209
17793c9a 1210next_page:
40123c1f 1211 remain -= page_length;
8c59967c 1212 user_data += page_length;
40123c1f 1213 offset += page_length;
673a394b
EA
1214 }
1215
fbd5a26d 1216out:
755d2218
CW
1217 i915_gem_object_unpin_pages(obj);
1218
e244a443 1219 if (hit_slowpath) {
8dcf015e
DV
1220 /*
1221 * Fixup: Flush cpu caches in case we didn't flush the dirty
1222 * cachelines in-line while writing and the object moved
1223 * out of the cpu write domain while we've dropped the lock.
1224 */
1225 if (!needs_clflush_after &&
1226 obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
000433b6 1227 if (i915_gem_clflush_object(obj, obj->pin_display))
ed75a55b 1228 needs_clflush_after = true;
e244a443 1229 }
8c59967c 1230 }
673a394b 1231
58642885 1232 if (needs_clflush_after)
c033666a 1233 i915_gem_chipset_flush(to_i915(dev));
ed75a55b
VS
1234 else
1235 obj->cache_dirty = true;
58642885 1236
de152b62 1237 intel_fb_obj_flush(obj, false, ORIGIN_CPU);
40123c1f 1238 return ret;
673a394b
EA
1239}
1240
1241/**
1242 * Writes data to the object referenced by handle.
14bb2c11
TU
1243 * @dev: drm device
1244 * @data: ioctl data blob
1245 * @file: drm file
673a394b
EA
1246 *
1247 * On error, the contents of the buffer that were to be modified are undefined.
1248 */
1249int
1250i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
fbd5a26d 1251 struct drm_file *file)
673a394b 1252{
fac5e23e 1253 struct drm_i915_private *dev_priv = to_i915(dev);
673a394b 1254 struct drm_i915_gem_pwrite *args = data;
05394f39 1255 struct drm_i915_gem_object *obj;
51311d0a
CW
1256 int ret;
1257
1258 if (args->size == 0)
1259 return 0;
1260
1261 if (!access_ok(VERIFY_READ,
3ed605bc 1262 u64_to_user_ptr(args->data_ptr),
51311d0a
CW
1263 args->size))
1264 return -EFAULT;
1265
d330a953 1266 if (likely(!i915.prefault_disable)) {
3ed605bc 1267 ret = fault_in_multipages_readable(u64_to_user_ptr(args->data_ptr),
0b74b508
XZ
1268 args->size);
1269 if (ret)
1270 return -EFAULT;
1271 }
673a394b 1272
5d77d9c5
ID
1273 intel_runtime_pm_get(dev_priv);
1274
fbd5a26d 1275 ret = i915_mutex_lock_interruptible(dev);
1d7cfea1 1276 if (ret)
5d77d9c5 1277 goto put_rpm;
1d7cfea1 1278
a8ad0bd8 1279 obj = to_intel_bo(drm_gem_object_lookup(file, args->handle));
c8725226 1280 if (&obj->base == NULL) {
1d7cfea1
CW
1281 ret = -ENOENT;
1282 goto unlock;
fbd5a26d 1283 }
673a394b 1284
7dcd2499 1285 /* Bounds check destination. */
05394f39
CW
1286 if (args->offset > obj->base.size ||
1287 args->size > obj->base.size - args->offset) {
ce9d419d 1288 ret = -EINVAL;
35b62a89 1289 goto out;
ce9d419d
CW
1290 }
1291
db53a302
CW
1292 trace_i915_gem_object_pwrite(obj, args->offset, args->size);
1293
935aaa69 1294 ret = -EFAULT;
673a394b
EA
1295 /* We can only do the GTT pwrite on untiled buffers, as otherwise
1296 * it would end up going through the fenced access, and we'll get
1297 * different detiling behavior between reading and writing.
1298 * pread/pwrite currently are reading and writing from the CPU
1299 * perspective, requiring manual detiling by the client.
1300 */
6eae0059
CW
1301 if (!i915_gem_object_has_struct_page(obj) ||
1302 cpu_write_needs_clflush(obj)) {
4f1959ee 1303 ret = i915_gem_gtt_pwrite_fast(dev_priv, obj, args, file);
935aaa69
DV
1304 /* Note that the gtt paths might fail with non-page-backed user
1305 * pointers (e.g. gtt mappings when moving data between
1306 * textures). Fallback to the shmem path in that case. */
fbd5a26d 1307 }
673a394b 1308
d1054ee4 1309 if (ret == -EFAULT || ret == -ENOSPC) {
6a2c4232
CW
1310 if (obj->phys_handle)
1311 ret = i915_gem_phys_pwrite(obj, args, file);
6eae0059 1312 else if (i915_gem_object_has_struct_page(obj))
6a2c4232 1313 ret = i915_gem_shmem_pwrite(dev, obj, args, file);
b50a5371
AS
1314 else
1315 ret = -ENODEV;
6a2c4232 1316 }
5c0480f2 1317
35b62a89 1318out:
05394f39 1319 drm_gem_object_unreference(&obj->base);
1d7cfea1 1320unlock:
fbd5a26d 1321 mutex_unlock(&dev->struct_mutex);
5d77d9c5
ID
1322put_rpm:
1323 intel_runtime_pm_put(dev_priv);
1324
673a394b
EA
1325 return ret;
1326}
1327
b361237b
CW
1328/**
1329 * Ensures that all rendering to the object has completed and the object is
1330 * safe to unbind from the GTT or access from the CPU.
14bb2c11
TU
1331 * @obj: i915 gem object
1332 * @readonly: waiting for read access or write
b361237b 1333 */
2e2f351d 1334int
b361237b
CW
1335i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
1336 bool readonly)
1337{
b4716185 1338 int ret, i;
b361237b 1339
b4716185 1340 if (!obj->active)
b361237b
CW
1341 return 0;
1342
b4716185
CW
1343 if (readonly) {
1344 if (obj->last_write_req != NULL) {
1345 ret = i915_wait_request(obj->last_write_req);
1346 if (ret)
1347 return ret;
b361237b 1348
4a570db5 1349 i = obj->last_write_req->engine->id;
b4716185
CW
1350 if (obj->last_read_req[i] == obj->last_write_req)
1351 i915_gem_object_retire__read(obj, i);
1352 else
1353 i915_gem_object_retire__write(obj);
1354 }
1355 } else {
666796da 1356 for (i = 0; i < I915_NUM_ENGINES; i++) {
b4716185
CW
1357 if (obj->last_read_req[i] == NULL)
1358 continue;
1359
1360 ret = i915_wait_request(obj->last_read_req[i]);
1361 if (ret)
1362 return ret;
1363
1364 i915_gem_object_retire__read(obj, i);
1365 }
d501b1d2 1366 GEM_BUG_ON(obj->active);
b4716185
CW
1367 }
1368
1369 return 0;
1370}
1371
1372static void
1373i915_gem_object_retire_request(struct drm_i915_gem_object *obj,
1374 struct drm_i915_gem_request *req)
1375{
4a570db5 1376 int ring = req->engine->id;
b4716185
CW
1377
1378 if (obj->last_read_req[ring] == req)
1379 i915_gem_object_retire__read(obj, ring);
1380 else if (obj->last_write_req == req)
1381 i915_gem_object_retire__write(obj);
1382
0c5eed65 1383 if (!i915_reset_in_progress(&req->i915->gpu_error))
05235c53 1384 i915_gem_request_retire_upto(req);
b361237b
CW
1385}
1386
3236f57a
CW
1387/* A nonblocking variant of the above wait. This is a highly dangerous routine
1388 * as the object state may change during this call.
1389 */
1390static __must_check int
1391i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
2e1b8730 1392 struct intel_rps_client *rps,
3236f57a
CW
1393 bool readonly)
1394{
1395 struct drm_device *dev = obj->base.dev;
fac5e23e 1396 struct drm_i915_private *dev_priv = to_i915(dev);
666796da 1397 struct drm_i915_gem_request *requests[I915_NUM_ENGINES];
b4716185 1398 int ret, i, n = 0;
3236f57a
CW
1399
1400 BUG_ON(!mutex_is_locked(&dev->struct_mutex));
1401 BUG_ON(!dev_priv->mm.interruptible);
1402
b4716185 1403 if (!obj->active)
3236f57a
CW
1404 return 0;
1405
b4716185
CW
1406 if (readonly) {
1407 struct drm_i915_gem_request *req;
1408
1409 req = obj->last_write_req;
1410 if (req == NULL)
1411 return 0;
1412
b4716185
CW
1413 requests[n++] = i915_gem_request_reference(req);
1414 } else {
666796da 1415 for (i = 0; i < I915_NUM_ENGINES; i++) {
b4716185
CW
1416 struct drm_i915_gem_request *req;
1417
1418 req = obj->last_read_req[i];
1419 if (req == NULL)
1420 continue;
1421
b4716185
CW
1422 requests[n++] = i915_gem_request_reference(req);
1423 }
1424 }
1425
3236f57a 1426 mutex_unlock(&dev->struct_mutex);
299259a3 1427 ret = 0;
b4716185 1428 for (i = 0; ret == 0 && i < n; i++)
299259a3 1429 ret = __i915_wait_request(requests[i], true, NULL, rps);
3236f57a
CW
1430 mutex_lock(&dev->struct_mutex);
1431
b4716185
CW
1432 for (i = 0; i < n; i++) {
1433 if (ret == 0)
1434 i915_gem_object_retire_request(obj, requests[i]);
1435 i915_gem_request_unreference(requests[i]);
1436 }
1437
1438 return ret;
3236f57a
CW
1439}
1440
2e1b8730
CW
1441static struct intel_rps_client *to_rps_client(struct drm_file *file)
1442{
1443 struct drm_i915_file_private *fpriv = file->driver_priv;
1444 return &fpriv->rps;
1445}
1446
aeecc969
CW
1447static enum fb_op_origin
1448write_origin(struct drm_i915_gem_object *obj, unsigned domain)
1449{
1450 return domain == I915_GEM_DOMAIN_GTT && !obj->has_wc_mmap ?
1451 ORIGIN_GTT : ORIGIN_CPU;
1452}
1453
673a394b 1454/**
2ef7eeaa
EA
1455 * Called when user space prepares to use an object with the CPU, either
1456 * through the mmap ioctl's mapping or a GTT mapping.
14bb2c11
TU
1457 * @dev: drm device
1458 * @data: ioctl data blob
1459 * @file: drm file
673a394b
EA
1460 */
1461int
1462i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
05394f39 1463 struct drm_file *file)
673a394b
EA
1464{
1465 struct drm_i915_gem_set_domain *args = data;
05394f39 1466 struct drm_i915_gem_object *obj;
2ef7eeaa
EA
1467 uint32_t read_domains = args->read_domains;
1468 uint32_t write_domain = args->write_domain;
673a394b
EA
1469 int ret;
1470
2ef7eeaa 1471 /* Only handle setting domains to types used by the CPU. */
21d509e3 1472 if (write_domain & I915_GEM_GPU_DOMAINS)
2ef7eeaa
EA
1473 return -EINVAL;
1474
21d509e3 1475 if (read_domains & I915_GEM_GPU_DOMAINS)
2ef7eeaa
EA
1476 return -EINVAL;
1477
1478 /* Having something in the write domain implies it's in the read
1479 * domain, and only that read domain. Enforce that in the request.
1480 */
1481 if (write_domain != 0 && read_domains != write_domain)
1482 return -EINVAL;
1483
76c1dec1 1484 ret = i915_mutex_lock_interruptible(dev);
1d7cfea1 1485 if (ret)
76c1dec1 1486 return ret;
1d7cfea1 1487
a8ad0bd8 1488 obj = to_intel_bo(drm_gem_object_lookup(file, args->handle));
c8725226 1489 if (&obj->base == NULL) {
1d7cfea1
CW
1490 ret = -ENOENT;
1491 goto unlock;
76c1dec1 1492 }
673a394b 1493
3236f57a
CW
1494 /* Try to flush the object off the GPU without holding the lock.
1495 * We will repeat the flush holding the lock in the normal manner
1496 * to catch cases where we are gazumped.
1497 */
6e4930f6 1498 ret = i915_gem_object_wait_rendering__nonblocking(obj,
2e1b8730 1499 to_rps_client(file),
6e4930f6 1500 !write_domain);
3236f57a
CW
1501 if (ret)
1502 goto unref;
1503
43566ded 1504 if (read_domains & I915_GEM_DOMAIN_GTT)
2ef7eeaa 1505 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
43566ded 1506 else
e47c68e9 1507 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
2ef7eeaa 1508
031b698a 1509 if (write_domain != 0)
aeecc969 1510 intel_fb_obj_invalidate(obj, write_origin(obj, write_domain));
031b698a 1511
3236f57a 1512unref:
05394f39 1513 drm_gem_object_unreference(&obj->base);
1d7cfea1 1514unlock:
673a394b
EA
1515 mutex_unlock(&dev->struct_mutex);
1516 return ret;
1517}
1518
1519/**
1520 * Called when user space has done writes to this buffer
14bb2c11
TU
1521 * @dev: drm device
1522 * @data: ioctl data blob
1523 * @file: drm file
673a394b
EA
1524 */
1525int
1526i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
05394f39 1527 struct drm_file *file)
673a394b
EA
1528{
1529 struct drm_i915_gem_sw_finish *args = data;
05394f39 1530 struct drm_i915_gem_object *obj;
673a394b
EA
1531 int ret = 0;
1532
76c1dec1 1533 ret = i915_mutex_lock_interruptible(dev);
1d7cfea1 1534 if (ret)
76c1dec1 1535 return ret;
1d7cfea1 1536
a8ad0bd8 1537 obj = to_intel_bo(drm_gem_object_lookup(file, args->handle));
c8725226 1538 if (&obj->base == NULL) {
1d7cfea1
CW
1539 ret = -ENOENT;
1540 goto unlock;
673a394b
EA
1541 }
1542
673a394b 1543 /* Pinned buffers may be scanout, so flush the cache */
2c22569b 1544 if (obj->pin_display)
e62b59e4 1545 i915_gem_object_flush_cpu_write_domain(obj);
e47c68e9 1546
05394f39 1547 drm_gem_object_unreference(&obj->base);
1d7cfea1 1548unlock:
673a394b
EA
1549 mutex_unlock(&dev->struct_mutex);
1550 return ret;
1551}
1552
1553/**
14bb2c11
TU
1554 * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address
1555 * it is mapped to.
1556 * @dev: drm device
1557 * @data: ioctl data blob
1558 * @file: drm file
673a394b
EA
1559 *
1560 * While the mapping holds a reference on the contents of the object, it doesn't
1561 * imply a ref on the object itself.
34367381
DV
1562 *
1563 * IMPORTANT:
1564 *
1565 * DRM driver writers who look a this function as an example for how to do GEM
1566 * mmap support, please don't implement mmap support like here. The modern way
1567 * to implement DRM mmap support is with an mmap offset ioctl (like
1568 * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly.
1569 * That way debug tooling like valgrind will understand what's going on, hiding
1570 * the mmap call in a driver private ioctl will break that. The i915 driver only
1571 * does cpu mmaps this way because we didn't know better.
673a394b
EA
1572 */
1573int
1574i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
05394f39 1575 struct drm_file *file)
673a394b
EA
1576{
1577 struct drm_i915_gem_mmap *args = data;
1578 struct drm_gem_object *obj;
673a394b
EA
1579 unsigned long addr;
1580
1816f923
AG
1581 if (args->flags & ~(I915_MMAP_WC))
1582 return -EINVAL;
1583
568a58e5 1584 if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT))
1816f923
AG
1585 return -ENODEV;
1586
a8ad0bd8 1587 obj = drm_gem_object_lookup(file, args->handle);
673a394b 1588 if (obj == NULL)
bf79cb91 1589 return -ENOENT;
673a394b 1590
1286ff73
DV
1591 /* prime objects have no backing filp to GEM mmap
1592 * pages from.
1593 */
1594 if (!obj->filp) {
1595 drm_gem_object_unreference_unlocked(obj);
1596 return -EINVAL;
1597 }
1598
6be5ceb0 1599 addr = vm_mmap(obj->filp, 0, args->size,
673a394b
EA
1600 PROT_READ | PROT_WRITE, MAP_SHARED,
1601 args->offset);
1816f923
AG
1602 if (args->flags & I915_MMAP_WC) {
1603 struct mm_struct *mm = current->mm;
1604 struct vm_area_struct *vma;
1605
80a89a5e
MH
1606 if (down_write_killable(&mm->mmap_sem)) {
1607 drm_gem_object_unreference_unlocked(obj);
1608 return -EINTR;
1609 }
1816f923
AG
1610 vma = find_vma(mm, addr);
1611 if (vma)
1612 vma->vm_page_prot =
1613 pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
1614 else
1615 addr = -ENOMEM;
1616 up_write(&mm->mmap_sem);
aeecc969
CW
1617
1618 /* This may race, but that's ok, it only gets set */
1619 WRITE_ONCE(to_intel_bo(obj)->has_wc_mmap, true);
1816f923 1620 }
bc9025bd 1621 drm_gem_object_unreference_unlocked(obj);
673a394b
EA
1622 if (IS_ERR((void *)addr))
1623 return addr;
1624
1625 args->addr_ptr = (uint64_t) addr;
1626
1627 return 0;
1628}
1629
de151cf6
JB
1630/**
1631 * i915_gem_fault - fault a page into the GTT
d9072a3e
GT
1632 * @vma: VMA in question
1633 * @vmf: fault info
de151cf6
JB
1634 *
1635 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
1636 * from userspace. The fault handler takes care of binding the object to
1637 * the GTT (if needed), allocating and programming a fence register (again,
1638 * only if needed based on whether the old reg is still valid or the object
1639 * is tiled) and inserting a new PTE into the faulting process.
1640 *
1641 * Note that the faulting process may involve evicting existing objects
1642 * from the GTT and/or fence registers to make room. So performance may
1643 * suffer if the GTT working set is large or there are few fence registers
1644 * left.
1645 */
1646int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1647{
05394f39
CW
1648 struct drm_i915_gem_object *obj = to_intel_bo(vma->vm_private_data);
1649 struct drm_device *dev = obj->base.dev;
72e96d64
JL
1650 struct drm_i915_private *dev_priv = to_i915(dev);
1651 struct i915_ggtt *ggtt = &dev_priv->ggtt;
c5ad54cf 1652 struct i915_ggtt_view view = i915_ggtt_view_normal;
de151cf6
JB
1653 pgoff_t page_offset;
1654 unsigned long pfn;
1655 int ret = 0;
0f973f27 1656 bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
de151cf6 1657
f65c9168
PZ
1658 intel_runtime_pm_get(dev_priv);
1659
de151cf6
JB
1660 /* We don't use vmf->pgoff since that has the fake offset */
1661 page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >>
1662 PAGE_SHIFT;
1663
d9bc7e9f
CW
1664 ret = i915_mutex_lock_interruptible(dev);
1665 if (ret)
1666 goto out;
a00b10c3 1667
db53a302
CW
1668 trace_i915_gem_object_fault(obj, page_offset, true, write);
1669
6e4930f6
CW
1670 /* Try to flush the object off the GPU first without holding the lock.
1671 * Upon reacquiring the lock, we will perform our sanity checks and then
1672 * repeat the flush holding the lock in the normal manner to catch cases
1673 * where we are gazumped.
1674 */
1675 ret = i915_gem_object_wait_rendering__nonblocking(obj, NULL, !write);
1676 if (ret)
1677 goto unlock;
1678
eb119bd6
CW
1679 /* Access to snoopable pages through the GTT is incoherent. */
1680 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) {
ddeff6ee 1681 ret = -EFAULT;
eb119bd6
CW
1682 goto unlock;
1683 }
1684
c5ad54cf 1685 /* Use a partial view if the object is bigger than the aperture. */
72e96d64 1686 if (obj->base.size >= ggtt->mappable_end &&
e7ded2d7 1687 obj->tiling_mode == I915_TILING_NONE) {
c5ad54cf 1688 static const unsigned int chunk_size = 256; // 1 MiB
e7ded2d7 1689
c5ad54cf
JL
1690 memset(&view, 0, sizeof(view));
1691 view.type = I915_GGTT_VIEW_PARTIAL;
1692 view.params.partial.offset = rounddown(page_offset, chunk_size);
1693 view.params.partial.size =
1694 min_t(unsigned int,
1695 chunk_size,
1696 (vma->vm_end - vma->vm_start)/PAGE_SIZE -
1697 view.params.partial.offset);
1698 }
1699
1700 /* Now pin it into the GTT if needed */
1701 ret = i915_gem_object_ggtt_pin(obj, &view, 0, PIN_MAPPABLE);
c9839303
CW
1702 if (ret)
1703 goto unlock;
4a684a41 1704
c9839303
CW
1705 ret = i915_gem_object_set_to_gtt_domain(obj, write);
1706 if (ret)
1707 goto unpin;
74898d7e 1708
06d98131 1709 ret = i915_gem_object_get_fence(obj);
d9e86c0e 1710 if (ret)
c9839303 1711 goto unpin;
7d1c4804 1712
b90b91d8 1713 /* Finally, remap it using the new GTT offset */
72e96d64 1714 pfn = ggtt->mappable_base +
c5ad54cf 1715 i915_gem_obj_ggtt_offset_view(obj, &view);
f343c5f6 1716 pfn >>= PAGE_SHIFT;
de151cf6 1717
c5ad54cf
JL
1718 if (unlikely(view.type == I915_GGTT_VIEW_PARTIAL)) {
1719 /* Overriding existing pages in partial view does not cause
1720 * us any trouble as TLBs are still valid because the fault
1721 * is due to userspace losing part of the mapping or never
1722 * having accessed it before (at this partials' range).
1723 */
1724 unsigned long base = vma->vm_start +
1725 (view.params.partial.offset << PAGE_SHIFT);
1726 unsigned int i;
b90b91d8 1727
c5ad54cf
JL
1728 for (i = 0; i < view.params.partial.size; i++) {
1729 ret = vm_insert_pfn(vma, base + i * PAGE_SIZE, pfn + i);
b90b91d8
CW
1730 if (ret)
1731 break;
1732 }
1733
1734 obj->fault_mappable = true;
c5ad54cf
JL
1735 } else {
1736 if (!obj->fault_mappable) {
1737 unsigned long size = min_t(unsigned long,
1738 vma->vm_end - vma->vm_start,
1739 obj->base.size);
1740 int i;
1741
1742 for (i = 0; i < size >> PAGE_SHIFT; i++) {
1743 ret = vm_insert_pfn(vma,
1744 (unsigned long)vma->vm_start + i * PAGE_SIZE,
1745 pfn + i);
1746 if (ret)
1747 break;
1748 }
1749
1750 obj->fault_mappable = true;
1751 } else
1752 ret = vm_insert_pfn(vma,
1753 (unsigned long)vmf->virtual_address,
1754 pfn + page_offset);
1755 }
c9839303 1756unpin:
c5ad54cf 1757 i915_gem_object_ggtt_unpin_view(obj, &view);
c715089f 1758unlock:
de151cf6 1759 mutex_unlock(&dev->struct_mutex);
d9bc7e9f 1760out:
de151cf6 1761 switch (ret) {
d9bc7e9f 1762 case -EIO:
2232f031
DV
1763 /*
1764 * We eat errors when the gpu is terminally wedged to avoid
1765 * userspace unduly crashing (gl has no provisions for mmaps to
1766 * fail). But any other -EIO isn't ours (e.g. swap in failure)
1767 * and so needs to be reported.
1768 */
1769 if (!i915_terminally_wedged(&dev_priv->gpu_error)) {
f65c9168
PZ
1770 ret = VM_FAULT_SIGBUS;
1771 break;
1772 }
045e769a 1773 case -EAGAIN:
571c608d
DV
1774 /*
1775 * EAGAIN means the gpu is hung and we'll wait for the error
1776 * handler to reset everything when re-faulting in
1777 * i915_mutex_lock_interruptible.
d9bc7e9f 1778 */
c715089f
CW
1779 case 0:
1780 case -ERESTARTSYS:
bed636ab 1781 case -EINTR:
e79e0fe3
DR
1782 case -EBUSY:
1783 /*
1784 * EBUSY is ok: this just means that another thread
1785 * already did the job.
1786 */
f65c9168
PZ
1787 ret = VM_FAULT_NOPAGE;
1788 break;
de151cf6 1789 case -ENOMEM:
f65c9168
PZ
1790 ret = VM_FAULT_OOM;
1791 break;
a7c2e1aa 1792 case -ENOSPC:
45d67817 1793 case -EFAULT:
f65c9168
PZ
1794 ret = VM_FAULT_SIGBUS;
1795 break;
de151cf6 1796 default:
a7c2e1aa 1797 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret);
f65c9168
PZ
1798 ret = VM_FAULT_SIGBUS;
1799 break;
de151cf6 1800 }
f65c9168
PZ
1801
1802 intel_runtime_pm_put(dev_priv);
1803 return ret;
de151cf6
JB
1804}
1805
901782b2
CW
1806/**
1807 * i915_gem_release_mmap - remove physical page mappings
1808 * @obj: obj in question
1809 *
af901ca1 1810 * Preserve the reservation of the mmapping with the DRM core code, but
901782b2
CW
1811 * relinquish ownership of the pages back to the system.
1812 *
1813 * It is vital that we remove the page mapping if we have mapped a tiled
1814 * object through the GTT and then lose the fence register due to
1815 * resource pressure. Similarly if the object has been moved out of the
1816 * aperture, than pages mapped into userspace must be revoked. Removing the
1817 * mapping will then trigger a page fault on the next user access, allowing
1818 * fixup by i915_gem_fault().
1819 */
d05ca301 1820void
05394f39 1821i915_gem_release_mmap(struct drm_i915_gem_object *obj)
901782b2 1822{
349f2ccf
CW
1823 /* Serialisation between user GTT access and our code depends upon
1824 * revoking the CPU's PTE whilst the mutex is held. The next user
1825 * pagefault then has to wait until we release the mutex.
1826 */
1827 lockdep_assert_held(&obj->base.dev->struct_mutex);
1828
6299f992
CW
1829 if (!obj->fault_mappable)
1830 return;
901782b2 1831
6796cb16
DH
1832 drm_vma_node_unmap(&obj->base.vma_node,
1833 obj->base.dev->anon_inode->i_mapping);
349f2ccf
CW
1834
1835 /* Ensure that the CPU's PTE are revoked and there are not outstanding
1836 * memory transactions from userspace before we return. The TLB
1837 * flushing implied above by changing the PTE above *should* be
1838 * sufficient, an extra barrier here just provides us with a bit
1839 * of paranoid documentation about our requirement to serialise
1840 * memory writes before touching registers / GSM.
1841 */
1842 wmb();
1843
6299f992 1844 obj->fault_mappable = false;
901782b2
CW
1845}
1846
eedd10f4
CW
1847void
1848i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv)
1849{
1850 struct drm_i915_gem_object *obj;
1851
1852 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list)
1853 i915_gem_release_mmap(obj);
1854}
1855
0fa87796 1856uint32_t
e28f8711 1857i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode)
92b88aeb 1858{
e28f8711 1859 uint32_t gtt_size;
92b88aeb
CW
1860
1861 if (INTEL_INFO(dev)->gen >= 4 ||
e28f8711
CW
1862 tiling_mode == I915_TILING_NONE)
1863 return size;
92b88aeb
CW
1864
1865 /* Previous chips need a power-of-two fence region when tiling */
7e22dbbb 1866 if (IS_GEN3(dev))
e28f8711 1867 gtt_size = 1024*1024;
92b88aeb 1868 else
e28f8711 1869 gtt_size = 512*1024;
92b88aeb 1870
e28f8711
CW
1871 while (gtt_size < size)
1872 gtt_size <<= 1;
92b88aeb 1873
e28f8711 1874 return gtt_size;
92b88aeb
CW
1875}
1876
de151cf6
JB
1877/**
1878 * i915_gem_get_gtt_alignment - return required GTT alignment for an object
14bb2c11
TU
1879 * @dev: drm device
1880 * @size: object size
1881 * @tiling_mode: tiling mode
1882 * @fenced: is fenced alignemned required or not
de151cf6
JB
1883 *
1884 * Return the required GTT alignment for an object, taking into account
5e783301 1885 * potential fence register mapping.
de151cf6 1886 */
d865110c
ID
1887uint32_t
1888i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size,
1889 int tiling_mode, bool fenced)
de151cf6 1890{
de151cf6
JB
1891 /*
1892 * Minimum alignment is 4k (GTT page size), but might be greater
1893 * if a fence register is needed for the object.
1894 */
d865110c 1895 if (INTEL_INFO(dev)->gen >= 4 || (!fenced && IS_G33(dev)) ||
e28f8711 1896 tiling_mode == I915_TILING_NONE)
de151cf6
JB
1897 return 4096;
1898
a00b10c3
CW
1899 /*
1900 * Previous chips need to be aligned to the size of the smallest
1901 * fence register that can contain the object.
1902 */
e28f8711 1903 return i915_gem_get_gtt_size(dev, size, tiling_mode);
a00b10c3
CW
1904}
1905
d8cb5086
CW
1906static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
1907{
fac5e23e 1908 struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
d8cb5086
CW
1909 int ret;
1910
da494d7c
DV
1911 dev_priv->mm.shrinker_no_lock_stealing = true;
1912
d8cb5086
CW
1913 ret = drm_gem_create_mmap_offset(&obj->base);
1914 if (ret != -ENOSPC)
da494d7c 1915 goto out;
d8cb5086
CW
1916
1917 /* Badly fragmented mmap space? The only way we can recover
1918 * space is by destroying unwanted objects. We can't randomly release
1919 * mmap_offsets as userspace expects them to be persistent for the
1920 * lifetime of the objects. The closest we can is to release the
1921 * offsets on purgeable objects by truncating it and marking it purged,
1922 * which prevents userspace from ever using that object again.
1923 */
21ab4e74
CW
1924 i915_gem_shrink(dev_priv,
1925 obj->base.size >> PAGE_SHIFT,
1926 I915_SHRINK_BOUND |
1927 I915_SHRINK_UNBOUND |
1928 I915_SHRINK_PURGEABLE);
d8cb5086
CW
1929 ret = drm_gem_create_mmap_offset(&obj->base);
1930 if (ret != -ENOSPC)
da494d7c 1931 goto out;
d8cb5086
CW
1932
1933 i915_gem_shrink_all(dev_priv);
da494d7c
DV
1934 ret = drm_gem_create_mmap_offset(&obj->base);
1935out:
1936 dev_priv->mm.shrinker_no_lock_stealing = false;
1937
1938 return ret;
d8cb5086
CW
1939}
1940
1941static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
1942{
d8cb5086
CW
1943 drm_gem_free_mmap_offset(&obj->base);
1944}
1945
da6b51d0 1946int
ff72145b
DA
1947i915_gem_mmap_gtt(struct drm_file *file,
1948 struct drm_device *dev,
da6b51d0 1949 uint32_t handle,
ff72145b 1950 uint64_t *offset)
de151cf6 1951{
05394f39 1952 struct drm_i915_gem_object *obj;
de151cf6
JB
1953 int ret;
1954
76c1dec1 1955 ret = i915_mutex_lock_interruptible(dev);
1d7cfea1 1956 if (ret)
76c1dec1 1957 return ret;
de151cf6 1958
a8ad0bd8 1959 obj = to_intel_bo(drm_gem_object_lookup(file, handle));
c8725226 1960 if (&obj->base == NULL) {
1d7cfea1
CW
1961 ret = -ENOENT;
1962 goto unlock;
1963 }
de151cf6 1964
05394f39 1965 if (obj->madv != I915_MADV_WILLNEED) {
bd9b6a4e 1966 DRM_DEBUG("Attempting to mmap a purgeable buffer\n");
8c99e57d 1967 ret = -EFAULT;
1d7cfea1 1968 goto out;
ab18282d
CW
1969 }
1970
d8cb5086
CW
1971 ret = i915_gem_object_create_mmap_offset(obj);
1972 if (ret)
1973 goto out;
de151cf6 1974
0de23977 1975 *offset = drm_vma_node_offset_addr(&obj->base.vma_node);
de151cf6 1976
1d7cfea1 1977out:
05394f39 1978 drm_gem_object_unreference(&obj->base);
1d7cfea1 1979unlock:
de151cf6 1980 mutex_unlock(&dev->struct_mutex);
1d7cfea1 1981 return ret;
de151cf6
JB
1982}
1983
ff72145b
DA
1984/**
1985 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
1986 * @dev: DRM device
1987 * @data: GTT mapping ioctl data
1988 * @file: GEM object info
1989 *
1990 * Simply returns the fake offset to userspace so it can mmap it.
1991 * The mmap call will end up in drm_gem_mmap(), which will set things
1992 * up so we can get faults in the handler above.
1993 *
1994 * The fault handler will take care of binding the object into the GTT
1995 * (since it may have been evicted to make room for something), allocating
1996 * a fence register, and mapping the appropriate aperture address into
1997 * userspace.
1998 */
1999int
2000i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
2001 struct drm_file *file)
2002{
2003 struct drm_i915_gem_mmap_gtt *args = data;
2004
da6b51d0 2005 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
ff72145b
DA
2006}
2007
225067ee
DV
2008/* Immediately discard the backing storage */
2009static void
2010i915_gem_object_truncate(struct drm_i915_gem_object *obj)
e5281ccd 2011{
4d6294bf 2012 i915_gem_object_free_mmap_offset(obj);
1286ff73 2013
4d6294bf
CW
2014 if (obj->base.filp == NULL)
2015 return;
e5281ccd 2016
225067ee
DV
2017 /* Our goal here is to return as much of the memory as
2018 * is possible back to the system as we are called from OOM.
2019 * To do this we must instruct the shmfs to drop all of its
2020 * backing pages, *now*.
2021 */
5537252b 2022 shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1);
225067ee
DV
2023 obj->madv = __I915_MADV_PURGED;
2024}
e5281ccd 2025
5537252b
CW
2026/* Try to discard unwanted pages */
2027static void
2028i915_gem_object_invalidate(struct drm_i915_gem_object *obj)
225067ee 2029{
5537252b
CW
2030 struct address_space *mapping;
2031
2032 switch (obj->madv) {
2033 case I915_MADV_DONTNEED:
2034 i915_gem_object_truncate(obj);
2035 case __I915_MADV_PURGED:
2036 return;
2037 }
2038
2039 if (obj->base.filp == NULL)
2040 return;
2041
2042 mapping = file_inode(obj->base.filp)->i_mapping,
2043 invalidate_mapping_pages(mapping, 0, (loff_t)-1);
e5281ccd
CW
2044}
2045
5cdf5881 2046static void
05394f39 2047i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
673a394b 2048{
85d1225e
DG
2049 struct sgt_iter sgt_iter;
2050 struct page *page;
90797e6d 2051 int ret;
1286ff73 2052
05394f39 2053 BUG_ON(obj->madv == __I915_MADV_PURGED);
673a394b 2054
6c085a72 2055 ret = i915_gem_object_set_to_cpu_domain(obj, true);
f4457ae7 2056 if (WARN_ON(ret)) {
6c085a72
CW
2057 /* In the event of a disaster, abandon all caches and
2058 * hope for the best.
2059 */
2c22569b 2060 i915_gem_clflush_object(obj, true);
6c085a72
CW
2061 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
2062 }
2063
e2273302
ID
2064 i915_gem_gtt_finish_object(obj);
2065
6dacfd2f 2066 if (i915_gem_object_needs_bit17_swizzle(obj))
280b713b
EA
2067 i915_gem_object_save_bit_17_swizzle(obj);
2068
05394f39
CW
2069 if (obj->madv == I915_MADV_DONTNEED)
2070 obj->dirty = 0;
3ef94daa 2071
85d1225e 2072 for_each_sgt_page(page, sgt_iter, obj->pages) {
05394f39 2073 if (obj->dirty)
9da3da66 2074 set_page_dirty(page);
3ef94daa 2075
05394f39 2076 if (obj->madv == I915_MADV_WILLNEED)
9da3da66 2077 mark_page_accessed(page);
3ef94daa 2078
09cbfeaf 2079 put_page(page);
3ef94daa 2080 }
05394f39 2081 obj->dirty = 0;
673a394b 2082
9da3da66
CW
2083 sg_free_table(obj->pages);
2084 kfree(obj->pages);
37e680a1 2085}
6c085a72 2086
dd624afd 2087int
37e680a1
CW
2088i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
2089{
2090 const struct drm_i915_gem_object_ops *ops = obj->ops;
2091
2f745ad3 2092 if (obj->pages == NULL)
37e680a1
CW
2093 return 0;
2094
a5570178
CW
2095 if (obj->pages_pin_count)
2096 return -EBUSY;
2097
9843877d 2098 BUG_ON(i915_gem_obj_bound_any(obj));
3e123027 2099
a2165e31
CW
2100 /* ->put_pages might need to allocate memory for the bit17 swizzle
2101 * array, hence protect them from being reaped by removing them from gtt
2102 * lists early. */
35c20a60 2103 list_del(&obj->global_list);
a2165e31 2104
0a798eb9 2105 if (obj->mapping) {
fb8621d3
CW
2106 if (is_vmalloc_addr(obj->mapping))
2107 vunmap(obj->mapping);
2108 else
2109 kunmap(kmap_to_page(obj->mapping));
0a798eb9
CW
2110 obj->mapping = NULL;
2111 }
2112
37e680a1 2113 ops->put_pages(obj);
05394f39 2114 obj->pages = NULL;
37e680a1 2115
5537252b 2116 i915_gem_object_invalidate(obj);
6c085a72
CW
2117
2118 return 0;
2119}
2120
37e680a1 2121static int
6c085a72 2122i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
e5281ccd 2123{
fac5e23e 2124 struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
e5281ccd
CW
2125 int page_count, i;
2126 struct address_space *mapping;
9da3da66
CW
2127 struct sg_table *st;
2128 struct scatterlist *sg;
85d1225e 2129 struct sgt_iter sgt_iter;
e5281ccd 2130 struct page *page;
90797e6d 2131 unsigned long last_pfn = 0; /* suppress gcc warning */
e2273302 2132 int ret;
6c085a72 2133 gfp_t gfp;
e5281ccd 2134
6c085a72
CW
2135 /* Assert that the object is not currently in any GPU domain. As it
2136 * wasn't in the GTT, there shouldn't be any way it could have been in
2137 * a GPU cache
2138 */
2139 BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
2140 BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
2141
9da3da66
CW
2142 st = kmalloc(sizeof(*st), GFP_KERNEL);
2143 if (st == NULL)
2144 return -ENOMEM;
2145
05394f39 2146 page_count = obj->base.size / PAGE_SIZE;
9da3da66 2147 if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
9da3da66 2148 kfree(st);
e5281ccd 2149 return -ENOMEM;
9da3da66 2150 }
e5281ccd 2151
9da3da66
CW
2152 /* Get the list of pages out of our struct file. They'll be pinned
2153 * at this point until we release them.
2154 *
2155 * Fail silently without starting the shrinker
2156 */
496ad9aa 2157 mapping = file_inode(obj->base.filp)->i_mapping;
c62d2555 2158 gfp = mapping_gfp_constraint(mapping, ~(__GFP_IO | __GFP_RECLAIM));
d0164adc 2159 gfp |= __GFP_NORETRY | __GFP_NOWARN;
90797e6d
ID
2160 sg = st->sgl;
2161 st->nents = 0;
2162 for (i = 0; i < page_count; i++) {
6c085a72
CW
2163 page = shmem_read_mapping_page_gfp(mapping, i, gfp);
2164 if (IS_ERR(page)) {
21ab4e74
CW
2165 i915_gem_shrink(dev_priv,
2166 page_count,
2167 I915_SHRINK_BOUND |
2168 I915_SHRINK_UNBOUND |
2169 I915_SHRINK_PURGEABLE);
6c085a72
CW
2170 page = shmem_read_mapping_page_gfp(mapping, i, gfp);
2171 }
2172 if (IS_ERR(page)) {
2173 /* We've tried hard to allocate the memory by reaping
2174 * our own buffer, now let the real VM do its job and
2175 * go down in flames if truly OOM.
2176 */
6c085a72 2177 i915_gem_shrink_all(dev_priv);
f461d1be 2178 page = shmem_read_mapping_page(mapping, i);
e2273302
ID
2179 if (IS_ERR(page)) {
2180 ret = PTR_ERR(page);
6c085a72 2181 goto err_pages;
e2273302 2182 }
6c085a72 2183 }
426729dc
KRW
2184#ifdef CONFIG_SWIOTLB
2185 if (swiotlb_nr_tbl()) {
2186 st->nents++;
2187 sg_set_page(sg, page, PAGE_SIZE, 0);
2188 sg = sg_next(sg);
2189 continue;
2190 }
2191#endif
90797e6d
ID
2192 if (!i || page_to_pfn(page) != last_pfn + 1) {
2193 if (i)
2194 sg = sg_next(sg);
2195 st->nents++;
2196 sg_set_page(sg, page, PAGE_SIZE, 0);
2197 } else {
2198 sg->length += PAGE_SIZE;
2199 }
2200 last_pfn = page_to_pfn(page);
3bbbe706
DV
2201
2202 /* Check that the i965g/gm workaround works. */
2203 WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL));
e5281ccd 2204 }
426729dc
KRW
2205#ifdef CONFIG_SWIOTLB
2206 if (!swiotlb_nr_tbl())
2207#endif
2208 sg_mark_end(sg);
74ce6b6c
CW
2209 obj->pages = st;
2210
e2273302
ID
2211 ret = i915_gem_gtt_prepare_object(obj);
2212 if (ret)
2213 goto err_pages;
2214
6dacfd2f 2215 if (i915_gem_object_needs_bit17_swizzle(obj))
e5281ccd
CW
2216 i915_gem_object_do_bit_17_swizzle(obj);
2217
656bfa3a
DV
2218 if (obj->tiling_mode != I915_TILING_NONE &&
2219 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES)
2220 i915_gem_object_pin_pages(obj);
2221
e5281ccd
CW
2222 return 0;
2223
2224err_pages:
90797e6d 2225 sg_mark_end(sg);
85d1225e
DG
2226 for_each_sgt_page(page, sgt_iter, st)
2227 put_page(page);
9da3da66
CW
2228 sg_free_table(st);
2229 kfree(st);
0820baf3
CW
2230
2231 /* shmemfs first checks if there is enough memory to allocate the page
2232 * and reports ENOSPC should there be insufficient, along with the usual
2233 * ENOMEM for a genuine allocation failure.
2234 *
2235 * We use ENOSPC in our driver to mean that we have run out of aperture
2236 * space and so want to translate the error from shmemfs back to our
2237 * usual understanding of ENOMEM.
2238 */
e2273302
ID
2239 if (ret == -ENOSPC)
2240 ret = -ENOMEM;
2241
2242 return ret;
673a394b
EA
2243}
2244
37e680a1
CW
2245/* Ensure that the associated pages are gathered from the backing storage
2246 * and pinned into our object. i915_gem_object_get_pages() may be called
2247 * multiple times before they are released by a single call to
2248 * i915_gem_object_put_pages() - once the pages are no longer referenced
2249 * either as a result of memory pressure (reaping pages under the shrinker)
2250 * or as the object is itself released.
2251 */
2252int
2253i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
2254{
fac5e23e 2255 struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
37e680a1
CW
2256 const struct drm_i915_gem_object_ops *ops = obj->ops;
2257 int ret;
2258
2f745ad3 2259 if (obj->pages)
37e680a1
CW
2260 return 0;
2261
43e28f09 2262 if (obj->madv != I915_MADV_WILLNEED) {
bd9b6a4e 2263 DRM_DEBUG("Attempting to obtain a purgeable object\n");
8c99e57d 2264 return -EFAULT;
43e28f09
CW
2265 }
2266
a5570178
CW
2267 BUG_ON(obj->pages_pin_count);
2268
37e680a1
CW
2269 ret = ops->get_pages(obj);
2270 if (ret)
2271 return ret;
2272
35c20a60 2273 list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list);
ee286370
CW
2274
2275 obj->get_page.sg = obj->pages->sgl;
2276 obj->get_page.last = 0;
2277
37e680a1 2278 return 0;
673a394b
EA
2279}
2280
dd6034c6
DG
2281/* The 'mapping' part of i915_gem_object_pin_map() below */
2282static void *i915_gem_object_map(const struct drm_i915_gem_object *obj)
2283{
2284 unsigned long n_pages = obj->base.size >> PAGE_SHIFT;
2285 struct sg_table *sgt = obj->pages;
85d1225e
DG
2286 struct sgt_iter sgt_iter;
2287 struct page *page;
b338fa47
DG
2288 struct page *stack_pages[32];
2289 struct page **pages = stack_pages;
dd6034c6
DG
2290 unsigned long i = 0;
2291 void *addr;
2292
2293 /* A single page can always be kmapped */
2294 if (n_pages == 1)
2295 return kmap(sg_page(sgt->sgl));
2296
b338fa47
DG
2297 if (n_pages > ARRAY_SIZE(stack_pages)) {
2298 /* Too big for stack -- allocate temporary array instead */
2299 pages = drm_malloc_gfp(n_pages, sizeof(*pages), GFP_TEMPORARY);
2300 if (!pages)
2301 return NULL;
2302 }
dd6034c6 2303
85d1225e
DG
2304 for_each_sgt_page(page, sgt_iter, sgt)
2305 pages[i++] = page;
dd6034c6
DG
2306
2307 /* Check that we have the expected number of pages */
2308 GEM_BUG_ON(i != n_pages);
2309
2310 addr = vmap(pages, n_pages, 0, PAGE_KERNEL);
2311
b338fa47
DG
2312 if (pages != stack_pages)
2313 drm_free_large(pages);
dd6034c6
DG
2314
2315 return addr;
2316}
2317
2318/* get, pin, and map the pages of the object into kernel space */
0a798eb9
CW
2319void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj)
2320{
2321 int ret;
2322
2323 lockdep_assert_held(&obj->base.dev->struct_mutex);
2324
2325 ret = i915_gem_object_get_pages(obj);
2326 if (ret)
2327 return ERR_PTR(ret);
2328
2329 i915_gem_object_pin_pages(obj);
2330
dd6034c6
DG
2331 if (!obj->mapping) {
2332 obj->mapping = i915_gem_object_map(obj);
2333 if (!obj->mapping) {
0a798eb9
CW
2334 i915_gem_object_unpin_pages(obj);
2335 return ERR_PTR(-ENOMEM);
2336 }
2337 }
2338
2339 return obj->mapping;
2340}
2341
b4716185 2342void i915_vma_move_to_active(struct i915_vma *vma,
b2af0376 2343 struct drm_i915_gem_request *req)
673a394b 2344{
b4716185 2345 struct drm_i915_gem_object *obj = vma->obj;
e2f80391 2346 struct intel_engine_cs *engine;
b2af0376 2347
666796da 2348 engine = i915_gem_request_get_engine(req);
673a394b
EA
2349
2350 /* Add a reference if we're newly entering the active list. */
b4716185 2351 if (obj->active == 0)
05394f39 2352 drm_gem_object_reference(&obj->base);
666796da 2353 obj->active |= intel_engine_flag(engine);
e35a41de 2354
117897f4 2355 list_move_tail(&obj->engine_list[engine->id], &engine->active_list);
e2f80391 2356 i915_gem_request_assign(&obj->last_read_req[engine->id], req);
caea7476 2357
1c7f4bca 2358 list_move_tail(&vma->vm_link, &vma->vm->active_list);
caea7476
CW
2359}
2360
b4716185
CW
2361static void
2362i915_gem_object_retire__write(struct drm_i915_gem_object *obj)
e2d05a8b 2363{
d501b1d2
CW
2364 GEM_BUG_ON(obj->last_write_req == NULL);
2365 GEM_BUG_ON(!(obj->active & intel_engine_flag(obj->last_write_req->engine)));
b4716185
CW
2366
2367 i915_gem_request_assign(&obj->last_write_req, NULL);
de152b62 2368 intel_fb_obj_flush(obj, true, ORIGIN_CS);
e2d05a8b
BW
2369}
2370
caea7476 2371static void
b4716185 2372i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring)
ce44b0ea 2373{
feb822cf 2374 struct i915_vma *vma;
ce44b0ea 2375
d501b1d2
CW
2376 GEM_BUG_ON(obj->last_read_req[ring] == NULL);
2377 GEM_BUG_ON(!(obj->active & (1 << ring)));
b4716185 2378
117897f4 2379 list_del_init(&obj->engine_list[ring]);
b4716185
CW
2380 i915_gem_request_assign(&obj->last_read_req[ring], NULL);
2381
4a570db5 2382 if (obj->last_write_req && obj->last_write_req->engine->id == ring)
b4716185
CW
2383 i915_gem_object_retire__write(obj);
2384
2385 obj->active &= ~(1 << ring);
2386 if (obj->active)
2387 return;
caea7476 2388
6c246959
CW
2389 /* Bump our place on the bound list to keep it roughly in LRU order
2390 * so that we don't steal from recently used but inactive objects
2391 * (unless we are forced to ofc!)
2392 */
2393 list_move_tail(&obj->global_list,
2394 &to_i915(obj->base.dev)->mm.bound_list);
2395
1c7f4bca
CW
2396 list_for_each_entry(vma, &obj->vma_list, obj_link) {
2397 if (!list_empty(&vma->vm_link))
2398 list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
feb822cf 2399 }
caea7476 2400
97b2a6a1 2401 i915_gem_request_assign(&obj->last_fenced_req, NULL);
caea7476 2402 drm_gem_object_unreference(&obj->base);
c8725f3d
CW
2403}
2404
7b4d3a16 2405static bool i915_context_is_banned(const struct i915_gem_context *ctx)
be62acb4 2406{
44e2c070 2407 unsigned long elapsed;
be62acb4 2408
44e2c070 2409 if (ctx->hang_stats.banned)
be62acb4
MK
2410 return true;
2411
7b4d3a16 2412 elapsed = get_seconds() - ctx->hang_stats.guilty_ts;
676fa572
CW
2413 if (ctx->hang_stats.ban_period_seconds &&
2414 elapsed <= ctx->hang_stats.ban_period_seconds) {
7b4d3a16
CW
2415 DRM_DEBUG("context hanging too fast, banning!\n");
2416 return true;
be62acb4
MK
2417 }
2418
2419 return false;
2420}
2421
7b4d3a16 2422static void i915_set_reset_status(struct i915_gem_context *ctx,
b6b0fac0 2423 const bool guilty)
aa60c664 2424{
7b4d3a16 2425 struct i915_ctx_hang_stats *hs = &ctx->hang_stats;
44e2c070
MK
2426
2427 if (guilty) {
7b4d3a16 2428 hs->banned = i915_context_is_banned(ctx);
44e2c070
MK
2429 hs->batch_active++;
2430 hs->guilty_ts = get_seconds();
2431 } else {
2432 hs->batch_pending++;
aa60c664
MK
2433 }
2434}
2435
8d9fc7fd 2436struct drm_i915_gem_request *
0bc40be8 2437i915_gem_find_active_request(struct intel_engine_cs *engine)
9375e446 2438{
4db080f9
CW
2439 struct drm_i915_gem_request *request;
2440
f69a02c9
CW
2441 /* We are called by the error capture and reset at a random
2442 * point in time. In particular, note that neither is crucially
2443 * ordered with an interrupt. After a hang, the GPU is dead and we
2444 * assume that no more writes can happen (we waited long enough for
2445 * all writes that were in transaction to be flushed) - adding an
2446 * extra delay for a recent interrupt is pointless. Hence, we do
2447 * not need an engine->irq_seqno_barrier() before the seqno reads.
2448 */
0bc40be8 2449 list_for_each_entry(request, &engine->request_list, list) {
f69a02c9 2450 if (i915_gem_request_completed(request))
4db080f9 2451 continue;
aa60c664 2452
b6b0fac0 2453 return request;
4db080f9 2454 }
b6b0fac0
MK
2455
2456 return NULL;
2457}
2458
7b4d3a16 2459static void i915_gem_reset_engine_status(struct intel_engine_cs *engine)
b6b0fac0
MK
2460{
2461 struct drm_i915_gem_request *request;
2462 bool ring_hung;
2463
0bc40be8 2464 request = i915_gem_find_active_request(engine);
b6b0fac0
MK
2465 if (request == NULL)
2466 return;
2467
0bc40be8 2468 ring_hung = engine->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG;
b6b0fac0 2469
7b4d3a16 2470 i915_set_reset_status(request->ctx, ring_hung);
0bc40be8 2471 list_for_each_entry_continue(request, &engine->request_list, list)
7b4d3a16 2472 i915_set_reset_status(request->ctx, false);
4db080f9 2473}
aa60c664 2474
7b4d3a16 2475static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine)
4db080f9 2476{
608c1a52
CW
2477 struct intel_ringbuffer *buffer;
2478
0bc40be8 2479 while (!list_empty(&engine->active_list)) {
05394f39 2480 struct drm_i915_gem_object *obj;
9375e446 2481
0bc40be8 2482 obj = list_first_entry(&engine->active_list,
05394f39 2483 struct drm_i915_gem_object,
117897f4 2484 engine_list[engine->id]);
9375e446 2485
0bc40be8 2486 i915_gem_object_retire__read(obj, engine->id);
673a394b 2487 }
1d62beea 2488
dcb4c12a
OM
2489 /*
2490 * Clear the execlists queue up before freeing the requests, as those
2491 * are the ones that keep the context and ringbuffer backing objects
2492 * pinned in place.
2493 */
dcb4c12a 2494
7de1691a 2495 if (i915.enable_execlists) {
27af5eea
TU
2496 /* Ensure irq handler finishes or is cancelled. */
2497 tasklet_kill(&engine->irq_tasklet);
1197b4f2 2498
e39d42fa 2499 intel_execlists_cancel_requests(engine);
dcb4c12a
OM
2500 }
2501
1d62beea
BW
2502 /*
2503 * We must free the requests after all the corresponding objects have
2504 * been moved off active lists. Which is the same order as the normal
2505 * retire_requests function does. This is important if object hold
2506 * implicit references on things like e.g. ppgtt address spaces through
2507 * the request.
2508 */
05235c53 2509 if (!list_empty(&engine->request_list)) {
1d62beea
BW
2510 struct drm_i915_gem_request *request;
2511
05235c53
CW
2512 request = list_last_entry(&engine->request_list,
2513 struct drm_i915_gem_request,
2514 list);
1d62beea 2515
05235c53 2516 i915_gem_request_retire_upto(request);
1d62beea 2517 }
608c1a52
CW
2518
2519 /* Having flushed all requests from all queues, we know that all
2520 * ringbuffers must now be empty. However, since we do not reclaim
2521 * all space when retiring the request (to prevent HEADs colliding
2522 * with rapid ringbuffer wraparound) the amount of available space
2523 * upon reset is less than when we start. Do one more pass over
2524 * all the ringbuffers to reset last_retired_head.
2525 */
0bc40be8 2526 list_for_each_entry(buffer, &engine->buffers, link) {
608c1a52
CW
2527 buffer->last_retired_head = buffer->tail;
2528 intel_ring_update_space(buffer);
2529 }
2ed53a94
CW
2530
2531 intel_ring_init_seqno(engine, engine->last_submitted_seqno);
b913b33c
CW
2532
2533 engine->i915->gt.active_engines &= ~intel_engine_flag(engine);
673a394b
EA
2534}
2535
069efc1d 2536void i915_gem_reset(struct drm_device *dev)
673a394b 2537{
fac5e23e 2538 struct drm_i915_private *dev_priv = to_i915(dev);
e2f80391 2539 struct intel_engine_cs *engine;
673a394b 2540
4db080f9
CW
2541 /*
2542 * Before we free the objects from the requests, we need to inspect
2543 * them for finding the guilty party. As the requests only borrow
2544 * their reference to the objects, the inspection must be done first.
2545 */
b4ac5afc 2546 for_each_engine(engine, dev_priv)
7b4d3a16 2547 i915_gem_reset_engine_status(engine);
4db080f9 2548
b4ac5afc 2549 for_each_engine(engine, dev_priv)
7b4d3a16 2550 i915_gem_reset_engine_cleanup(engine);
b913b33c 2551 mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, 0);
dfaae392 2552
acce9ffa
BW
2553 i915_gem_context_reset(dev);
2554
19b2dbde 2555 i915_gem_restore_fences(dev);
b4716185
CW
2556
2557 WARN_ON(i915_verify_lists(dev));
673a394b
EA
2558}
2559
2560/**
2561 * This function clears the request list as sequence numbers are passed.
14bb2c11 2562 * @engine: engine to retire requests on
673a394b 2563 */
1cf0ba14 2564void
0bc40be8 2565i915_gem_retire_requests_ring(struct intel_engine_cs *engine)
673a394b 2566{
0bc40be8 2567 WARN_ON(i915_verify_lists(engine->dev));
673a394b 2568
832a3aad
CW
2569 /* Retire requests first as we use it above for the early return.
2570 * If we retire requests last, we may use a later seqno and so clear
2571 * the requests lists without clearing the active list, leading to
2572 * confusion.
e9103038 2573 */
0bc40be8 2574 while (!list_empty(&engine->request_list)) {
673a394b 2575 struct drm_i915_gem_request *request;
673a394b 2576
0bc40be8 2577 request = list_first_entry(&engine->request_list,
673a394b
EA
2578 struct drm_i915_gem_request,
2579 list);
673a394b 2580
f69a02c9 2581 if (!i915_gem_request_completed(request))
b84d5f0c
CW
2582 break;
2583
05235c53 2584 i915_gem_request_retire_upto(request);
b84d5f0c 2585 }
673a394b 2586
832a3aad
CW
2587 /* Move any buffers on the active list that are no longer referenced
2588 * by the ringbuffer to the flushing/inactive lists as appropriate,
2589 * before we free the context associated with the requests.
2590 */
0bc40be8 2591 while (!list_empty(&engine->active_list)) {
832a3aad
CW
2592 struct drm_i915_gem_object *obj;
2593
0bc40be8
TU
2594 obj = list_first_entry(&engine->active_list,
2595 struct drm_i915_gem_object,
117897f4 2596 engine_list[engine->id]);
832a3aad 2597
0bc40be8 2598 if (!list_empty(&obj->last_read_req[engine->id]->list))
832a3aad
CW
2599 break;
2600
0bc40be8 2601 i915_gem_object_retire__read(obj, engine->id);
832a3aad
CW
2602 }
2603
0bc40be8 2604 WARN_ON(i915_verify_lists(engine->dev));
673a394b
EA
2605}
2606
67d97da3 2607void i915_gem_retire_requests(struct drm_i915_private *dev_priv)
b09a1fec 2608{
e2f80391 2609 struct intel_engine_cs *engine;
67d97da3 2610
91c8a326 2611 lockdep_assert_held(&dev_priv->drm.struct_mutex);
67d97da3
CW
2612
2613 if (dev_priv->gt.active_engines == 0)
2614 return;
2615
2616 GEM_BUG_ON(!dev_priv->gt.awake);
b09a1fec 2617
b4ac5afc 2618 for_each_engine(engine, dev_priv) {
e2f80391 2619 i915_gem_retire_requests_ring(engine);
67d97da3
CW
2620 if (list_empty(&engine->request_list))
2621 dev_priv->gt.active_engines &= ~intel_engine_flag(engine);
b29c19b6
CW
2622 }
2623
67d97da3 2624 if (dev_priv->gt.active_engines == 0)
1b51bce2
CW
2625 queue_delayed_work(dev_priv->wq,
2626 &dev_priv->gt.idle_work,
2627 msecs_to_jiffies(100));
b09a1fec
CW
2628}
2629
75ef9da2 2630static void
673a394b
EA
2631i915_gem_retire_work_handler(struct work_struct *work)
2632{
b29c19b6 2633 struct drm_i915_private *dev_priv =
67d97da3 2634 container_of(work, typeof(*dev_priv), gt.retire_work.work);
91c8a326 2635 struct drm_device *dev = &dev_priv->drm;
673a394b 2636
891b48cf 2637 /* Come back later if the device is busy... */
b29c19b6 2638 if (mutex_trylock(&dev->struct_mutex)) {
67d97da3 2639 i915_gem_retire_requests(dev_priv);
b29c19b6 2640 mutex_unlock(&dev->struct_mutex);
673a394b 2641 }
67d97da3
CW
2642
2643 /* Keep the retire handler running until we are finally idle.
2644 * We do not need to do this test under locking as in the worst-case
2645 * we queue the retire worker once too often.
2646 */
c9615613
CW
2647 if (READ_ONCE(dev_priv->gt.awake)) {
2648 i915_queue_hangcheck(dev_priv);
67d97da3
CW
2649 queue_delayed_work(dev_priv->wq,
2650 &dev_priv->gt.retire_work,
bcb45086 2651 round_jiffies_up_relative(HZ));
c9615613 2652 }
b29c19b6 2653}
0a58705b 2654
b29c19b6
CW
2655static void
2656i915_gem_idle_work_handler(struct work_struct *work)
2657{
2658 struct drm_i915_private *dev_priv =
67d97da3 2659 container_of(work, typeof(*dev_priv), gt.idle_work.work);
91c8a326 2660 struct drm_device *dev = &dev_priv->drm;
b4ac5afc 2661 struct intel_engine_cs *engine;
67d97da3
CW
2662 unsigned int stuck_engines;
2663 bool rearm_hangcheck;
2664
2665 if (!READ_ONCE(dev_priv->gt.awake))
2666 return;
2667
2668 if (READ_ONCE(dev_priv->gt.active_engines))
2669 return;
2670
2671 rearm_hangcheck =
2672 cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
2673
2674 if (!mutex_trylock(&dev->struct_mutex)) {
2675 /* Currently busy, come back later */
2676 mod_delayed_work(dev_priv->wq,
2677 &dev_priv->gt.idle_work,
2678 msecs_to_jiffies(50));
2679 goto out_rearm;
2680 }
2681
2682 if (dev_priv->gt.active_engines)
2683 goto out_unlock;
b29c19b6 2684
b4ac5afc 2685 for_each_engine(engine, dev_priv)
67d97da3 2686 i915_gem_batch_pool_fini(&engine->batch_pool);
35c94185 2687
67d97da3
CW
2688 GEM_BUG_ON(!dev_priv->gt.awake);
2689 dev_priv->gt.awake = false;
2690 rearm_hangcheck = false;
30ecad77 2691
67d97da3
CW
2692 stuck_engines = intel_kick_waiters(dev_priv);
2693 if (unlikely(stuck_engines)) {
2694 DRM_DEBUG_DRIVER("kicked stuck waiters...missed irq\n");
2695 dev_priv->gpu_error.missed_irq_rings |= stuck_engines;
2696 }
35c94185 2697
67d97da3
CW
2698 if (INTEL_GEN(dev_priv) >= 6)
2699 gen6_rps_idle(dev_priv);
2700 intel_runtime_pm_put(dev_priv);
2701out_unlock:
2702 mutex_unlock(&dev->struct_mutex);
b29c19b6 2703
67d97da3
CW
2704out_rearm:
2705 if (rearm_hangcheck) {
2706 GEM_BUG_ON(!dev_priv->gt.awake);
2707 i915_queue_hangcheck(dev_priv);
35c94185 2708 }
673a394b
EA
2709}
2710
30dfebf3
DV
2711/**
2712 * Ensures that an object will eventually get non-busy by flushing any required
2713 * write domains, emitting any outstanding lazy request and retiring and
2714 * completed requests.
14bb2c11 2715 * @obj: object to flush
30dfebf3
DV
2716 */
2717static int
2718i915_gem_object_flush_active(struct drm_i915_gem_object *obj)
2719{
a5ac0f90 2720 int i;
b4716185
CW
2721
2722 if (!obj->active)
2723 return 0;
30dfebf3 2724
666796da 2725 for (i = 0; i < I915_NUM_ENGINES; i++) {
b4716185 2726 struct drm_i915_gem_request *req;
41c52415 2727
b4716185
CW
2728 req = obj->last_read_req[i];
2729 if (req == NULL)
2730 continue;
2731
f69a02c9 2732 if (i915_gem_request_completed(req))
b4716185 2733 i915_gem_object_retire__read(obj, i);
30dfebf3
DV
2734 }
2735
2736 return 0;
2737}
2738
23ba4fd0
BW
2739/**
2740 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
14bb2c11
TU
2741 * @dev: drm device pointer
2742 * @data: ioctl data blob
2743 * @file: drm file pointer
23ba4fd0
BW
2744 *
2745 * Returns 0 if successful, else an error is returned with the remaining time in
2746 * the timeout parameter.
2747 * -ETIME: object is still busy after timeout
2748 * -ERESTARTSYS: signal interrupted the wait
2749 * -ENONENT: object doesn't exist
2750 * Also possible, but rare:
2751 * -EAGAIN: GPU wedged
2752 * -ENOMEM: damn
2753 * -ENODEV: Internal IRQ fail
2754 * -E?: The add request failed
2755 *
2756 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any
2757 * non-zero timeout parameter the wait ioctl will wait for the given number of
2758 * nanoseconds on an object becoming unbusy. Since the wait itself does so
2759 * without holding struct_mutex the object may become re-busied before this
2760 * function completes. A similar but shorter * race condition exists in the busy
2761 * ioctl
2762 */
2763int
2764i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
2765{
2766 struct drm_i915_gem_wait *args = data;
2767 struct drm_i915_gem_object *obj;
666796da 2768 struct drm_i915_gem_request *req[I915_NUM_ENGINES];
b4716185
CW
2769 int i, n = 0;
2770 int ret;
23ba4fd0 2771
11b5d511
DV
2772 if (args->flags != 0)
2773 return -EINVAL;
2774
23ba4fd0
BW
2775 ret = i915_mutex_lock_interruptible(dev);
2776 if (ret)
2777 return ret;
2778
a8ad0bd8 2779 obj = to_intel_bo(drm_gem_object_lookup(file, args->bo_handle));
23ba4fd0
BW
2780 if (&obj->base == NULL) {
2781 mutex_unlock(&dev->struct_mutex);
2782 return -ENOENT;
2783 }
2784
30dfebf3
DV
2785 /* Need to make sure the object gets inactive eventually. */
2786 ret = i915_gem_object_flush_active(obj);
23ba4fd0
BW
2787 if (ret)
2788 goto out;
2789
b4716185 2790 if (!obj->active)
97b2a6a1 2791 goto out;
23ba4fd0 2792
23ba4fd0 2793 /* Do this after OLR check to make sure we make forward progress polling
762e4583 2794 * on this IOCTL with a timeout == 0 (like busy ioctl)
23ba4fd0 2795 */
762e4583 2796 if (args->timeout_ns == 0) {
23ba4fd0
BW
2797 ret = -ETIME;
2798 goto out;
2799 }
2800
2801 drm_gem_object_unreference(&obj->base);
b4716185 2802
666796da 2803 for (i = 0; i < I915_NUM_ENGINES; i++) {
b4716185
CW
2804 if (obj->last_read_req[i] == NULL)
2805 continue;
2806
2807 req[n++] = i915_gem_request_reference(obj->last_read_req[i]);
2808 }
2809
23ba4fd0
BW
2810 mutex_unlock(&dev->struct_mutex);
2811
b4716185
CW
2812 for (i = 0; i < n; i++) {
2813 if (ret == 0)
299259a3 2814 ret = __i915_wait_request(req[i], true,
b4716185 2815 args->timeout_ns > 0 ? &args->timeout_ns : NULL,
b6aa0873 2816 to_rps_client(file));
73db04cf 2817 i915_gem_request_unreference(req[i]);
b4716185 2818 }
ff865885 2819 return ret;
23ba4fd0
BW
2820
2821out:
2822 drm_gem_object_unreference(&obj->base);
2823 mutex_unlock(&dev->struct_mutex);
2824 return ret;
2825}
2826
b4716185
CW
2827static int
2828__i915_gem_object_sync(struct drm_i915_gem_object *obj,
2829 struct intel_engine_cs *to,
91af127f
JH
2830 struct drm_i915_gem_request *from_req,
2831 struct drm_i915_gem_request **to_req)
b4716185
CW
2832{
2833 struct intel_engine_cs *from;
2834 int ret;
2835
666796da 2836 from = i915_gem_request_get_engine(from_req);
b4716185
CW
2837 if (to == from)
2838 return 0;
2839
f69a02c9 2840 if (i915_gem_request_completed(from_req))
b4716185
CW
2841 return 0;
2842
c033666a 2843 if (!i915_semaphore_is_enabled(to_i915(obj->base.dev))) {
a6f766f3 2844 struct drm_i915_private *i915 = to_i915(obj->base.dev);
91af127f 2845 ret = __i915_wait_request(from_req,
a6f766f3
CW
2846 i915->mm.interruptible,
2847 NULL,
2848 &i915->rps.semaphores);
b4716185
CW
2849 if (ret)
2850 return ret;
2851
91af127f 2852 i915_gem_object_retire_request(obj, from_req);
b4716185
CW
2853 } else {
2854 int idx = intel_ring_sync_index(from, to);
91af127f
JH
2855 u32 seqno = i915_gem_request_get_seqno(from_req);
2856
2857 WARN_ON(!to_req);
b4716185
CW
2858
2859 if (seqno <= from->semaphore.sync_seqno[idx])
2860 return 0;
2861
91af127f 2862 if (*to_req == NULL) {
26827088
DG
2863 struct drm_i915_gem_request *req;
2864
2865 req = i915_gem_request_alloc(to, NULL);
2866 if (IS_ERR(req))
2867 return PTR_ERR(req);
2868
2869 *to_req = req;
91af127f
JH
2870 }
2871
599d924c
JH
2872 trace_i915_gem_ring_sync_to(*to_req, from, from_req);
2873 ret = to->semaphore.sync_to(*to_req, from, seqno);
b4716185
CW
2874 if (ret)
2875 return ret;
2876
2877 /* We use last_read_req because sync_to()
2878 * might have just caused seqno wrap under
2879 * the radar.
2880 */
2881 from->semaphore.sync_seqno[idx] =
2882 i915_gem_request_get_seqno(obj->last_read_req[from->id]);
2883 }
2884
2885 return 0;
2886}
2887
5816d648
BW
2888/**
2889 * i915_gem_object_sync - sync an object to a ring.
2890 *
2891 * @obj: object which may be in use on another ring.
2892 * @to: ring we wish to use the object on. May be NULL.
91af127f
JH
2893 * @to_req: request we wish to use the object for. See below.
2894 * This will be allocated and returned if a request is
2895 * required but not passed in.
5816d648
BW
2896 *
2897 * This code is meant to abstract object synchronization with the GPU.
2898 * Calling with NULL implies synchronizing the object with the CPU
b4716185 2899 * rather than a particular GPU ring. Conceptually we serialise writes
91af127f 2900 * between engines inside the GPU. We only allow one engine to write
b4716185
CW
2901 * into a buffer at any time, but multiple readers. To ensure each has
2902 * a coherent view of memory, we must:
2903 *
2904 * - If there is an outstanding write request to the object, the new
2905 * request must wait for it to complete (either CPU or in hw, requests
2906 * on the same ring will be naturally ordered).
2907 *
2908 * - If we are a write request (pending_write_domain is set), the new
2909 * request must wait for outstanding read requests to complete.
5816d648 2910 *
91af127f
JH
2911 * For CPU synchronisation (NULL to) no request is required. For syncing with
2912 * rings to_req must be non-NULL. However, a request does not have to be
2913 * pre-allocated. If *to_req is NULL and sync commands will be emitted then a
2914 * request will be allocated automatically and returned through *to_req. Note
2915 * that it is not guaranteed that commands will be emitted (because the system
2916 * might already be idle). Hence there is no need to create a request that
2917 * might never have any work submitted. Note further that if a request is
2918 * returned in *to_req, it is the responsibility of the caller to submit
2919 * that request (after potentially adding more work to it).
2920 *
5816d648
BW
2921 * Returns 0 if successful, else propagates up the lower layer error.
2922 */
2911a35b
BW
2923int
2924i915_gem_object_sync(struct drm_i915_gem_object *obj,
91af127f
JH
2925 struct intel_engine_cs *to,
2926 struct drm_i915_gem_request **to_req)
2911a35b 2927{
b4716185 2928 const bool readonly = obj->base.pending_write_domain == 0;
666796da 2929 struct drm_i915_gem_request *req[I915_NUM_ENGINES];
b4716185 2930 int ret, i, n;
41c52415 2931
b4716185 2932 if (!obj->active)
2911a35b
BW
2933 return 0;
2934
b4716185
CW
2935 if (to == NULL)
2936 return i915_gem_object_wait_rendering(obj, readonly);
2911a35b 2937
b4716185
CW
2938 n = 0;
2939 if (readonly) {
2940 if (obj->last_write_req)
2941 req[n++] = obj->last_write_req;
2942 } else {
666796da 2943 for (i = 0; i < I915_NUM_ENGINES; i++)
b4716185
CW
2944 if (obj->last_read_req[i])
2945 req[n++] = obj->last_read_req[i];
2946 }
2947 for (i = 0; i < n; i++) {
91af127f 2948 ret = __i915_gem_object_sync(obj, to, req[i], to_req);
b4716185
CW
2949 if (ret)
2950 return ret;
2951 }
2911a35b 2952
b4716185 2953 return 0;
2911a35b
BW
2954}
2955
b5ffc9bc
CW
2956static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
2957{
2958 u32 old_write_domain, old_read_domains;
2959
b5ffc9bc
CW
2960 /* Force a pagefault for domain tracking on next user access */
2961 i915_gem_release_mmap(obj);
2962
b97c3d9c
KP
2963 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
2964 return;
2965
b5ffc9bc
CW
2966 old_read_domains = obj->base.read_domains;
2967 old_write_domain = obj->base.write_domain;
2968
2969 obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT;
2970 obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT;
2971
2972 trace_i915_gem_object_change_domain(obj,
2973 old_read_domains,
2974 old_write_domain);
2975}
2976
8ef8561f
CW
2977static void __i915_vma_iounmap(struct i915_vma *vma)
2978{
2979 GEM_BUG_ON(vma->pin_count);
2980
2981 if (vma->iomap == NULL)
2982 return;
2983
2984 io_mapping_unmap(vma->iomap);
2985 vma->iomap = NULL;
2986}
2987
e9f24d5f 2988static int __i915_vma_unbind(struct i915_vma *vma, bool wait)
673a394b 2989{
07fe0b12 2990 struct drm_i915_gem_object *obj = vma->obj;
fac5e23e 2991 struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
43e28f09 2992 int ret;
673a394b 2993
1c7f4bca 2994 if (list_empty(&vma->obj_link))
673a394b
EA
2995 return 0;
2996
0ff501cb
DV
2997 if (!drm_mm_node_allocated(&vma->node)) {
2998 i915_gem_vma_destroy(vma);
0ff501cb
DV
2999 return 0;
3000 }
433544bd 3001
d7f46fc4 3002 if (vma->pin_count)
31d8d651 3003 return -EBUSY;
673a394b 3004
c4670ad0
CW
3005 BUG_ON(obj->pages == NULL);
3006
e9f24d5f
TU
3007 if (wait) {
3008 ret = i915_gem_object_wait_rendering(obj, false);
3009 if (ret)
3010 return ret;
3011 }
a8198eea 3012
596c5923 3013 if (vma->is_ggtt && vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
8b1bc9b4 3014 i915_gem_object_finish_gtt(obj);
5323fd04 3015
8b1bc9b4
DV
3016 /* release the fence reg _after_ flushing */
3017 ret = i915_gem_object_put_fence(obj);
3018 if (ret)
3019 return ret;
8ef8561f
CW
3020
3021 __i915_vma_iounmap(vma);
8b1bc9b4 3022 }
96b47b65 3023
07fe0b12 3024 trace_i915_vma_unbind(vma);
db53a302 3025
777dc5bb 3026 vma->vm->unbind_vma(vma);
5e562f1d 3027 vma->bound = 0;
6f65e29a 3028
1c7f4bca 3029 list_del_init(&vma->vm_link);
596c5923 3030 if (vma->is_ggtt) {
fe14d5f4
TU
3031 if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
3032 obj->map_and_fenceable = false;
3033 } else if (vma->ggtt_view.pages) {
3034 sg_free_table(vma->ggtt_view.pages);
3035 kfree(vma->ggtt_view.pages);
fe14d5f4 3036 }
016a65a3 3037 vma->ggtt_view.pages = NULL;
fe14d5f4 3038 }
673a394b 3039
2f633156
BW
3040 drm_mm_remove_node(&vma->node);
3041 i915_gem_vma_destroy(vma);
3042
3043 /* Since the unbound list is global, only move to that list if
b93dab6e 3044 * no more VMAs exist. */
e2273302 3045 if (list_empty(&obj->vma_list))
2f633156 3046 list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list);
673a394b 3047
70903c3b
CW
3048 /* And finally now the object is completely decoupled from this vma,
3049 * we can drop its hold on the backing storage and allow it to be
3050 * reaped by the shrinker.
3051 */
3052 i915_gem_object_unpin_pages(obj);
3053
88241785 3054 return 0;
54cf91dc
CW
3055}
3056
e9f24d5f
TU
3057int i915_vma_unbind(struct i915_vma *vma)
3058{
3059 return __i915_vma_unbind(vma, true);
3060}
3061
3062int __i915_vma_unbind_no_wait(struct i915_vma *vma)
3063{
3064 return __i915_vma_unbind(vma, false);
3065}
3066
6e5a5beb 3067int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv)
4df2faf4 3068{
e2f80391 3069 struct intel_engine_cs *engine;
b4ac5afc 3070 int ret;
4df2faf4 3071
91c8a326 3072 lockdep_assert_held(&dev_priv->drm.struct_mutex);
6e5a5beb 3073
b4ac5afc 3074 for_each_engine(engine, dev_priv) {
62e63007
CW
3075 if (engine->last_context == NULL)
3076 continue;
3077
666796da 3078 ret = intel_engine_idle(engine);
1ec14ad3
CW
3079 if (ret)
3080 return ret;
3081 }
4df2faf4 3082
b4716185 3083 WARN_ON(i915_verify_lists(dev));
8a1a49f9 3084 return 0;
4df2faf4
DV
3085}
3086
4144f9b5 3087static bool i915_gem_valid_gtt_space(struct i915_vma *vma,
42d6ab48
CW
3088 unsigned long cache_level)
3089{
4144f9b5 3090 struct drm_mm_node *gtt_space = &vma->node;
42d6ab48
CW
3091 struct drm_mm_node *other;
3092
4144f9b5
CW
3093 /*
3094 * On some machines we have to be careful when putting differing types
3095 * of snoopable memory together to avoid the prefetcher crossing memory
3096 * domains and dying. During vm initialisation, we decide whether or not
3097 * these constraints apply and set the drm_mm.color_adjust
3098 * appropriately.
42d6ab48 3099 */
4144f9b5 3100 if (vma->vm->mm.color_adjust == NULL)
42d6ab48
CW
3101 return true;
3102
c6cfb325 3103 if (!drm_mm_node_allocated(gtt_space))
42d6ab48
CW
3104 return true;
3105
3106 if (list_empty(&gtt_space->node_list))
3107 return true;
3108
3109 other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list);
3110 if (other->allocated && !other->hole_follows && other->color != cache_level)
3111 return false;
3112
3113 other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list);
3114 if (other->allocated && !gtt_space->hole_follows && other->color != cache_level)
3115 return false;
3116
3117 return true;
3118}
3119
673a394b 3120/**
91e6711e
JL
3121 * Finds free space in the GTT aperture and binds the object or a view of it
3122 * there.
14bb2c11
TU
3123 * @obj: object to bind
3124 * @vm: address space to bind into
3125 * @ggtt_view: global gtt view if applicable
3126 * @alignment: requested alignment
3127 * @flags: mask of PIN_* flags to use
673a394b 3128 */
262de145 3129static struct i915_vma *
07fe0b12
BW
3130i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
3131 struct i915_address_space *vm,
ec7adb6e 3132 const struct i915_ggtt_view *ggtt_view,
07fe0b12 3133 unsigned alignment,
ec7adb6e 3134 uint64_t flags)
673a394b 3135{
05394f39 3136 struct drm_device *dev = obj->base.dev;
72e96d64
JL
3137 struct drm_i915_private *dev_priv = to_i915(dev);
3138 struct i915_ggtt *ggtt = &dev_priv->ggtt;
65bd342f 3139 u32 fence_alignment, unfenced_alignment;
101b506a
MT
3140 u32 search_flag, alloc_flag;
3141 u64 start, end;
65bd342f 3142 u64 size, fence_size;
2f633156 3143 struct i915_vma *vma;
07f73f69 3144 int ret;
673a394b 3145
91e6711e
JL
3146 if (i915_is_ggtt(vm)) {
3147 u32 view_size;
3148
3149 if (WARN_ON(!ggtt_view))
3150 return ERR_PTR(-EINVAL);
ec7adb6e 3151
91e6711e
JL
3152 view_size = i915_ggtt_view_size(obj, ggtt_view);
3153
3154 fence_size = i915_gem_get_gtt_size(dev,
3155 view_size,
3156 obj->tiling_mode);
3157 fence_alignment = i915_gem_get_gtt_alignment(dev,
3158 view_size,
3159 obj->tiling_mode,
3160 true);
3161 unfenced_alignment = i915_gem_get_gtt_alignment(dev,
3162 view_size,
3163 obj->tiling_mode,
3164 false);
3165 size = flags & PIN_MAPPABLE ? fence_size : view_size;
3166 } else {
3167 fence_size = i915_gem_get_gtt_size(dev,
3168 obj->base.size,
3169 obj->tiling_mode);
3170 fence_alignment = i915_gem_get_gtt_alignment(dev,
3171 obj->base.size,
3172 obj->tiling_mode,
3173 true);
3174 unfenced_alignment =
3175 i915_gem_get_gtt_alignment(dev,
3176 obj->base.size,
3177 obj->tiling_mode,
3178 false);
3179 size = flags & PIN_MAPPABLE ? fence_size : obj->base.size;
3180 }
a00b10c3 3181
101b506a
MT
3182 start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
3183 end = vm->total;
3184 if (flags & PIN_MAPPABLE)
72e96d64 3185 end = min_t(u64, end, ggtt->mappable_end);
101b506a 3186 if (flags & PIN_ZONE_4G)
48ea1e32 3187 end = min_t(u64, end, (1ULL << 32) - PAGE_SIZE);
101b506a 3188
673a394b 3189 if (alignment == 0)
1ec9e26d 3190 alignment = flags & PIN_MAPPABLE ? fence_alignment :
5e783301 3191 unfenced_alignment;
1ec9e26d 3192 if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) {
91e6711e
JL
3193 DRM_DEBUG("Invalid object (view type=%u) alignment requested %u\n",
3194 ggtt_view ? ggtt_view->type : 0,
3195 alignment);
262de145 3196 return ERR_PTR(-EINVAL);
673a394b
EA
3197 }
3198
91e6711e
JL
3199 /* If binding the object/GGTT view requires more space than the entire
3200 * aperture has, reject it early before evicting everything in a vain
3201 * attempt to find space.
654fc607 3202 */
91e6711e 3203 if (size > end) {
65bd342f 3204 DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: size=%llu > %s aperture=%llu\n",
91e6711e
JL
3205 ggtt_view ? ggtt_view->type : 0,
3206 size,
1ec9e26d 3207 flags & PIN_MAPPABLE ? "mappable" : "total",
d23db88c 3208 end);
262de145 3209 return ERR_PTR(-E2BIG);
654fc607
CW
3210 }
3211
37e680a1 3212 ret = i915_gem_object_get_pages(obj);
6c085a72 3213 if (ret)
262de145 3214 return ERR_PTR(ret);
6c085a72 3215
fbdda6fb
CW
3216 i915_gem_object_pin_pages(obj);
3217
ec7adb6e
JL
3218 vma = ggtt_view ? i915_gem_obj_lookup_or_create_ggtt_vma(obj, ggtt_view) :
3219 i915_gem_obj_lookup_or_create_vma(obj, vm);
3220
262de145 3221 if (IS_ERR(vma))
bc6bc15b 3222 goto err_unpin;
2f633156 3223
506a8e87
CW
3224 if (flags & PIN_OFFSET_FIXED) {
3225 uint64_t offset = flags & PIN_OFFSET_MASK;
3226
3227 if (offset & (alignment - 1) || offset + size > end) {
3228 ret = -EINVAL;
3229 goto err_free_vma;
3230 }
3231 vma->node.start = offset;
3232 vma->node.size = size;
3233 vma->node.color = obj->cache_level;
3234 ret = drm_mm_reserve_node(&vm->mm, &vma->node);
3235 if (ret) {
3236 ret = i915_gem_evict_for_vma(vma);
3237 if (ret == 0)
3238 ret = drm_mm_reserve_node(&vm->mm, &vma->node);
3239 }
3240 if (ret)
3241 goto err_free_vma;
101b506a 3242 } else {
506a8e87
CW
3243 if (flags & PIN_HIGH) {
3244 search_flag = DRM_MM_SEARCH_BELOW;
3245 alloc_flag = DRM_MM_CREATE_TOP;
3246 } else {
3247 search_flag = DRM_MM_SEARCH_DEFAULT;
3248 alloc_flag = DRM_MM_CREATE_DEFAULT;
3249 }
101b506a 3250
0a9ae0d7 3251search_free:
506a8e87
CW
3252 ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
3253 size, alignment,
3254 obj->cache_level,
3255 start, end,
3256 search_flag,
3257 alloc_flag);
3258 if (ret) {
3259 ret = i915_gem_evict_something(dev, vm, size, alignment,
3260 obj->cache_level,
3261 start, end,
3262 flags);
3263 if (ret == 0)
3264 goto search_free;
9731129c 3265
506a8e87
CW
3266 goto err_free_vma;
3267 }
673a394b 3268 }
4144f9b5 3269 if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) {
2f633156 3270 ret = -EINVAL;
bc6bc15b 3271 goto err_remove_node;
673a394b
EA
3272 }
3273
fe14d5f4 3274 trace_i915_vma_bind(vma, flags);
0875546c 3275 ret = i915_vma_bind(vma, obj->cache_level, flags);
fe14d5f4 3276 if (ret)
e2273302 3277 goto err_remove_node;
fe14d5f4 3278
35c20a60 3279 list_move_tail(&obj->global_list, &dev_priv->mm.bound_list);
1c7f4bca 3280 list_add_tail(&vma->vm_link, &vm->inactive_list);
bf1a1092 3281
262de145 3282 return vma;
2f633156 3283
bc6bc15b 3284err_remove_node:
6286ef9b 3285 drm_mm_remove_node(&vma->node);
bc6bc15b 3286err_free_vma:
2f633156 3287 i915_gem_vma_destroy(vma);
262de145 3288 vma = ERR_PTR(ret);
bc6bc15b 3289err_unpin:
2f633156 3290 i915_gem_object_unpin_pages(obj);
262de145 3291 return vma;
673a394b
EA
3292}
3293
000433b6 3294bool
2c22569b
CW
3295i915_gem_clflush_object(struct drm_i915_gem_object *obj,
3296 bool force)
673a394b 3297{
673a394b
EA
3298 /* If we don't have a page list set up, then we're not pinned
3299 * to GPU, and we can ignore the cache flush because it'll happen
3300 * again at bind time.
3301 */
05394f39 3302 if (obj->pages == NULL)
000433b6 3303 return false;
673a394b 3304
769ce464
ID
3305 /*
3306 * Stolen memory is always coherent with the GPU as it is explicitly
3307 * marked as wc by the system, or the system is cache-coherent.
3308 */
6a2c4232 3309 if (obj->stolen || obj->phys_handle)
000433b6 3310 return false;
769ce464 3311
9c23f7fc
CW
3312 /* If the GPU is snooping the contents of the CPU cache,
3313 * we do not need to manually clear the CPU cache lines. However,
3314 * the caches are only snooped when the render cache is
3315 * flushed/invalidated. As we always have to emit invalidations
3316 * and flushes when moving into and out of the RENDER domain, correct
3317 * snooping behaviour occurs naturally as the result of our domain
3318 * tracking.
3319 */
0f71979a
CW
3320 if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) {
3321 obj->cache_dirty = true;
000433b6 3322 return false;
0f71979a 3323 }
9c23f7fc 3324
1c5d22f7 3325 trace_i915_gem_object_clflush(obj);
9da3da66 3326 drm_clflush_sg(obj->pages);
0f71979a 3327 obj->cache_dirty = false;
000433b6
CW
3328
3329 return true;
e47c68e9
EA
3330}
3331
3332/** Flushes the GTT write domain for the object if it's dirty. */
3333static void
05394f39 3334i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
e47c68e9 3335{
1c5d22f7
CW
3336 uint32_t old_write_domain;
3337
05394f39 3338 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT)
e47c68e9
EA
3339 return;
3340
63256ec5 3341 /* No actual flushing is required for the GTT write domain. Writes
e47c68e9
EA
3342 * to it immediately go to main memory as far as we know, so there's
3343 * no chipset flush. It also doesn't land in render cache.
63256ec5
CW
3344 *
3345 * However, we do have to enforce the order so that all writes through
3346 * the GTT land before any writes to the device, such as updates to
3347 * the GATT itself.
e47c68e9 3348 */
63256ec5
CW
3349 wmb();
3350
05394f39
CW
3351 old_write_domain = obj->base.write_domain;
3352 obj->base.write_domain = 0;
1c5d22f7 3353
de152b62 3354 intel_fb_obj_flush(obj, false, ORIGIN_GTT);
f99d7069 3355
1c5d22f7 3356 trace_i915_gem_object_change_domain(obj,
05394f39 3357 obj->base.read_domains,
1c5d22f7 3358 old_write_domain);
e47c68e9
EA
3359}
3360
3361/** Flushes the CPU write domain for the object if it's dirty. */
3362static void
e62b59e4 3363i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
e47c68e9 3364{
1c5d22f7 3365 uint32_t old_write_domain;
e47c68e9 3366
05394f39 3367 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
e47c68e9
EA
3368 return;
3369
e62b59e4 3370 if (i915_gem_clflush_object(obj, obj->pin_display))
c033666a 3371 i915_gem_chipset_flush(to_i915(obj->base.dev));
000433b6 3372
05394f39
CW
3373 old_write_domain = obj->base.write_domain;
3374 obj->base.write_domain = 0;
1c5d22f7 3375
de152b62 3376 intel_fb_obj_flush(obj, false, ORIGIN_CPU);
f99d7069 3377
1c5d22f7 3378 trace_i915_gem_object_change_domain(obj,
05394f39 3379 obj->base.read_domains,
1c5d22f7 3380 old_write_domain);
e47c68e9
EA
3381}
3382
2ef7eeaa
EA
3383/**
3384 * Moves a single object to the GTT read, and possibly write domain.
14bb2c11
TU
3385 * @obj: object to act on
3386 * @write: ask for write access or read only
2ef7eeaa
EA
3387 *
3388 * This function returns when the move is complete, including waiting on
3389 * flushes to occur.
3390 */
79e53945 3391int
2021746e 3392i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
2ef7eeaa 3393{
72e96d64
JL
3394 struct drm_device *dev = obj->base.dev;
3395 struct drm_i915_private *dev_priv = to_i915(dev);
3396 struct i915_ggtt *ggtt = &dev_priv->ggtt;
1c5d22f7 3397 uint32_t old_write_domain, old_read_domains;
43566ded 3398 struct i915_vma *vma;
e47c68e9 3399 int ret;
2ef7eeaa 3400
8d7e3de1
CW
3401 if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
3402 return 0;
3403
0201f1ec 3404 ret = i915_gem_object_wait_rendering(obj, !write);
88241785
CW
3405 if (ret)
3406 return ret;
3407
43566ded
CW
3408 /* Flush and acquire obj->pages so that we are coherent through
3409 * direct access in memory with previous cached writes through
3410 * shmemfs and that our cache domain tracking remains valid.
3411 * For example, if the obj->filp was moved to swap without us
3412 * being notified and releasing the pages, we would mistakenly
3413 * continue to assume that the obj remained out of the CPU cached
3414 * domain.
3415 */
3416 ret = i915_gem_object_get_pages(obj);
3417 if (ret)
3418 return ret;
3419
e62b59e4 3420 i915_gem_object_flush_cpu_write_domain(obj);
1c5d22f7 3421
d0a57789
CW
3422 /* Serialise direct access to this object with the barriers for
3423 * coherent writes from the GPU, by effectively invalidating the
3424 * GTT domain upon first access.
3425 */
3426 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
3427 mb();
3428
05394f39
CW
3429 old_write_domain = obj->base.write_domain;
3430 old_read_domains = obj->base.read_domains;
1c5d22f7 3431
e47c68e9
EA
3432 /* It should now be out of any other write domains, and we can update
3433 * the domain values for our changes.
3434 */
05394f39
CW
3435 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
3436 obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
e47c68e9 3437 if (write) {
05394f39
CW
3438 obj->base.read_domains = I915_GEM_DOMAIN_GTT;
3439 obj->base.write_domain = I915_GEM_DOMAIN_GTT;
3440 obj->dirty = 1;
2ef7eeaa
EA
3441 }
3442
1c5d22f7
CW
3443 trace_i915_gem_object_change_domain(obj,
3444 old_read_domains,
3445 old_write_domain);
3446
8325a09d 3447 /* And bump the LRU for this access */
43566ded
CW
3448 vma = i915_gem_obj_to_ggtt(obj);
3449 if (vma && drm_mm_node_allocated(&vma->node) && !obj->active)
1c7f4bca 3450 list_move_tail(&vma->vm_link,
72e96d64 3451 &ggtt->base.inactive_list);
8325a09d 3452
e47c68e9
EA
3453 return 0;
3454}
3455
ef55f92a
CW
3456/**
3457 * Changes the cache-level of an object across all VMA.
14bb2c11
TU
3458 * @obj: object to act on
3459 * @cache_level: new cache level to set for the object
ef55f92a
CW
3460 *
3461 * After this function returns, the object will be in the new cache-level
3462 * across all GTT and the contents of the backing storage will be coherent,
3463 * with respect to the new cache-level. In order to keep the backing storage
3464 * coherent for all users, we only allow a single cache level to be set
3465 * globally on the object and prevent it from being changed whilst the
3466 * hardware is reading from the object. That is if the object is currently
3467 * on the scanout it will be set to uncached (or equivalent display
3468 * cache coherency) and all non-MOCS GPU access will also be uncached so
3469 * that all direct access to the scanout remains coherent.
3470 */
e4ffd173
CW
3471int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
3472 enum i915_cache_level cache_level)
3473{
7bddb01f 3474 struct drm_device *dev = obj->base.dev;
df6f783a 3475 struct i915_vma *vma, *next;
ef55f92a 3476 bool bound = false;
ed75a55b 3477 int ret = 0;
e4ffd173
CW
3478
3479 if (obj->cache_level == cache_level)
ed75a55b 3480 goto out;
e4ffd173 3481
ef55f92a
CW
3482 /* Inspect the list of currently bound VMA and unbind any that would
3483 * be invalid given the new cache-level. This is principally to
3484 * catch the issue of the CS prefetch crossing page boundaries and
3485 * reading an invalid PTE on older architectures.
3486 */
1c7f4bca 3487 list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) {
ef55f92a
CW
3488 if (!drm_mm_node_allocated(&vma->node))
3489 continue;
3490
3491 if (vma->pin_count) {
3492 DRM_DEBUG("can not change the cache level of pinned objects\n");
3493 return -EBUSY;
3494 }
3495
4144f9b5 3496 if (!i915_gem_valid_gtt_space(vma, cache_level)) {
07fe0b12 3497 ret = i915_vma_unbind(vma);
3089c6f2
BW
3498 if (ret)
3499 return ret;
ef55f92a
CW
3500 } else
3501 bound = true;
42d6ab48
CW
3502 }
3503
ef55f92a
CW
3504 /* We can reuse the existing drm_mm nodes but need to change the
3505 * cache-level on the PTE. We could simply unbind them all and
3506 * rebind with the correct cache-level on next use. However since
3507 * we already have a valid slot, dma mapping, pages etc, we may as
3508 * rewrite the PTE in the belief that doing so tramples upon less
3509 * state and so involves less work.
3510 */
3511 if (bound) {
3512 /* Before we change the PTE, the GPU must not be accessing it.
3513 * If we wait upon the object, we know that all the bound
3514 * VMA are no longer active.
3515 */
2e2f351d 3516 ret = i915_gem_object_wait_rendering(obj, false);
e4ffd173
CW
3517 if (ret)
3518 return ret;
3519
ef55f92a
CW
3520 if (!HAS_LLC(dev) && cache_level != I915_CACHE_NONE) {
3521 /* Access to snoopable pages through the GTT is
3522 * incoherent and on some machines causes a hard
3523 * lockup. Relinquish the CPU mmaping to force
3524 * userspace to refault in the pages and we can
3525 * then double check if the GTT mapping is still
3526 * valid for that pointer access.
3527 */
3528 i915_gem_release_mmap(obj);
3529
3530 /* As we no longer need a fence for GTT access,
3531 * we can relinquish it now (and so prevent having
3532 * to steal a fence from someone else on the next
3533 * fence request). Note GPU activity would have
3534 * dropped the fence as all snoopable access is
3535 * supposed to be linear.
3536 */
e4ffd173
CW
3537 ret = i915_gem_object_put_fence(obj);
3538 if (ret)
3539 return ret;
ef55f92a
CW
3540 } else {
3541 /* We either have incoherent backing store and
3542 * so no GTT access or the architecture is fully
3543 * coherent. In such cases, existing GTT mmaps
3544 * ignore the cache bit in the PTE and we can
3545 * rewrite it without confusing the GPU or having
3546 * to force userspace to fault back in its mmaps.
3547 */
e4ffd173
CW
3548 }
3549
1c7f4bca 3550 list_for_each_entry(vma, &obj->vma_list, obj_link) {
ef55f92a
CW
3551 if (!drm_mm_node_allocated(&vma->node))
3552 continue;
3553
3554 ret = i915_vma_bind(vma, cache_level, PIN_UPDATE);
3555 if (ret)
3556 return ret;
3557 }
e4ffd173
CW
3558 }
3559
1c7f4bca 3560 list_for_each_entry(vma, &obj->vma_list, obj_link)
2c22569b
CW
3561 vma->node.color = cache_level;
3562 obj->cache_level = cache_level;
3563
ed75a55b 3564out:
ef55f92a
CW
3565 /* Flush the dirty CPU caches to the backing storage so that the
3566 * object is now coherent at its new cache level (with respect
3567 * to the access domain).
3568 */
b50a5371 3569 if (obj->cache_dirty && cpu_write_needs_clflush(obj)) {
0f71979a 3570 if (i915_gem_clflush_object(obj, true))
c033666a 3571 i915_gem_chipset_flush(to_i915(obj->base.dev));
e4ffd173
CW
3572 }
3573
e4ffd173
CW
3574 return 0;
3575}
3576
199adf40
BW
3577int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
3578 struct drm_file *file)
e6994aee 3579{
199adf40 3580 struct drm_i915_gem_caching *args = data;
e6994aee 3581 struct drm_i915_gem_object *obj;
e6994aee 3582
a8ad0bd8 3583 obj = to_intel_bo(drm_gem_object_lookup(file, args->handle));
432be69d
CW
3584 if (&obj->base == NULL)
3585 return -ENOENT;
e6994aee 3586
651d794f
CW
3587 switch (obj->cache_level) {
3588 case I915_CACHE_LLC:
3589 case I915_CACHE_L3_LLC:
3590 args->caching = I915_CACHING_CACHED;
3591 break;
3592
4257d3ba
CW
3593 case I915_CACHE_WT:
3594 args->caching = I915_CACHING_DISPLAY;
3595 break;
3596
651d794f
CW
3597 default:
3598 args->caching = I915_CACHING_NONE;
3599 break;
3600 }
e6994aee 3601
432be69d
CW
3602 drm_gem_object_unreference_unlocked(&obj->base);
3603 return 0;
e6994aee
CW
3604}
3605
199adf40
BW
3606int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
3607 struct drm_file *file)
e6994aee 3608{
fac5e23e 3609 struct drm_i915_private *dev_priv = to_i915(dev);
199adf40 3610 struct drm_i915_gem_caching *args = data;
e6994aee
CW
3611 struct drm_i915_gem_object *obj;
3612 enum i915_cache_level level;
3613 int ret;
3614
199adf40
BW
3615 switch (args->caching) {
3616 case I915_CACHING_NONE:
e6994aee
CW
3617 level = I915_CACHE_NONE;
3618 break;
199adf40 3619 case I915_CACHING_CACHED:
e5756c10
ID
3620 /*
3621 * Due to a HW issue on BXT A stepping, GPU stores via a
3622 * snooped mapping may leave stale data in a corresponding CPU
3623 * cacheline, whereas normally such cachelines would get
3624 * invalidated.
3625 */
ca377809 3626 if (!HAS_LLC(dev) && !HAS_SNOOP(dev))
e5756c10
ID
3627 return -ENODEV;
3628
e6994aee
CW
3629 level = I915_CACHE_LLC;
3630 break;
4257d3ba
CW
3631 case I915_CACHING_DISPLAY:
3632 level = HAS_WT(dev) ? I915_CACHE_WT : I915_CACHE_NONE;
3633 break;
e6994aee
CW
3634 default:
3635 return -EINVAL;
3636 }
3637
fd0fe6ac
ID
3638 intel_runtime_pm_get(dev_priv);
3639
3bc2913e
BW
3640 ret = i915_mutex_lock_interruptible(dev);
3641 if (ret)
fd0fe6ac 3642 goto rpm_put;
3bc2913e 3643
a8ad0bd8 3644 obj = to_intel_bo(drm_gem_object_lookup(file, args->handle));
e6994aee
CW
3645 if (&obj->base == NULL) {
3646 ret = -ENOENT;
3647 goto unlock;
3648 }
3649
3650 ret = i915_gem_object_set_cache_level(obj, level);
3651
3652 drm_gem_object_unreference(&obj->base);
3653unlock:
3654 mutex_unlock(&dev->struct_mutex);
fd0fe6ac
ID
3655rpm_put:
3656 intel_runtime_pm_put(dev_priv);
3657
e6994aee
CW
3658 return ret;
3659}
3660
b9241ea3 3661/*
2da3b9b9
CW
3662 * Prepare buffer for display plane (scanout, cursors, etc).
3663 * Can be called from an uninterruptible phase (modesetting) and allows
3664 * any flushes to be pipelined (for pageflips).
b9241ea3
ZW
3665 */
3666int
2da3b9b9
CW
3667i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
3668 u32 alignment,
e6617330 3669 const struct i915_ggtt_view *view)
b9241ea3 3670{
2da3b9b9 3671 u32 old_read_domains, old_write_domain;
b9241ea3
ZW
3672 int ret;
3673
cc98b413
CW
3674 /* Mark the pin_display early so that we account for the
3675 * display coherency whilst setting up the cache domains.
3676 */
8a0c39b1 3677 obj->pin_display++;
cc98b413 3678
a7ef0640
EA
3679 /* The display engine is not coherent with the LLC cache on gen6. As
3680 * a result, we make sure that the pinning that is about to occur is
3681 * done with uncached PTEs. This is lowest common denominator for all
3682 * chipsets.
3683 *
3684 * However for gen6+, we could do better by using the GFDT bit instead
3685 * of uncaching, which would allow us to flush all the LLC-cached data
3686 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
3687 */
651d794f
CW
3688 ret = i915_gem_object_set_cache_level(obj,
3689 HAS_WT(obj->base.dev) ? I915_CACHE_WT : I915_CACHE_NONE);
a7ef0640 3690 if (ret)
cc98b413 3691 goto err_unpin_display;
a7ef0640 3692
2da3b9b9
CW
3693 /* As the user may map the buffer once pinned in the display plane
3694 * (e.g. libkms for the bootup splash), we have to ensure that we
3695 * always use map_and_fenceable for all scanout buffers.
3696 */
50470bb0
TU
3697 ret = i915_gem_object_ggtt_pin(obj, view, alignment,
3698 view->type == I915_GGTT_VIEW_NORMAL ?
3699 PIN_MAPPABLE : 0);
2da3b9b9 3700 if (ret)
cc98b413 3701 goto err_unpin_display;
2da3b9b9 3702
e62b59e4 3703 i915_gem_object_flush_cpu_write_domain(obj);
b118c1e3 3704
2da3b9b9 3705 old_write_domain = obj->base.write_domain;
05394f39 3706 old_read_domains = obj->base.read_domains;
2da3b9b9
CW
3707
3708 /* It should now be out of any other write domains, and we can update
3709 * the domain values for our changes.
3710 */
e5f1d962 3711 obj->base.write_domain = 0;
05394f39 3712 obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
b9241ea3
ZW
3713
3714 trace_i915_gem_object_change_domain(obj,
3715 old_read_domains,
2da3b9b9 3716 old_write_domain);
b9241ea3
ZW
3717
3718 return 0;
cc98b413
CW
3719
3720err_unpin_display:
8a0c39b1 3721 obj->pin_display--;
cc98b413
CW
3722 return ret;
3723}
3724
3725void
e6617330
TU
3726i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj,
3727 const struct i915_ggtt_view *view)
cc98b413 3728{
8a0c39b1
TU
3729 if (WARN_ON(obj->pin_display == 0))
3730 return;
3731
e6617330
TU
3732 i915_gem_object_ggtt_unpin_view(obj, view);
3733
8a0c39b1 3734 obj->pin_display--;
b9241ea3
ZW
3735}
3736
e47c68e9
EA
3737/**
3738 * Moves a single object to the CPU read, and possibly write domain.
14bb2c11
TU
3739 * @obj: object to act on
3740 * @write: requesting write or read-only access
e47c68e9
EA
3741 *
3742 * This function returns when the move is complete, including waiting on
3743 * flushes to occur.
3744 */
dabdfe02 3745int
919926ae 3746i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
e47c68e9 3747{
1c5d22f7 3748 uint32_t old_write_domain, old_read_domains;
e47c68e9
EA
3749 int ret;
3750
8d7e3de1
CW
3751 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
3752 return 0;
3753
0201f1ec 3754 ret = i915_gem_object_wait_rendering(obj, !write);
88241785
CW
3755 if (ret)
3756 return ret;
3757
e47c68e9 3758 i915_gem_object_flush_gtt_write_domain(obj);
2ef7eeaa 3759
05394f39
CW
3760 old_write_domain = obj->base.write_domain;
3761 old_read_domains = obj->base.read_domains;
1c5d22f7 3762
e47c68e9 3763 /* Flush the CPU cache if it's still invalid. */
05394f39 3764 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
2c22569b 3765 i915_gem_clflush_object(obj, false);
2ef7eeaa 3766
05394f39 3767 obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
2ef7eeaa
EA
3768 }
3769
3770 /* It should now be out of any other write domains, and we can update
3771 * the domain values for our changes.
3772 */
05394f39 3773 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
e47c68e9
EA
3774
3775 /* If we're writing through the CPU, then the GPU read domains will
3776 * need to be invalidated at next use.
3777 */
3778 if (write) {
05394f39
CW
3779 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
3780 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
e47c68e9 3781 }
2ef7eeaa 3782
1c5d22f7
CW
3783 trace_i915_gem_object_change_domain(obj,
3784 old_read_domains,
3785 old_write_domain);
3786
2ef7eeaa
EA
3787 return 0;
3788}
3789
673a394b
EA
3790/* Throttle our rendering by waiting until the ring has completed our requests
3791 * emitted over 20 msec ago.
3792 *
b962442e
EA
3793 * Note that if we were to use the current jiffies each time around the loop,
3794 * we wouldn't escape the function with any frames outstanding if the time to
3795 * render a frame was over 20ms.
3796 *
673a394b
EA
3797 * This should get us reasonable parallelism between CPU and GPU but also
3798 * relatively low latency when blocking on a particular request to finish.
3799 */
40a5f0de 3800static int
f787a5f5 3801i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
40a5f0de 3802{
fac5e23e 3803 struct drm_i915_private *dev_priv = to_i915(dev);
f787a5f5 3804 struct drm_i915_file_private *file_priv = file->driver_priv;
d0bc54f2 3805 unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES;
54fb2411 3806 struct drm_i915_gem_request *request, *target = NULL;
f787a5f5 3807 int ret;
93533c29 3808
308887aa
DV
3809 ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
3810 if (ret)
3811 return ret;
3812
f4457ae7
CW
3813 /* ABI: return -EIO if already wedged */
3814 if (i915_terminally_wedged(&dev_priv->gpu_error))
3815 return -EIO;
e110e8d6 3816
1c25595f 3817 spin_lock(&file_priv->mm.lock);
f787a5f5 3818 list_for_each_entry(request, &file_priv->mm.request_list, client_list) {
b962442e
EA
3819 if (time_after_eq(request->emitted_jiffies, recent_enough))
3820 break;
40a5f0de 3821
fcfa423c
JH
3822 /*
3823 * Note that the request might not have been submitted yet.
3824 * In which case emitted_jiffies will be zero.
3825 */
3826 if (!request->emitted_jiffies)
3827 continue;
3828
54fb2411 3829 target = request;
b962442e 3830 }
ff865885
JH
3831 if (target)
3832 i915_gem_request_reference(target);
1c25595f 3833 spin_unlock(&file_priv->mm.lock);
40a5f0de 3834
54fb2411 3835 if (target == NULL)
f787a5f5 3836 return 0;
2bc43b5c 3837
299259a3 3838 ret = __i915_wait_request(target, true, NULL, NULL);
73db04cf 3839 i915_gem_request_unreference(target);
ff865885 3840
40a5f0de
EA
3841 return ret;
3842}
3843
d23db88c
CW
3844static bool
3845i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags)
3846{
3847 struct drm_i915_gem_object *obj = vma->obj;
3848
3849 if (alignment &&
3850 vma->node.start & (alignment - 1))
3851 return true;
3852
3853 if (flags & PIN_MAPPABLE && !obj->map_and_fenceable)
3854 return true;
3855
3856 if (flags & PIN_OFFSET_BIAS &&
3857 vma->node.start < (flags & PIN_OFFSET_MASK))
3858 return true;
3859
506a8e87
CW
3860 if (flags & PIN_OFFSET_FIXED &&
3861 vma->node.start != (flags & PIN_OFFSET_MASK))
3862 return true;
3863
d23db88c
CW
3864 return false;
3865}
3866
d0710abb
CW
3867void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)
3868{
3869 struct drm_i915_gem_object *obj = vma->obj;
3870 bool mappable, fenceable;
3871 u32 fence_size, fence_alignment;
3872
3873 fence_size = i915_gem_get_gtt_size(obj->base.dev,
3874 obj->base.size,
3875 obj->tiling_mode);
3876 fence_alignment = i915_gem_get_gtt_alignment(obj->base.dev,
3877 obj->base.size,
3878 obj->tiling_mode,
3879 true);
3880
3881 fenceable = (vma->node.size == fence_size &&
3882 (vma->node.start & (fence_alignment - 1)) == 0);
3883
3884 mappable = (vma->node.start + fence_size <=
62106b4f 3885 to_i915(obj->base.dev)->ggtt.mappable_end);
d0710abb
CW
3886
3887 obj->map_and_fenceable = mappable && fenceable;
3888}
3889
ec7adb6e
JL
3890static int
3891i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
3892 struct i915_address_space *vm,
3893 const struct i915_ggtt_view *ggtt_view,
3894 uint32_t alignment,
3895 uint64_t flags)
673a394b 3896{
fac5e23e 3897 struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
07fe0b12 3898 struct i915_vma *vma;
ef79e17c 3899 unsigned bound;
673a394b
EA
3900 int ret;
3901
6e7186af
BW
3902 if (WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base))
3903 return -ENODEV;
3904
bf3d149b 3905 if (WARN_ON(flags & (PIN_GLOBAL | PIN_MAPPABLE) && !i915_is_ggtt(vm)))
1ec9e26d 3906 return -EINVAL;
07fe0b12 3907
c826c449
CW
3908 if (WARN_ON((flags & (PIN_MAPPABLE | PIN_GLOBAL)) == PIN_MAPPABLE))
3909 return -EINVAL;
3910
ec7adb6e
JL
3911 if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view))
3912 return -EINVAL;
3913
3914 vma = ggtt_view ? i915_gem_obj_to_ggtt_view(obj, ggtt_view) :
3915 i915_gem_obj_to_vma(obj, vm);
3916
07fe0b12 3917 if (vma) {
d7f46fc4
BW
3918 if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT))
3919 return -EBUSY;
3920
d23db88c 3921 if (i915_vma_misplaced(vma, alignment, flags)) {
d7f46fc4 3922 WARN(vma->pin_count,
ec7adb6e 3923 "bo is already pinned in %s with incorrect alignment:"
088e0df4 3924 " offset=%08x %08x, req.alignment=%x, req.map_and_fenceable=%d,"
75e9e915 3925 " obj->map_and_fenceable=%d\n",
ec7adb6e 3926 ggtt_view ? "ggtt" : "ppgtt",
088e0df4
MT
3927 upper_32_bits(vma->node.start),
3928 lower_32_bits(vma->node.start),
fe14d5f4 3929 alignment,
d23db88c 3930 !!(flags & PIN_MAPPABLE),
05394f39 3931 obj->map_and_fenceable);
07fe0b12 3932 ret = i915_vma_unbind(vma);
ac0c6b5a
CW
3933 if (ret)
3934 return ret;
8ea99c92
DV
3935
3936 vma = NULL;
ac0c6b5a
CW
3937 }
3938 }
3939
ef79e17c 3940 bound = vma ? vma->bound : 0;
8ea99c92 3941 if (vma == NULL || !drm_mm_node_allocated(&vma->node)) {
ec7adb6e
JL
3942 vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view, alignment,
3943 flags);
262de145
DV
3944 if (IS_ERR(vma))
3945 return PTR_ERR(vma);
0875546c
DV
3946 } else {
3947 ret = i915_vma_bind(vma, obj->cache_level, flags);
fe14d5f4
TU
3948 if (ret)
3949 return ret;
3950 }
74898d7e 3951
91e6711e
JL
3952 if (ggtt_view && ggtt_view->type == I915_GGTT_VIEW_NORMAL &&
3953 (bound ^ vma->bound) & GLOBAL_BIND) {
d0710abb 3954 __i915_vma_set_map_and_fenceable(vma);
91e6711e
JL
3955 WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable);
3956 }
ef79e17c 3957
8ea99c92 3958 vma->pin_count++;
673a394b
EA
3959 return 0;
3960}
3961
ec7adb6e
JL
3962int
3963i915_gem_object_pin(struct drm_i915_gem_object *obj,
3964 struct i915_address_space *vm,
3965 uint32_t alignment,
3966 uint64_t flags)
3967{
3968 return i915_gem_object_do_pin(obj, vm,
3969 i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL,
3970 alignment, flags);
3971}
3972
3973int
3974i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
3975 const struct i915_ggtt_view *view,
3976 uint32_t alignment,
3977 uint64_t flags)
3978{
72e96d64
JL
3979 struct drm_device *dev = obj->base.dev;
3980 struct drm_i915_private *dev_priv = to_i915(dev);
3981 struct i915_ggtt *ggtt = &dev_priv->ggtt;
3982
ade7daa1 3983 BUG_ON(!view);
ec7adb6e 3984
72e96d64 3985 return i915_gem_object_do_pin(obj, &ggtt->base, view,
6fafab76 3986 alignment, flags | PIN_GLOBAL);
ec7adb6e
JL
3987}
3988
673a394b 3989void
e6617330
TU
3990i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj,
3991 const struct i915_ggtt_view *view)
673a394b 3992{
e6617330 3993 struct i915_vma *vma = i915_gem_obj_to_ggtt_view(obj, view);
673a394b 3994
e6617330 3995 WARN_ON(vma->pin_count == 0);
9abc4648 3996 WARN_ON(!i915_gem_obj_ggtt_bound_view(obj, view));
d7f46fc4 3997
30154650 3998 --vma->pin_count;
673a394b
EA
3999}
4000
673a394b
EA
4001int
4002i915_gem_busy_ioctl(struct drm_device *dev, void *data,
05394f39 4003 struct drm_file *file)
673a394b
EA
4004{
4005 struct drm_i915_gem_busy *args = data;
05394f39 4006 struct drm_i915_gem_object *obj;
30dbf0c0
CW
4007 int ret;
4008
76c1dec1 4009 ret = i915_mutex_lock_interruptible(dev);
1d7cfea1 4010 if (ret)
76c1dec1 4011 return ret;
673a394b 4012
a8ad0bd8 4013 obj = to_intel_bo(drm_gem_object_lookup(file, args->handle));
c8725226 4014 if (&obj->base == NULL) {
1d7cfea1
CW
4015 ret = -ENOENT;
4016 goto unlock;
673a394b 4017 }
d1b851fc 4018
0be555b6
CW
4019 /* Count all active objects as busy, even if they are currently not used
4020 * by the gpu. Users of this interface expect objects to eventually
4021 * become non-busy without any further actions, therefore emit any
4022 * necessary flushes here.
c4de0a5d 4023 */
30dfebf3 4024 ret = i915_gem_object_flush_active(obj);
b4716185
CW
4025 if (ret)
4026 goto unref;
0be555b6 4027
426960be
CW
4028 args->busy = 0;
4029 if (obj->active) {
4030 int i;
4031
666796da 4032 for (i = 0; i < I915_NUM_ENGINES; i++) {
426960be
CW
4033 struct drm_i915_gem_request *req;
4034
4035 req = obj->last_read_req[i];
4036 if (req)
4a570db5 4037 args->busy |= 1 << (16 + req->engine->exec_id);
426960be
CW
4038 }
4039 if (obj->last_write_req)
4a570db5 4040 args->busy |= obj->last_write_req->engine->exec_id;
426960be 4041 }
673a394b 4042
b4716185 4043unref:
05394f39 4044 drm_gem_object_unreference(&obj->base);
1d7cfea1 4045unlock:
673a394b 4046 mutex_unlock(&dev->struct_mutex);
1d7cfea1 4047 return ret;
673a394b
EA
4048}
4049
4050int
4051i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
4052 struct drm_file *file_priv)
4053{
0206e353 4054 return i915_gem_ring_throttle(dev, file_priv);
673a394b
EA
4055}
4056
3ef94daa
CW
4057int
4058i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
4059 struct drm_file *file_priv)
4060{
fac5e23e 4061 struct drm_i915_private *dev_priv = to_i915(dev);
3ef94daa 4062 struct drm_i915_gem_madvise *args = data;
05394f39 4063 struct drm_i915_gem_object *obj;
76c1dec1 4064 int ret;
3ef94daa
CW
4065
4066 switch (args->madv) {
4067 case I915_MADV_DONTNEED:
4068 case I915_MADV_WILLNEED:
4069 break;
4070 default:
4071 return -EINVAL;
4072 }
4073
1d7cfea1
CW
4074 ret = i915_mutex_lock_interruptible(dev);
4075 if (ret)
4076 return ret;
4077
a8ad0bd8 4078 obj = to_intel_bo(drm_gem_object_lookup(file_priv, args->handle));
c8725226 4079 if (&obj->base == NULL) {
1d7cfea1
CW
4080 ret = -ENOENT;
4081 goto unlock;
3ef94daa 4082 }
3ef94daa 4083
d7f46fc4 4084 if (i915_gem_obj_is_pinned(obj)) {
1d7cfea1
CW
4085 ret = -EINVAL;
4086 goto out;
3ef94daa
CW
4087 }
4088
656bfa3a
DV
4089 if (obj->pages &&
4090 obj->tiling_mode != I915_TILING_NONE &&
4091 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
4092 if (obj->madv == I915_MADV_WILLNEED)
4093 i915_gem_object_unpin_pages(obj);
4094 if (args->madv == I915_MADV_WILLNEED)
4095 i915_gem_object_pin_pages(obj);
4096 }
4097
05394f39
CW
4098 if (obj->madv != __I915_MADV_PURGED)
4099 obj->madv = args->madv;
3ef94daa 4100
6c085a72 4101 /* if the object is no longer attached, discard its backing storage */
be6a0376 4102 if (obj->madv == I915_MADV_DONTNEED && obj->pages == NULL)
2d7ef395
CW
4103 i915_gem_object_truncate(obj);
4104
05394f39 4105 args->retained = obj->madv != __I915_MADV_PURGED;
bb6baf76 4106
1d7cfea1 4107out:
05394f39 4108 drm_gem_object_unreference(&obj->base);
1d7cfea1 4109unlock:
3ef94daa 4110 mutex_unlock(&dev->struct_mutex);
1d7cfea1 4111 return ret;
3ef94daa
CW
4112}
4113
37e680a1
CW
4114void i915_gem_object_init(struct drm_i915_gem_object *obj,
4115 const struct drm_i915_gem_object_ops *ops)
0327d6ba 4116{
b4716185
CW
4117 int i;
4118
35c20a60 4119 INIT_LIST_HEAD(&obj->global_list);
666796da 4120 for (i = 0; i < I915_NUM_ENGINES; i++)
117897f4 4121 INIT_LIST_HEAD(&obj->engine_list[i]);
b25cb2f8 4122 INIT_LIST_HEAD(&obj->obj_exec_link);
2f633156 4123 INIT_LIST_HEAD(&obj->vma_list);
8d9d5744 4124 INIT_LIST_HEAD(&obj->batch_pool_link);
0327d6ba 4125
37e680a1
CW
4126 obj->ops = ops;
4127
0327d6ba
CW
4128 obj->fence_reg = I915_FENCE_REG_NONE;
4129 obj->madv = I915_MADV_WILLNEED;
0327d6ba 4130
f19ec8cb 4131 i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size);
0327d6ba
CW
4132}
4133
37e680a1 4134static const struct drm_i915_gem_object_ops i915_gem_object_ops = {
de472664 4135 .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE,
37e680a1
CW
4136 .get_pages = i915_gem_object_get_pages_gtt,
4137 .put_pages = i915_gem_object_put_pages_gtt,
4138};
4139
d37cd8a8 4140struct drm_i915_gem_object *i915_gem_object_create(struct drm_device *dev,
05394f39 4141 size_t size)
ac52bc56 4142{
c397b908 4143 struct drm_i915_gem_object *obj;
5949eac4 4144 struct address_space *mapping;
1a240d4d 4145 gfp_t mask;
fe3db79b 4146 int ret;
ac52bc56 4147
42dcedd4 4148 obj = i915_gem_object_alloc(dev);
c397b908 4149 if (obj == NULL)
fe3db79b 4150 return ERR_PTR(-ENOMEM);
673a394b 4151
fe3db79b
CW
4152 ret = drm_gem_object_init(dev, &obj->base, size);
4153 if (ret)
4154 goto fail;
673a394b 4155
bed1ea95
CW
4156 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
4157 if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) {
4158 /* 965gm cannot relocate objects above 4GiB. */
4159 mask &= ~__GFP_HIGHMEM;
4160 mask |= __GFP_DMA32;
4161 }
4162
496ad9aa 4163 mapping = file_inode(obj->base.filp)->i_mapping;
bed1ea95 4164 mapping_set_gfp_mask(mapping, mask);
5949eac4 4165
37e680a1 4166 i915_gem_object_init(obj, &i915_gem_object_ops);
73aa808f 4167
c397b908
DV
4168 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
4169 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
673a394b 4170
3d29b842
ED
4171 if (HAS_LLC(dev)) {
4172 /* On some devices, we can have the GPU use the LLC (the CPU
a1871112
EA
4173 * cache) for about a 10% performance improvement
4174 * compared to uncached. Graphics requests other than
4175 * display scanout are coherent with the CPU in
4176 * accessing this cache. This means in this mode we
4177 * don't need to clflush on the CPU side, and on the
4178 * GPU side we only need to flush internal caches to
4179 * get data visible to the CPU.
4180 *
4181 * However, we maintain the display planes as UC, and so
4182 * need to rebind when first used as such.
4183 */
4184 obj->cache_level = I915_CACHE_LLC;
4185 } else
4186 obj->cache_level = I915_CACHE_NONE;
4187
d861e338
DV
4188 trace_i915_gem_object_create(obj);
4189
05394f39 4190 return obj;
fe3db79b
CW
4191
4192fail:
4193 i915_gem_object_free(obj);
4194
4195 return ERR_PTR(ret);
c397b908
DV
4196}
4197
340fbd8c
CW
4198static bool discard_backing_storage(struct drm_i915_gem_object *obj)
4199{
4200 /* If we are the last user of the backing storage (be it shmemfs
4201 * pages or stolen etc), we know that the pages are going to be
4202 * immediately released. In this case, we can then skip copying
4203 * back the contents from the GPU.
4204 */
4205
4206 if (obj->madv != I915_MADV_WILLNEED)
4207 return false;
4208
4209 if (obj->base.filp == NULL)
4210 return true;
4211
4212 /* At first glance, this looks racy, but then again so would be
4213 * userspace racing mmap against close. However, the first external
4214 * reference to the filp can only be obtained through the
4215 * i915_gem_mmap_ioctl() which safeguards us against the user
4216 * acquiring such a reference whilst we are in the middle of
4217 * freeing the object.
4218 */
4219 return atomic_long_read(&obj->base.filp->f_count) == 1;
4220}
4221
1488fc08 4222void i915_gem_free_object(struct drm_gem_object *gem_obj)
673a394b 4223{
1488fc08 4224 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
05394f39 4225 struct drm_device *dev = obj->base.dev;
fac5e23e 4226 struct drm_i915_private *dev_priv = to_i915(dev);
07fe0b12 4227 struct i915_vma *vma, *next;
673a394b 4228
f65c9168
PZ
4229 intel_runtime_pm_get(dev_priv);
4230
26e12f89
CW
4231 trace_i915_gem_object_destroy(obj);
4232
1c7f4bca 4233 list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) {
d7f46fc4
BW
4234 int ret;
4235
4236 vma->pin_count = 0;
4237 ret = i915_vma_unbind(vma);
07fe0b12
BW
4238 if (WARN_ON(ret == -ERESTARTSYS)) {
4239 bool was_interruptible;
1488fc08 4240
07fe0b12
BW
4241 was_interruptible = dev_priv->mm.interruptible;
4242 dev_priv->mm.interruptible = false;
1488fc08 4243
07fe0b12 4244 WARN_ON(i915_vma_unbind(vma));
1488fc08 4245
07fe0b12
BW
4246 dev_priv->mm.interruptible = was_interruptible;
4247 }
1488fc08
CW
4248 }
4249
1d64ae71
BW
4250 /* Stolen objects don't hold a ref, but do hold pin count. Fix that up
4251 * before progressing. */
4252 if (obj->stolen)
4253 i915_gem_object_unpin_pages(obj);
4254
a071fa00
DV
4255 WARN_ON(obj->frontbuffer_bits);
4256
656bfa3a
DV
4257 if (obj->pages && obj->madv == I915_MADV_WILLNEED &&
4258 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES &&
4259 obj->tiling_mode != I915_TILING_NONE)
4260 i915_gem_object_unpin_pages(obj);
4261
401c29f6
BW
4262 if (WARN_ON(obj->pages_pin_count))
4263 obj->pages_pin_count = 0;
340fbd8c 4264 if (discard_backing_storage(obj))
5537252b 4265 obj->madv = I915_MADV_DONTNEED;
37e680a1 4266 i915_gem_object_put_pages(obj);
de151cf6 4267
9da3da66
CW
4268 BUG_ON(obj->pages);
4269
2f745ad3
CW
4270 if (obj->base.import_attach)
4271 drm_prime_gem_destroy(&obj->base, NULL);
de151cf6 4272
5cc9ed4b
CW
4273 if (obj->ops->release)
4274 obj->ops->release(obj);
4275
05394f39
CW
4276 drm_gem_object_release(&obj->base);
4277 i915_gem_info_remove_obj(dev_priv, obj->base.size);
c397b908 4278
05394f39 4279 kfree(obj->bit_17);
42dcedd4 4280 i915_gem_object_free(obj);
f65c9168
PZ
4281
4282 intel_runtime_pm_put(dev_priv);
673a394b
EA
4283}
4284
ec7adb6e
JL
4285struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
4286 struct i915_address_space *vm)
e656a6cb
DV
4287{
4288 struct i915_vma *vma;
1c7f4bca 4289 list_for_each_entry(vma, &obj->vma_list, obj_link) {
1b683729
TU
4290 if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL &&
4291 vma->vm == vm)
e656a6cb 4292 return vma;
ec7adb6e
JL
4293 }
4294 return NULL;
4295}
4296
4297struct i915_vma *i915_gem_obj_to_ggtt_view(struct drm_i915_gem_object *obj,
4298 const struct i915_ggtt_view *view)
4299{
ec7adb6e 4300 struct i915_vma *vma;
e656a6cb 4301
598b9ec8 4302 GEM_BUG_ON(!view);
ec7adb6e 4303
1c7f4bca 4304 list_for_each_entry(vma, &obj->vma_list, obj_link)
598b9ec8 4305 if (vma->is_ggtt && i915_ggtt_view_equal(&vma->ggtt_view, view))
ec7adb6e 4306 return vma;
e656a6cb
DV
4307 return NULL;
4308}
4309
2f633156
BW
4310void i915_gem_vma_destroy(struct i915_vma *vma)
4311{
4312 WARN_ON(vma->node.allocated);
aaa05667
CW
4313
4314 /* Keep the vma as a placeholder in the execbuffer reservation lists */
4315 if (!list_empty(&vma->exec_list))
4316 return;
4317
596c5923
CW
4318 if (!vma->is_ggtt)
4319 i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm));
b9d06dd9 4320
1c7f4bca 4321 list_del(&vma->obj_link);
b93dab6e 4322
e20d2ab7 4323 kmem_cache_free(to_i915(vma->obj->base.dev)->vmas, vma);
2f633156
BW
4324}
4325
e3efda49 4326static void
117897f4 4327i915_gem_stop_engines(struct drm_device *dev)
e3efda49 4328{
fac5e23e 4329 struct drm_i915_private *dev_priv = to_i915(dev);
e2f80391 4330 struct intel_engine_cs *engine;
e3efda49 4331
b4ac5afc 4332 for_each_engine(engine, dev_priv)
117897f4 4333 dev_priv->gt.stop_engine(engine);
e3efda49
CW
4334}
4335
29105ccc 4336int
45c5f202 4337i915_gem_suspend(struct drm_device *dev)
29105ccc 4338{
fac5e23e 4339 struct drm_i915_private *dev_priv = to_i915(dev);
45c5f202 4340 int ret = 0;
28dfe52a 4341
b7137e0c
CW
4342 intel_suspend_gt_powersave(dev_priv);
4343
45c5f202 4344 mutex_lock(&dev->struct_mutex);
5ab57c70
CW
4345
4346 /* We have to flush all the executing contexts to main memory so
4347 * that they can saved in the hibernation image. To ensure the last
4348 * context image is coherent, we have to switch away from it. That
4349 * leaves the dev_priv->kernel_context still active when
4350 * we actually suspend, and its image in memory may not match the GPU
4351 * state. Fortunately, the kernel_context is disposable and we do
4352 * not rely on its state.
4353 */
4354 ret = i915_gem_switch_to_kernel_context(dev_priv);
4355 if (ret)
4356 goto err;
4357
6e5a5beb 4358 ret = i915_gem_wait_for_idle(dev_priv);
f7403347 4359 if (ret)
45c5f202 4360 goto err;
f7403347 4361
c033666a 4362 i915_gem_retire_requests(dev_priv);
673a394b 4363
5ab57c70
CW
4364 /* Note that rather than stopping the engines, all we have to do
4365 * is assert that every RING_HEAD == RING_TAIL (all execution complete)
4366 * and similar for all logical context images (to ensure they are
4367 * all ready for hibernation).
4368 */
117897f4 4369 i915_gem_stop_engines(dev);
b2e862d0 4370 i915_gem_context_lost(dev_priv);
45c5f202
CW
4371 mutex_unlock(&dev->struct_mutex);
4372
737b1506 4373 cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
67d97da3
CW
4374 cancel_delayed_work_sync(&dev_priv->gt.retire_work);
4375 flush_delayed_work(&dev_priv->gt.idle_work);
29105ccc 4376
bdcf120b
CW
4377 /* Assert that we sucessfully flushed all the work and
4378 * reset the GPU back to its idle, low power state.
4379 */
67d97da3 4380 WARN_ON(dev_priv->gt.awake);
bdcf120b 4381
673a394b 4382 return 0;
45c5f202
CW
4383
4384err:
4385 mutex_unlock(&dev->struct_mutex);
4386 return ret;
673a394b
EA
4387}
4388
5ab57c70
CW
4389void i915_gem_resume(struct drm_device *dev)
4390{
4391 struct drm_i915_private *dev_priv = to_i915(dev);
4392
4393 mutex_lock(&dev->struct_mutex);
4394 i915_gem_restore_gtt_mappings(dev);
4395
4396 /* As we didn't flush the kernel context before suspend, we cannot
4397 * guarantee that the context image is complete. So let's just reset
4398 * it and start again.
4399 */
4400 if (i915.enable_execlists)
4401 intel_lr_context_reset(dev_priv, dev_priv->kernel_context);
4402
4403 mutex_unlock(&dev->struct_mutex);
4404}
4405
f691e2f4
DV
4406void i915_gem_init_swizzling(struct drm_device *dev)
4407{
fac5e23e 4408 struct drm_i915_private *dev_priv = to_i915(dev);
f691e2f4 4409
11782b02 4410 if (INTEL_INFO(dev)->gen < 5 ||
f691e2f4
DV
4411 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
4412 return;
4413
4414 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
4415 DISP_TILE_SURFACE_SWIZZLING);
4416
11782b02
DV
4417 if (IS_GEN5(dev))
4418 return;
4419
f691e2f4
DV
4420 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
4421 if (IS_GEN6(dev))
6b26c86d 4422 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB));
8782e26c 4423 else if (IS_GEN7(dev))
6b26c86d 4424 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB));
31a5336e
BW
4425 else if (IS_GEN8(dev))
4426 I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW));
8782e26c
BW
4427 else
4428 BUG();
f691e2f4 4429}
e21af88d 4430
81e7f200
VS
4431static void init_unused_ring(struct drm_device *dev, u32 base)
4432{
fac5e23e 4433 struct drm_i915_private *dev_priv = to_i915(dev);
81e7f200
VS
4434
4435 I915_WRITE(RING_CTL(base), 0);
4436 I915_WRITE(RING_HEAD(base), 0);
4437 I915_WRITE(RING_TAIL(base), 0);
4438 I915_WRITE(RING_START(base), 0);
4439}
4440
4441static void init_unused_rings(struct drm_device *dev)
4442{
4443 if (IS_I830(dev)) {
4444 init_unused_ring(dev, PRB1_BASE);
4445 init_unused_ring(dev, SRB0_BASE);
4446 init_unused_ring(dev, SRB1_BASE);
4447 init_unused_ring(dev, SRB2_BASE);
4448 init_unused_ring(dev, SRB3_BASE);
4449 } else if (IS_GEN2(dev)) {
4450 init_unused_ring(dev, SRB0_BASE);
4451 init_unused_ring(dev, SRB1_BASE);
4452 } else if (IS_GEN3(dev)) {
4453 init_unused_ring(dev, PRB1_BASE);
4454 init_unused_ring(dev, PRB2_BASE);
4455 }
4456}
4457
4fc7c971
BW
4458int
4459i915_gem_init_hw(struct drm_device *dev)
4460{
fac5e23e 4461 struct drm_i915_private *dev_priv = to_i915(dev);
e2f80391 4462 struct intel_engine_cs *engine;
d200cda6 4463 int ret;
4fc7c971 4464
5e4f5189
CW
4465 /* Double layer security blanket, see i915_gem_init() */
4466 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4467
3accaf7e 4468 if (HAS_EDRAM(dev) && INTEL_GEN(dev_priv) < 9)
05e21cc4 4469 I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf));
4fc7c971 4470
0bf21347
VS
4471 if (IS_HASWELL(dev))
4472 I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev) ?
4473 LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED);
9435373e 4474
88a2b2a3 4475 if (HAS_PCH_NOP(dev)) {
6ba844b0
DV
4476 if (IS_IVYBRIDGE(dev)) {
4477 u32 temp = I915_READ(GEN7_MSG_CTL);
4478 temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK);
4479 I915_WRITE(GEN7_MSG_CTL, temp);
4480 } else if (INTEL_INFO(dev)->gen >= 7) {
4481 u32 temp = I915_READ(HSW_NDE_RSTWRN_OPT);
4482 temp &= ~RESET_PCH_HANDSHAKE_ENABLE;
4483 I915_WRITE(HSW_NDE_RSTWRN_OPT, temp);
4484 }
88a2b2a3
BW
4485 }
4486
4fc7c971
BW
4487 i915_gem_init_swizzling(dev);
4488
d5abdfda
DV
4489 /*
4490 * At least 830 can leave some of the unused rings
4491 * "active" (ie. head != tail) after resume which
4492 * will prevent c3 entry. Makes sure all unused rings
4493 * are totally idle.
4494 */
4495 init_unused_rings(dev);
4496
ed54c1a1 4497 BUG_ON(!dev_priv->kernel_context);
90638cc1 4498
4ad2fd88
JH
4499 ret = i915_ppgtt_init_hw(dev);
4500 if (ret) {
4501 DRM_ERROR("PPGTT enable HW failed %d\n", ret);
4502 goto out;
4503 }
4504
4505 /* Need to do basic initialisation of all rings first: */
b4ac5afc 4506 for_each_engine(engine, dev_priv) {
e2f80391 4507 ret = engine->init_hw(engine);
35a57ffb 4508 if (ret)
5e4f5189 4509 goto out;
35a57ffb 4510 }
99433931 4511
0ccdacf6
PA
4512 intel_mocs_init_l3cc_table(dev);
4513
33a732f4 4514 /* We can't enable contexts until all firmware is loaded */
e556f7c1
DG
4515 ret = intel_guc_setup(dev);
4516 if (ret)
4517 goto out;
33a732f4 4518
5e4f5189
CW
4519out:
4520 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
2fa48d8d 4521 return ret;
8187a2b7
ZN
4522}
4523
1070a42b
CW
4524int i915_gem_init(struct drm_device *dev)
4525{
fac5e23e 4526 struct drm_i915_private *dev_priv = to_i915(dev);
1070a42b
CW
4527 int ret;
4528
1070a42b 4529 mutex_lock(&dev->struct_mutex);
d62b4892 4530
a83014d3 4531 if (!i915.enable_execlists) {
f3dc74c0 4532 dev_priv->gt.execbuf_submit = i915_gem_ringbuffer_submission;
117897f4
TU
4533 dev_priv->gt.cleanup_engine = intel_cleanup_engine;
4534 dev_priv->gt.stop_engine = intel_stop_engine;
454afebd 4535 } else {
f3dc74c0 4536 dev_priv->gt.execbuf_submit = intel_execlists_submission;
117897f4
TU
4537 dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup;
4538 dev_priv->gt.stop_engine = intel_logical_ring_stop;
a83014d3
OM
4539 }
4540
5e4f5189
CW
4541 /* This is just a security blanket to placate dragons.
4542 * On some systems, we very sporadically observe that the first TLBs
4543 * used by the CS may be stale, despite us poking the TLB reset. If
4544 * we hold the forcewake during initialisation these problems
4545 * just magically go away.
4546 */
4547 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4548
72778cb2 4549 i915_gem_init_userptr(dev_priv);
d85489d3 4550 i915_gem_init_ggtt(dev);
d62b4892 4551
2fa48d8d 4552 ret = i915_gem_context_init(dev);
7bcc3777
JN
4553 if (ret)
4554 goto out_unlock;
2fa48d8d 4555
8b3e2d36 4556 ret = intel_engines_init(dev);
35a57ffb 4557 if (ret)
7bcc3777 4558 goto out_unlock;
2fa48d8d 4559
1070a42b 4560 ret = i915_gem_init_hw(dev);
60990320
CW
4561 if (ret == -EIO) {
4562 /* Allow ring initialisation to fail by marking the GPU as
4563 * wedged. But we only want to do this where the GPU is angry,
4564 * for all other failure, such as an allocation failure, bail.
4565 */
4566 DRM_ERROR("Failed to initialize GPU, declaring it wedged\n");
805de8f4 4567 atomic_or(I915_WEDGED, &dev_priv->gpu_error.reset_counter);
60990320 4568 ret = 0;
1070a42b 4569 }
7bcc3777
JN
4570
4571out_unlock:
5e4f5189 4572 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
60990320 4573 mutex_unlock(&dev->struct_mutex);
1070a42b 4574
60990320 4575 return ret;
1070a42b
CW
4576}
4577
8187a2b7 4578void
117897f4 4579i915_gem_cleanup_engines(struct drm_device *dev)
8187a2b7 4580{
fac5e23e 4581 struct drm_i915_private *dev_priv = to_i915(dev);
e2f80391 4582 struct intel_engine_cs *engine;
8187a2b7 4583
b4ac5afc 4584 for_each_engine(engine, dev_priv)
117897f4 4585 dev_priv->gt.cleanup_engine(engine);
8187a2b7
ZN
4586}
4587
64193406 4588static void
666796da 4589init_engine_lists(struct intel_engine_cs *engine)
64193406 4590{
0bc40be8
TU
4591 INIT_LIST_HEAD(&engine->active_list);
4592 INIT_LIST_HEAD(&engine->request_list);
64193406
CW
4593}
4594
40ae4e16
ID
4595void
4596i915_gem_load_init_fences(struct drm_i915_private *dev_priv)
4597{
91c8a326 4598 struct drm_device *dev = &dev_priv->drm;
40ae4e16
ID
4599
4600 if (INTEL_INFO(dev_priv)->gen >= 7 && !IS_VALLEYVIEW(dev_priv) &&
4601 !IS_CHERRYVIEW(dev_priv))
4602 dev_priv->num_fence_regs = 32;
4603 else if (INTEL_INFO(dev_priv)->gen >= 4 || IS_I945G(dev_priv) ||
4604 IS_I945GM(dev_priv) || IS_G33(dev_priv))
4605 dev_priv->num_fence_regs = 16;
4606 else
4607 dev_priv->num_fence_regs = 8;
4608
c033666a 4609 if (intel_vgpu_active(dev_priv))
40ae4e16
ID
4610 dev_priv->num_fence_regs =
4611 I915_READ(vgtif_reg(avail_rs.fence_num));
4612
4613 /* Initialize fence registers to zero */
4614 i915_gem_restore_fences(dev);
4615
4616 i915_gem_detect_bit_6_swizzle(dev);
4617}
4618
673a394b 4619void
d64aa096 4620i915_gem_load_init(struct drm_device *dev)
673a394b 4621{
fac5e23e 4622 struct drm_i915_private *dev_priv = to_i915(dev);
42dcedd4
CW
4623 int i;
4624
efab6d8d 4625 dev_priv->objects =
42dcedd4
CW
4626 kmem_cache_create("i915_gem_object",
4627 sizeof(struct drm_i915_gem_object), 0,
4628 SLAB_HWCACHE_ALIGN,
4629 NULL);
e20d2ab7
CW
4630 dev_priv->vmas =
4631 kmem_cache_create("i915_gem_vma",
4632 sizeof(struct i915_vma), 0,
4633 SLAB_HWCACHE_ALIGN,
4634 NULL);
efab6d8d
CW
4635 dev_priv->requests =
4636 kmem_cache_create("i915_gem_request",
4637 sizeof(struct drm_i915_gem_request), 0,
4638 SLAB_HWCACHE_ALIGN,
4639 NULL);
673a394b 4640
fc8c067e 4641 INIT_LIST_HEAD(&dev_priv->vm_list);
a33afea5 4642 INIT_LIST_HEAD(&dev_priv->context_list);
6c085a72
CW
4643 INIT_LIST_HEAD(&dev_priv->mm.unbound_list);
4644 INIT_LIST_HEAD(&dev_priv->mm.bound_list);
a09ba7fa 4645 INIT_LIST_HEAD(&dev_priv->mm.fence_list);
666796da
TU
4646 for (i = 0; i < I915_NUM_ENGINES; i++)
4647 init_engine_lists(&dev_priv->engine[i]);
4b9de737 4648 for (i = 0; i < I915_MAX_NUM_FENCES; i++)
007cc8ac 4649 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list);
67d97da3 4650 INIT_DELAYED_WORK(&dev_priv->gt.retire_work,
673a394b 4651 i915_gem_retire_work_handler);
67d97da3 4652 INIT_DELAYED_WORK(&dev_priv->gt.idle_work,
b29c19b6 4653 i915_gem_idle_work_handler);
1f15b76f 4654 init_waitqueue_head(&dev_priv->gpu_error.wait_queue);
1f83fee0 4655 init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
31169714 4656
72bfa19c
CW
4657 dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL;
4658
19b2dbde 4659 INIT_LIST_HEAD(&dev_priv->mm.fence_list);
10ed13e4 4660
6b95a207 4661 init_waitqueue_head(&dev_priv->pending_flip_queue);
17250b71 4662
ce453d81
CW
4663 dev_priv->mm.interruptible = true;
4664
f99d7069 4665 mutex_init(&dev_priv->fb_tracking.lock);
673a394b 4666}
71acb5eb 4667
d64aa096
ID
4668void i915_gem_load_cleanup(struct drm_device *dev)
4669{
4670 struct drm_i915_private *dev_priv = to_i915(dev);
4671
4672 kmem_cache_destroy(dev_priv->requests);
4673 kmem_cache_destroy(dev_priv->vmas);
4674 kmem_cache_destroy(dev_priv->objects);
4675}
4676
461fb99c
CW
4677int i915_gem_freeze_late(struct drm_i915_private *dev_priv)
4678{
4679 struct drm_i915_gem_object *obj;
4680
4681 /* Called just before we write the hibernation image.
4682 *
4683 * We need to update the domain tracking to reflect that the CPU
4684 * will be accessing all the pages to create and restore from the
4685 * hibernation, and so upon restoration those pages will be in the
4686 * CPU domain.
4687 *
4688 * To make sure the hibernation image contains the latest state,
4689 * we update that state just before writing out the image.
4690 */
4691
4692 list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list) {
4693 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
4694 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
4695 }
4696
4697 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
4698 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
4699 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
4700 }
4701
4702 return 0;
4703}
4704
f787a5f5 4705void i915_gem_release(struct drm_device *dev, struct drm_file *file)
b962442e 4706{
f787a5f5 4707 struct drm_i915_file_private *file_priv = file->driver_priv;
b962442e
EA
4708
4709 /* Clean up our request list when the client is going away, so that
4710 * later retire_requests won't dereference our soon-to-be-gone
4711 * file_priv.
4712 */
1c25595f 4713 spin_lock(&file_priv->mm.lock);
f787a5f5
CW
4714 while (!list_empty(&file_priv->mm.request_list)) {
4715 struct drm_i915_gem_request *request;
4716
4717 request = list_first_entry(&file_priv->mm.request_list,
4718 struct drm_i915_gem_request,
4719 client_list);
4720 list_del(&request->client_list);
4721 request->file_priv = NULL;
4722 }
1c25595f 4723 spin_unlock(&file_priv->mm.lock);
b29c19b6 4724
2e1b8730 4725 if (!list_empty(&file_priv->rps.link)) {
8d3afd7d 4726 spin_lock(&to_i915(dev)->rps.client_lock);
2e1b8730 4727 list_del(&file_priv->rps.link);
8d3afd7d 4728 spin_unlock(&to_i915(dev)->rps.client_lock);
1854d5ca 4729 }
b29c19b6
CW
4730}
4731
4732int i915_gem_open(struct drm_device *dev, struct drm_file *file)
4733{
4734 struct drm_i915_file_private *file_priv;
e422b888 4735 int ret;
b29c19b6
CW
4736
4737 DRM_DEBUG_DRIVER("\n");
4738
4739 file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL);
4740 if (!file_priv)
4741 return -ENOMEM;
4742
4743 file->driver_priv = file_priv;
f19ec8cb 4744 file_priv->dev_priv = to_i915(dev);
ab0e7ff9 4745 file_priv->file = file;
2e1b8730 4746 INIT_LIST_HEAD(&file_priv->rps.link);
b29c19b6
CW
4747
4748 spin_lock_init(&file_priv->mm.lock);
4749 INIT_LIST_HEAD(&file_priv->mm.request_list);
b29c19b6 4750
de1add36
TU
4751 file_priv->bsd_ring = -1;
4752
e422b888
BW
4753 ret = i915_gem_context_open(dev, file);
4754 if (ret)
4755 kfree(file_priv);
b29c19b6 4756
e422b888 4757 return ret;
b29c19b6
CW
4758}
4759
b680c37a
DV
4760/**
4761 * i915_gem_track_fb - update frontbuffer tracking
d9072a3e
GT
4762 * @old: current GEM buffer for the frontbuffer slots
4763 * @new: new GEM buffer for the frontbuffer slots
4764 * @frontbuffer_bits: bitmask of frontbuffer slots
b680c37a
DV
4765 *
4766 * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them
4767 * from @old and setting them in @new. Both @old and @new can be NULL.
4768 */
a071fa00
DV
4769void i915_gem_track_fb(struct drm_i915_gem_object *old,
4770 struct drm_i915_gem_object *new,
4771 unsigned frontbuffer_bits)
4772{
4773 if (old) {
4774 WARN_ON(!mutex_is_locked(&old->base.dev->struct_mutex));
4775 WARN_ON(!(old->frontbuffer_bits & frontbuffer_bits));
4776 old->frontbuffer_bits &= ~frontbuffer_bits;
4777 }
4778
4779 if (new) {
4780 WARN_ON(!mutex_is_locked(&new->base.dev->struct_mutex));
4781 WARN_ON(new->frontbuffer_bits & frontbuffer_bits);
4782 new->frontbuffer_bits |= frontbuffer_bits;
4783 }
4784}
4785
a70a3148 4786/* All the new VM stuff */
088e0df4
MT
4787u64 i915_gem_obj_offset(struct drm_i915_gem_object *o,
4788 struct i915_address_space *vm)
a70a3148 4789{
fac5e23e 4790 struct drm_i915_private *dev_priv = to_i915(o->base.dev);
a70a3148
BW
4791 struct i915_vma *vma;
4792
896ab1a5 4793 WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base);
a70a3148 4794
1c7f4bca 4795 list_for_each_entry(vma, &o->vma_list, obj_link) {
596c5923 4796 if (vma->is_ggtt &&
ec7adb6e
JL
4797 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL)
4798 continue;
4799 if (vma->vm == vm)
a70a3148 4800 return vma->node.start;
a70a3148 4801 }
ec7adb6e 4802
f25748ea
DV
4803 WARN(1, "%s vma for this object not found.\n",
4804 i915_is_ggtt(vm) ? "global" : "ppgtt");
a70a3148
BW
4805 return -1;
4806}
4807
088e0df4
MT
4808u64 i915_gem_obj_ggtt_offset_view(struct drm_i915_gem_object *o,
4809 const struct i915_ggtt_view *view)
a70a3148
BW
4810{
4811 struct i915_vma *vma;
4812
1c7f4bca 4813 list_for_each_entry(vma, &o->vma_list, obj_link)
8aac2220 4814 if (vma->is_ggtt && i915_ggtt_view_equal(&vma->ggtt_view, view))
ec7adb6e
JL
4815 return vma->node.start;
4816
5678ad73 4817 WARN(1, "global vma for this object not found. (view=%u)\n", view->type);
ec7adb6e
JL
4818 return -1;
4819}
4820
4821bool i915_gem_obj_bound(struct drm_i915_gem_object *o,
4822 struct i915_address_space *vm)
4823{
4824 struct i915_vma *vma;
4825
1c7f4bca 4826 list_for_each_entry(vma, &o->vma_list, obj_link) {
596c5923 4827 if (vma->is_ggtt &&
ec7adb6e
JL
4828 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL)
4829 continue;
4830 if (vma->vm == vm && drm_mm_node_allocated(&vma->node))
4831 return true;
4832 }
4833
4834 return false;
4835}
4836
4837bool i915_gem_obj_ggtt_bound_view(struct drm_i915_gem_object *o,
9abc4648 4838 const struct i915_ggtt_view *view)
ec7adb6e 4839{
ec7adb6e
JL
4840 struct i915_vma *vma;
4841
1c7f4bca 4842 list_for_each_entry(vma, &o->vma_list, obj_link)
ff5ec22d 4843 if (vma->is_ggtt &&
9abc4648 4844 i915_ggtt_view_equal(&vma->ggtt_view, view) &&
fe14d5f4 4845 drm_mm_node_allocated(&vma->node))
a70a3148
BW
4846 return true;
4847
4848 return false;
4849}
4850
4851bool i915_gem_obj_bound_any(struct drm_i915_gem_object *o)
4852{
5a1d5eb0 4853 struct i915_vma *vma;
a70a3148 4854
1c7f4bca 4855 list_for_each_entry(vma, &o->vma_list, obj_link)
5a1d5eb0 4856 if (drm_mm_node_allocated(&vma->node))
a70a3148
BW
4857 return true;
4858
4859 return false;
4860}
4861
8da32727 4862unsigned long i915_gem_obj_ggtt_size(struct drm_i915_gem_object *o)
a70a3148 4863{
a70a3148
BW
4864 struct i915_vma *vma;
4865
8da32727 4866 GEM_BUG_ON(list_empty(&o->vma_list));
a70a3148 4867
1c7f4bca 4868 list_for_each_entry(vma, &o->vma_list, obj_link) {
596c5923 4869 if (vma->is_ggtt &&
8da32727 4870 vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL)
a70a3148 4871 return vma->node.size;
ec7adb6e 4872 }
8da32727 4873
a70a3148
BW
4874 return 0;
4875}
4876
ec7adb6e 4877bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj)
5c2abbea
BW
4878{
4879 struct i915_vma *vma;
1c7f4bca 4880 list_for_each_entry(vma, &obj->vma_list, obj_link)
ec7adb6e
JL
4881 if (vma->pin_count > 0)
4882 return true;
a6631ae1 4883
ec7adb6e 4884 return false;
5c2abbea 4885}
ea70299d 4886
033908ae
DG
4887/* Like i915_gem_object_get_page(), but mark the returned page dirty */
4888struct page *
4889i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, int n)
4890{
4891 struct page *page;
4892
4893 /* Only default objects have per-page dirty tracking */
b9bcd14a 4894 if (WARN_ON(!i915_gem_object_has_struct_page(obj)))
033908ae
DG
4895 return NULL;
4896
4897 page = i915_gem_object_get_page(obj, n);
4898 set_page_dirty(page);
4899 return page;
4900}
4901
ea70299d
DG
4902/* Allocate a new GEM object and fill it with the supplied data */
4903struct drm_i915_gem_object *
4904i915_gem_object_create_from_data(struct drm_device *dev,
4905 const void *data, size_t size)
4906{
4907 struct drm_i915_gem_object *obj;
4908 struct sg_table *sg;
4909 size_t bytes;
4910 int ret;
4911
d37cd8a8 4912 obj = i915_gem_object_create(dev, round_up(size, PAGE_SIZE));
fe3db79b 4913 if (IS_ERR(obj))
ea70299d
DG
4914 return obj;
4915
4916 ret = i915_gem_object_set_to_cpu_domain(obj, true);
4917 if (ret)
4918 goto fail;
4919
4920 ret = i915_gem_object_get_pages(obj);
4921 if (ret)
4922 goto fail;
4923
4924 i915_gem_object_pin_pages(obj);
4925 sg = obj->pages;
4926 bytes = sg_copy_from_buffer(sg->sgl, sg->nents, (void *)data, size);
9e7d18c0 4927 obj->dirty = 1; /* Backing store is now out of date */
ea70299d
DG
4928 i915_gem_object_unpin_pages(obj);
4929
4930 if (WARN_ON(bytes != size)) {
4931 DRM_ERROR("Incomplete copy, wrote %zu of %zu", bytes, size);
4932 ret = -EFAULT;
4933 goto fail;
4934 }
4935
4936 return obj;
4937
4938fail:
4939 drm_gem_object_unreference(&obj->base);
4940 return ERR_PTR(ret);
4941}
This page took 1.314993 seconds and 5 git commands to generate.