fdacedc27d8770c014b5e2b409206f496e9ebe0c
[deliverable/linux.git] / drivers / gpu / drm / i915 / i915_gem.c
1 /*
2 * Copyright © 2008-2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Eric Anholt <eric@anholt.net>
25 *
26 */
27
28 #include <drm/drmP.h>
29 #include <drm/drm_vma_manager.h>
30 #include <drm/i915_drm.h>
31 #include "i915_drv.h"
32 #include "i915_gem_dmabuf.h"
33 #include "i915_vgpu.h"
34 #include "i915_trace.h"
35 #include "intel_drv.h"
36 #include "intel_mocs.h"
37 #include <linux/reservation.h>
38 #include <linux/shmem_fs.h>
39 #include <linux/slab.h>
40 #include <linux/swap.h>
41 #include <linux/pci.h>
42 #include <linux/dma-buf.h>
43
44 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
45 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
46
47 static bool cpu_cache_is_coherent(struct drm_device *dev,
48 enum i915_cache_level level)
49 {
50 return HAS_LLC(dev) || level != I915_CACHE_NONE;
51 }
52
53 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
54 {
55 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
56 return false;
57
58 if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
59 return true;
60
61 return obj->pin_display;
62 }
63
64 static int
65 insert_mappable_node(struct drm_i915_private *i915,
66 struct drm_mm_node *node, u32 size)
67 {
68 memset(node, 0, sizeof(*node));
69 return drm_mm_insert_node_in_range_generic(&i915->ggtt.base.mm, node,
70 size, 0, 0, 0,
71 i915->ggtt.mappable_end,
72 DRM_MM_SEARCH_DEFAULT,
73 DRM_MM_CREATE_DEFAULT);
74 }
75
76 static void
77 remove_mappable_node(struct drm_mm_node *node)
78 {
79 drm_mm_remove_node(node);
80 }
81
82 /* some bookkeeping */
83 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
84 size_t size)
85 {
86 spin_lock(&dev_priv->mm.object_stat_lock);
87 dev_priv->mm.object_count++;
88 dev_priv->mm.object_memory += size;
89 spin_unlock(&dev_priv->mm.object_stat_lock);
90 }
91
92 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
93 size_t size)
94 {
95 spin_lock(&dev_priv->mm.object_stat_lock);
96 dev_priv->mm.object_count--;
97 dev_priv->mm.object_memory -= size;
98 spin_unlock(&dev_priv->mm.object_stat_lock);
99 }
100
101 static int
102 i915_gem_wait_for_error(struct i915_gpu_error *error)
103 {
104 int ret;
105
106 if (!i915_reset_in_progress(error))
107 return 0;
108
109 /*
110 * Only wait 10 seconds for the gpu reset to complete to avoid hanging
111 * userspace. If it takes that long something really bad is going on and
112 * we should simply try to bail out and fail as gracefully as possible.
113 */
114 ret = wait_event_interruptible_timeout(error->reset_queue,
115 !i915_reset_in_progress(error),
116 10*HZ);
117 if (ret == 0) {
118 DRM_ERROR("Timed out waiting for the gpu reset to complete\n");
119 return -EIO;
120 } else if (ret < 0) {
121 return ret;
122 } else {
123 return 0;
124 }
125 }
126
127 int i915_mutex_lock_interruptible(struct drm_device *dev)
128 {
129 struct drm_i915_private *dev_priv = to_i915(dev);
130 int ret;
131
132 ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
133 if (ret)
134 return ret;
135
136 ret = mutex_lock_interruptible(&dev->struct_mutex);
137 if (ret)
138 return ret;
139
140 return 0;
141 }
142
143 int
144 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
145 struct drm_file *file)
146 {
147 struct drm_i915_private *dev_priv = to_i915(dev);
148 struct i915_ggtt *ggtt = &dev_priv->ggtt;
149 struct drm_i915_gem_get_aperture *args = data;
150 struct i915_vma *vma;
151 size_t pinned;
152
153 pinned = 0;
154 mutex_lock(&dev->struct_mutex);
155 list_for_each_entry(vma, &ggtt->base.active_list, vm_link)
156 if (i915_vma_is_pinned(vma))
157 pinned += vma->node.size;
158 list_for_each_entry(vma, &ggtt->base.inactive_list, vm_link)
159 if (i915_vma_is_pinned(vma))
160 pinned += vma->node.size;
161 mutex_unlock(&dev->struct_mutex);
162
163 args->aper_size = ggtt->base.total;
164 args->aper_available_size = args->aper_size - pinned;
165
166 return 0;
167 }
168
169 static int
170 i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
171 {
172 struct address_space *mapping = file_inode(obj->base.filp)->i_mapping;
173 char *vaddr = obj->phys_handle->vaddr;
174 struct sg_table *st;
175 struct scatterlist *sg;
176 int i;
177
178 if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
179 return -EINVAL;
180
181 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
182 struct page *page;
183 char *src;
184
185 page = shmem_read_mapping_page(mapping, i);
186 if (IS_ERR(page))
187 return PTR_ERR(page);
188
189 src = kmap_atomic(page);
190 memcpy(vaddr, src, PAGE_SIZE);
191 drm_clflush_virt_range(vaddr, PAGE_SIZE);
192 kunmap_atomic(src);
193
194 put_page(page);
195 vaddr += PAGE_SIZE;
196 }
197
198 i915_gem_chipset_flush(to_i915(obj->base.dev));
199
200 st = kmalloc(sizeof(*st), GFP_KERNEL);
201 if (st == NULL)
202 return -ENOMEM;
203
204 if (sg_alloc_table(st, 1, GFP_KERNEL)) {
205 kfree(st);
206 return -ENOMEM;
207 }
208
209 sg = st->sgl;
210 sg->offset = 0;
211 sg->length = obj->base.size;
212
213 sg_dma_address(sg) = obj->phys_handle->busaddr;
214 sg_dma_len(sg) = obj->base.size;
215
216 obj->pages = st;
217 return 0;
218 }
219
220 static void
221 i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj)
222 {
223 int ret;
224
225 BUG_ON(obj->madv == __I915_MADV_PURGED);
226
227 ret = i915_gem_object_set_to_cpu_domain(obj, true);
228 if (WARN_ON(ret)) {
229 /* In the event of a disaster, abandon all caches and
230 * hope for the best.
231 */
232 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
233 }
234
235 if (obj->madv == I915_MADV_DONTNEED)
236 obj->dirty = 0;
237
238 if (obj->dirty) {
239 struct address_space *mapping = file_inode(obj->base.filp)->i_mapping;
240 char *vaddr = obj->phys_handle->vaddr;
241 int i;
242
243 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
244 struct page *page;
245 char *dst;
246
247 page = shmem_read_mapping_page(mapping, i);
248 if (IS_ERR(page))
249 continue;
250
251 dst = kmap_atomic(page);
252 drm_clflush_virt_range(vaddr, PAGE_SIZE);
253 memcpy(dst, vaddr, PAGE_SIZE);
254 kunmap_atomic(dst);
255
256 set_page_dirty(page);
257 if (obj->madv == I915_MADV_WILLNEED)
258 mark_page_accessed(page);
259 put_page(page);
260 vaddr += PAGE_SIZE;
261 }
262 obj->dirty = 0;
263 }
264
265 sg_free_table(obj->pages);
266 kfree(obj->pages);
267 }
268
269 static void
270 i915_gem_object_release_phys(struct drm_i915_gem_object *obj)
271 {
272 drm_pci_free(obj->base.dev, obj->phys_handle);
273 }
274
275 static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
276 .get_pages = i915_gem_object_get_pages_phys,
277 .put_pages = i915_gem_object_put_pages_phys,
278 .release = i915_gem_object_release_phys,
279 };
280
281 int
282 i915_gem_object_unbind(struct drm_i915_gem_object *obj)
283 {
284 struct i915_vma *vma;
285 LIST_HEAD(still_in_list);
286 int ret;
287
288 /* The vma will only be freed if it is marked as closed, and if we wait
289 * upon rendering to the vma, we may unbind anything in the list.
290 */
291 while ((vma = list_first_entry_or_null(&obj->vma_list,
292 struct i915_vma,
293 obj_link))) {
294 list_move_tail(&vma->obj_link, &still_in_list);
295 ret = i915_vma_unbind(vma);
296 if (ret)
297 break;
298 }
299 list_splice(&still_in_list, &obj->vma_list);
300
301 return ret;
302 }
303
304 int
305 i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
306 int align)
307 {
308 drm_dma_handle_t *phys;
309 int ret;
310
311 if (obj->phys_handle) {
312 if ((unsigned long)obj->phys_handle->vaddr & (align -1))
313 return -EBUSY;
314
315 return 0;
316 }
317
318 if (obj->madv != I915_MADV_WILLNEED)
319 return -EFAULT;
320
321 if (obj->base.filp == NULL)
322 return -EINVAL;
323
324 ret = i915_gem_object_unbind(obj);
325 if (ret)
326 return ret;
327
328 ret = i915_gem_object_put_pages(obj);
329 if (ret)
330 return ret;
331
332 /* create a new object */
333 phys = drm_pci_alloc(obj->base.dev, obj->base.size, align);
334 if (!phys)
335 return -ENOMEM;
336
337 obj->phys_handle = phys;
338 obj->ops = &i915_gem_phys_ops;
339
340 return i915_gem_object_get_pages(obj);
341 }
342
343 static int
344 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
345 struct drm_i915_gem_pwrite *args,
346 struct drm_file *file_priv)
347 {
348 struct drm_device *dev = obj->base.dev;
349 void *vaddr = obj->phys_handle->vaddr + args->offset;
350 char __user *user_data = u64_to_user_ptr(args->data_ptr);
351 int ret = 0;
352
353 /* We manually control the domain here and pretend that it
354 * remains coherent i.e. in the GTT domain, like shmem_pwrite.
355 */
356 ret = i915_gem_object_wait_rendering(obj, false);
357 if (ret)
358 return ret;
359
360 intel_fb_obj_invalidate(obj, ORIGIN_CPU);
361 if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) {
362 unsigned long unwritten;
363
364 /* The physical object once assigned is fixed for the lifetime
365 * of the obj, so we can safely drop the lock and continue
366 * to access vaddr.
367 */
368 mutex_unlock(&dev->struct_mutex);
369 unwritten = copy_from_user(vaddr, user_data, args->size);
370 mutex_lock(&dev->struct_mutex);
371 if (unwritten) {
372 ret = -EFAULT;
373 goto out;
374 }
375 }
376
377 drm_clflush_virt_range(vaddr, args->size);
378 i915_gem_chipset_flush(to_i915(dev));
379
380 out:
381 intel_fb_obj_flush(obj, false, ORIGIN_CPU);
382 return ret;
383 }
384
385 void *i915_gem_object_alloc(struct drm_device *dev)
386 {
387 struct drm_i915_private *dev_priv = to_i915(dev);
388 return kmem_cache_zalloc(dev_priv->objects, GFP_KERNEL);
389 }
390
391 void i915_gem_object_free(struct drm_i915_gem_object *obj)
392 {
393 struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
394 kmem_cache_free(dev_priv->objects, obj);
395 }
396
397 static int
398 i915_gem_create(struct drm_file *file,
399 struct drm_device *dev,
400 uint64_t size,
401 uint32_t *handle_p)
402 {
403 struct drm_i915_gem_object *obj;
404 int ret;
405 u32 handle;
406
407 size = roundup(size, PAGE_SIZE);
408 if (size == 0)
409 return -EINVAL;
410
411 /* Allocate the new object */
412 obj = i915_gem_object_create(dev, size);
413 if (IS_ERR(obj))
414 return PTR_ERR(obj);
415
416 ret = drm_gem_handle_create(file, &obj->base, &handle);
417 /* drop reference from allocate - handle holds it now */
418 i915_gem_object_put_unlocked(obj);
419 if (ret)
420 return ret;
421
422 *handle_p = handle;
423 return 0;
424 }
425
426 int
427 i915_gem_dumb_create(struct drm_file *file,
428 struct drm_device *dev,
429 struct drm_mode_create_dumb *args)
430 {
431 /* have to work out size/pitch and return them */
432 args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64);
433 args->size = args->pitch * args->height;
434 return i915_gem_create(file, dev,
435 args->size, &args->handle);
436 }
437
438 /**
439 * Creates a new mm object and returns a handle to it.
440 * @dev: drm device pointer
441 * @data: ioctl data blob
442 * @file: drm file pointer
443 */
444 int
445 i915_gem_create_ioctl(struct drm_device *dev, void *data,
446 struct drm_file *file)
447 {
448 struct drm_i915_gem_create *args = data;
449
450 return i915_gem_create(file, dev,
451 args->size, &args->handle);
452 }
453
454 static inline int
455 __copy_to_user_swizzled(char __user *cpu_vaddr,
456 const char *gpu_vaddr, int gpu_offset,
457 int length)
458 {
459 int ret, cpu_offset = 0;
460
461 while (length > 0) {
462 int cacheline_end = ALIGN(gpu_offset + 1, 64);
463 int this_length = min(cacheline_end - gpu_offset, length);
464 int swizzled_gpu_offset = gpu_offset ^ 64;
465
466 ret = __copy_to_user(cpu_vaddr + cpu_offset,
467 gpu_vaddr + swizzled_gpu_offset,
468 this_length);
469 if (ret)
470 return ret + length;
471
472 cpu_offset += this_length;
473 gpu_offset += this_length;
474 length -= this_length;
475 }
476
477 return 0;
478 }
479
480 static inline int
481 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
482 const char __user *cpu_vaddr,
483 int length)
484 {
485 int ret, cpu_offset = 0;
486
487 while (length > 0) {
488 int cacheline_end = ALIGN(gpu_offset + 1, 64);
489 int this_length = min(cacheline_end - gpu_offset, length);
490 int swizzled_gpu_offset = gpu_offset ^ 64;
491
492 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset,
493 cpu_vaddr + cpu_offset,
494 this_length);
495 if (ret)
496 return ret + length;
497
498 cpu_offset += this_length;
499 gpu_offset += this_length;
500 length -= this_length;
501 }
502
503 return 0;
504 }
505
506 /*
507 * Pins the specified object's pages and synchronizes the object with
508 * GPU accesses. Sets needs_clflush to non-zero if the caller should
509 * flush the object from the CPU cache.
510 */
511 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
512 int *needs_clflush)
513 {
514 int ret;
515
516 *needs_clflush = 0;
517
518 if (WARN_ON(!i915_gem_object_has_struct_page(obj)))
519 return -EINVAL;
520
521 ret = i915_gem_object_wait_rendering(obj, true);
522 if (ret)
523 return ret;
524
525 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) {
526 /* If we're not in the cpu read domain, set ourself into the gtt
527 * read domain and manually flush cachelines (if required). This
528 * optimizes for the case when the gpu will dirty the data
529 * anyway again before the next pread happens. */
530 *needs_clflush = !cpu_cache_is_coherent(obj->base.dev,
531 obj->cache_level);
532 }
533
534 ret = i915_gem_object_get_pages(obj);
535 if (ret)
536 return ret;
537
538 i915_gem_object_pin_pages(obj);
539
540 return ret;
541 }
542
543 /* Per-page copy function for the shmem pread fastpath.
544 * Flushes invalid cachelines before reading the target if
545 * needs_clflush is set. */
546 static int
547 shmem_pread_fast(struct page *page, int shmem_page_offset, int page_length,
548 char __user *user_data,
549 bool page_do_bit17_swizzling, bool needs_clflush)
550 {
551 char *vaddr;
552 int ret;
553
554 if (unlikely(page_do_bit17_swizzling))
555 return -EINVAL;
556
557 vaddr = kmap_atomic(page);
558 if (needs_clflush)
559 drm_clflush_virt_range(vaddr + shmem_page_offset,
560 page_length);
561 ret = __copy_to_user_inatomic(user_data,
562 vaddr + shmem_page_offset,
563 page_length);
564 kunmap_atomic(vaddr);
565
566 return ret ? -EFAULT : 0;
567 }
568
569 static void
570 shmem_clflush_swizzled_range(char *addr, unsigned long length,
571 bool swizzled)
572 {
573 if (unlikely(swizzled)) {
574 unsigned long start = (unsigned long) addr;
575 unsigned long end = (unsigned long) addr + length;
576
577 /* For swizzling simply ensure that we always flush both
578 * channels. Lame, but simple and it works. Swizzled
579 * pwrite/pread is far from a hotpath - current userspace
580 * doesn't use it at all. */
581 start = round_down(start, 128);
582 end = round_up(end, 128);
583
584 drm_clflush_virt_range((void *)start, end - start);
585 } else {
586 drm_clflush_virt_range(addr, length);
587 }
588
589 }
590
591 /* Only difference to the fast-path function is that this can handle bit17
592 * and uses non-atomic copy and kmap functions. */
593 static int
594 shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length,
595 char __user *user_data,
596 bool page_do_bit17_swizzling, bool needs_clflush)
597 {
598 char *vaddr;
599 int ret;
600
601 vaddr = kmap(page);
602 if (needs_clflush)
603 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
604 page_length,
605 page_do_bit17_swizzling);
606
607 if (page_do_bit17_swizzling)
608 ret = __copy_to_user_swizzled(user_data,
609 vaddr, shmem_page_offset,
610 page_length);
611 else
612 ret = __copy_to_user(user_data,
613 vaddr + shmem_page_offset,
614 page_length);
615 kunmap(page);
616
617 return ret ? - EFAULT : 0;
618 }
619
620 static inline unsigned long
621 slow_user_access(struct io_mapping *mapping,
622 uint64_t page_base, int page_offset,
623 char __user *user_data,
624 unsigned long length, bool pwrite)
625 {
626 void __iomem *ioaddr;
627 void *vaddr;
628 uint64_t unwritten;
629
630 ioaddr = io_mapping_map_wc(mapping, page_base, PAGE_SIZE);
631 /* We can use the cpu mem copy function because this is X86. */
632 vaddr = (void __force *)ioaddr + page_offset;
633 if (pwrite)
634 unwritten = __copy_from_user(vaddr, user_data, length);
635 else
636 unwritten = __copy_to_user(user_data, vaddr, length);
637
638 io_mapping_unmap(ioaddr);
639 return unwritten;
640 }
641
642 static int
643 i915_gem_gtt_pread(struct drm_device *dev,
644 struct drm_i915_gem_object *obj, uint64_t size,
645 uint64_t data_offset, uint64_t data_ptr)
646 {
647 struct drm_i915_private *dev_priv = to_i915(dev);
648 struct i915_ggtt *ggtt = &dev_priv->ggtt;
649 struct drm_mm_node node;
650 char __user *user_data;
651 uint64_t remain;
652 uint64_t offset;
653 int ret;
654
655 ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE);
656 if (ret) {
657 ret = insert_mappable_node(dev_priv, &node, PAGE_SIZE);
658 if (ret)
659 goto out;
660
661 ret = i915_gem_object_get_pages(obj);
662 if (ret) {
663 remove_mappable_node(&node);
664 goto out;
665 }
666
667 i915_gem_object_pin_pages(obj);
668 } else {
669 node.start = i915_gem_obj_ggtt_offset(obj);
670 node.allocated = false;
671 ret = i915_gem_object_put_fence(obj);
672 if (ret)
673 goto out_unpin;
674 }
675
676 ret = i915_gem_object_set_to_gtt_domain(obj, false);
677 if (ret)
678 goto out_unpin;
679
680 user_data = u64_to_user_ptr(data_ptr);
681 remain = size;
682 offset = data_offset;
683
684 mutex_unlock(&dev->struct_mutex);
685 if (likely(!i915.prefault_disable)) {
686 ret = fault_in_multipages_writeable(user_data, remain);
687 if (ret) {
688 mutex_lock(&dev->struct_mutex);
689 goto out_unpin;
690 }
691 }
692
693 while (remain > 0) {
694 /* Operation in this page
695 *
696 * page_base = page offset within aperture
697 * page_offset = offset within page
698 * page_length = bytes to copy for this page
699 */
700 u32 page_base = node.start;
701 unsigned page_offset = offset_in_page(offset);
702 unsigned page_length = PAGE_SIZE - page_offset;
703 page_length = remain < page_length ? remain : page_length;
704 if (node.allocated) {
705 wmb();
706 ggtt->base.insert_page(&ggtt->base,
707 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
708 node.start,
709 I915_CACHE_NONE, 0);
710 wmb();
711 } else {
712 page_base += offset & PAGE_MASK;
713 }
714 /* This is a slow read/write as it tries to read from
715 * and write to user memory which may result into page
716 * faults, and so we cannot perform this under struct_mutex.
717 */
718 if (slow_user_access(ggtt->mappable, page_base,
719 page_offset, user_data,
720 page_length, false)) {
721 ret = -EFAULT;
722 break;
723 }
724
725 remain -= page_length;
726 user_data += page_length;
727 offset += page_length;
728 }
729
730 mutex_lock(&dev->struct_mutex);
731 if (ret == 0 && (obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) {
732 /* The user has modified the object whilst we tried
733 * reading from it, and we now have no idea what domain
734 * the pages should be in. As we have just been touching
735 * them directly, flush everything back to the GTT
736 * domain.
737 */
738 ret = i915_gem_object_set_to_gtt_domain(obj, false);
739 }
740
741 out_unpin:
742 if (node.allocated) {
743 wmb();
744 ggtt->base.clear_range(&ggtt->base,
745 node.start, node.size,
746 true);
747 i915_gem_object_unpin_pages(obj);
748 remove_mappable_node(&node);
749 } else {
750 i915_gem_object_ggtt_unpin(obj);
751 }
752 out:
753 return ret;
754 }
755
756 static int
757 i915_gem_shmem_pread(struct drm_device *dev,
758 struct drm_i915_gem_object *obj,
759 struct drm_i915_gem_pread *args,
760 struct drm_file *file)
761 {
762 char __user *user_data;
763 ssize_t remain;
764 loff_t offset;
765 int shmem_page_offset, page_length, ret = 0;
766 int obj_do_bit17_swizzling, page_do_bit17_swizzling;
767 int prefaulted = 0;
768 int needs_clflush = 0;
769 struct sg_page_iter sg_iter;
770
771 if (!i915_gem_object_has_struct_page(obj))
772 return -ENODEV;
773
774 user_data = u64_to_user_ptr(args->data_ptr);
775 remain = args->size;
776
777 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
778
779 ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
780 if (ret)
781 return ret;
782
783 offset = args->offset;
784
785 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
786 offset >> PAGE_SHIFT) {
787 struct page *page = sg_page_iter_page(&sg_iter);
788
789 if (remain <= 0)
790 break;
791
792 /* Operation in this page
793 *
794 * shmem_page_offset = offset within page in shmem file
795 * page_length = bytes to copy for this page
796 */
797 shmem_page_offset = offset_in_page(offset);
798 page_length = remain;
799 if ((shmem_page_offset + page_length) > PAGE_SIZE)
800 page_length = PAGE_SIZE - shmem_page_offset;
801
802 page_do_bit17_swizzling = obj_do_bit17_swizzling &&
803 (page_to_phys(page) & (1 << 17)) != 0;
804
805 ret = shmem_pread_fast(page, shmem_page_offset, page_length,
806 user_data, page_do_bit17_swizzling,
807 needs_clflush);
808 if (ret == 0)
809 goto next_page;
810
811 mutex_unlock(&dev->struct_mutex);
812
813 if (likely(!i915.prefault_disable) && !prefaulted) {
814 ret = fault_in_multipages_writeable(user_data, remain);
815 /* Userspace is tricking us, but we've already clobbered
816 * its pages with the prefault and promised to write the
817 * data up to the first fault. Hence ignore any errors
818 * and just continue. */
819 (void)ret;
820 prefaulted = 1;
821 }
822
823 ret = shmem_pread_slow(page, shmem_page_offset, page_length,
824 user_data, page_do_bit17_swizzling,
825 needs_clflush);
826
827 mutex_lock(&dev->struct_mutex);
828
829 if (ret)
830 goto out;
831
832 next_page:
833 remain -= page_length;
834 user_data += page_length;
835 offset += page_length;
836 }
837
838 out:
839 i915_gem_object_unpin_pages(obj);
840
841 return ret;
842 }
843
844 /**
845 * Reads data from the object referenced by handle.
846 * @dev: drm device pointer
847 * @data: ioctl data blob
848 * @file: drm file pointer
849 *
850 * On error, the contents of *data are undefined.
851 */
852 int
853 i915_gem_pread_ioctl(struct drm_device *dev, void *data,
854 struct drm_file *file)
855 {
856 struct drm_i915_gem_pread *args = data;
857 struct drm_i915_gem_object *obj;
858 int ret = 0;
859
860 if (args->size == 0)
861 return 0;
862
863 if (!access_ok(VERIFY_WRITE,
864 u64_to_user_ptr(args->data_ptr),
865 args->size))
866 return -EFAULT;
867
868 ret = i915_mutex_lock_interruptible(dev);
869 if (ret)
870 return ret;
871
872 obj = i915_gem_object_lookup(file, args->handle);
873 if (!obj) {
874 ret = -ENOENT;
875 goto unlock;
876 }
877
878 /* Bounds check source. */
879 if (args->offset > obj->base.size ||
880 args->size > obj->base.size - args->offset) {
881 ret = -EINVAL;
882 goto out;
883 }
884
885 trace_i915_gem_object_pread(obj, args->offset, args->size);
886
887 ret = i915_gem_shmem_pread(dev, obj, args, file);
888
889 /* pread for non shmem backed objects */
890 if (ret == -EFAULT || ret == -ENODEV) {
891 intel_runtime_pm_get(to_i915(dev));
892 ret = i915_gem_gtt_pread(dev, obj, args->size,
893 args->offset, args->data_ptr);
894 intel_runtime_pm_put(to_i915(dev));
895 }
896
897 out:
898 i915_gem_object_put(obj);
899 unlock:
900 mutex_unlock(&dev->struct_mutex);
901 return ret;
902 }
903
904 /* This is the fast write path which cannot handle
905 * page faults in the source data
906 */
907
908 static inline int
909 fast_user_write(struct io_mapping *mapping,
910 loff_t page_base, int page_offset,
911 char __user *user_data,
912 int length)
913 {
914 void __iomem *vaddr_atomic;
915 void *vaddr;
916 unsigned long unwritten;
917
918 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base);
919 /* We can use the cpu mem copy function because this is X86. */
920 vaddr = (void __force*)vaddr_atomic + page_offset;
921 unwritten = __copy_from_user_inatomic_nocache(vaddr,
922 user_data, length);
923 io_mapping_unmap_atomic(vaddr_atomic);
924 return unwritten;
925 }
926
927 /**
928 * This is the fast pwrite path, where we copy the data directly from the
929 * user into the GTT, uncached.
930 * @i915: i915 device private data
931 * @obj: i915 gem object
932 * @args: pwrite arguments structure
933 * @file: drm file pointer
934 */
935 static int
936 i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
937 struct drm_i915_gem_object *obj,
938 struct drm_i915_gem_pwrite *args,
939 struct drm_file *file)
940 {
941 struct i915_ggtt *ggtt = &i915->ggtt;
942 struct drm_device *dev = obj->base.dev;
943 struct drm_mm_node node;
944 uint64_t remain, offset;
945 char __user *user_data;
946 int ret;
947 bool hit_slow_path = false;
948
949 if (obj->tiling_mode != I915_TILING_NONE)
950 return -EFAULT;
951
952 ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
953 PIN_MAPPABLE | PIN_NONBLOCK);
954 if (ret) {
955 ret = insert_mappable_node(i915, &node, PAGE_SIZE);
956 if (ret)
957 goto out;
958
959 ret = i915_gem_object_get_pages(obj);
960 if (ret) {
961 remove_mappable_node(&node);
962 goto out;
963 }
964
965 i915_gem_object_pin_pages(obj);
966 } else {
967 node.start = i915_gem_obj_ggtt_offset(obj);
968 node.allocated = false;
969 ret = i915_gem_object_put_fence(obj);
970 if (ret)
971 goto out_unpin;
972 }
973
974 ret = i915_gem_object_set_to_gtt_domain(obj, true);
975 if (ret)
976 goto out_unpin;
977
978 intel_fb_obj_invalidate(obj, ORIGIN_GTT);
979 obj->dirty = true;
980
981 user_data = u64_to_user_ptr(args->data_ptr);
982 offset = args->offset;
983 remain = args->size;
984 while (remain) {
985 /* Operation in this page
986 *
987 * page_base = page offset within aperture
988 * page_offset = offset within page
989 * page_length = bytes to copy for this page
990 */
991 u32 page_base = node.start;
992 unsigned page_offset = offset_in_page(offset);
993 unsigned page_length = PAGE_SIZE - page_offset;
994 page_length = remain < page_length ? remain : page_length;
995 if (node.allocated) {
996 wmb(); /* flush the write before we modify the GGTT */
997 ggtt->base.insert_page(&ggtt->base,
998 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
999 node.start, I915_CACHE_NONE, 0);
1000 wmb(); /* flush modifications to the GGTT (insert_page) */
1001 } else {
1002 page_base += offset & PAGE_MASK;
1003 }
1004 /* If we get a fault while copying data, then (presumably) our
1005 * source page isn't available. Return the error and we'll
1006 * retry in the slow path.
1007 * If the object is non-shmem backed, we retry again with the
1008 * path that handles page fault.
1009 */
1010 if (fast_user_write(ggtt->mappable, page_base,
1011 page_offset, user_data, page_length)) {
1012 hit_slow_path = true;
1013 mutex_unlock(&dev->struct_mutex);
1014 if (slow_user_access(ggtt->mappable,
1015 page_base,
1016 page_offset, user_data,
1017 page_length, true)) {
1018 ret = -EFAULT;
1019 mutex_lock(&dev->struct_mutex);
1020 goto out_flush;
1021 }
1022
1023 mutex_lock(&dev->struct_mutex);
1024 }
1025
1026 remain -= page_length;
1027 user_data += page_length;
1028 offset += page_length;
1029 }
1030
1031 out_flush:
1032 if (hit_slow_path) {
1033 if (ret == 0 &&
1034 (obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) {
1035 /* The user has modified the object whilst we tried
1036 * reading from it, and we now have no idea what domain
1037 * the pages should be in. As we have just been touching
1038 * them directly, flush everything back to the GTT
1039 * domain.
1040 */
1041 ret = i915_gem_object_set_to_gtt_domain(obj, false);
1042 }
1043 }
1044
1045 intel_fb_obj_flush(obj, false, ORIGIN_GTT);
1046 out_unpin:
1047 if (node.allocated) {
1048 wmb();
1049 ggtt->base.clear_range(&ggtt->base,
1050 node.start, node.size,
1051 true);
1052 i915_gem_object_unpin_pages(obj);
1053 remove_mappable_node(&node);
1054 } else {
1055 i915_gem_object_ggtt_unpin(obj);
1056 }
1057 out:
1058 return ret;
1059 }
1060
1061 /* Per-page copy function for the shmem pwrite fastpath.
1062 * Flushes invalid cachelines before writing to the target if
1063 * needs_clflush_before is set and flushes out any written cachelines after
1064 * writing if needs_clflush is set. */
1065 static int
1066 shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length,
1067 char __user *user_data,
1068 bool page_do_bit17_swizzling,
1069 bool needs_clflush_before,
1070 bool needs_clflush_after)
1071 {
1072 char *vaddr;
1073 int ret;
1074
1075 if (unlikely(page_do_bit17_swizzling))
1076 return -EINVAL;
1077
1078 vaddr = kmap_atomic(page);
1079 if (needs_clflush_before)
1080 drm_clflush_virt_range(vaddr + shmem_page_offset,
1081 page_length);
1082 ret = __copy_from_user_inatomic(vaddr + shmem_page_offset,
1083 user_data, page_length);
1084 if (needs_clflush_after)
1085 drm_clflush_virt_range(vaddr + shmem_page_offset,
1086 page_length);
1087 kunmap_atomic(vaddr);
1088
1089 return ret ? -EFAULT : 0;
1090 }
1091
1092 /* Only difference to the fast-path function is that this can handle bit17
1093 * and uses non-atomic copy and kmap functions. */
1094 static int
1095 shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length,
1096 char __user *user_data,
1097 bool page_do_bit17_swizzling,
1098 bool needs_clflush_before,
1099 bool needs_clflush_after)
1100 {
1101 char *vaddr;
1102 int ret;
1103
1104 vaddr = kmap(page);
1105 if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
1106 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
1107 page_length,
1108 page_do_bit17_swizzling);
1109 if (page_do_bit17_swizzling)
1110 ret = __copy_from_user_swizzled(vaddr, shmem_page_offset,
1111 user_data,
1112 page_length);
1113 else
1114 ret = __copy_from_user(vaddr + shmem_page_offset,
1115 user_data,
1116 page_length);
1117 if (needs_clflush_after)
1118 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
1119 page_length,
1120 page_do_bit17_swizzling);
1121 kunmap(page);
1122
1123 return ret ? -EFAULT : 0;
1124 }
1125
1126 static int
1127 i915_gem_shmem_pwrite(struct drm_device *dev,
1128 struct drm_i915_gem_object *obj,
1129 struct drm_i915_gem_pwrite *args,
1130 struct drm_file *file)
1131 {
1132 ssize_t remain;
1133 loff_t offset;
1134 char __user *user_data;
1135 int shmem_page_offset, page_length, ret = 0;
1136 int obj_do_bit17_swizzling, page_do_bit17_swizzling;
1137 int hit_slowpath = 0;
1138 int needs_clflush_after = 0;
1139 int needs_clflush_before = 0;
1140 struct sg_page_iter sg_iter;
1141
1142 user_data = u64_to_user_ptr(args->data_ptr);
1143 remain = args->size;
1144
1145 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
1146
1147 ret = i915_gem_object_wait_rendering(obj, false);
1148 if (ret)
1149 return ret;
1150
1151 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
1152 /* If we're not in the cpu write domain, set ourself into the gtt
1153 * write domain and manually flush cachelines (if required). This
1154 * optimizes for the case when the gpu will use the data
1155 * right away and we therefore have to clflush anyway. */
1156 needs_clflush_after = cpu_write_needs_clflush(obj);
1157 }
1158 /* Same trick applies to invalidate partially written cachelines read
1159 * before writing. */
1160 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0)
1161 needs_clflush_before =
1162 !cpu_cache_is_coherent(dev, obj->cache_level);
1163
1164 ret = i915_gem_object_get_pages(obj);
1165 if (ret)
1166 return ret;
1167
1168 intel_fb_obj_invalidate(obj, ORIGIN_CPU);
1169
1170 i915_gem_object_pin_pages(obj);
1171
1172 offset = args->offset;
1173 obj->dirty = 1;
1174
1175 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
1176 offset >> PAGE_SHIFT) {
1177 struct page *page = sg_page_iter_page(&sg_iter);
1178 int partial_cacheline_write;
1179
1180 if (remain <= 0)
1181 break;
1182
1183 /* Operation in this page
1184 *
1185 * shmem_page_offset = offset within page in shmem file
1186 * page_length = bytes to copy for this page
1187 */
1188 shmem_page_offset = offset_in_page(offset);
1189
1190 page_length = remain;
1191 if ((shmem_page_offset + page_length) > PAGE_SIZE)
1192 page_length = PAGE_SIZE - shmem_page_offset;
1193
1194 /* If we don't overwrite a cacheline completely we need to be
1195 * careful to have up-to-date data by first clflushing. Don't
1196 * overcomplicate things and flush the entire patch. */
1197 partial_cacheline_write = needs_clflush_before &&
1198 ((shmem_page_offset | page_length)
1199 & (boot_cpu_data.x86_clflush_size - 1));
1200
1201 page_do_bit17_swizzling = obj_do_bit17_swizzling &&
1202 (page_to_phys(page) & (1 << 17)) != 0;
1203
1204 ret = shmem_pwrite_fast(page, shmem_page_offset, page_length,
1205 user_data, page_do_bit17_swizzling,
1206 partial_cacheline_write,
1207 needs_clflush_after);
1208 if (ret == 0)
1209 goto next_page;
1210
1211 hit_slowpath = 1;
1212 mutex_unlock(&dev->struct_mutex);
1213 ret = shmem_pwrite_slow(page, shmem_page_offset, page_length,
1214 user_data, page_do_bit17_swizzling,
1215 partial_cacheline_write,
1216 needs_clflush_after);
1217
1218 mutex_lock(&dev->struct_mutex);
1219
1220 if (ret)
1221 goto out;
1222
1223 next_page:
1224 remain -= page_length;
1225 user_data += page_length;
1226 offset += page_length;
1227 }
1228
1229 out:
1230 i915_gem_object_unpin_pages(obj);
1231
1232 if (hit_slowpath) {
1233 /*
1234 * Fixup: Flush cpu caches in case we didn't flush the dirty
1235 * cachelines in-line while writing and the object moved
1236 * out of the cpu write domain while we've dropped the lock.
1237 */
1238 if (!needs_clflush_after &&
1239 obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
1240 if (i915_gem_clflush_object(obj, obj->pin_display))
1241 needs_clflush_after = true;
1242 }
1243 }
1244
1245 if (needs_clflush_after)
1246 i915_gem_chipset_flush(to_i915(dev));
1247 else
1248 obj->cache_dirty = true;
1249
1250 intel_fb_obj_flush(obj, false, ORIGIN_CPU);
1251 return ret;
1252 }
1253
1254 /**
1255 * Writes data to the object referenced by handle.
1256 * @dev: drm device
1257 * @data: ioctl data blob
1258 * @file: drm file
1259 *
1260 * On error, the contents of the buffer that were to be modified are undefined.
1261 */
1262 int
1263 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
1264 struct drm_file *file)
1265 {
1266 struct drm_i915_private *dev_priv = to_i915(dev);
1267 struct drm_i915_gem_pwrite *args = data;
1268 struct drm_i915_gem_object *obj;
1269 int ret;
1270
1271 if (args->size == 0)
1272 return 0;
1273
1274 if (!access_ok(VERIFY_READ,
1275 u64_to_user_ptr(args->data_ptr),
1276 args->size))
1277 return -EFAULT;
1278
1279 if (likely(!i915.prefault_disable)) {
1280 ret = fault_in_multipages_readable(u64_to_user_ptr(args->data_ptr),
1281 args->size);
1282 if (ret)
1283 return -EFAULT;
1284 }
1285
1286 intel_runtime_pm_get(dev_priv);
1287
1288 ret = i915_mutex_lock_interruptible(dev);
1289 if (ret)
1290 goto put_rpm;
1291
1292 obj = i915_gem_object_lookup(file, args->handle);
1293 if (!obj) {
1294 ret = -ENOENT;
1295 goto unlock;
1296 }
1297
1298 /* Bounds check destination. */
1299 if (args->offset > obj->base.size ||
1300 args->size > obj->base.size - args->offset) {
1301 ret = -EINVAL;
1302 goto out;
1303 }
1304
1305 trace_i915_gem_object_pwrite(obj, args->offset, args->size);
1306
1307 ret = -EFAULT;
1308 /* We can only do the GTT pwrite on untiled buffers, as otherwise
1309 * it would end up going through the fenced access, and we'll get
1310 * different detiling behavior between reading and writing.
1311 * pread/pwrite currently are reading and writing from the CPU
1312 * perspective, requiring manual detiling by the client.
1313 */
1314 if (!i915_gem_object_has_struct_page(obj) ||
1315 cpu_write_needs_clflush(obj)) {
1316 ret = i915_gem_gtt_pwrite_fast(dev_priv, obj, args, file);
1317 /* Note that the gtt paths might fail with non-page-backed user
1318 * pointers (e.g. gtt mappings when moving data between
1319 * textures). Fallback to the shmem path in that case. */
1320 }
1321
1322 if (ret == -EFAULT || ret == -ENOSPC) {
1323 if (obj->phys_handle)
1324 ret = i915_gem_phys_pwrite(obj, args, file);
1325 else if (i915_gem_object_has_struct_page(obj))
1326 ret = i915_gem_shmem_pwrite(dev, obj, args, file);
1327 else
1328 ret = -ENODEV;
1329 }
1330
1331 out:
1332 i915_gem_object_put(obj);
1333 unlock:
1334 mutex_unlock(&dev->struct_mutex);
1335 put_rpm:
1336 intel_runtime_pm_put(dev_priv);
1337
1338 return ret;
1339 }
1340
1341 /**
1342 * Ensures that all rendering to the object has completed and the object is
1343 * safe to unbind from the GTT or access from the CPU.
1344 * @obj: i915 gem object
1345 * @readonly: waiting for read access or write
1346 */
1347 int
1348 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
1349 bool readonly)
1350 {
1351 struct reservation_object *resv;
1352 struct i915_gem_active *active;
1353 unsigned long active_mask;
1354 int idx, ret;
1355
1356 lockdep_assert_held(&obj->base.dev->struct_mutex);
1357
1358 if (!readonly) {
1359 active = obj->last_read;
1360 active_mask = obj->active;
1361 } else {
1362 active_mask = 1;
1363 active = &obj->last_write;
1364 }
1365
1366 for_each_active(active_mask, idx) {
1367 ret = i915_gem_active_wait(&active[idx],
1368 &obj->base.dev->struct_mutex);
1369 if (ret)
1370 return ret;
1371 }
1372
1373 resv = i915_gem_object_get_dmabuf_resv(obj);
1374 if (resv) {
1375 long err;
1376
1377 err = reservation_object_wait_timeout_rcu(resv, !readonly, true,
1378 MAX_SCHEDULE_TIMEOUT);
1379 if (err < 0)
1380 return err;
1381 }
1382
1383 return 0;
1384 }
1385
1386 /* A nonblocking variant of the above wait. This is a highly dangerous routine
1387 * as the object state may change during this call.
1388 */
1389 static __must_check int
1390 i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
1391 struct intel_rps_client *rps,
1392 bool readonly)
1393 {
1394 struct drm_device *dev = obj->base.dev;
1395 struct drm_i915_private *dev_priv = to_i915(dev);
1396 struct drm_i915_gem_request *requests[I915_NUM_ENGINES];
1397 struct i915_gem_active *active;
1398 unsigned long active_mask;
1399 int ret, i, n = 0;
1400
1401 BUG_ON(!mutex_is_locked(&dev->struct_mutex));
1402 BUG_ON(!dev_priv->mm.interruptible);
1403
1404 active_mask = obj->active;
1405 if (!active_mask)
1406 return 0;
1407
1408 if (!readonly) {
1409 active = obj->last_read;
1410 } else {
1411 active_mask = 1;
1412 active = &obj->last_write;
1413 }
1414
1415 for_each_active(active_mask, i) {
1416 struct drm_i915_gem_request *req;
1417
1418 req = i915_gem_active_get(&active[i],
1419 &obj->base.dev->struct_mutex);
1420 if (req)
1421 requests[n++] = req;
1422 }
1423
1424 mutex_unlock(&dev->struct_mutex);
1425 ret = 0;
1426 for (i = 0; ret == 0 && i < n; i++)
1427 ret = i915_wait_request(requests[i], true, NULL, rps);
1428 mutex_lock(&dev->struct_mutex);
1429
1430 for (i = 0; i < n; i++)
1431 i915_gem_request_put(requests[i]);
1432
1433 return ret;
1434 }
1435
1436 static struct intel_rps_client *to_rps_client(struct drm_file *file)
1437 {
1438 struct drm_i915_file_private *fpriv = file->driver_priv;
1439 return &fpriv->rps;
1440 }
1441
1442 static enum fb_op_origin
1443 write_origin(struct drm_i915_gem_object *obj, unsigned domain)
1444 {
1445 return domain == I915_GEM_DOMAIN_GTT && !obj->has_wc_mmap ?
1446 ORIGIN_GTT : ORIGIN_CPU;
1447 }
1448
1449 /**
1450 * Called when user space prepares to use an object with the CPU, either
1451 * through the mmap ioctl's mapping or a GTT mapping.
1452 * @dev: drm device
1453 * @data: ioctl data blob
1454 * @file: drm file
1455 */
1456 int
1457 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
1458 struct drm_file *file)
1459 {
1460 struct drm_i915_gem_set_domain *args = data;
1461 struct drm_i915_gem_object *obj;
1462 uint32_t read_domains = args->read_domains;
1463 uint32_t write_domain = args->write_domain;
1464 int ret;
1465
1466 /* Only handle setting domains to types used by the CPU. */
1467 if (write_domain & I915_GEM_GPU_DOMAINS)
1468 return -EINVAL;
1469
1470 if (read_domains & I915_GEM_GPU_DOMAINS)
1471 return -EINVAL;
1472
1473 /* Having something in the write domain implies it's in the read
1474 * domain, and only that read domain. Enforce that in the request.
1475 */
1476 if (write_domain != 0 && read_domains != write_domain)
1477 return -EINVAL;
1478
1479 ret = i915_mutex_lock_interruptible(dev);
1480 if (ret)
1481 return ret;
1482
1483 obj = i915_gem_object_lookup(file, args->handle);
1484 if (!obj) {
1485 ret = -ENOENT;
1486 goto unlock;
1487 }
1488
1489 /* Try to flush the object off the GPU without holding the lock.
1490 * We will repeat the flush holding the lock in the normal manner
1491 * to catch cases where we are gazumped.
1492 */
1493 ret = i915_gem_object_wait_rendering__nonblocking(obj,
1494 to_rps_client(file),
1495 !write_domain);
1496 if (ret)
1497 goto unref;
1498
1499 if (read_domains & I915_GEM_DOMAIN_GTT)
1500 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
1501 else
1502 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
1503
1504 if (write_domain != 0)
1505 intel_fb_obj_invalidate(obj, write_origin(obj, write_domain));
1506
1507 unref:
1508 i915_gem_object_put(obj);
1509 unlock:
1510 mutex_unlock(&dev->struct_mutex);
1511 return ret;
1512 }
1513
1514 /**
1515 * Called when user space has done writes to this buffer
1516 * @dev: drm device
1517 * @data: ioctl data blob
1518 * @file: drm file
1519 */
1520 int
1521 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
1522 struct drm_file *file)
1523 {
1524 struct drm_i915_gem_sw_finish *args = data;
1525 struct drm_i915_gem_object *obj;
1526 int ret = 0;
1527
1528 ret = i915_mutex_lock_interruptible(dev);
1529 if (ret)
1530 return ret;
1531
1532 obj = i915_gem_object_lookup(file, args->handle);
1533 if (!obj) {
1534 ret = -ENOENT;
1535 goto unlock;
1536 }
1537
1538 /* Pinned buffers may be scanout, so flush the cache */
1539 if (obj->pin_display)
1540 i915_gem_object_flush_cpu_write_domain(obj);
1541
1542 i915_gem_object_put(obj);
1543 unlock:
1544 mutex_unlock(&dev->struct_mutex);
1545 return ret;
1546 }
1547
1548 /**
1549 * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address
1550 * it is mapped to.
1551 * @dev: drm device
1552 * @data: ioctl data blob
1553 * @file: drm file
1554 *
1555 * While the mapping holds a reference on the contents of the object, it doesn't
1556 * imply a ref on the object itself.
1557 *
1558 * IMPORTANT:
1559 *
1560 * DRM driver writers who look a this function as an example for how to do GEM
1561 * mmap support, please don't implement mmap support like here. The modern way
1562 * to implement DRM mmap support is with an mmap offset ioctl (like
1563 * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly.
1564 * That way debug tooling like valgrind will understand what's going on, hiding
1565 * the mmap call in a driver private ioctl will break that. The i915 driver only
1566 * does cpu mmaps this way because we didn't know better.
1567 */
1568 int
1569 i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1570 struct drm_file *file)
1571 {
1572 struct drm_i915_gem_mmap *args = data;
1573 struct drm_i915_gem_object *obj;
1574 unsigned long addr;
1575
1576 if (args->flags & ~(I915_MMAP_WC))
1577 return -EINVAL;
1578
1579 if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT))
1580 return -ENODEV;
1581
1582 obj = i915_gem_object_lookup(file, args->handle);
1583 if (!obj)
1584 return -ENOENT;
1585
1586 /* prime objects have no backing filp to GEM mmap
1587 * pages from.
1588 */
1589 if (!obj->base.filp) {
1590 i915_gem_object_put_unlocked(obj);
1591 return -EINVAL;
1592 }
1593
1594 addr = vm_mmap(obj->base.filp, 0, args->size,
1595 PROT_READ | PROT_WRITE, MAP_SHARED,
1596 args->offset);
1597 if (args->flags & I915_MMAP_WC) {
1598 struct mm_struct *mm = current->mm;
1599 struct vm_area_struct *vma;
1600
1601 if (down_write_killable(&mm->mmap_sem)) {
1602 i915_gem_object_put_unlocked(obj);
1603 return -EINTR;
1604 }
1605 vma = find_vma(mm, addr);
1606 if (vma)
1607 vma->vm_page_prot =
1608 pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
1609 else
1610 addr = -ENOMEM;
1611 up_write(&mm->mmap_sem);
1612
1613 /* This may race, but that's ok, it only gets set */
1614 WRITE_ONCE(obj->has_wc_mmap, true);
1615 }
1616 i915_gem_object_put_unlocked(obj);
1617 if (IS_ERR((void *)addr))
1618 return addr;
1619
1620 args->addr_ptr = (uint64_t) addr;
1621
1622 return 0;
1623 }
1624
1625 /**
1626 * i915_gem_fault - fault a page into the GTT
1627 * @vma: VMA in question
1628 * @vmf: fault info
1629 *
1630 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
1631 * from userspace. The fault handler takes care of binding the object to
1632 * the GTT (if needed), allocating and programming a fence register (again,
1633 * only if needed based on whether the old reg is still valid or the object
1634 * is tiled) and inserting a new PTE into the faulting process.
1635 *
1636 * Note that the faulting process may involve evicting existing objects
1637 * from the GTT and/or fence registers to make room. So performance may
1638 * suffer if the GTT working set is large or there are few fence registers
1639 * left.
1640 */
1641 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1642 {
1643 struct drm_i915_gem_object *obj = to_intel_bo(vma->vm_private_data);
1644 struct drm_device *dev = obj->base.dev;
1645 struct drm_i915_private *dev_priv = to_i915(dev);
1646 struct i915_ggtt *ggtt = &dev_priv->ggtt;
1647 struct i915_ggtt_view view = i915_ggtt_view_normal;
1648 pgoff_t page_offset;
1649 unsigned long pfn;
1650 int ret = 0;
1651 bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
1652
1653 intel_runtime_pm_get(dev_priv);
1654
1655 /* We don't use vmf->pgoff since that has the fake offset */
1656 page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >>
1657 PAGE_SHIFT;
1658
1659 ret = i915_mutex_lock_interruptible(dev);
1660 if (ret)
1661 goto out;
1662
1663 trace_i915_gem_object_fault(obj, page_offset, true, write);
1664
1665 /* Try to flush the object off the GPU first without holding the lock.
1666 * Upon reacquiring the lock, we will perform our sanity checks and then
1667 * repeat the flush holding the lock in the normal manner to catch cases
1668 * where we are gazumped.
1669 */
1670 ret = i915_gem_object_wait_rendering__nonblocking(obj, NULL, !write);
1671 if (ret)
1672 goto unlock;
1673
1674 /* Access to snoopable pages through the GTT is incoherent. */
1675 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) {
1676 ret = -EFAULT;
1677 goto unlock;
1678 }
1679
1680 /* Use a partial view if the object is bigger than the aperture. */
1681 if (obj->base.size >= ggtt->mappable_end &&
1682 obj->tiling_mode == I915_TILING_NONE) {
1683 static const unsigned int chunk_size = 256; // 1 MiB
1684
1685 memset(&view, 0, sizeof(view));
1686 view.type = I915_GGTT_VIEW_PARTIAL;
1687 view.params.partial.offset = rounddown(page_offset, chunk_size);
1688 view.params.partial.size =
1689 min_t(unsigned int,
1690 chunk_size,
1691 (vma->vm_end - vma->vm_start)/PAGE_SIZE -
1692 view.params.partial.offset);
1693 }
1694
1695 /* Now pin it into the GTT if needed */
1696 ret = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE);
1697 if (ret)
1698 goto unlock;
1699
1700 ret = i915_gem_object_set_to_gtt_domain(obj, write);
1701 if (ret)
1702 goto unpin;
1703
1704 ret = i915_gem_object_get_fence(obj);
1705 if (ret)
1706 goto unpin;
1707
1708 /* Finally, remap it using the new GTT offset */
1709 pfn = ggtt->mappable_base +
1710 i915_gem_obj_ggtt_offset_view(obj, &view);
1711 pfn >>= PAGE_SHIFT;
1712
1713 if (unlikely(view.type == I915_GGTT_VIEW_PARTIAL)) {
1714 /* Overriding existing pages in partial view does not cause
1715 * us any trouble as TLBs are still valid because the fault
1716 * is due to userspace losing part of the mapping or never
1717 * having accessed it before (at this partials' range).
1718 */
1719 unsigned long base = vma->vm_start +
1720 (view.params.partial.offset << PAGE_SHIFT);
1721 unsigned int i;
1722
1723 for (i = 0; i < view.params.partial.size; i++) {
1724 ret = vm_insert_pfn(vma, base + i * PAGE_SIZE, pfn + i);
1725 if (ret)
1726 break;
1727 }
1728
1729 obj->fault_mappable = true;
1730 } else {
1731 if (!obj->fault_mappable) {
1732 unsigned long size = min_t(unsigned long,
1733 vma->vm_end - vma->vm_start,
1734 obj->base.size);
1735 int i;
1736
1737 for (i = 0; i < size >> PAGE_SHIFT; i++) {
1738 ret = vm_insert_pfn(vma,
1739 (unsigned long)vma->vm_start + i * PAGE_SIZE,
1740 pfn + i);
1741 if (ret)
1742 break;
1743 }
1744
1745 obj->fault_mappable = true;
1746 } else
1747 ret = vm_insert_pfn(vma,
1748 (unsigned long)vmf->virtual_address,
1749 pfn + page_offset);
1750 }
1751 unpin:
1752 i915_gem_object_ggtt_unpin_view(obj, &view);
1753 unlock:
1754 mutex_unlock(&dev->struct_mutex);
1755 out:
1756 switch (ret) {
1757 case -EIO:
1758 /*
1759 * We eat errors when the gpu is terminally wedged to avoid
1760 * userspace unduly crashing (gl has no provisions for mmaps to
1761 * fail). But any other -EIO isn't ours (e.g. swap in failure)
1762 * and so needs to be reported.
1763 */
1764 if (!i915_terminally_wedged(&dev_priv->gpu_error)) {
1765 ret = VM_FAULT_SIGBUS;
1766 break;
1767 }
1768 case -EAGAIN:
1769 /*
1770 * EAGAIN means the gpu is hung and we'll wait for the error
1771 * handler to reset everything when re-faulting in
1772 * i915_mutex_lock_interruptible.
1773 */
1774 case 0:
1775 case -ERESTARTSYS:
1776 case -EINTR:
1777 case -EBUSY:
1778 /*
1779 * EBUSY is ok: this just means that another thread
1780 * already did the job.
1781 */
1782 ret = VM_FAULT_NOPAGE;
1783 break;
1784 case -ENOMEM:
1785 ret = VM_FAULT_OOM;
1786 break;
1787 case -ENOSPC:
1788 case -EFAULT:
1789 ret = VM_FAULT_SIGBUS;
1790 break;
1791 default:
1792 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret);
1793 ret = VM_FAULT_SIGBUS;
1794 break;
1795 }
1796
1797 intel_runtime_pm_put(dev_priv);
1798 return ret;
1799 }
1800
1801 /**
1802 * i915_gem_release_mmap - remove physical page mappings
1803 * @obj: obj in question
1804 *
1805 * Preserve the reservation of the mmapping with the DRM core code, but
1806 * relinquish ownership of the pages back to the system.
1807 *
1808 * It is vital that we remove the page mapping if we have mapped a tiled
1809 * object through the GTT and then lose the fence register due to
1810 * resource pressure. Similarly if the object has been moved out of the
1811 * aperture, than pages mapped into userspace must be revoked. Removing the
1812 * mapping will then trigger a page fault on the next user access, allowing
1813 * fixup by i915_gem_fault().
1814 */
1815 void
1816 i915_gem_release_mmap(struct drm_i915_gem_object *obj)
1817 {
1818 /* Serialisation between user GTT access and our code depends upon
1819 * revoking the CPU's PTE whilst the mutex is held. The next user
1820 * pagefault then has to wait until we release the mutex.
1821 */
1822 lockdep_assert_held(&obj->base.dev->struct_mutex);
1823
1824 if (!obj->fault_mappable)
1825 return;
1826
1827 drm_vma_node_unmap(&obj->base.vma_node,
1828 obj->base.dev->anon_inode->i_mapping);
1829
1830 /* Ensure that the CPU's PTE are revoked and there are not outstanding
1831 * memory transactions from userspace before we return. The TLB
1832 * flushing implied above by changing the PTE above *should* be
1833 * sufficient, an extra barrier here just provides us with a bit
1834 * of paranoid documentation about our requirement to serialise
1835 * memory writes before touching registers / GSM.
1836 */
1837 wmb();
1838
1839 obj->fault_mappable = false;
1840 }
1841
1842 void
1843 i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv)
1844 {
1845 struct drm_i915_gem_object *obj;
1846
1847 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list)
1848 i915_gem_release_mmap(obj);
1849 }
1850
1851 /**
1852 * i915_gem_get_ggtt_size - return required global GTT size for an object
1853 * @dev_priv: i915 device
1854 * @size: object size
1855 * @tiling_mode: tiling mode
1856 *
1857 * Return the required global GTT size for an object, taking into account
1858 * potential fence register mapping.
1859 */
1860 u64 i915_gem_get_ggtt_size(struct drm_i915_private *dev_priv,
1861 u64 size, int tiling_mode)
1862 {
1863 u64 ggtt_size;
1864
1865 GEM_BUG_ON(size == 0);
1866
1867 if (INTEL_GEN(dev_priv) >= 4 ||
1868 tiling_mode == I915_TILING_NONE)
1869 return size;
1870
1871 /* Previous chips need a power-of-two fence region when tiling */
1872 if (IS_GEN3(dev_priv))
1873 ggtt_size = 1024*1024;
1874 else
1875 ggtt_size = 512*1024;
1876
1877 while (ggtt_size < size)
1878 ggtt_size <<= 1;
1879
1880 return ggtt_size;
1881 }
1882
1883 /**
1884 * i915_gem_get_ggtt_alignment - return required global GTT alignment
1885 * @dev_priv: i915 device
1886 * @size: object size
1887 * @tiling_mode: tiling mode
1888 * @fenced: is fenced alignment required or not
1889 *
1890 * Return the required global GTT alignment for an object, taking into account
1891 * potential fence register mapping.
1892 */
1893 u64 i915_gem_get_ggtt_alignment(struct drm_i915_private *dev_priv, u64 size,
1894 int tiling_mode, bool fenced)
1895 {
1896 GEM_BUG_ON(size == 0);
1897
1898 /*
1899 * Minimum alignment is 4k (GTT page size), but might be greater
1900 * if a fence register is needed for the object.
1901 */
1902 if (INTEL_GEN(dev_priv) >= 4 || (!fenced && IS_G33(dev_priv)) ||
1903 tiling_mode == I915_TILING_NONE)
1904 return 4096;
1905
1906 /*
1907 * Previous chips need to be aligned to the size of the smallest
1908 * fence register that can contain the object.
1909 */
1910 return i915_gem_get_ggtt_size(dev_priv, size, tiling_mode);
1911 }
1912
1913 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
1914 {
1915 struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
1916 int ret;
1917
1918 dev_priv->mm.shrinker_no_lock_stealing = true;
1919
1920 ret = drm_gem_create_mmap_offset(&obj->base);
1921 if (ret != -ENOSPC)
1922 goto out;
1923
1924 /* Badly fragmented mmap space? The only way we can recover
1925 * space is by destroying unwanted objects. We can't randomly release
1926 * mmap_offsets as userspace expects them to be persistent for the
1927 * lifetime of the objects. The closest we can is to release the
1928 * offsets on purgeable objects by truncating it and marking it purged,
1929 * which prevents userspace from ever using that object again.
1930 */
1931 i915_gem_shrink(dev_priv,
1932 obj->base.size >> PAGE_SHIFT,
1933 I915_SHRINK_BOUND |
1934 I915_SHRINK_UNBOUND |
1935 I915_SHRINK_PURGEABLE);
1936 ret = drm_gem_create_mmap_offset(&obj->base);
1937 if (ret != -ENOSPC)
1938 goto out;
1939
1940 i915_gem_shrink_all(dev_priv);
1941 ret = drm_gem_create_mmap_offset(&obj->base);
1942 out:
1943 dev_priv->mm.shrinker_no_lock_stealing = false;
1944
1945 return ret;
1946 }
1947
1948 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
1949 {
1950 drm_gem_free_mmap_offset(&obj->base);
1951 }
1952
1953 int
1954 i915_gem_mmap_gtt(struct drm_file *file,
1955 struct drm_device *dev,
1956 uint32_t handle,
1957 uint64_t *offset)
1958 {
1959 struct drm_i915_gem_object *obj;
1960 int ret;
1961
1962 ret = i915_mutex_lock_interruptible(dev);
1963 if (ret)
1964 return ret;
1965
1966 obj = i915_gem_object_lookup(file, handle);
1967 if (!obj) {
1968 ret = -ENOENT;
1969 goto unlock;
1970 }
1971
1972 if (obj->madv != I915_MADV_WILLNEED) {
1973 DRM_DEBUG("Attempting to mmap a purgeable buffer\n");
1974 ret = -EFAULT;
1975 goto out;
1976 }
1977
1978 ret = i915_gem_object_create_mmap_offset(obj);
1979 if (ret)
1980 goto out;
1981
1982 *offset = drm_vma_node_offset_addr(&obj->base.vma_node);
1983
1984 out:
1985 i915_gem_object_put(obj);
1986 unlock:
1987 mutex_unlock(&dev->struct_mutex);
1988 return ret;
1989 }
1990
1991 /**
1992 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
1993 * @dev: DRM device
1994 * @data: GTT mapping ioctl data
1995 * @file: GEM object info
1996 *
1997 * Simply returns the fake offset to userspace so it can mmap it.
1998 * The mmap call will end up in drm_gem_mmap(), which will set things
1999 * up so we can get faults in the handler above.
2000 *
2001 * The fault handler will take care of binding the object into the GTT
2002 * (since it may have been evicted to make room for something), allocating
2003 * a fence register, and mapping the appropriate aperture address into
2004 * userspace.
2005 */
2006 int
2007 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
2008 struct drm_file *file)
2009 {
2010 struct drm_i915_gem_mmap_gtt *args = data;
2011
2012 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
2013 }
2014
2015 /* Immediately discard the backing storage */
2016 static void
2017 i915_gem_object_truncate(struct drm_i915_gem_object *obj)
2018 {
2019 i915_gem_object_free_mmap_offset(obj);
2020
2021 if (obj->base.filp == NULL)
2022 return;
2023
2024 /* Our goal here is to return as much of the memory as
2025 * is possible back to the system as we are called from OOM.
2026 * To do this we must instruct the shmfs to drop all of its
2027 * backing pages, *now*.
2028 */
2029 shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1);
2030 obj->madv = __I915_MADV_PURGED;
2031 }
2032
2033 /* Try to discard unwanted pages */
2034 static void
2035 i915_gem_object_invalidate(struct drm_i915_gem_object *obj)
2036 {
2037 struct address_space *mapping;
2038
2039 switch (obj->madv) {
2040 case I915_MADV_DONTNEED:
2041 i915_gem_object_truncate(obj);
2042 case __I915_MADV_PURGED:
2043 return;
2044 }
2045
2046 if (obj->base.filp == NULL)
2047 return;
2048
2049 mapping = file_inode(obj->base.filp)->i_mapping,
2050 invalidate_mapping_pages(mapping, 0, (loff_t)-1);
2051 }
2052
2053 static void
2054 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
2055 {
2056 struct sgt_iter sgt_iter;
2057 struct page *page;
2058 int ret;
2059
2060 BUG_ON(obj->madv == __I915_MADV_PURGED);
2061
2062 ret = i915_gem_object_set_to_cpu_domain(obj, true);
2063 if (WARN_ON(ret)) {
2064 /* In the event of a disaster, abandon all caches and
2065 * hope for the best.
2066 */
2067 i915_gem_clflush_object(obj, true);
2068 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
2069 }
2070
2071 i915_gem_gtt_finish_object(obj);
2072
2073 if (i915_gem_object_needs_bit17_swizzle(obj))
2074 i915_gem_object_save_bit_17_swizzle(obj);
2075
2076 if (obj->madv == I915_MADV_DONTNEED)
2077 obj->dirty = 0;
2078
2079 for_each_sgt_page(page, sgt_iter, obj->pages) {
2080 if (obj->dirty)
2081 set_page_dirty(page);
2082
2083 if (obj->madv == I915_MADV_WILLNEED)
2084 mark_page_accessed(page);
2085
2086 put_page(page);
2087 }
2088 obj->dirty = 0;
2089
2090 sg_free_table(obj->pages);
2091 kfree(obj->pages);
2092 }
2093
2094 int
2095 i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
2096 {
2097 const struct drm_i915_gem_object_ops *ops = obj->ops;
2098
2099 if (obj->pages == NULL)
2100 return 0;
2101
2102 if (obj->pages_pin_count)
2103 return -EBUSY;
2104
2105 GEM_BUG_ON(obj->bind_count);
2106
2107 /* ->put_pages might need to allocate memory for the bit17 swizzle
2108 * array, hence protect them from being reaped by removing them from gtt
2109 * lists early. */
2110 list_del(&obj->global_list);
2111
2112 if (obj->mapping) {
2113 if (is_vmalloc_addr(obj->mapping))
2114 vunmap(obj->mapping);
2115 else
2116 kunmap(kmap_to_page(obj->mapping));
2117 obj->mapping = NULL;
2118 }
2119
2120 ops->put_pages(obj);
2121 obj->pages = NULL;
2122
2123 i915_gem_object_invalidate(obj);
2124
2125 return 0;
2126 }
2127
2128 static int
2129 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
2130 {
2131 struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
2132 int page_count, i;
2133 struct address_space *mapping;
2134 struct sg_table *st;
2135 struct scatterlist *sg;
2136 struct sgt_iter sgt_iter;
2137 struct page *page;
2138 unsigned long last_pfn = 0; /* suppress gcc warning */
2139 int ret;
2140 gfp_t gfp;
2141
2142 /* Assert that the object is not currently in any GPU domain. As it
2143 * wasn't in the GTT, there shouldn't be any way it could have been in
2144 * a GPU cache
2145 */
2146 BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
2147 BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
2148
2149 st = kmalloc(sizeof(*st), GFP_KERNEL);
2150 if (st == NULL)
2151 return -ENOMEM;
2152
2153 page_count = obj->base.size / PAGE_SIZE;
2154 if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
2155 kfree(st);
2156 return -ENOMEM;
2157 }
2158
2159 /* Get the list of pages out of our struct file. They'll be pinned
2160 * at this point until we release them.
2161 *
2162 * Fail silently without starting the shrinker
2163 */
2164 mapping = file_inode(obj->base.filp)->i_mapping;
2165 gfp = mapping_gfp_constraint(mapping, ~(__GFP_IO | __GFP_RECLAIM));
2166 gfp |= __GFP_NORETRY | __GFP_NOWARN;
2167 sg = st->sgl;
2168 st->nents = 0;
2169 for (i = 0; i < page_count; i++) {
2170 page = shmem_read_mapping_page_gfp(mapping, i, gfp);
2171 if (IS_ERR(page)) {
2172 i915_gem_shrink(dev_priv,
2173 page_count,
2174 I915_SHRINK_BOUND |
2175 I915_SHRINK_UNBOUND |
2176 I915_SHRINK_PURGEABLE);
2177 page = shmem_read_mapping_page_gfp(mapping, i, gfp);
2178 }
2179 if (IS_ERR(page)) {
2180 /* We've tried hard to allocate the memory by reaping
2181 * our own buffer, now let the real VM do its job and
2182 * go down in flames if truly OOM.
2183 */
2184 i915_gem_shrink_all(dev_priv);
2185 page = shmem_read_mapping_page(mapping, i);
2186 if (IS_ERR(page)) {
2187 ret = PTR_ERR(page);
2188 goto err_pages;
2189 }
2190 }
2191 #ifdef CONFIG_SWIOTLB
2192 if (swiotlb_nr_tbl()) {
2193 st->nents++;
2194 sg_set_page(sg, page, PAGE_SIZE, 0);
2195 sg = sg_next(sg);
2196 continue;
2197 }
2198 #endif
2199 if (!i || page_to_pfn(page) != last_pfn + 1) {
2200 if (i)
2201 sg = sg_next(sg);
2202 st->nents++;
2203 sg_set_page(sg, page, PAGE_SIZE, 0);
2204 } else {
2205 sg->length += PAGE_SIZE;
2206 }
2207 last_pfn = page_to_pfn(page);
2208
2209 /* Check that the i965g/gm workaround works. */
2210 WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL));
2211 }
2212 #ifdef CONFIG_SWIOTLB
2213 if (!swiotlb_nr_tbl())
2214 #endif
2215 sg_mark_end(sg);
2216 obj->pages = st;
2217
2218 ret = i915_gem_gtt_prepare_object(obj);
2219 if (ret)
2220 goto err_pages;
2221
2222 if (i915_gem_object_needs_bit17_swizzle(obj))
2223 i915_gem_object_do_bit_17_swizzle(obj);
2224
2225 if (obj->tiling_mode != I915_TILING_NONE &&
2226 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES)
2227 i915_gem_object_pin_pages(obj);
2228
2229 return 0;
2230
2231 err_pages:
2232 sg_mark_end(sg);
2233 for_each_sgt_page(page, sgt_iter, st)
2234 put_page(page);
2235 sg_free_table(st);
2236 kfree(st);
2237
2238 /* shmemfs first checks if there is enough memory to allocate the page
2239 * and reports ENOSPC should there be insufficient, along with the usual
2240 * ENOMEM for a genuine allocation failure.
2241 *
2242 * We use ENOSPC in our driver to mean that we have run out of aperture
2243 * space and so want to translate the error from shmemfs back to our
2244 * usual understanding of ENOMEM.
2245 */
2246 if (ret == -ENOSPC)
2247 ret = -ENOMEM;
2248
2249 return ret;
2250 }
2251
2252 /* Ensure that the associated pages are gathered from the backing storage
2253 * and pinned into our object. i915_gem_object_get_pages() may be called
2254 * multiple times before they are released by a single call to
2255 * i915_gem_object_put_pages() - once the pages are no longer referenced
2256 * either as a result of memory pressure (reaping pages under the shrinker)
2257 * or as the object is itself released.
2258 */
2259 int
2260 i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
2261 {
2262 struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
2263 const struct drm_i915_gem_object_ops *ops = obj->ops;
2264 int ret;
2265
2266 if (obj->pages)
2267 return 0;
2268
2269 if (obj->madv != I915_MADV_WILLNEED) {
2270 DRM_DEBUG("Attempting to obtain a purgeable object\n");
2271 return -EFAULT;
2272 }
2273
2274 BUG_ON(obj->pages_pin_count);
2275
2276 ret = ops->get_pages(obj);
2277 if (ret)
2278 return ret;
2279
2280 list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list);
2281
2282 obj->get_page.sg = obj->pages->sgl;
2283 obj->get_page.last = 0;
2284
2285 return 0;
2286 }
2287
2288 /* The 'mapping' part of i915_gem_object_pin_map() below */
2289 static void *i915_gem_object_map(const struct drm_i915_gem_object *obj)
2290 {
2291 unsigned long n_pages = obj->base.size >> PAGE_SHIFT;
2292 struct sg_table *sgt = obj->pages;
2293 struct sgt_iter sgt_iter;
2294 struct page *page;
2295 struct page *stack_pages[32];
2296 struct page **pages = stack_pages;
2297 unsigned long i = 0;
2298 void *addr;
2299
2300 /* A single page can always be kmapped */
2301 if (n_pages == 1)
2302 return kmap(sg_page(sgt->sgl));
2303
2304 if (n_pages > ARRAY_SIZE(stack_pages)) {
2305 /* Too big for stack -- allocate temporary array instead */
2306 pages = drm_malloc_gfp(n_pages, sizeof(*pages), GFP_TEMPORARY);
2307 if (!pages)
2308 return NULL;
2309 }
2310
2311 for_each_sgt_page(page, sgt_iter, sgt)
2312 pages[i++] = page;
2313
2314 /* Check that we have the expected number of pages */
2315 GEM_BUG_ON(i != n_pages);
2316
2317 addr = vmap(pages, n_pages, 0, PAGE_KERNEL);
2318
2319 if (pages != stack_pages)
2320 drm_free_large(pages);
2321
2322 return addr;
2323 }
2324
2325 /* get, pin, and map the pages of the object into kernel space */
2326 void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj)
2327 {
2328 int ret;
2329
2330 lockdep_assert_held(&obj->base.dev->struct_mutex);
2331
2332 ret = i915_gem_object_get_pages(obj);
2333 if (ret)
2334 return ERR_PTR(ret);
2335
2336 i915_gem_object_pin_pages(obj);
2337
2338 if (!obj->mapping) {
2339 obj->mapping = i915_gem_object_map(obj);
2340 if (!obj->mapping) {
2341 i915_gem_object_unpin_pages(obj);
2342 return ERR_PTR(-ENOMEM);
2343 }
2344 }
2345
2346 return obj->mapping;
2347 }
2348
2349 static void
2350 i915_gem_object_retire__write(struct i915_gem_active *active,
2351 struct drm_i915_gem_request *request)
2352 {
2353 struct drm_i915_gem_object *obj =
2354 container_of(active, struct drm_i915_gem_object, last_write);
2355
2356 intel_fb_obj_flush(obj, true, ORIGIN_CS);
2357 }
2358
2359 static void
2360 i915_gem_object_retire__read(struct i915_gem_active *active,
2361 struct drm_i915_gem_request *request)
2362 {
2363 int idx = request->engine->id;
2364 struct drm_i915_gem_object *obj =
2365 container_of(active, struct drm_i915_gem_object, last_read[idx]);
2366
2367 GEM_BUG_ON((obj->active & (1 << idx)) == 0);
2368
2369 obj->active &= ~(1 << idx);
2370 if (obj->active)
2371 return;
2372
2373 /* Bump our place on the bound list to keep it roughly in LRU order
2374 * so that we don't steal from recently used but inactive objects
2375 * (unless we are forced to ofc!)
2376 */
2377 if (obj->bind_count)
2378 list_move_tail(&obj->global_list,
2379 &request->i915->mm.bound_list);
2380
2381 i915_gem_object_put(obj);
2382 }
2383
2384 static bool i915_context_is_banned(const struct i915_gem_context *ctx)
2385 {
2386 unsigned long elapsed;
2387
2388 if (ctx->hang_stats.banned)
2389 return true;
2390
2391 elapsed = get_seconds() - ctx->hang_stats.guilty_ts;
2392 if (ctx->hang_stats.ban_period_seconds &&
2393 elapsed <= ctx->hang_stats.ban_period_seconds) {
2394 DRM_DEBUG("context hanging too fast, banning!\n");
2395 return true;
2396 }
2397
2398 return false;
2399 }
2400
2401 static void i915_set_reset_status(struct i915_gem_context *ctx,
2402 const bool guilty)
2403 {
2404 struct i915_ctx_hang_stats *hs = &ctx->hang_stats;
2405
2406 if (guilty) {
2407 hs->banned = i915_context_is_banned(ctx);
2408 hs->batch_active++;
2409 hs->guilty_ts = get_seconds();
2410 } else {
2411 hs->batch_pending++;
2412 }
2413 }
2414
2415 struct drm_i915_gem_request *
2416 i915_gem_find_active_request(struct intel_engine_cs *engine)
2417 {
2418 struct drm_i915_gem_request *request;
2419
2420 /* We are called by the error capture and reset at a random
2421 * point in time. In particular, note that neither is crucially
2422 * ordered with an interrupt. After a hang, the GPU is dead and we
2423 * assume that no more writes can happen (we waited long enough for
2424 * all writes that were in transaction to be flushed) - adding an
2425 * extra delay for a recent interrupt is pointless. Hence, we do
2426 * not need an engine->irq_seqno_barrier() before the seqno reads.
2427 */
2428 list_for_each_entry(request, &engine->request_list, link) {
2429 if (i915_gem_request_completed(request))
2430 continue;
2431
2432 return request;
2433 }
2434
2435 return NULL;
2436 }
2437
2438 static void i915_gem_reset_engine_status(struct intel_engine_cs *engine)
2439 {
2440 struct drm_i915_gem_request *request;
2441 bool ring_hung;
2442
2443 request = i915_gem_find_active_request(engine);
2444 if (request == NULL)
2445 return;
2446
2447 ring_hung = engine->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG;
2448
2449 i915_set_reset_status(request->ctx, ring_hung);
2450 list_for_each_entry_continue(request, &engine->request_list, link)
2451 i915_set_reset_status(request->ctx, false);
2452 }
2453
2454 static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine)
2455 {
2456 struct intel_ring *ring;
2457
2458 /* Mark all pending requests as complete so that any concurrent
2459 * (lockless) lookup doesn't try and wait upon the request as we
2460 * reset it.
2461 */
2462 intel_engine_init_seqno(engine, engine->last_submitted_seqno);
2463
2464 /*
2465 * Clear the execlists queue up before freeing the requests, as those
2466 * are the ones that keep the context and ringbuffer backing objects
2467 * pinned in place.
2468 */
2469
2470 if (i915.enable_execlists) {
2471 /* Ensure irq handler finishes or is cancelled. */
2472 tasklet_kill(&engine->irq_tasklet);
2473
2474 intel_execlists_cancel_requests(engine);
2475 }
2476
2477 /*
2478 * We must free the requests after all the corresponding objects have
2479 * been moved off active lists. Which is the same order as the normal
2480 * retire_requests function does. This is important if object hold
2481 * implicit references on things like e.g. ppgtt address spaces through
2482 * the request.
2483 */
2484 if (!list_empty(&engine->request_list)) {
2485 struct drm_i915_gem_request *request;
2486
2487 request = list_last_entry(&engine->request_list,
2488 struct drm_i915_gem_request,
2489 link);
2490
2491 i915_gem_request_retire_upto(request);
2492 }
2493
2494 /* Having flushed all requests from all queues, we know that all
2495 * ringbuffers must now be empty. However, since we do not reclaim
2496 * all space when retiring the request (to prevent HEADs colliding
2497 * with rapid ringbuffer wraparound) the amount of available space
2498 * upon reset is less than when we start. Do one more pass over
2499 * all the ringbuffers to reset last_retired_head.
2500 */
2501 list_for_each_entry(ring, &engine->buffers, link) {
2502 ring->last_retired_head = ring->tail;
2503 intel_ring_update_space(ring);
2504 }
2505
2506 engine->i915->gt.active_engines &= ~intel_engine_flag(engine);
2507 }
2508
2509 void i915_gem_reset(struct drm_device *dev)
2510 {
2511 struct drm_i915_private *dev_priv = to_i915(dev);
2512 struct intel_engine_cs *engine;
2513
2514 /*
2515 * Before we free the objects from the requests, we need to inspect
2516 * them for finding the guilty party. As the requests only borrow
2517 * their reference to the objects, the inspection must be done first.
2518 */
2519 for_each_engine(engine, dev_priv)
2520 i915_gem_reset_engine_status(engine);
2521
2522 for_each_engine(engine, dev_priv)
2523 i915_gem_reset_engine_cleanup(engine);
2524 mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, 0);
2525
2526 i915_gem_context_reset(dev);
2527
2528 i915_gem_restore_fences(dev);
2529 }
2530
2531 static void
2532 i915_gem_retire_work_handler(struct work_struct *work)
2533 {
2534 struct drm_i915_private *dev_priv =
2535 container_of(work, typeof(*dev_priv), gt.retire_work.work);
2536 struct drm_device *dev = &dev_priv->drm;
2537
2538 /* Come back later if the device is busy... */
2539 if (mutex_trylock(&dev->struct_mutex)) {
2540 i915_gem_retire_requests(dev_priv);
2541 mutex_unlock(&dev->struct_mutex);
2542 }
2543
2544 /* Keep the retire handler running until we are finally idle.
2545 * We do not need to do this test under locking as in the worst-case
2546 * we queue the retire worker once too often.
2547 */
2548 if (READ_ONCE(dev_priv->gt.awake)) {
2549 i915_queue_hangcheck(dev_priv);
2550 queue_delayed_work(dev_priv->wq,
2551 &dev_priv->gt.retire_work,
2552 round_jiffies_up_relative(HZ));
2553 }
2554 }
2555
2556 static void
2557 i915_gem_idle_work_handler(struct work_struct *work)
2558 {
2559 struct drm_i915_private *dev_priv =
2560 container_of(work, typeof(*dev_priv), gt.idle_work.work);
2561 struct drm_device *dev = &dev_priv->drm;
2562 struct intel_engine_cs *engine;
2563 unsigned int stuck_engines;
2564 bool rearm_hangcheck;
2565
2566 if (!READ_ONCE(dev_priv->gt.awake))
2567 return;
2568
2569 if (READ_ONCE(dev_priv->gt.active_engines))
2570 return;
2571
2572 rearm_hangcheck =
2573 cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
2574
2575 if (!mutex_trylock(&dev->struct_mutex)) {
2576 /* Currently busy, come back later */
2577 mod_delayed_work(dev_priv->wq,
2578 &dev_priv->gt.idle_work,
2579 msecs_to_jiffies(50));
2580 goto out_rearm;
2581 }
2582
2583 if (dev_priv->gt.active_engines)
2584 goto out_unlock;
2585
2586 for_each_engine(engine, dev_priv)
2587 i915_gem_batch_pool_fini(&engine->batch_pool);
2588
2589 GEM_BUG_ON(!dev_priv->gt.awake);
2590 dev_priv->gt.awake = false;
2591 rearm_hangcheck = false;
2592
2593 /* As we have disabled hangcheck, we need to unstick any waiters still
2594 * hanging around. However, as we may be racing against the interrupt
2595 * handler or the waiters themselves, we skip enabling the fake-irq.
2596 */
2597 stuck_engines = intel_kick_waiters(dev_priv);
2598 if (unlikely(stuck_engines))
2599 DRM_DEBUG_DRIVER("kicked stuck waiters (%x)...missed irq?\n",
2600 stuck_engines);
2601
2602 if (INTEL_GEN(dev_priv) >= 6)
2603 gen6_rps_idle(dev_priv);
2604 intel_runtime_pm_put(dev_priv);
2605 out_unlock:
2606 mutex_unlock(&dev->struct_mutex);
2607
2608 out_rearm:
2609 if (rearm_hangcheck) {
2610 GEM_BUG_ON(!dev_priv->gt.awake);
2611 i915_queue_hangcheck(dev_priv);
2612 }
2613 }
2614
2615 void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
2616 {
2617 struct drm_i915_gem_object *obj = to_intel_bo(gem);
2618 struct drm_i915_file_private *fpriv = file->driver_priv;
2619 struct i915_vma *vma, *vn;
2620
2621 mutex_lock(&obj->base.dev->struct_mutex);
2622 list_for_each_entry_safe(vma, vn, &obj->vma_list, obj_link)
2623 if (vma->vm->file == fpriv)
2624 i915_vma_close(vma);
2625 mutex_unlock(&obj->base.dev->struct_mutex);
2626 }
2627
2628 /**
2629 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
2630 * @dev: drm device pointer
2631 * @data: ioctl data blob
2632 * @file: drm file pointer
2633 *
2634 * Returns 0 if successful, else an error is returned with the remaining time in
2635 * the timeout parameter.
2636 * -ETIME: object is still busy after timeout
2637 * -ERESTARTSYS: signal interrupted the wait
2638 * -ENONENT: object doesn't exist
2639 * Also possible, but rare:
2640 * -EAGAIN: GPU wedged
2641 * -ENOMEM: damn
2642 * -ENODEV: Internal IRQ fail
2643 * -E?: The add request failed
2644 *
2645 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any
2646 * non-zero timeout parameter the wait ioctl will wait for the given number of
2647 * nanoseconds on an object becoming unbusy. Since the wait itself does so
2648 * without holding struct_mutex the object may become re-busied before this
2649 * function completes. A similar but shorter * race condition exists in the busy
2650 * ioctl
2651 */
2652 int
2653 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
2654 {
2655 struct drm_i915_gem_wait *args = data;
2656 struct drm_i915_gem_object *obj;
2657 struct drm_i915_gem_request *requests[I915_NUM_ENGINES];
2658 int i, n = 0;
2659 int ret;
2660
2661 if (args->flags != 0)
2662 return -EINVAL;
2663
2664 ret = i915_mutex_lock_interruptible(dev);
2665 if (ret)
2666 return ret;
2667
2668 obj = i915_gem_object_lookup(file, args->bo_handle);
2669 if (!obj) {
2670 mutex_unlock(&dev->struct_mutex);
2671 return -ENOENT;
2672 }
2673
2674 if (!obj->active)
2675 goto out;
2676
2677 for (i = 0; i < I915_NUM_ENGINES; i++) {
2678 struct drm_i915_gem_request *req;
2679
2680 req = i915_gem_active_get(&obj->last_read[i],
2681 &obj->base.dev->struct_mutex);
2682 if (req)
2683 requests[n++] = req;
2684 }
2685
2686 out:
2687 i915_gem_object_put(obj);
2688 mutex_unlock(&dev->struct_mutex);
2689
2690 for (i = 0; i < n; i++) {
2691 if (ret == 0)
2692 ret = i915_wait_request(requests[i], true,
2693 args->timeout_ns > 0 ? &args->timeout_ns : NULL,
2694 to_rps_client(file));
2695 i915_gem_request_put(requests[i]);
2696 }
2697 return ret;
2698 }
2699
2700 static int
2701 __i915_gem_object_sync(struct drm_i915_gem_request *to,
2702 struct drm_i915_gem_request *from)
2703 {
2704 int ret;
2705
2706 if (to->engine == from->engine)
2707 return 0;
2708
2709 if (!i915.semaphores) {
2710 ret = i915_wait_request(from,
2711 from->i915->mm.interruptible,
2712 NULL,
2713 NO_WAITBOOST);
2714 if (ret)
2715 return ret;
2716 } else {
2717 int idx = intel_engine_sync_index(from->engine, to->engine);
2718 if (from->fence.seqno <= from->engine->semaphore.sync_seqno[idx])
2719 return 0;
2720
2721 trace_i915_gem_ring_sync_to(to, from);
2722 ret = to->engine->semaphore.sync_to(to, from);
2723 if (ret)
2724 return ret;
2725
2726 from->engine->semaphore.sync_seqno[idx] = from->fence.seqno;
2727 }
2728
2729 return 0;
2730 }
2731
2732 /**
2733 * i915_gem_object_sync - sync an object to a ring.
2734 *
2735 * @obj: object which may be in use on another ring.
2736 * @to: request we are wishing to use
2737 *
2738 * This code is meant to abstract object synchronization with the GPU.
2739 * Conceptually we serialise writes between engines inside the GPU.
2740 * We only allow one engine to write into a buffer at any time, but
2741 * multiple readers. To ensure each has a coherent view of memory, we must:
2742 *
2743 * - If there is an outstanding write request to the object, the new
2744 * request must wait for it to complete (either CPU or in hw, requests
2745 * on the same ring will be naturally ordered).
2746 *
2747 * - If we are a write request (pending_write_domain is set), the new
2748 * request must wait for outstanding read requests to complete.
2749 *
2750 * Returns 0 if successful, else propagates up the lower layer error.
2751 */
2752 int
2753 i915_gem_object_sync(struct drm_i915_gem_object *obj,
2754 struct drm_i915_gem_request *to)
2755 {
2756 struct i915_gem_active *active;
2757 unsigned long active_mask;
2758 int idx;
2759
2760 lockdep_assert_held(&obj->base.dev->struct_mutex);
2761
2762 active_mask = obj->active;
2763 if (!active_mask)
2764 return 0;
2765
2766 if (obj->base.pending_write_domain) {
2767 active = obj->last_read;
2768 } else {
2769 active_mask = 1;
2770 active = &obj->last_write;
2771 }
2772
2773 for_each_active(active_mask, idx) {
2774 struct drm_i915_gem_request *request;
2775 int ret;
2776
2777 request = i915_gem_active_peek(&active[idx],
2778 &obj->base.dev->struct_mutex);
2779 if (!request)
2780 continue;
2781
2782 ret = __i915_gem_object_sync(to, request);
2783 if (ret)
2784 return ret;
2785 }
2786
2787 return 0;
2788 }
2789
2790 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
2791 {
2792 u32 old_write_domain, old_read_domains;
2793
2794 /* Force a pagefault for domain tracking on next user access */
2795 i915_gem_release_mmap(obj);
2796
2797 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
2798 return;
2799
2800 old_read_domains = obj->base.read_domains;
2801 old_write_domain = obj->base.write_domain;
2802
2803 obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT;
2804 obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT;
2805
2806 trace_i915_gem_object_change_domain(obj,
2807 old_read_domains,
2808 old_write_domain);
2809 }
2810
2811 static void __i915_vma_iounmap(struct i915_vma *vma)
2812 {
2813 GEM_BUG_ON(i915_vma_is_pinned(vma));
2814
2815 if (vma->iomap == NULL)
2816 return;
2817
2818 io_mapping_unmap(vma->iomap);
2819 vma->iomap = NULL;
2820 }
2821
2822 int i915_vma_unbind(struct i915_vma *vma)
2823 {
2824 struct drm_i915_gem_object *obj = vma->obj;
2825 unsigned long active;
2826 int ret;
2827
2828 /* First wait upon any activity as retiring the request may
2829 * have side-effects such as unpinning or even unbinding this vma.
2830 */
2831 active = i915_vma_get_active(vma);
2832 if (active) {
2833 int idx;
2834
2835 /* When a closed VMA is retired, it is unbound - eek.
2836 * In order to prevent it from being recursively closed,
2837 * take a pin on the vma so that the second unbind is
2838 * aborted.
2839 */
2840 __i915_vma_pin(vma);
2841
2842 for_each_active(active, idx) {
2843 ret = i915_gem_active_retire(&vma->last_read[idx],
2844 &vma->vm->dev->struct_mutex);
2845 if (ret)
2846 break;
2847 }
2848
2849 __i915_vma_unpin(vma);
2850 if (ret)
2851 return ret;
2852
2853 GEM_BUG_ON(i915_vma_is_active(vma));
2854 }
2855
2856 if (i915_vma_is_pinned(vma))
2857 return -EBUSY;
2858
2859 if (!drm_mm_node_allocated(&vma->node))
2860 goto destroy;
2861
2862 GEM_BUG_ON(obj->bind_count == 0);
2863 GEM_BUG_ON(!obj->pages);
2864
2865 if (i915_vma_is_ggtt(vma) &&
2866 vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
2867 i915_gem_object_finish_gtt(obj);
2868
2869 /* release the fence reg _after_ flushing */
2870 ret = i915_gem_object_put_fence(obj);
2871 if (ret)
2872 return ret;
2873
2874 __i915_vma_iounmap(vma);
2875 }
2876
2877 if (likely(!vma->vm->closed)) {
2878 trace_i915_vma_unbind(vma);
2879 vma->vm->unbind_vma(vma);
2880 }
2881 vma->flags &= ~(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND);
2882
2883 drm_mm_remove_node(&vma->node);
2884 list_move_tail(&vma->vm_link, &vma->vm->unbound_list);
2885
2886 if (i915_vma_is_ggtt(vma)) {
2887 if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
2888 obj->map_and_fenceable = false;
2889 } else if (vma->ggtt_view.pages) {
2890 sg_free_table(vma->ggtt_view.pages);
2891 kfree(vma->ggtt_view.pages);
2892 }
2893 vma->ggtt_view.pages = NULL;
2894 }
2895
2896 /* Since the unbound list is global, only move to that list if
2897 * no more VMAs exist. */
2898 if (--obj->bind_count == 0)
2899 list_move_tail(&obj->global_list,
2900 &to_i915(obj->base.dev)->mm.unbound_list);
2901
2902 /* And finally now the object is completely decoupled from this vma,
2903 * we can drop its hold on the backing storage and allow it to be
2904 * reaped by the shrinker.
2905 */
2906 i915_gem_object_unpin_pages(obj);
2907
2908 destroy:
2909 if (unlikely(i915_vma_is_closed(vma)))
2910 i915_vma_destroy(vma);
2911
2912 return 0;
2913 }
2914
2915 int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv)
2916 {
2917 struct intel_engine_cs *engine;
2918 int ret;
2919
2920 lockdep_assert_held(&dev_priv->drm.struct_mutex);
2921
2922 for_each_engine(engine, dev_priv) {
2923 if (engine->last_context == NULL)
2924 continue;
2925
2926 ret = intel_engine_idle(engine);
2927 if (ret)
2928 return ret;
2929 }
2930
2931 return 0;
2932 }
2933
2934 static bool i915_gem_valid_gtt_space(struct i915_vma *vma,
2935 unsigned long cache_level)
2936 {
2937 struct drm_mm_node *gtt_space = &vma->node;
2938 struct drm_mm_node *other;
2939
2940 /*
2941 * On some machines we have to be careful when putting differing types
2942 * of snoopable memory together to avoid the prefetcher crossing memory
2943 * domains and dying. During vm initialisation, we decide whether or not
2944 * these constraints apply and set the drm_mm.color_adjust
2945 * appropriately.
2946 */
2947 if (vma->vm->mm.color_adjust == NULL)
2948 return true;
2949
2950 if (!drm_mm_node_allocated(gtt_space))
2951 return true;
2952
2953 if (list_empty(&gtt_space->node_list))
2954 return true;
2955
2956 other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list);
2957 if (other->allocated && !other->hole_follows && other->color != cache_level)
2958 return false;
2959
2960 other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list);
2961 if (other->allocated && !gtt_space->hole_follows && other->color != cache_level)
2962 return false;
2963
2964 return true;
2965 }
2966
2967 /**
2968 * i915_vma_insert - finds a slot for the vma in its address space
2969 * @vma: the vma
2970 * @size: requested size in bytes (can be larger than the VMA)
2971 * @alignment: required alignment
2972 * @flags: mask of PIN_* flags to use
2973 *
2974 * First we try to allocate some free space that meets the requirements for
2975 * the VMA. Failiing that, if the flags permit, it will evict an old VMA,
2976 * preferrably the oldest idle entry to make room for the new VMA.
2977 *
2978 * Returns:
2979 * 0 on success, negative error code otherwise.
2980 */
2981 static int
2982 i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
2983 {
2984 struct drm_i915_private *dev_priv = to_i915(vma->vm->dev);
2985 struct drm_i915_gem_object *obj = vma->obj;
2986 u64 start, end;
2987 u64 min_alignment;
2988 int ret;
2989
2990 GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND));
2991 GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
2992
2993 size = max(size, vma->size);
2994 if (flags & PIN_MAPPABLE)
2995 size = i915_gem_get_ggtt_size(dev_priv, size, obj->tiling_mode);
2996
2997 min_alignment =
2998 i915_gem_get_ggtt_alignment(dev_priv, size, obj->tiling_mode,
2999 flags & PIN_MAPPABLE);
3000 if (alignment == 0)
3001 alignment = min_alignment;
3002 if (alignment & (min_alignment - 1)) {
3003 DRM_DEBUG("Invalid object alignment requested %llu, minimum %llu\n",
3004 alignment, min_alignment);
3005 return -EINVAL;
3006 }
3007
3008 start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
3009
3010 end = vma->vm->total;
3011 if (flags & PIN_MAPPABLE)
3012 end = min_t(u64, end, dev_priv->ggtt.mappable_end);
3013 if (flags & PIN_ZONE_4G)
3014 end = min_t(u64, end, (1ULL << 32) - PAGE_SIZE);
3015
3016 /* If binding the object/GGTT view requires more space than the entire
3017 * aperture has, reject it early before evicting everything in a vain
3018 * attempt to find space.
3019 */
3020 if (size > end) {
3021 DRM_DEBUG("Attempting to bind an object larger than the aperture: request=%llu [object=%zd] > %s aperture=%llu\n",
3022 size, obj->base.size,
3023 flags & PIN_MAPPABLE ? "mappable" : "total",
3024 end);
3025 return -E2BIG;
3026 }
3027
3028 ret = i915_gem_object_get_pages(obj);
3029 if (ret)
3030 return ret;
3031
3032 i915_gem_object_pin_pages(obj);
3033
3034 if (flags & PIN_OFFSET_FIXED) {
3035 u64 offset = flags & PIN_OFFSET_MASK;
3036 if (offset & (alignment - 1) || offset > end - size) {
3037 ret = -EINVAL;
3038 goto err_unpin;
3039 }
3040
3041 vma->node.start = offset;
3042 vma->node.size = size;
3043 vma->node.color = obj->cache_level;
3044 ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node);
3045 if (ret) {
3046 ret = i915_gem_evict_for_vma(vma);
3047 if (ret == 0)
3048 ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node);
3049 if (ret)
3050 goto err_unpin;
3051 }
3052 } else {
3053 u32 search_flag, alloc_flag;
3054
3055 if (flags & PIN_HIGH) {
3056 search_flag = DRM_MM_SEARCH_BELOW;
3057 alloc_flag = DRM_MM_CREATE_TOP;
3058 } else {
3059 search_flag = DRM_MM_SEARCH_DEFAULT;
3060 alloc_flag = DRM_MM_CREATE_DEFAULT;
3061 }
3062
3063 /* We only allocate in PAGE_SIZE/GTT_PAGE_SIZE (4096) chunks,
3064 * so we know that we always have a minimum alignment of 4096.
3065 * The drm_mm range manager is optimised to return results
3066 * with zero alignment, so where possible use the optimal
3067 * path.
3068 */
3069 if (alignment <= 4096)
3070 alignment = 0;
3071
3072 search_free:
3073 ret = drm_mm_insert_node_in_range_generic(&vma->vm->mm,
3074 &vma->node,
3075 size, alignment,
3076 obj->cache_level,
3077 start, end,
3078 search_flag,
3079 alloc_flag);
3080 if (ret) {
3081 ret = i915_gem_evict_something(vma->vm, size, alignment,
3082 obj->cache_level,
3083 start, end,
3084 flags);
3085 if (ret == 0)
3086 goto search_free;
3087
3088 goto err_unpin;
3089 }
3090 }
3091 GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level));
3092
3093 list_move_tail(&obj->global_list, &dev_priv->mm.bound_list);
3094 list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
3095 obj->bind_count++;
3096
3097 return 0;
3098
3099 err_unpin:
3100 i915_gem_object_unpin_pages(obj);
3101 return ret;
3102 }
3103
3104 bool
3105 i915_gem_clflush_object(struct drm_i915_gem_object *obj,
3106 bool force)
3107 {
3108 /* If we don't have a page list set up, then we're not pinned
3109 * to GPU, and we can ignore the cache flush because it'll happen
3110 * again at bind time.
3111 */
3112 if (obj->pages == NULL)
3113 return false;
3114
3115 /*
3116 * Stolen memory is always coherent with the GPU as it is explicitly
3117 * marked as wc by the system, or the system is cache-coherent.
3118 */
3119 if (obj->stolen || obj->phys_handle)
3120 return false;
3121
3122 /* If the GPU is snooping the contents of the CPU cache,
3123 * we do not need to manually clear the CPU cache lines. However,
3124 * the caches are only snooped when the render cache is
3125 * flushed/invalidated. As we always have to emit invalidations
3126 * and flushes when moving into and out of the RENDER domain, correct
3127 * snooping behaviour occurs naturally as the result of our domain
3128 * tracking.
3129 */
3130 if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) {
3131 obj->cache_dirty = true;
3132 return false;
3133 }
3134
3135 trace_i915_gem_object_clflush(obj);
3136 drm_clflush_sg(obj->pages);
3137 obj->cache_dirty = false;
3138
3139 return true;
3140 }
3141
3142 /** Flushes the GTT write domain for the object if it's dirty. */
3143 static void
3144 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
3145 {
3146 uint32_t old_write_domain;
3147
3148 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT)
3149 return;
3150
3151 /* No actual flushing is required for the GTT write domain. Writes
3152 * to it immediately go to main memory as far as we know, so there's
3153 * no chipset flush. It also doesn't land in render cache.
3154 *
3155 * However, we do have to enforce the order so that all writes through
3156 * the GTT land before any writes to the device, such as updates to
3157 * the GATT itself.
3158 */
3159 wmb();
3160
3161 old_write_domain = obj->base.write_domain;
3162 obj->base.write_domain = 0;
3163
3164 intel_fb_obj_flush(obj, false, ORIGIN_GTT);
3165
3166 trace_i915_gem_object_change_domain(obj,
3167 obj->base.read_domains,
3168 old_write_domain);
3169 }
3170
3171 /** Flushes the CPU write domain for the object if it's dirty. */
3172 static void
3173 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
3174 {
3175 uint32_t old_write_domain;
3176
3177 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
3178 return;
3179
3180 if (i915_gem_clflush_object(obj, obj->pin_display))
3181 i915_gem_chipset_flush(to_i915(obj->base.dev));
3182
3183 old_write_domain = obj->base.write_domain;
3184 obj->base.write_domain = 0;
3185
3186 intel_fb_obj_flush(obj, false, ORIGIN_CPU);
3187
3188 trace_i915_gem_object_change_domain(obj,
3189 obj->base.read_domains,
3190 old_write_domain);
3191 }
3192
3193 /**
3194 * Moves a single object to the GTT read, and possibly write domain.
3195 * @obj: object to act on
3196 * @write: ask for write access or read only
3197 *
3198 * This function returns when the move is complete, including waiting on
3199 * flushes to occur.
3200 */
3201 int
3202 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
3203 {
3204 uint32_t old_write_domain, old_read_domains;
3205 struct i915_vma *vma;
3206 int ret;
3207
3208 ret = i915_gem_object_wait_rendering(obj, !write);
3209 if (ret)
3210 return ret;
3211
3212 if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
3213 return 0;
3214
3215 /* Flush and acquire obj->pages so that we are coherent through
3216 * direct access in memory with previous cached writes through
3217 * shmemfs and that our cache domain tracking remains valid.
3218 * For example, if the obj->filp was moved to swap without us
3219 * being notified and releasing the pages, we would mistakenly
3220 * continue to assume that the obj remained out of the CPU cached
3221 * domain.
3222 */
3223 ret = i915_gem_object_get_pages(obj);
3224 if (ret)
3225 return ret;
3226
3227 i915_gem_object_flush_cpu_write_domain(obj);
3228
3229 /* Serialise direct access to this object with the barriers for
3230 * coherent writes from the GPU, by effectively invalidating the
3231 * GTT domain upon first access.
3232 */
3233 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
3234 mb();
3235
3236 old_write_domain = obj->base.write_domain;
3237 old_read_domains = obj->base.read_domains;
3238
3239 /* It should now be out of any other write domains, and we can update
3240 * the domain values for our changes.
3241 */
3242 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
3243 obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
3244 if (write) {
3245 obj->base.read_domains = I915_GEM_DOMAIN_GTT;
3246 obj->base.write_domain = I915_GEM_DOMAIN_GTT;
3247 obj->dirty = 1;
3248 }
3249
3250 trace_i915_gem_object_change_domain(obj,
3251 old_read_domains,
3252 old_write_domain);
3253
3254 /* And bump the LRU for this access */
3255 vma = i915_gem_obj_to_ggtt(obj);
3256 if (vma &&
3257 drm_mm_node_allocated(&vma->node) &&
3258 !i915_vma_is_active(vma))
3259 list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
3260
3261 return 0;
3262 }
3263
3264 /**
3265 * Changes the cache-level of an object across all VMA.
3266 * @obj: object to act on
3267 * @cache_level: new cache level to set for the object
3268 *
3269 * After this function returns, the object will be in the new cache-level
3270 * across all GTT and the contents of the backing storage will be coherent,
3271 * with respect to the new cache-level. In order to keep the backing storage
3272 * coherent for all users, we only allow a single cache level to be set
3273 * globally on the object and prevent it from being changed whilst the
3274 * hardware is reading from the object. That is if the object is currently
3275 * on the scanout it will be set to uncached (or equivalent display
3276 * cache coherency) and all non-MOCS GPU access will also be uncached so
3277 * that all direct access to the scanout remains coherent.
3278 */
3279 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
3280 enum i915_cache_level cache_level)
3281 {
3282 struct i915_vma *vma;
3283 int ret = 0;
3284
3285 if (obj->cache_level == cache_level)
3286 goto out;
3287
3288 /* Inspect the list of currently bound VMA and unbind any that would
3289 * be invalid given the new cache-level. This is principally to
3290 * catch the issue of the CS prefetch crossing page boundaries and
3291 * reading an invalid PTE on older architectures.
3292 */
3293 restart:
3294 list_for_each_entry(vma, &obj->vma_list, obj_link) {
3295 if (!drm_mm_node_allocated(&vma->node))
3296 continue;
3297
3298 if (i915_vma_is_pinned(vma)) {
3299 DRM_DEBUG("can not change the cache level of pinned objects\n");
3300 return -EBUSY;
3301 }
3302
3303 if (i915_gem_valid_gtt_space(vma, cache_level))
3304 continue;
3305
3306 ret = i915_vma_unbind(vma);
3307 if (ret)
3308 return ret;
3309
3310 /* As unbinding may affect other elements in the
3311 * obj->vma_list (due to side-effects from retiring
3312 * an active vma), play safe and restart the iterator.
3313 */
3314 goto restart;
3315 }
3316
3317 /* We can reuse the existing drm_mm nodes but need to change the
3318 * cache-level on the PTE. We could simply unbind them all and
3319 * rebind with the correct cache-level on next use. However since
3320 * we already have a valid slot, dma mapping, pages etc, we may as
3321 * rewrite the PTE in the belief that doing so tramples upon less
3322 * state and so involves less work.
3323 */
3324 if (obj->bind_count) {
3325 /* Before we change the PTE, the GPU must not be accessing it.
3326 * If we wait upon the object, we know that all the bound
3327 * VMA are no longer active.
3328 */
3329 ret = i915_gem_object_wait_rendering(obj, false);
3330 if (ret)
3331 return ret;
3332
3333 if (!HAS_LLC(obj->base.dev) && cache_level != I915_CACHE_NONE) {
3334 /* Access to snoopable pages through the GTT is
3335 * incoherent and on some machines causes a hard
3336 * lockup. Relinquish the CPU mmaping to force
3337 * userspace to refault in the pages and we can
3338 * then double check if the GTT mapping is still
3339 * valid for that pointer access.
3340 */
3341 i915_gem_release_mmap(obj);
3342
3343 /* As we no longer need a fence for GTT access,
3344 * we can relinquish it now (and so prevent having
3345 * to steal a fence from someone else on the next
3346 * fence request). Note GPU activity would have
3347 * dropped the fence as all snoopable access is
3348 * supposed to be linear.
3349 */
3350 ret = i915_gem_object_put_fence(obj);
3351 if (ret)
3352 return ret;
3353 } else {
3354 /* We either have incoherent backing store and
3355 * so no GTT access or the architecture is fully
3356 * coherent. In such cases, existing GTT mmaps
3357 * ignore the cache bit in the PTE and we can
3358 * rewrite it without confusing the GPU or having
3359 * to force userspace to fault back in its mmaps.
3360 */
3361 }
3362
3363 list_for_each_entry(vma, &obj->vma_list, obj_link) {
3364 if (!drm_mm_node_allocated(&vma->node))
3365 continue;
3366
3367 ret = i915_vma_bind(vma, cache_level, PIN_UPDATE);
3368 if (ret)
3369 return ret;
3370 }
3371 }
3372
3373 list_for_each_entry(vma, &obj->vma_list, obj_link)
3374 vma->node.color = cache_level;
3375 obj->cache_level = cache_level;
3376
3377 out:
3378 /* Flush the dirty CPU caches to the backing storage so that the
3379 * object is now coherent at its new cache level (with respect
3380 * to the access domain).
3381 */
3382 if (obj->cache_dirty && cpu_write_needs_clflush(obj)) {
3383 if (i915_gem_clflush_object(obj, true))
3384 i915_gem_chipset_flush(to_i915(obj->base.dev));
3385 }
3386
3387 return 0;
3388 }
3389
3390 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
3391 struct drm_file *file)
3392 {
3393 struct drm_i915_gem_caching *args = data;
3394 struct drm_i915_gem_object *obj;
3395
3396 obj = i915_gem_object_lookup(file, args->handle);
3397 if (!obj)
3398 return -ENOENT;
3399
3400 switch (obj->cache_level) {
3401 case I915_CACHE_LLC:
3402 case I915_CACHE_L3_LLC:
3403 args->caching = I915_CACHING_CACHED;
3404 break;
3405
3406 case I915_CACHE_WT:
3407 args->caching = I915_CACHING_DISPLAY;
3408 break;
3409
3410 default:
3411 args->caching = I915_CACHING_NONE;
3412 break;
3413 }
3414
3415 i915_gem_object_put_unlocked(obj);
3416 return 0;
3417 }
3418
3419 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
3420 struct drm_file *file)
3421 {
3422 struct drm_i915_private *dev_priv = to_i915(dev);
3423 struct drm_i915_gem_caching *args = data;
3424 struct drm_i915_gem_object *obj;
3425 enum i915_cache_level level;
3426 int ret;
3427
3428 switch (args->caching) {
3429 case I915_CACHING_NONE:
3430 level = I915_CACHE_NONE;
3431 break;
3432 case I915_CACHING_CACHED:
3433 /*
3434 * Due to a HW issue on BXT A stepping, GPU stores via a
3435 * snooped mapping may leave stale data in a corresponding CPU
3436 * cacheline, whereas normally such cachelines would get
3437 * invalidated.
3438 */
3439 if (!HAS_LLC(dev) && !HAS_SNOOP(dev))
3440 return -ENODEV;
3441
3442 level = I915_CACHE_LLC;
3443 break;
3444 case I915_CACHING_DISPLAY:
3445 level = HAS_WT(dev) ? I915_CACHE_WT : I915_CACHE_NONE;
3446 break;
3447 default:
3448 return -EINVAL;
3449 }
3450
3451 intel_runtime_pm_get(dev_priv);
3452
3453 ret = i915_mutex_lock_interruptible(dev);
3454 if (ret)
3455 goto rpm_put;
3456
3457 obj = i915_gem_object_lookup(file, args->handle);
3458 if (!obj) {
3459 ret = -ENOENT;
3460 goto unlock;
3461 }
3462
3463 ret = i915_gem_object_set_cache_level(obj, level);
3464
3465 i915_gem_object_put(obj);
3466 unlock:
3467 mutex_unlock(&dev->struct_mutex);
3468 rpm_put:
3469 intel_runtime_pm_put(dev_priv);
3470
3471 return ret;
3472 }
3473
3474 /*
3475 * Prepare buffer for display plane (scanout, cursors, etc).
3476 * Can be called from an uninterruptible phase (modesetting) and allows
3477 * any flushes to be pipelined (for pageflips).
3478 */
3479 int
3480 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
3481 u32 alignment,
3482 const struct i915_ggtt_view *view)
3483 {
3484 u32 old_read_domains, old_write_domain;
3485 int ret;
3486
3487 /* Mark the pin_display early so that we account for the
3488 * display coherency whilst setting up the cache domains.
3489 */
3490 obj->pin_display++;
3491
3492 /* The display engine is not coherent with the LLC cache on gen6. As
3493 * a result, we make sure that the pinning that is about to occur is
3494 * done with uncached PTEs. This is lowest common denominator for all
3495 * chipsets.
3496 *
3497 * However for gen6+, we could do better by using the GFDT bit instead
3498 * of uncaching, which would allow us to flush all the LLC-cached data
3499 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
3500 */
3501 ret = i915_gem_object_set_cache_level(obj,
3502 HAS_WT(obj->base.dev) ? I915_CACHE_WT : I915_CACHE_NONE);
3503 if (ret)
3504 goto err_unpin_display;
3505
3506 /* As the user may map the buffer once pinned in the display plane
3507 * (e.g. libkms for the bootup splash), we have to ensure that we
3508 * always use map_and_fenceable for all scanout buffers.
3509 */
3510 ret = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
3511 view->type == I915_GGTT_VIEW_NORMAL ?
3512 PIN_MAPPABLE : 0);
3513 if (ret)
3514 goto err_unpin_display;
3515
3516 i915_gem_object_flush_cpu_write_domain(obj);
3517
3518 old_write_domain = obj->base.write_domain;
3519 old_read_domains = obj->base.read_domains;
3520
3521 /* It should now be out of any other write domains, and we can update
3522 * the domain values for our changes.
3523 */
3524 obj->base.write_domain = 0;
3525 obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
3526
3527 trace_i915_gem_object_change_domain(obj,
3528 old_read_domains,
3529 old_write_domain);
3530
3531 return 0;
3532
3533 err_unpin_display:
3534 obj->pin_display--;
3535 return ret;
3536 }
3537
3538 void
3539 i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj,
3540 const struct i915_ggtt_view *view)
3541 {
3542 if (WARN_ON(obj->pin_display == 0))
3543 return;
3544
3545 i915_gem_object_ggtt_unpin_view(obj, view);
3546
3547 obj->pin_display--;
3548 }
3549
3550 /**
3551 * Moves a single object to the CPU read, and possibly write domain.
3552 * @obj: object to act on
3553 * @write: requesting write or read-only access
3554 *
3555 * This function returns when the move is complete, including waiting on
3556 * flushes to occur.
3557 */
3558 int
3559 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
3560 {
3561 uint32_t old_write_domain, old_read_domains;
3562 int ret;
3563
3564 ret = i915_gem_object_wait_rendering(obj, !write);
3565 if (ret)
3566 return ret;
3567
3568 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
3569 return 0;
3570
3571 i915_gem_object_flush_gtt_write_domain(obj);
3572
3573 old_write_domain = obj->base.write_domain;
3574 old_read_domains = obj->base.read_domains;
3575
3576 /* Flush the CPU cache if it's still invalid. */
3577 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
3578 i915_gem_clflush_object(obj, false);
3579
3580 obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
3581 }
3582
3583 /* It should now be out of any other write domains, and we can update
3584 * the domain values for our changes.
3585 */
3586 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
3587
3588 /* If we're writing through the CPU, then the GPU read domains will
3589 * need to be invalidated at next use.
3590 */
3591 if (write) {
3592 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
3593 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
3594 }
3595
3596 trace_i915_gem_object_change_domain(obj,
3597 old_read_domains,
3598 old_write_domain);
3599
3600 return 0;
3601 }
3602
3603 /* Throttle our rendering by waiting until the ring has completed our requests
3604 * emitted over 20 msec ago.
3605 *
3606 * Note that if we were to use the current jiffies each time around the loop,
3607 * we wouldn't escape the function with any frames outstanding if the time to
3608 * render a frame was over 20ms.
3609 *
3610 * This should get us reasonable parallelism between CPU and GPU but also
3611 * relatively low latency when blocking on a particular request to finish.
3612 */
3613 static int
3614 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
3615 {
3616 struct drm_i915_private *dev_priv = to_i915(dev);
3617 struct drm_i915_file_private *file_priv = file->driver_priv;
3618 unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES;
3619 struct drm_i915_gem_request *request, *target = NULL;
3620 int ret;
3621
3622 ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
3623 if (ret)
3624 return ret;
3625
3626 /* ABI: return -EIO if already wedged */
3627 if (i915_terminally_wedged(&dev_priv->gpu_error))
3628 return -EIO;
3629
3630 spin_lock(&file_priv->mm.lock);
3631 list_for_each_entry(request, &file_priv->mm.request_list, client_list) {
3632 if (time_after_eq(request->emitted_jiffies, recent_enough))
3633 break;
3634
3635 /*
3636 * Note that the request might not have been submitted yet.
3637 * In which case emitted_jiffies will be zero.
3638 */
3639 if (!request->emitted_jiffies)
3640 continue;
3641
3642 target = request;
3643 }
3644 if (target)
3645 i915_gem_request_get(target);
3646 spin_unlock(&file_priv->mm.lock);
3647
3648 if (target == NULL)
3649 return 0;
3650
3651 ret = i915_wait_request(target, true, NULL, NULL);
3652 i915_gem_request_put(target);
3653
3654 return ret;
3655 }
3656
3657 static bool
3658 i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
3659 {
3660 struct drm_i915_gem_object *obj = vma->obj;
3661
3662 if (!drm_mm_node_allocated(&vma->node))
3663 return false;
3664
3665 if (vma->node.size < size)
3666 return true;
3667
3668 if (alignment && vma->node.start & (alignment - 1))
3669 return true;
3670
3671 if (flags & PIN_MAPPABLE && !obj->map_and_fenceable)
3672 return true;
3673
3674 if (flags & PIN_OFFSET_BIAS &&
3675 vma->node.start < (flags & PIN_OFFSET_MASK))
3676 return true;
3677
3678 if (flags & PIN_OFFSET_FIXED &&
3679 vma->node.start != (flags & PIN_OFFSET_MASK))
3680 return true;
3681
3682 return false;
3683 }
3684
3685 void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)
3686 {
3687 struct drm_i915_gem_object *obj = vma->obj;
3688 struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
3689 bool mappable, fenceable;
3690 u32 fence_size, fence_alignment;
3691
3692 fence_size = i915_gem_get_ggtt_size(dev_priv,
3693 obj->base.size,
3694 obj->tiling_mode);
3695 fence_alignment = i915_gem_get_ggtt_alignment(dev_priv,
3696 obj->base.size,
3697 obj->tiling_mode,
3698 true);
3699
3700 fenceable = (vma->node.size == fence_size &&
3701 (vma->node.start & (fence_alignment - 1)) == 0);
3702
3703 mappable = (vma->node.start + fence_size <=
3704 dev_priv->ggtt.mappable_end);
3705
3706 obj->map_and_fenceable = mappable && fenceable;
3707 }
3708
3709 int __i915_vma_do_pin(struct i915_vma *vma,
3710 u64 size, u64 alignment, u64 flags)
3711 {
3712 unsigned int bound = vma->flags;
3713 int ret;
3714
3715 GEM_BUG_ON((flags & (PIN_GLOBAL | PIN_USER)) == 0);
3716 GEM_BUG_ON((flags & PIN_GLOBAL) && !i915_vma_is_ggtt(vma));
3717
3718 if (WARN_ON(bound & I915_VMA_PIN_OVERFLOW)) {
3719 ret = -EBUSY;
3720 goto err;
3721 }
3722
3723 if ((bound & I915_VMA_BIND_MASK) == 0) {
3724 ret = i915_vma_insert(vma, size, alignment, flags);
3725 if (ret)
3726 goto err;
3727 }
3728
3729 ret = i915_vma_bind(vma, vma->obj->cache_level, flags);
3730 if (ret)
3731 goto err;
3732
3733 if ((bound ^ vma->flags) & I915_VMA_GLOBAL_BIND)
3734 __i915_vma_set_map_and_fenceable(vma);
3735
3736 GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags));
3737 return 0;
3738
3739 err:
3740 __i915_vma_unpin(vma);
3741 return ret;
3742 }
3743
3744 int
3745 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
3746 const struct i915_ggtt_view *view,
3747 u64 size,
3748 u64 alignment,
3749 u64 flags)
3750 {
3751 struct i915_vma *vma;
3752 int ret;
3753
3754 if (!view)
3755 view = &i915_ggtt_view_normal;
3756
3757 vma = i915_gem_obj_lookup_or_create_ggtt_vma(obj, view);
3758 if (IS_ERR(vma))
3759 return PTR_ERR(vma);
3760
3761 if (i915_vma_misplaced(vma, size, alignment, flags)) {
3762 if (flags & PIN_NONBLOCK &&
3763 (i915_vma_is_pinned(vma) || i915_vma_is_active(vma)))
3764 return -ENOSPC;
3765
3766 WARN(i915_vma_is_pinned(vma),
3767 "bo is already pinned in ggtt with incorrect alignment:"
3768 " offset=%08x %08x, req.alignment=%llx, req.map_and_fenceable=%d,"
3769 " obj->map_and_fenceable=%d\n",
3770 upper_32_bits(vma->node.start),
3771 lower_32_bits(vma->node.start),
3772 alignment,
3773 !!(flags & PIN_MAPPABLE),
3774 obj->map_and_fenceable);
3775 ret = i915_vma_unbind(vma);
3776 if (ret)
3777 return ret;
3778 }
3779
3780 return i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL);
3781 }
3782
3783 void
3784 i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj,
3785 const struct i915_ggtt_view *view)
3786 {
3787 i915_vma_unpin(i915_gem_obj_to_ggtt_view(obj, view));
3788 }
3789
3790 int
3791 i915_gem_busy_ioctl(struct drm_device *dev, void *data,
3792 struct drm_file *file)
3793 {
3794 struct drm_i915_gem_busy *args = data;
3795 struct drm_i915_gem_object *obj;
3796 int ret;
3797
3798 ret = i915_mutex_lock_interruptible(dev);
3799 if (ret)
3800 return ret;
3801
3802 obj = i915_gem_object_lookup(file, args->handle);
3803 if (!obj) {
3804 ret = -ENOENT;
3805 goto unlock;
3806 }
3807
3808 /* Count all active objects as busy, even if they are currently not used
3809 * by the gpu. Users of this interface expect objects to eventually
3810 * become non-busy without any further actions.
3811 */
3812 args->busy = 0;
3813 if (obj->active) {
3814 struct drm_i915_gem_request *req;
3815 int i;
3816
3817 for (i = 0; i < I915_NUM_ENGINES; i++) {
3818 req = i915_gem_active_peek(&obj->last_read[i],
3819 &obj->base.dev->struct_mutex);
3820 if (req)
3821 args->busy |= 1 << (16 + req->engine->exec_id);
3822 }
3823 req = i915_gem_active_peek(&obj->last_write,
3824 &obj->base.dev->struct_mutex);
3825 if (req)
3826 args->busy |= req->engine->exec_id;
3827 }
3828
3829 i915_gem_object_put(obj);
3830 unlock:
3831 mutex_unlock(&dev->struct_mutex);
3832 return ret;
3833 }
3834
3835 int
3836 i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
3837 struct drm_file *file_priv)
3838 {
3839 return i915_gem_ring_throttle(dev, file_priv);
3840 }
3841
3842 int
3843 i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
3844 struct drm_file *file_priv)
3845 {
3846 struct drm_i915_private *dev_priv = to_i915(dev);
3847 struct drm_i915_gem_madvise *args = data;
3848 struct drm_i915_gem_object *obj;
3849 int ret;
3850
3851 switch (args->madv) {
3852 case I915_MADV_DONTNEED:
3853 case I915_MADV_WILLNEED:
3854 break;
3855 default:
3856 return -EINVAL;
3857 }
3858
3859 ret = i915_mutex_lock_interruptible(dev);
3860 if (ret)
3861 return ret;
3862
3863 obj = i915_gem_object_lookup(file_priv, args->handle);
3864 if (!obj) {
3865 ret = -ENOENT;
3866 goto unlock;
3867 }
3868
3869 if (i915_gem_obj_is_pinned(obj)) {
3870 ret = -EINVAL;
3871 goto out;
3872 }
3873
3874 if (obj->pages &&
3875 obj->tiling_mode != I915_TILING_NONE &&
3876 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
3877 if (obj->madv == I915_MADV_WILLNEED)
3878 i915_gem_object_unpin_pages(obj);
3879 if (args->madv == I915_MADV_WILLNEED)
3880 i915_gem_object_pin_pages(obj);
3881 }
3882
3883 if (obj->madv != __I915_MADV_PURGED)
3884 obj->madv = args->madv;
3885
3886 /* if the object is no longer attached, discard its backing storage */
3887 if (obj->madv == I915_MADV_DONTNEED && obj->pages == NULL)
3888 i915_gem_object_truncate(obj);
3889
3890 args->retained = obj->madv != __I915_MADV_PURGED;
3891
3892 out:
3893 i915_gem_object_put(obj);
3894 unlock:
3895 mutex_unlock(&dev->struct_mutex);
3896 return ret;
3897 }
3898
3899 void i915_gem_object_init(struct drm_i915_gem_object *obj,
3900 const struct drm_i915_gem_object_ops *ops)
3901 {
3902 int i;
3903
3904 INIT_LIST_HEAD(&obj->global_list);
3905 for (i = 0; i < I915_NUM_ENGINES; i++)
3906 init_request_active(&obj->last_read[i],
3907 i915_gem_object_retire__read);
3908 init_request_active(&obj->last_write,
3909 i915_gem_object_retire__write);
3910 init_request_active(&obj->last_fence, NULL);
3911 INIT_LIST_HEAD(&obj->obj_exec_link);
3912 INIT_LIST_HEAD(&obj->vma_list);
3913 INIT_LIST_HEAD(&obj->batch_pool_link);
3914
3915 obj->ops = ops;
3916
3917 obj->fence_reg = I915_FENCE_REG_NONE;
3918 obj->madv = I915_MADV_WILLNEED;
3919
3920 i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size);
3921 }
3922
3923 static const struct drm_i915_gem_object_ops i915_gem_object_ops = {
3924 .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE,
3925 .get_pages = i915_gem_object_get_pages_gtt,
3926 .put_pages = i915_gem_object_put_pages_gtt,
3927 };
3928
3929 struct drm_i915_gem_object *i915_gem_object_create(struct drm_device *dev,
3930 size_t size)
3931 {
3932 struct drm_i915_gem_object *obj;
3933 struct address_space *mapping;
3934 gfp_t mask;
3935 int ret;
3936
3937 obj = i915_gem_object_alloc(dev);
3938 if (obj == NULL)
3939 return ERR_PTR(-ENOMEM);
3940
3941 ret = drm_gem_object_init(dev, &obj->base, size);
3942 if (ret)
3943 goto fail;
3944
3945 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
3946 if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) {
3947 /* 965gm cannot relocate objects above 4GiB. */
3948 mask &= ~__GFP_HIGHMEM;
3949 mask |= __GFP_DMA32;
3950 }
3951
3952 mapping = file_inode(obj->base.filp)->i_mapping;
3953 mapping_set_gfp_mask(mapping, mask);
3954
3955 i915_gem_object_init(obj, &i915_gem_object_ops);
3956
3957 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
3958 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
3959
3960 if (HAS_LLC(dev)) {
3961 /* On some devices, we can have the GPU use the LLC (the CPU
3962 * cache) for about a 10% performance improvement
3963 * compared to uncached. Graphics requests other than
3964 * display scanout are coherent with the CPU in
3965 * accessing this cache. This means in this mode we
3966 * don't need to clflush on the CPU side, and on the
3967 * GPU side we only need to flush internal caches to
3968 * get data visible to the CPU.
3969 *
3970 * However, we maintain the display planes as UC, and so
3971 * need to rebind when first used as such.
3972 */
3973 obj->cache_level = I915_CACHE_LLC;
3974 } else
3975 obj->cache_level = I915_CACHE_NONE;
3976
3977 trace_i915_gem_object_create(obj);
3978
3979 return obj;
3980
3981 fail:
3982 i915_gem_object_free(obj);
3983
3984 return ERR_PTR(ret);
3985 }
3986
3987 static bool discard_backing_storage(struct drm_i915_gem_object *obj)
3988 {
3989 /* If we are the last user of the backing storage (be it shmemfs
3990 * pages or stolen etc), we know that the pages are going to be
3991 * immediately released. In this case, we can then skip copying
3992 * back the contents from the GPU.
3993 */
3994
3995 if (obj->madv != I915_MADV_WILLNEED)
3996 return false;
3997
3998 if (obj->base.filp == NULL)
3999 return true;
4000
4001 /* At first glance, this looks racy, but then again so would be
4002 * userspace racing mmap against close. However, the first external
4003 * reference to the filp can only be obtained through the
4004 * i915_gem_mmap_ioctl() which safeguards us against the user
4005 * acquiring such a reference whilst we are in the middle of
4006 * freeing the object.
4007 */
4008 return atomic_long_read(&obj->base.filp->f_count) == 1;
4009 }
4010
4011 void i915_gem_free_object(struct drm_gem_object *gem_obj)
4012 {
4013 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
4014 struct drm_device *dev = obj->base.dev;
4015 struct drm_i915_private *dev_priv = to_i915(dev);
4016 struct i915_vma *vma, *next;
4017
4018 intel_runtime_pm_get(dev_priv);
4019
4020 trace_i915_gem_object_destroy(obj);
4021
4022 /* All file-owned VMA should have been released by this point through
4023 * i915_gem_close_object(), or earlier by i915_gem_context_close().
4024 * However, the object may also be bound into the global GTT (e.g.
4025 * older GPUs without per-process support, or for direct access through
4026 * the GTT either for the user or for scanout). Those VMA still need to
4027 * unbound now.
4028 */
4029 list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) {
4030 GEM_BUG_ON(!i915_vma_is_ggtt(vma));
4031 GEM_BUG_ON(i915_vma_is_active(vma));
4032 vma->flags &= ~I915_VMA_PIN_MASK;
4033 i915_vma_close(vma);
4034 }
4035 GEM_BUG_ON(obj->bind_count);
4036
4037 /* Stolen objects don't hold a ref, but do hold pin count. Fix that up
4038 * before progressing. */
4039 if (obj->stolen)
4040 i915_gem_object_unpin_pages(obj);
4041
4042 WARN_ON(obj->frontbuffer_bits);
4043
4044 if (obj->pages && obj->madv == I915_MADV_WILLNEED &&
4045 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES &&
4046 obj->tiling_mode != I915_TILING_NONE)
4047 i915_gem_object_unpin_pages(obj);
4048
4049 if (WARN_ON(obj->pages_pin_count))
4050 obj->pages_pin_count = 0;
4051 if (discard_backing_storage(obj))
4052 obj->madv = I915_MADV_DONTNEED;
4053 i915_gem_object_put_pages(obj);
4054
4055 BUG_ON(obj->pages);
4056
4057 if (obj->base.import_attach)
4058 drm_prime_gem_destroy(&obj->base, NULL);
4059
4060 if (obj->ops->release)
4061 obj->ops->release(obj);
4062
4063 drm_gem_object_release(&obj->base);
4064 i915_gem_info_remove_obj(dev_priv, obj->base.size);
4065
4066 kfree(obj->bit_17);
4067 i915_gem_object_free(obj);
4068
4069 intel_runtime_pm_put(dev_priv);
4070 }
4071
4072 struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
4073 struct i915_address_space *vm)
4074 {
4075 struct i915_vma *vma;
4076 list_for_each_entry(vma, &obj->vma_list, obj_link) {
4077 if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL &&
4078 vma->vm == vm)
4079 return vma;
4080 }
4081 return NULL;
4082 }
4083
4084 struct i915_vma *i915_gem_obj_to_ggtt_view(struct drm_i915_gem_object *obj,
4085 const struct i915_ggtt_view *view)
4086 {
4087 struct i915_vma *vma;
4088
4089 GEM_BUG_ON(!view);
4090
4091 list_for_each_entry(vma, &obj->vma_list, obj_link)
4092 if (i915_vma_is_ggtt(vma) &&
4093 i915_ggtt_view_equal(&vma->ggtt_view, view))
4094 return vma;
4095 return NULL;
4096 }
4097
4098 static void
4099 i915_gem_stop_engines(struct drm_device *dev)
4100 {
4101 struct drm_i915_private *dev_priv = to_i915(dev);
4102 struct intel_engine_cs *engine;
4103
4104 for_each_engine(engine, dev_priv)
4105 dev_priv->gt.stop_engine(engine);
4106 }
4107
4108 int
4109 i915_gem_suspend(struct drm_device *dev)
4110 {
4111 struct drm_i915_private *dev_priv = to_i915(dev);
4112 int ret = 0;
4113
4114 intel_suspend_gt_powersave(dev_priv);
4115
4116 mutex_lock(&dev->struct_mutex);
4117
4118 /* We have to flush all the executing contexts to main memory so
4119 * that they can saved in the hibernation image. To ensure the last
4120 * context image is coherent, we have to switch away from it. That
4121 * leaves the dev_priv->kernel_context still active when
4122 * we actually suspend, and its image in memory may not match the GPU
4123 * state. Fortunately, the kernel_context is disposable and we do
4124 * not rely on its state.
4125 */
4126 ret = i915_gem_switch_to_kernel_context(dev_priv);
4127 if (ret)
4128 goto err;
4129
4130 ret = i915_gem_wait_for_idle(dev_priv);
4131 if (ret)
4132 goto err;
4133
4134 i915_gem_retire_requests(dev_priv);
4135
4136 /* Note that rather than stopping the engines, all we have to do
4137 * is assert that every RING_HEAD == RING_TAIL (all execution complete)
4138 * and similar for all logical context images (to ensure they are
4139 * all ready for hibernation).
4140 */
4141 i915_gem_stop_engines(dev);
4142 i915_gem_context_lost(dev_priv);
4143 mutex_unlock(&dev->struct_mutex);
4144
4145 cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
4146 cancel_delayed_work_sync(&dev_priv->gt.retire_work);
4147 flush_delayed_work(&dev_priv->gt.idle_work);
4148
4149 /* Assert that we sucessfully flushed all the work and
4150 * reset the GPU back to its idle, low power state.
4151 */
4152 WARN_ON(dev_priv->gt.awake);
4153
4154 return 0;
4155
4156 err:
4157 mutex_unlock(&dev->struct_mutex);
4158 return ret;
4159 }
4160
4161 void i915_gem_resume(struct drm_device *dev)
4162 {
4163 struct drm_i915_private *dev_priv = to_i915(dev);
4164
4165 mutex_lock(&dev->struct_mutex);
4166 i915_gem_restore_gtt_mappings(dev);
4167
4168 /* As we didn't flush the kernel context before suspend, we cannot
4169 * guarantee that the context image is complete. So let's just reset
4170 * it and start again.
4171 */
4172 if (i915.enable_execlists)
4173 intel_lr_context_reset(dev_priv, dev_priv->kernel_context);
4174
4175 mutex_unlock(&dev->struct_mutex);
4176 }
4177
4178 void i915_gem_init_swizzling(struct drm_device *dev)
4179 {
4180 struct drm_i915_private *dev_priv = to_i915(dev);
4181
4182 if (INTEL_INFO(dev)->gen < 5 ||
4183 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
4184 return;
4185
4186 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
4187 DISP_TILE_SURFACE_SWIZZLING);
4188
4189 if (IS_GEN5(dev))
4190 return;
4191
4192 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
4193 if (IS_GEN6(dev))
4194 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB));
4195 else if (IS_GEN7(dev))
4196 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB));
4197 else if (IS_GEN8(dev))
4198 I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW));
4199 else
4200 BUG();
4201 }
4202
4203 static void init_unused_ring(struct drm_device *dev, u32 base)
4204 {
4205 struct drm_i915_private *dev_priv = to_i915(dev);
4206
4207 I915_WRITE(RING_CTL(base), 0);
4208 I915_WRITE(RING_HEAD(base), 0);
4209 I915_WRITE(RING_TAIL(base), 0);
4210 I915_WRITE(RING_START(base), 0);
4211 }
4212
4213 static void init_unused_rings(struct drm_device *dev)
4214 {
4215 if (IS_I830(dev)) {
4216 init_unused_ring(dev, PRB1_BASE);
4217 init_unused_ring(dev, SRB0_BASE);
4218 init_unused_ring(dev, SRB1_BASE);
4219 init_unused_ring(dev, SRB2_BASE);
4220 init_unused_ring(dev, SRB3_BASE);
4221 } else if (IS_GEN2(dev)) {
4222 init_unused_ring(dev, SRB0_BASE);
4223 init_unused_ring(dev, SRB1_BASE);
4224 } else if (IS_GEN3(dev)) {
4225 init_unused_ring(dev, PRB1_BASE);
4226 init_unused_ring(dev, PRB2_BASE);
4227 }
4228 }
4229
4230 int
4231 i915_gem_init_hw(struct drm_device *dev)
4232 {
4233 struct drm_i915_private *dev_priv = to_i915(dev);
4234 struct intel_engine_cs *engine;
4235 int ret;
4236
4237 /* Double layer security blanket, see i915_gem_init() */
4238 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4239
4240 if (HAS_EDRAM(dev) && INTEL_GEN(dev_priv) < 9)
4241 I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf));
4242
4243 if (IS_HASWELL(dev))
4244 I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev) ?
4245 LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED);
4246
4247 if (HAS_PCH_NOP(dev)) {
4248 if (IS_IVYBRIDGE(dev)) {
4249 u32 temp = I915_READ(GEN7_MSG_CTL);
4250 temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK);
4251 I915_WRITE(GEN7_MSG_CTL, temp);
4252 } else if (INTEL_INFO(dev)->gen >= 7) {
4253 u32 temp = I915_READ(HSW_NDE_RSTWRN_OPT);
4254 temp &= ~RESET_PCH_HANDSHAKE_ENABLE;
4255 I915_WRITE(HSW_NDE_RSTWRN_OPT, temp);
4256 }
4257 }
4258
4259 i915_gem_init_swizzling(dev);
4260
4261 /*
4262 * At least 830 can leave some of the unused rings
4263 * "active" (ie. head != tail) after resume which
4264 * will prevent c3 entry. Makes sure all unused rings
4265 * are totally idle.
4266 */
4267 init_unused_rings(dev);
4268
4269 BUG_ON(!dev_priv->kernel_context);
4270
4271 ret = i915_ppgtt_init_hw(dev);
4272 if (ret) {
4273 DRM_ERROR("PPGTT enable HW failed %d\n", ret);
4274 goto out;
4275 }
4276
4277 /* Need to do basic initialisation of all rings first: */
4278 for_each_engine(engine, dev_priv) {
4279 ret = engine->init_hw(engine);
4280 if (ret)
4281 goto out;
4282 }
4283
4284 intel_mocs_init_l3cc_table(dev);
4285
4286 /* We can't enable contexts until all firmware is loaded */
4287 ret = intel_guc_setup(dev);
4288 if (ret)
4289 goto out;
4290
4291 out:
4292 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
4293 return ret;
4294 }
4295
4296 bool intel_sanitize_semaphores(struct drm_i915_private *dev_priv, int value)
4297 {
4298 if (INTEL_INFO(dev_priv)->gen < 6)
4299 return false;
4300
4301 /* TODO: make semaphores and Execlists play nicely together */
4302 if (i915.enable_execlists)
4303 return false;
4304
4305 if (value >= 0)
4306 return value;
4307
4308 #ifdef CONFIG_INTEL_IOMMU
4309 /* Enable semaphores on SNB when IO remapping is off */
4310 if (INTEL_INFO(dev_priv)->gen == 6 && intel_iommu_gfx_mapped)
4311 return false;
4312 #endif
4313
4314 return true;
4315 }
4316
4317 int i915_gem_init(struct drm_device *dev)
4318 {
4319 struct drm_i915_private *dev_priv = to_i915(dev);
4320 int ret;
4321
4322 mutex_lock(&dev->struct_mutex);
4323
4324 if (!i915.enable_execlists) {
4325 dev_priv->gt.cleanup_engine = intel_engine_cleanup;
4326 dev_priv->gt.stop_engine = intel_engine_stop;
4327 } else {
4328 dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup;
4329 dev_priv->gt.stop_engine = intel_logical_ring_stop;
4330 }
4331
4332 /* This is just a security blanket to placate dragons.
4333 * On some systems, we very sporadically observe that the first TLBs
4334 * used by the CS may be stale, despite us poking the TLB reset. If
4335 * we hold the forcewake during initialisation these problems
4336 * just magically go away.
4337 */
4338 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4339
4340 i915_gem_init_userptr(dev_priv);
4341
4342 ret = i915_gem_init_ggtt(dev_priv);
4343 if (ret)
4344 goto out_unlock;
4345
4346 ret = i915_gem_context_init(dev);
4347 if (ret)
4348 goto out_unlock;
4349
4350 ret = intel_engines_init(dev);
4351 if (ret)
4352 goto out_unlock;
4353
4354 ret = i915_gem_init_hw(dev);
4355 if (ret == -EIO) {
4356 /* Allow engine initialisation to fail by marking the GPU as
4357 * wedged. But we only want to do this where the GPU is angry,
4358 * for all other failure, such as an allocation failure, bail.
4359 */
4360 DRM_ERROR("Failed to initialize GPU, declaring it wedged\n");
4361 atomic_or(I915_WEDGED, &dev_priv->gpu_error.reset_counter);
4362 ret = 0;
4363 }
4364
4365 out_unlock:
4366 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
4367 mutex_unlock(&dev->struct_mutex);
4368
4369 return ret;
4370 }
4371
4372 void
4373 i915_gem_cleanup_engines(struct drm_device *dev)
4374 {
4375 struct drm_i915_private *dev_priv = to_i915(dev);
4376 struct intel_engine_cs *engine;
4377
4378 for_each_engine(engine, dev_priv)
4379 dev_priv->gt.cleanup_engine(engine);
4380 }
4381
4382 static void
4383 init_engine_lists(struct intel_engine_cs *engine)
4384 {
4385 INIT_LIST_HEAD(&engine->request_list);
4386 }
4387
4388 void
4389 i915_gem_load_init_fences(struct drm_i915_private *dev_priv)
4390 {
4391 struct drm_device *dev = &dev_priv->drm;
4392
4393 if (INTEL_INFO(dev_priv)->gen >= 7 && !IS_VALLEYVIEW(dev_priv) &&
4394 !IS_CHERRYVIEW(dev_priv))
4395 dev_priv->num_fence_regs = 32;
4396 else if (INTEL_INFO(dev_priv)->gen >= 4 || IS_I945G(dev_priv) ||
4397 IS_I945GM(dev_priv) || IS_G33(dev_priv))
4398 dev_priv->num_fence_regs = 16;
4399 else
4400 dev_priv->num_fence_regs = 8;
4401
4402 if (intel_vgpu_active(dev_priv))
4403 dev_priv->num_fence_regs =
4404 I915_READ(vgtif_reg(avail_rs.fence_num));
4405
4406 /* Initialize fence registers to zero */
4407 i915_gem_restore_fences(dev);
4408
4409 i915_gem_detect_bit_6_swizzle(dev);
4410 }
4411
4412 void
4413 i915_gem_load_init(struct drm_device *dev)
4414 {
4415 struct drm_i915_private *dev_priv = to_i915(dev);
4416 int i;
4417
4418 dev_priv->objects =
4419 kmem_cache_create("i915_gem_object",
4420 sizeof(struct drm_i915_gem_object), 0,
4421 SLAB_HWCACHE_ALIGN,
4422 NULL);
4423 dev_priv->vmas =
4424 kmem_cache_create("i915_gem_vma",
4425 sizeof(struct i915_vma), 0,
4426 SLAB_HWCACHE_ALIGN,
4427 NULL);
4428 dev_priv->requests =
4429 kmem_cache_create("i915_gem_request",
4430 sizeof(struct drm_i915_gem_request), 0,
4431 SLAB_HWCACHE_ALIGN,
4432 NULL);
4433
4434 INIT_LIST_HEAD(&dev_priv->context_list);
4435 INIT_LIST_HEAD(&dev_priv->mm.unbound_list);
4436 INIT_LIST_HEAD(&dev_priv->mm.bound_list);
4437 INIT_LIST_HEAD(&dev_priv->mm.fence_list);
4438 for (i = 0; i < I915_NUM_ENGINES; i++)
4439 init_engine_lists(&dev_priv->engine[i]);
4440 for (i = 0; i < I915_MAX_NUM_FENCES; i++)
4441 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list);
4442 INIT_DELAYED_WORK(&dev_priv->gt.retire_work,
4443 i915_gem_retire_work_handler);
4444 INIT_DELAYED_WORK(&dev_priv->gt.idle_work,
4445 i915_gem_idle_work_handler);
4446 init_waitqueue_head(&dev_priv->gpu_error.wait_queue);
4447 init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
4448
4449 dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL;
4450
4451 INIT_LIST_HEAD(&dev_priv->mm.fence_list);
4452
4453 init_waitqueue_head(&dev_priv->pending_flip_queue);
4454
4455 dev_priv->mm.interruptible = true;
4456
4457 mutex_init(&dev_priv->fb_tracking.lock);
4458 }
4459
4460 void i915_gem_load_cleanup(struct drm_device *dev)
4461 {
4462 struct drm_i915_private *dev_priv = to_i915(dev);
4463
4464 kmem_cache_destroy(dev_priv->requests);
4465 kmem_cache_destroy(dev_priv->vmas);
4466 kmem_cache_destroy(dev_priv->objects);
4467 }
4468
4469 int i915_gem_freeze_late(struct drm_i915_private *dev_priv)
4470 {
4471 struct drm_i915_gem_object *obj;
4472
4473 /* Called just before we write the hibernation image.
4474 *
4475 * We need to update the domain tracking to reflect that the CPU
4476 * will be accessing all the pages to create and restore from the
4477 * hibernation, and so upon restoration those pages will be in the
4478 * CPU domain.
4479 *
4480 * To make sure the hibernation image contains the latest state,
4481 * we update that state just before writing out the image.
4482 */
4483
4484 list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list) {
4485 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
4486 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
4487 }
4488
4489 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
4490 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
4491 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
4492 }
4493
4494 return 0;
4495 }
4496
4497 void i915_gem_release(struct drm_device *dev, struct drm_file *file)
4498 {
4499 struct drm_i915_file_private *file_priv = file->driver_priv;
4500 struct drm_i915_gem_request *request;
4501
4502 /* Clean up our request list when the client is going away, so that
4503 * later retire_requests won't dereference our soon-to-be-gone
4504 * file_priv.
4505 */
4506 spin_lock(&file_priv->mm.lock);
4507 list_for_each_entry(request, &file_priv->mm.request_list, client_list)
4508 request->file_priv = NULL;
4509 spin_unlock(&file_priv->mm.lock);
4510
4511 if (!list_empty(&file_priv->rps.link)) {
4512 spin_lock(&to_i915(dev)->rps.client_lock);
4513 list_del(&file_priv->rps.link);
4514 spin_unlock(&to_i915(dev)->rps.client_lock);
4515 }
4516 }
4517
4518 int i915_gem_open(struct drm_device *dev, struct drm_file *file)
4519 {
4520 struct drm_i915_file_private *file_priv;
4521 int ret;
4522
4523 DRM_DEBUG_DRIVER("\n");
4524
4525 file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL);
4526 if (!file_priv)
4527 return -ENOMEM;
4528
4529 file->driver_priv = file_priv;
4530 file_priv->dev_priv = to_i915(dev);
4531 file_priv->file = file;
4532 INIT_LIST_HEAD(&file_priv->rps.link);
4533
4534 spin_lock_init(&file_priv->mm.lock);
4535 INIT_LIST_HEAD(&file_priv->mm.request_list);
4536
4537 file_priv->bsd_engine = -1;
4538
4539 ret = i915_gem_context_open(dev, file);
4540 if (ret)
4541 kfree(file_priv);
4542
4543 return ret;
4544 }
4545
4546 /**
4547 * i915_gem_track_fb - update frontbuffer tracking
4548 * @old: current GEM buffer for the frontbuffer slots
4549 * @new: new GEM buffer for the frontbuffer slots
4550 * @frontbuffer_bits: bitmask of frontbuffer slots
4551 *
4552 * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them
4553 * from @old and setting them in @new. Both @old and @new can be NULL.
4554 */
4555 void i915_gem_track_fb(struct drm_i915_gem_object *old,
4556 struct drm_i915_gem_object *new,
4557 unsigned frontbuffer_bits)
4558 {
4559 if (old) {
4560 WARN_ON(!mutex_is_locked(&old->base.dev->struct_mutex));
4561 WARN_ON(!(old->frontbuffer_bits & frontbuffer_bits));
4562 old->frontbuffer_bits &= ~frontbuffer_bits;
4563 }
4564
4565 if (new) {
4566 WARN_ON(!mutex_is_locked(&new->base.dev->struct_mutex));
4567 WARN_ON(new->frontbuffer_bits & frontbuffer_bits);
4568 new->frontbuffer_bits |= frontbuffer_bits;
4569 }
4570 }
4571
4572 /* All the new VM stuff */
4573 u64 i915_gem_obj_offset(struct drm_i915_gem_object *o,
4574 struct i915_address_space *vm)
4575 {
4576 struct drm_i915_private *dev_priv = to_i915(o->base.dev);
4577 struct i915_vma *vma;
4578
4579 WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base);
4580
4581 list_for_each_entry(vma, &o->vma_list, obj_link) {
4582 if (i915_vma_is_ggtt(vma) &&
4583 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL)
4584 continue;
4585 if (vma->vm == vm)
4586 return vma->node.start;
4587 }
4588
4589 WARN(1, "%s vma for this object not found.\n",
4590 i915_is_ggtt(vm) ? "global" : "ppgtt");
4591 return -1;
4592 }
4593
4594 u64 i915_gem_obj_ggtt_offset_view(struct drm_i915_gem_object *o,
4595 const struct i915_ggtt_view *view)
4596 {
4597 struct i915_vma *vma;
4598
4599 list_for_each_entry(vma, &o->vma_list, obj_link)
4600 if (i915_vma_is_ggtt(vma) &&
4601 i915_ggtt_view_equal(&vma->ggtt_view, view))
4602 return vma->node.start;
4603
4604 WARN(1, "global vma for this object not found. (view=%u)\n", view->type);
4605 return -1;
4606 }
4607
4608 bool i915_gem_obj_bound(struct drm_i915_gem_object *o,
4609 struct i915_address_space *vm)
4610 {
4611 struct i915_vma *vma;
4612
4613 list_for_each_entry(vma, &o->vma_list, obj_link) {
4614 if (i915_vma_is_ggtt(vma) &&
4615 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL)
4616 continue;
4617 if (vma->vm == vm && drm_mm_node_allocated(&vma->node))
4618 return true;
4619 }
4620
4621 return false;
4622 }
4623
4624 bool i915_gem_obj_ggtt_bound_view(struct drm_i915_gem_object *o,
4625 const struct i915_ggtt_view *view)
4626 {
4627 struct i915_vma *vma;
4628
4629 list_for_each_entry(vma, &o->vma_list, obj_link)
4630 if (i915_vma_is_ggtt(vma) &&
4631 i915_ggtt_view_equal(&vma->ggtt_view, view) &&
4632 drm_mm_node_allocated(&vma->node))
4633 return true;
4634
4635 return false;
4636 }
4637
4638 unsigned long i915_gem_obj_ggtt_size(struct drm_i915_gem_object *o)
4639 {
4640 struct i915_vma *vma;
4641
4642 GEM_BUG_ON(list_empty(&o->vma_list));
4643
4644 list_for_each_entry(vma, &o->vma_list, obj_link) {
4645 if (i915_vma_is_ggtt(vma) &&
4646 vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL)
4647 return vma->node.size;
4648 }
4649
4650 return 0;
4651 }
4652
4653 bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj)
4654 {
4655 struct i915_vma *vma;
4656 list_for_each_entry(vma, &obj->vma_list, obj_link)
4657 if (i915_vma_is_pinned(vma))
4658 return true;
4659
4660 return false;
4661 }
4662
4663 /* Like i915_gem_object_get_page(), but mark the returned page dirty */
4664 struct page *
4665 i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, int n)
4666 {
4667 struct page *page;
4668
4669 /* Only default objects have per-page dirty tracking */
4670 if (WARN_ON(!i915_gem_object_has_struct_page(obj)))
4671 return NULL;
4672
4673 page = i915_gem_object_get_page(obj, n);
4674 set_page_dirty(page);
4675 return page;
4676 }
4677
4678 /* Allocate a new GEM object and fill it with the supplied data */
4679 struct drm_i915_gem_object *
4680 i915_gem_object_create_from_data(struct drm_device *dev,
4681 const void *data, size_t size)
4682 {
4683 struct drm_i915_gem_object *obj;
4684 struct sg_table *sg;
4685 size_t bytes;
4686 int ret;
4687
4688 obj = i915_gem_object_create(dev, round_up(size, PAGE_SIZE));
4689 if (IS_ERR(obj))
4690 return obj;
4691
4692 ret = i915_gem_object_set_to_cpu_domain(obj, true);
4693 if (ret)
4694 goto fail;
4695
4696 ret = i915_gem_object_get_pages(obj);
4697 if (ret)
4698 goto fail;
4699
4700 i915_gem_object_pin_pages(obj);
4701 sg = obj->pages;
4702 bytes = sg_copy_from_buffer(sg->sgl, sg->nents, (void *)data, size);
4703 obj->dirty = 1; /* Backing store is now out of date */
4704 i915_gem_object_unpin_pages(obj);
4705
4706 if (WARN_ON(bytes != size)) {
4707 DRM_ERROR("Incomplete copy, wrote %zu of %zu", bytes, size);
4708 ret = -EFAULT;
4709 goto fail;
4710 }
4711
4712 return obj;
4713
4714 fail:
4715 i915_gem_object_put(obj);
4716 return ERR_PTR(ret);
4717 }
This page took 0.174846 seconds and 5 git commands to generate.