2 * Copyright © 2008 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * Eric Anholt <eric@anholt.net>
32 #include "i915_trace.h"
33 #include "intel_drv.h"
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <linux/pci.h>
38 struct change_domains
{
39 uint32_t invalidate_domains
;
40 uint32_t flush_domains
;
44 static int i915_gem_object_flush_gpu_write_domain(struct drm_i915_gem_object
*obj
,
46 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object
*obj
);
47 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object
*obj
);
48 static int i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object
*obj
,
50 static int i915_gem_object_set_cpu_read_domain_range(struct drm_i915_gem_object
*obj
,
53 static void i915_gem_object_set_to_full_cpu_read_domain(struct drm_i915_gem_object
*obj
);
54 static int i915_gem_object_wait_rendering(struct drm_i915_gem_object
*obj
,
56 static int i915_gem_object_bind_to_gtt(struct drm_i915_gem_object
*obj
,
58 bool map_and_fenceable
);
59 static void i915_gem_clear_fence_reg(struct drm_i915_gem_object
*obj
);
60 static int i915_gem_phys_pwrite(struct drm_device
*dev
,
61 struct drm_i915_gem_object
*obj
,
62 struct drm_i915_gem_pwrite
*args
,
63 struct drm_file
*file
);
64 static void i915_gem_free_object_tail(struct drm_i915_gem_object
*obj
);
66 static int i915_gem_inactive_shrink(struct shrinker
*shrinker
,
71 /* some bookkeeping */
72 static void i915_gem_info_add_obj(struct drm_i915_private
*dev_priv
,
75 dev_priv
->mm
.object_count
++;
76 dev_priv
->mm
.object_memory
+= size
;
79 static void i915_gem_info_remove_obj(struct drm_i915_private
*dev_priv
,
82 dev_priv
->mm
.object_count
--;
83 dev_priv
->mm
.object_memory
-= size
;
86 static void i915_gem_info_add_gtt(struct drm_i915_private
*dev_priv
,
87 struct drm_i915_gem_object
*obj
)
89 dev_priv
->mm
.gtt_count
++;
90 dev_priv
->mm
.gtt_memory
+= obj
->gtt_space
->size
;
91 if (obj
->gtt_offset
< dev_priv
->mm
.gtt_mappable_end
) {
92 dev_priv
->mm
.mappable_gtt_used
+=
93 min_t(size_t, obj
->gtt_space
->size
,
94 dev_priv
->mm
.gtt_mappable_end
- obj
->gtt_offset
);
96 list_add_tail(&obj
->gtt_list
, &dev_priv
->mm
.gtt_list
);
99 static void i915_gem_info_remove_gtt(struct drm_i915_private
*dev_priv
,
100 struct drm_i915_gem_object
*obj
)
102 dev_priv
->mm
.gtt_count
--;
103 dev_priv
->mm
.gtt_memory
-= obj
->gtt_space
->size
;
104 if (obj
->gtt_offset
< dev_priv
->mm
.gtt_mappable_end
) {
105 dev_priv
->mm
.mappable_gtt_used
-=
106 min_t(size_t, obj
->gtt_space
->size
,
107 dev_priv
->mm
.gtt_mappable_end
- obj
->gtt_offset
);
109 list_del_init(&obj
->gtt_list
);
113 * Update the mappable working set counters. Call _only_ when there is a change
114 * in one of (pin|fault)_mappable and update *_mappable _before_ calling.
115 * @mappable: new state the changed mappable flag (either pin_ or fault_).
118 i915_gem_info_update_mappable(struct drm_i915_private
*dev_priv
,
119 struct drm_i915_gem_object
*obj
,
123 if (obj
->pin_mappable
&& obj
->fault_mappable
)
124 /* Combined state was already mappable. */
126 dev_priv
->mm
.gtt_mappable_count
++;
127 dev_priv
->mm
.gtt_mappable_memory
+= obj
->gtt_space
->size
;
129 if (obj
->pin_mappable
|| obj
->fault_mappable
)
130 /* Combined state still mappable. */
132 dev_priv
->mm
.gtt_mappable_count
--;
133 dev_priv
->mm
.gtt_mappable_memory
-= obj
->gtt_space
->size
;
137 static void i915_gem_info_add_pin(struct drm_i915_private
*dev_priv
,
138 struct drm_i915_gem_object
*obj
,
141 dev_priv
->mm
.pin_count
++;
142 dev_priv
->mm
.pin_memory
+= obj
->gtt_space
->size
;
144 obj
->pin_mappable
= true;
145 i915_gem_info_update_mappable(dev_priv
, obj
, true);
149 static void i915_gem_info_remove_pin(struct drm_i915_private
*dev_priv
,
150 struct drm_i915_gem_object
*obj
)
152 dev_priv
->mm
.pin_count
--;
153 dev_priv
->mm
.pin_memory
-= obj
->gtt_space
->size
;
154 if (obj
->pin_mappable
) {
155 obj
->pin_mappable
= false;
156 i915_gem_info_update_mappable(dev_priv
, obj
, false);
161 i915_gem_check_is_wedged(struct drm_device
*dev
)
163 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
164 struct completion
*x
= &dev_priv
->error_completion
;
168 if (!atomic_read(&dev_priv
->mm
.wedged
))
171 ret
= wait_for_completion_interruptible(x
);
175 /* Success, we reset the GPU! */
176 if (!atomic_read(&dev_priv
->mm
.wedged
))
179 /* GPU is hung, bump the completion count to account for
180 * the token we just consumed so that we never hit zero and
181 * end up waiting upon a subsequent completion event that
184 spin_lock_irqsave(&x
->wait
.lock
, flags
);
186 spin_unlock_irqrestore(&x
->wait
.lock
, flags
);
190 static int i915_mutex_lock_interruptible(struct drm_device
*dev
)
192 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
195 ret
= i915_gem_check_is_wedged(dev
);
199 ret
= mutex_lock_interruptible(&dev
->struct_mutex
);
203 if (atomic_read(&dev_priv
->mm
.wedged
)) {
204 mutex_unlock(&dev
->struct_mutex
);
208 WARN_ON(i915_verify_lists(dev
));
213 i915_gem_object_is_inactive(struct drm_i915_gem_object
*obj
)
215 return obj
->gtt_space
&& !obj
->active
&& obj
->pin_count
== 0;
218 int i915_gem_do_init(struct drm_device
*dev
,
220 unsigned long mappable_end
,
223 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
226 (start
& (PAGE_SIZE
- 1)) != 0 ||
227 (end
& (PAGE_SIZE
- 1)) != 0) {
231 drm_mm_init(&dev_priv
->mm
.gtt_space
, start
,
234 dev_priv
->mm
.gtt_total
= end
- start
;
235 dev_priv
->mm
.mappable_gtt_total
= min(end
, mappable_end
) - start
;
236 dev_priv
->mm
.gtt_mappable_end
= mappable_end
;
242 i915_gem_init_ioctl(struct drm_device
*dev
, void *data
,
243 struct drm_file
*file
)
245 struct drm_i915_gem_init
*args
= data
;
248 mutex_lock(&dev
->struct_mutex
);
249 ret
= i915_gem_do_init(dev
, args
->gtt_start
, args
->gtt_end
, args
->gtt_end
);
250 mutex_unlock(&dev
->struct_mutex
);
256 i915_gem_get_aperture_ioctl(struct drm_device
*dev
, void *data
,
257 struct drm_file
*file
)
259 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
260 struct drm_i915_gem_get_aperture
*args
= data
;
262 if (!(dev
->driver
->driver_features
& DRIVER_GEM
))
265 mutex_lock(&dev
->struct_mutex
);
266 args
->aper_size
= dev_priv
->mm
.gtt_total
;
267 args
->aper_available_size
= args
->aper_size
- dev_priv
->mm
.pin_memory
;
268 mutex_unlock(&dev
->struct_mutex
);
275 * Creates a new mm object and returns a handle to it.
278 i915_gem_create_ioctl(struct drm_device
*dev
, void *data
,
279 struct drm_file
*file
)
281 struct drm_i915_gem_create
*args
= data
;
282 struct drm_i915_gem_object
*obj
;
286 args
->size
= roundup(args
->size
, PAGE_SIZE
);
288 /* Allocate the new object */
289 obj
= i915_gem_alloc_object(dev
, args
->size
);
293 ret
= drm_gem_handle_create(file
, &obj
->base
, &handle
);
295 drm_gem_object_release(&obj
->base
);
296 i915_gem_info_remove_obj(dev
->dev_private
, obj
->base
.size
);
301 /* drop reference from allocate - handle holds it now */
302 drm_gem_object_unreference(&obj
->base
);
303 trace_i915_gem_object_create(obj
);
305 args
->handle
= handle
;
309 static int i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object
*obj
)
311 drm_i915_private_t
*dev_priv
= obj
->base
.dev
->dev_private
;
313 return dev_priv
->mm
.bit_6_swizzle_x
== I915_BIT_6_SWIZZLE_9_10_17
&&
314 obj
->tiling_mode
!= I915_TILING_NONE
;
318 slow_shmem_copy(struct page
*dst_page
,
320 struct page
*src_page
,
324 char *dst_vaddr
, *src_vaddr
;
326 dst_vaddr
= kmap(dst_page
);
327 src_vaddr
= kmap(src_page
);
329 memcpy(dst_vaddr
+ dst_offset
, src_vaddr
+ src_offset
, length
);
336 slow_shmem_bit17_copy(struct page
*gpu_page
,
338 struct page
*cpu_page
,
343 char *gpu_vaddr
, *cpu_vaddr
;
345 /* Use the unswizzled path if this page isn't affected. */
346 if ((page_to_phys(gpu_page
) & (1 << 17)) == 0) {
348 return slow_shmem_copy(cpu_page
, cpu_offset
,
349 gpu_page
, gpu_offset
, length
);
351 return slow_shmem_copy(gpu_page
, gpu_offset
,
352 cpu_page
, cpu_offset
, length
);
355 gpu_vaddr
= kmap(gpu_page
);
356 cpu_vaddr
= kmap(cpu_page
);
358 /* Copy the data, XORing A6 with A17 (1). The user already knows he's
359 * XORing with the other bits (A9 for Y, A9 and A10 for X)
362 int cacheline_end
= ALIGN(gpu_offset
+ 1, 64);
363 int this_length
= min(cacheline_end
- gpu_offset
, length
);
364 int swizzled_gpu_offset
= gpu_offset
^ 64;
367 memcpy(cpu_vaddr
+ cpu_offset
,
368 gpu_vaddr
+ swizzled_gpu_offset
,
371 memcpy(gpu_vaddr
+ swizzled_gpu_offset
,
372 cpu_vaddr
+ cpu_offset
,
375 cpu_offset
+= this_length
;
376 gpu_offset
+= this_length
;
377 length
-= this_length
;
385 * This is the fast shmem pread path, which attempts to copy_from_user directly
386 * from the backing pages of the object to the user's address space. On a
387 * fault, it fails so we can fall back to i915_gem_shmem_pwrite_slow().
390 i915_gem_shmem_pread_fast(struct drm_device
*dev
,
391 struct drm_i915_gem_object
*obj
,
392 struct drm_i915_gem_pread
*args
,
393 struct drm_file
*file
)
395 struct address_space
*mapping
= obj
->base
.filp
->f_path
.dentry
->d_inode
->i_mapping
;
398 char __user
*user_data
;
399 int page_offset
, page_length
;
401 user_data
= (char __user
*) (uintptr_t) args
->data_ptr
;
404 offset
= args
->offset
;
411 /* Operation in this page
413 * page_offset = offset within page
414 * page_length = bytes to copy for this page
416 page_offset
= offset
& (PAGE_SIZE
-1);
417 page_length
= remain
;
418 if ((page_offset
+ remain
) > PAGE_SIZE
)
419 page_length
= PAGE_SIZE
- page_offset
;
421 page
= read_cache_page_gfp(mapping
, offset
>> PAGE_SHIFT
,
422 GFP_HIGHUSER
| __GFP_RECLAIMABLE
);
424 return PTR_ERR(page
);
426 vaddr
= kmap_atomic(page
);
427 ret
= __copy_to_user_inatomic(user_data
,
430 kunmap_atomic(vaddr
);
432 mark_page_accessed(page
);
433 page_cache_release(page
);
437 remain
-= page_length
;
438 user_data
+= page_length
;
439 offset
+= page_length
;
446 * This is the fallback shmem pread path, which allocates temporary storage
447 * in kernel space to copy_to_user into outside of the struct_mutex, so we
448 * can copy out of the object's backing pages while holding the struct mutex
449 * and not take page faults.
452 i915_gem_shmem_pread_slow(struct drm_device
*dev
,
453 struct drm_i915_gem_object
*obj
,
454 struct drm_i915_gem_pread
*args
,
455 struct drm_file
*file
)
457 struct address_space
*mapping
= obj
->base
.filp
->f_path
.dentry
->d_inode
->i_mapping
;
458 struct mm_struct
*mm
= current
->mm
;
459 struct page
**user_pages
;
461 loff_t offset
, pinned_pages
, i
;
462 loff_t first_data_page
, last_data_page
, num_pages
;
463 int shmem_page_offset
;
464 int data_page_index
, data_page_offset
;
467 uint64_t data_ptr
= args
->data_ptr
;
468 int do_bit17_swizzling
;
472 /* Pin the user pages containing the data. We can't fault while
473 * holding the struct mutex, yet we want to hold it while
474 * dereferencing the user data.
476 first_data_page
= data_ptr
/ PAGE_SIZE
;
477 last_data_page
= (data_ptr
+ args
->size
- 1) / PAGE_SIZE
;
478 num_pages
= last_data_page
- first_data_page
+ 1;
480 user_pages
= drm_malloc_ab(num_pages
, sizeof(struct page
*));
481 if (user_pages
== NULL
)
484 mutex_unlock(&dev
->struct_mutex
);
485 down_read(&mm
->mmap_sem
);
486 pinned_pages
= get_user_pages(current
, mm
, (uintptr_t)args
->data_ptr
,
487 num_pages
, 1, 0, user_pages
, NULL
);
488 up_read(&mm
->mmap_sem
);
489 mutex_lock(&dev
->struct_mutex
);
490 if (pinned_pages
< num_pages
) {
495 ret
= i915_gem_object_set_cpu_read_domain_range(obj
,
501 do_bit17_swizzling
= i915_gem_object_needs_bit17_swizzle(obj
);
503 offset
= args
->offset
;
508 /* Operation in this page
510 * shmem_page_offset = offset within page in shmem file
511 * data_page_index = page number in get_user_pages return
512 * data_page_offset = offset with data_page_index page.
513 * page_length = bytes to copy for this page
515 shmem_page_offset
= offset
& ~PAGE_MASK
;
516 data_page_index
= data_ptr
/ PAGE_SIZE
- first_data_page
;
517 data_page_offset
= data_ptr
& ~PAGE_MASK
;
519 page_length
= remain
;
520 if ((shmem_page_offset
+ page_length
) > PAGE_SIZE
)
521 page_length
= PAGE_SIZE
- shmem_page_offset
;
522 if ((data_page_offset
+ page_length
) > PAGE_SIZE
)
523 page_length
= PAGE_SIZE
- data_page_offset
;
525 page
= read_cache_page_gfp(mapping
, offset
>> PAGE_SHIFT
,
526 GFP_HIGHUSER
| __GFP_RECLAIMABLE
);
528 return PTR_ERR(page
);
530 if (do_bit17_swizzling
) {
531 slow_shmem_bit17_copy(page
,
533 user_pages
[data_page_index
],
538 slow_shmem_copy(user_pages
[data_page_index
],
545 mark_page_accessed(page
);
546 page_cache_release(page
);
548 remain
-= page_length
;
549 data_ptr
+= page_length
;
550 offset
+= page_length
;
554 for (i
= 0; i
< pinned_pages
; i
++) {
555 SetPageDirty(user_pages
[i
]);
556 mark_page_accessed(user_pages
[i
]);
557 page_cache_release(user_pages
[i
]);
559 drm_free_large(user_pages
);
565 * Reads data from the object referenced by handle.
567 * On error, the contents of *data are undefined.
570 i915_gem_pread_ioctl(struct drm_device
*dev
, void *data
,
571 struct drm_file
*file
)
573 struct drm_i915_gem_pread
*args
= data
;
574 struct drm_i915_gem_object
*obj
;
580 if (!access_ok(VERIFY_WRITE
,
581 (char __user
*)(uintptr_t)args
->data_ptr
,
585 ret
= fault_in_pages_writeable((char __user
*)(uintptr_t)args
->data_ptr
,
590 ret
= i915_mutex_lock_interruptible(dev
);
594 obj
= to_intel_bo(drm_gem_object_lookup(dev
, file
, args
->handle
));
600 /* Bounds check source. */
601 if (args
->offset
> obj
->base
.size
||
602 args
->size
> obj
->base
.size
- args
->offset
) {
607 ret
= i915_gem_object_set_cpu_read_domain_range(obj
,
614 if (!i915_gem_object_needs_bit17_swizzle(obj
))
615 ret
= i915_gem_shmem_pread_fast(dev
, obj
, args
, file
);
617 ret
= i915_gem_shmem_pread_slow(dev
, obj
, args
, file
);
620 drm_gem_object_unreference(&obj
->base
);
622 mutex_unlock(&dev
->struct_mutex
);
626 /* This is the fast write path which cannot handle
627 * page faults in the source data
631 fast_user_write(struct io_mapping
*mapping
,
632 loff_t page_base
, int page_offset
,
633 char __user
*user_data
,
637 unsigned long unwritten
;
639 vaddr_atomic
= io_mapping_map_atomic_wc(mapping
, page_base
);
640 unwritten
= __copy_from_user_inatomic_nocache(vaddr_atomic
+ page_offset
,
642 io_mapping_unmap_atomic(vaddr_atomic
);
646 /* Here's the write path which can sleep for
651 slow_kernel_write(struct io_mapping
*mapping
,
652 loff_t gtt_base
, int gtt_offset
,
653 struct page
*user_page
, int user_offset
,
656 char __iomem
*dst_vaddr
;
659 dst_vaddr
= io_mapping_map_wc(mapping
, gtt_base
);
660 src_vaddr
= kmap(user_page
);
662 memcpy_toio(dst_vaddr
+ gtt_offset
,
663 src_vaddr
+ user_offset
,
667 io_mapping_unmap(dst_vaddr
);
671 * This is the fast pwrite path, where we copy the data directly from the
672 * user into the GTT, uncached.
675 i915_gem_gtt_pwrite_fast(struct drm_device
*dev
,
676 struct drm_i915_gem_object
*obj
,
677 struct drm_i915_gem_pwrite
*args
,
678 struct drm_file
*file
)
680 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
682 loff_t offset
, page_base
;
683 char __user
*user_data
;
684 int page_offset
, page_length
;
686 user_data
= (char __user
*) (uintptr_t) args
->data_ptr
;
689 offset
= obj
->gtt_offset
+ args
->offset
;
692 /* Operation in this page
694 * page_base = page offset within aperture
695 * page_offset = offset within page
696 * page_length = bytes to copy for this page
698 page_base
= (offset
& ~(PAGE_SIZE
-1));
699 page_offset
= offset
& (PAGE_SIZE
-1);
700 page_length
= remain
;
701 if ((page_offset
+ remain
) > PAGE_SIZE
)
702 page_length
= PAGE_SIZE
- page_offset
;
704 /* If we get a fault while copying data, then (presumably) our
705 * source page isn't available. Return the error and we'll
706 * retry in the slow path.
708 if (fast_user_write(dev_priv
->mm
.gtt_mapping
, page_base
,
709 page_offset
, user_data
, page_length
))
713 remain
-= page_length
;
714 user_data
+= page_length
;
715 offset
+= page_length
;
722 * This is the fallback GTT pwrite path, which uses get_user_pages to pin
723 * the memory and maps it using kmap_atomic for copying.
725 * This code resulted in x11perf -rgb10text consuming about 10% more CPU
726 * than using i915_gem_gtt_pwrite_fast on a G45 (32-bit).
729 i915_gem_gtt_pwrite_slow(struct drm_device
*dev
,
730 struct drm_i915_gem_object
*obj
,
731 struct drm_i915_gem_pwrite
*args
,
732 struct drm_file
*file
)
734 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
736 loff_t gtt_page_base
, offset
;
737 loff_t first_data_page
, last_data_page
, num_pages
;
738 loff_t pinned_pages
, i
;
739 struct page
**user_pages
;
740 struct mm_struct
*mm
= current
->mm
;
741 int gtt_page_offset
, data_page_offset
, data_page_index
, page_length
;
743 uint64_t data_ptr
= args
->data_ptr
;
747 /* Pin the user pages containing the data. We can't fault while
748 * holding the struct mutex, and all of the pwrite implementations
749 * want to hold it while dereferencing the user data.
751 first_data_page
= data_ptr
/ PAGE_SIZE
;
752 last_data_page
= (data_ptr
+ args
->size
- 1) / PAGE_SIZE
;
753 num_pages
= last_data_page
- first_data_page
+ 1;
755 user_pages
= drm_malloc_ab(num_pages
, sizeof(struct page
*));
756 if (user_pages
== NULL
)
759 mutex_unlock(&dev
->struct_mutex
);
760 down_read(&mm
->mmap_sem
);
761 pinned_pages
= get_user_pages(current
, mm
, (uintptr_t)args
->data_ptr
,
762 num_pages
, 0, 0, user_pages
, NULL
);
763 up_read(&mm
->mmap_sem
);
764 mutex_lock(&dev
->struct_mutex
);
765 if (pinned_pages
< num_pages
) {
767 goto out_unpin_pages
;
770 ret
= i915_gem_object_set_to_gtt_domain(obj
, 1);
772 goto out_unpin_pages
;
774 offset
= obj
->gtt_offset
+ args
->offset
;
777 /* Operation in this page
779 * gtt_page_base = page offset within aperture
780 * gtt_page_offset = offset within page in aperture
781 * data_page_index = page number in get_user_pages return
782 * data_page_offset = offset with data_page_index page.
783 * page_length = bytes to copy for this page
785 gtt_page_base
= offset
& PAGE_MASK
;
786 gtt_page_offset
= offset
& ~PAGE_MASK
;
787 data_page_index
= data_ptr
/ PAGE_SIZE
- first_data_page
;
788 data_page_offset
= data_ptr
& ~PAGE_MASK
;
790 page_length
= remain
;
791 if ((gtt_page_offset
+ page_length
) > PAGE_SIZE
)
792 page_length
= PAGE_SIZE
- gtt_page_offset
;
793 if ((data_page_offset
+ page_length
) > PAGE_SIZE
)
794 page_length
= PAGE_SIZE
- data_page_offset
;
796 slow_kernel_write(dev_priv
->mm
.gtt_mapping
,
797 gtt_page_base
, gtt_page_offset
,
798 user_pages
[data_page_index
],
802 remain
-= page_length
;
803 offset
+= page_length
;
804 data_ptr
+= page_length
;
808 for (i
= 0; i
< pinned_pages
; i
++)
809 page_cache_release(user_pages
[i
]);
810 drm_free_large(user_pages
);
816 * This is the fast shmem pwrite path, which attempts to directly
817 * copy_from_user into the kmapped pages backing the object.
820 i915_gem_shmem_pwrite_fast(struct drm_device
*dev
,
821 struct drm_i915_gem_object
*obj
,
822 struct drm_i915_gem_pwrite
*args
,
823 struct drm_file
*file
)
825 struct address_space
*mapping
= obj
->base
.filp
->f_path
.dentry
->d_inode
->i_mapping
;
828 char __user
*user_data
;
829 int page_offset
, page_length
;
831 user_data
= (char __user
*) (uintptr_t) args
->data_ptr
;
834 offset
= args
->offset
;
842 /* Operation in this page
844 * page_offset = offset within page
845 * page_length = bytes to copy for this page
847 page_offset
= offset
& (PAGE_SIZE
-1);
848 page_length
= remain
;
849 if ((page_offset
+ remain
) > PAGE_SIZE
)
850 page_length
= PAGE_SIZE
- page_offset
;
852 page
= read_cache_page_gfp(mapping
, offset
>> PAGE_SHIFT
,
853 GFP_HIGHUSER
| __GFP_RECLAIMABLE
);
855 return PTR_ERR(page
);
857 vaddr
= kmap_atomic(page
, KM_USER0
);
858 ret
= __copy_from_user_inatomic(vaddr
+ page_offset
,
861 kunmap_atomic(vaddr
, KM_USER0
);
863 set_page_dirty(page
);
864 mark_page_accessed(page
);
865 page_cache_release(page
);
867 /* If we get a fault while copying data, then (presumably) our
868 * source page isn't available. Return the error and we'll
869 * retry in the slow path.
874 remain
-= page_length
;
875 user_data
+= page_length
;
876 offset
+= page_length
;
883 * This is the fallback shmem pwrite path, which uses get_user_pages to pin
884 * the memory and maps it using kmap_atomic for copying.
886 * This avoids taking mmap_sem for faulting on the user's address while the
887 * struct_mutex is held.
890 i915_gem_shmem_pwrite_slow(struct drm_device
*dev
,
891 struct drm_i915_gem_object
*obj
,
892 struct drm_i915_gem_pwrite
*args
,
893 struct drm_file
*file
)
895 struct address_space
*mapping
= obj
->base
.filp
->f_path
.dentry
->d_inode
->i_mapping
;
896 struct mm_struct
*mm
= current
->mm
;
897 struct page
**user_pages
;
899 loff_t offset
, pinned_pages
, i
;
900 loff_t first_data_page
, last_data_page
, num_pages
;
901 int shmem_page_offset
;
902 int data_page_index
, data_page_offset
;
905 uint64_t data_ptr
= args
->data_ptr
;
906 int do_bit17_swizzling
;
910 /* Pin the user pages containing the data. We can't fault while
911 * holding the struct mutex, and all of the pwrite implementations
912 * want to hold it while dereferencing the user data.
914 first_data_page
= data_ptr
/ PAGE_SIZE
;
915 last_data_page
= (data_ptr
+ args
->size
- 1) / PAGE_SIZE
;
916 num_pages
= last_data_page
- first_data_page
+ 1;
918 user_pages
= drm_malloc_ab(num_pages
, sizeof(struct page
*));
919 if (user_pages
== NULL
)
922 mutex_unlock(&dev
->struct_mutex
);
923 down_read(&mm
->mmap_sem
);
924 pinned_pages
= get_user_pages(current
, mm
, (uintptr_t)args
->data_ptr
,
925 num_pages
, 0, 0, user_pages
, NULL
);
926 up_read(&mm
->mmap_sem
);
927 mutex_lock(&dev
->struct_mutex
);
928 if (pinned_pages
< num_pages
) {
933 ret
= i915_gem_object_set_to_cpu_domain(obj
, 1);
937 do_bit17_swizzling
= i915_gem_object_needs_bit17_swizzle(obj
);
939 offset
= args
->offset
;
945 /* Operation in this page
947 * shmem_page_offset = offset within page in shmem file
948 * data_page_index = page number in get_user_pages return
949 * data_page_offset = offset with data_page_index page.
950 * page_length = bytes to copy for this page
952 shmem_page_offset
= offset
& ~PAGE_MASK
;
953 data_page_index
= data_ptr
/ PAGE_SIZE
- first_data_page
;
954 data_page_offset
= data_ptr
& ~PAGE_MASK
;
956 page_length
= remain
;
957 if ((shmem_page_offset
+ page_length
) > PAGE_SIZE
)
958 page_length
= PAGE_SIZE
- shmem_page_offset
;
959 if ((data_page_offset
+ page_length
) > PAGE_SIZE
)
960 page_length
= PAGE_SIZE
- data_page_offset
;
962 page
= read_cache_page_gfp(mapping
, offset
>> PAGE_SHIFT
,
963 GFP_HIGHUSER
| __GFP_RECLAIMABLE
);
969 if (do_bit17_swizzling
) {
970 slow_shmem_bit17_copy(page
,
972 user_pages
[data_page_index
],
977 slow_shmem_copy(page
,
979 user_pages
[data_page_index
],
984 set_page_dirty(page
);
985 mark_page_accessed(page
);
986 page_cache_release(page
);
988 remain
-= page_length
;
989 data_ptr
+= page_length
;
990 offset
+= page_length
;
994 for (i
= 0; i
< pinned_pages
; i
++)
995 page_cache_release(user_pages
[i
]);
996 drm_free_large(user_pages
);
1002 * Writes data to the object referenced by handle.
1004 * On error, the contents of the buffer that were to be modified are undefined.
1007 i915_gem_pwrite_ioctl(struct drm_device
*dev
, void *data
,
1008 struct drm_file
*file
)
1010 struct drm_i915_gem_pwrite
*args
= data
;
1011 struct drm_i915_gem_object
*obj
;
1014 if (args
->size
== 0)
1017 if (!access_ok(VERIFY_READ
,
1018 (char __user
*)(uintptr_t)args
->data_ptr
,
1022 ret
= fault_in_pages_readable((char __user
*)(uintptr_t)args
->data_ptr
,
1027 ret
= i915_mutex_lock_interruptible(dev
);
1031 obj
= to_intel_bo(drm_gem_object_lookup(dev
, file
, args
->handle
));
1037 /* Bounds check destination. */
1038 if (args
->offset
> obj
->base
.size
||
1039 args
->size
> obj
->base
.size
- args
->offset
) {
1044 /* We can only do the GTT pwrite on untiled buffers, as otherwise
1045 * it would end up going through the fenced access, and we'll get
1046 * different detiling behavior between reading and writing.
1047 * pread/pwrite currently are reading and writing from the CPU
1048 * perspective, requiring manual detiling by the client.
1051 ret
= i915_gem_phys_pwrite(dev
, obj
, args
, file
);
1052 else if (obj
->tiling_mode
== I915_TILING_NONE
&&
1054 obj
->base
.write_domain
!= I915_GEM_DOMAIN_CPU
) {
1055 ret
= i915_gem_object_pin(obj
, 0, true);
1059 ret
= i915_gem_object_set_to_gtt_domain(obj
, 1);
1063 ret
= i915_gem_gtt_pwrite_fast(dev
, obj
, args
, file
);
1065 ret
= i915_gem_gtt_pwrite_slow(dev
, obj
, args
, file
);
1068 i915_gem_object_unpin(obj
);
1070 ret
= i915_gem_object_set_to_cpu_domain(obj
, 1);
1075 if (!i915_gem_object_needs_bit17_swizzle(obj
))
1076 ret
= i915_gem_shmem_pwrite_fast(dev
, obj
, args
, file
);
1078 ret
= i915_gem_shmem_pwrite_slow(dev
, obj
, args
, file
);
1082 drm_gem_object_unreference(&obj
->base
);
1084 mutex_unlock(&dev
->struct_mutex
);
1089 * Called when user space prepares to use an object with the CPU, either
1090 * through the mmap ioctl's mapping or a GTT mapping.
1093 i915_gem_set_domain_ioctl(struct drm_device
*dev
, void *data
,
1094 struct drm_file
*file
)
1096 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
1097 struct drm_i915_gem_set_domain
*args
= data
;
1098 struct drm_i915_gem_object
*obj
;
1099 uint32_t read_domains
= args
->read_domains
;
1100 uint32_t write_domain
= args
->write_domain
;
1103 if (!(dev
->driver
->driver_features
& DRIVER_GEM
))
1106 /* Only handle setting domains to types used by the CPU. */
1107 if (write_domain
& I915_GEM_GPU_DOMAINS
)
1110 if (read_domains
& I915_GEM_GPU_DOMAINS
)
1113 /* Having something in the write domain implies it's in the read
1114 * domain, and only that read domain. Enforce that in the request.
1116 if (write_domain
!= 0 && read_domains
!= write_domain
)
1119 ret
= i915_mutex_lock_interruptible(dev
);
1123 obj
= to_intel_bo(drm_gem_object_lookup(dev
, file
, args
->handle
));
1129 intel_mark_busy(dev
, obj
);
1131 if (read_domains
& I915_GEM_DOMAIN_GTT
) {
1132 ret
= i915_gem_object_set_to_gtt_domain(obj
, write_domain
!= 0);
1134 /* Update the LRU on the fence for the CPU access that's
1137 if (obj
->fence_reg
!= I915_FENCE_REG_NONE
) {
1138 struct drm_i915_fence_reg
*reg
=
1139 &dev_priv
->fence_regs
[obj
->fence_reg
];
1140 list_move_tail(®
->lru_list
,
1141 &dev_priv
->mm
.fence_list
);
1144 /* Silently promote "you're not bound, there was nothing to do"
1145 * to success, since the client was just asking us to
1146 * make sure everything was done.
1151 ret
= i915_gem_object_set_to_cpu_domain(obj
, write_domain
!= 0);
1154 /* Maintain LRU order of "inactive" objects */
1155 if (ret
== 0 && i915_gem_object_is_inactive(obj
))
1156 list_move_tail(&obj
->mm_list
, &dev_priv
->mm
.inactive_list
);
1158 drm_gem_object_unreference(&obj
->base
);
1160 mutex_unlock(&dev
->struct_mutex
);
1165 * Called when user space has done writes to this buffer
1168 i915_gem_sw_finish_ioctl(struct drm_device
*dev
, void *data
,
1169 struct drm_file
*file
)
1171 struct drm_i915_gem_sw_finish
*args
= data
;
1172 struct drm_i915_gem_object
*obj
;
1175 if (!(dev
->driver
->driver_features
& DRIVER_GEM
))
1178 ret
= i915_mutex_lock_interruptible(dev
);
1182 obj
= to_intel_bo(drm_gem_object_lookup(dev
, file
, args
->handle
));
1188 /* Pinned buffers may be scanout, so flush the cache */
1190 i915_gem_object_flush_cpu_write_domain(obj
);
1192 drm_gem_object_unreference(&obj
->base
);
1194 mutex_unlock(&dev
->struct_mutex
);
1199 * Maps the contents of an object, returning the address it is mapped
1202 * While the mapping holds a reference on the contents of the object, it doesn't
1203 * imply a ref on the object itself.
1206 i915_gem_mmap_ioctl(struct drm_device
*dev
, void *data
,
1207 struct drm_file
*file
)
1209 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
1210 struct drm_i915_gem_mmap
*args
= data
;
1211 struct drm_gem_object
*obj
;
1215 if (!(dev
->driver
->driver_features
& DRIVER_GEM
))
1218 obj
= drm_gem_object_lookup(dev
, file
, args
->handle
);
1222 if (obj
->size
> dev_priv
->mm
.gtt_mappable_end
) {
1223 drm_gem_object_unreference_unlocked(obj
);
1227 offset
= args
->offset
;
1229 down_write(¤t
->mm
->mmap_sem
);
1230 addr
= do_mmap(obj
->filp
, 0, args
->size
,
1231 PROT_READ
| PROT_WRITE
, MAP_SHARED
,
1233 up_write(¤t
->mm
->mmap_sem
);
1234 drm_gem_object_unreference_unlocked(obj
);
1235 if (IS_ERR((void *)addr
))
1238 args
->addr_ptr
= (uint64_t) addr
;
1244 * i915_gem_fault - fault a page into the GTT
1245 * vma: VMA in question
1248 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
1249 * from userspace. The fault handler takes care of binding the object to
1250 * the GTT (if needed), allocating and programming a fence register (again,
1251 * only if needed based on whether the old reg is still valid or the object
1252 * is tiled) and inserting a new PTE into the faulting process.
1254 * Note that the faulting process may involve evicting existing objects
1255 * from the GTT and/or fence registers to make room. So performance may
1256 * suffer if the GTT working set is large or there are few fence registers
1259 int i915_gem_fault(struct vm_area_struct
*vma
, struct vm_fault
*vmf
)
1261 struct drm_i915_gem_object
*obj
= to_intel_bo(vma
->vm_private_data
);
1262 struct drm_device
*dev
= obj
->base
.dev
;
1263 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
1264 pgoff_t page_offset
;
1267 bool write
= !!(vmf
->flags
& FAULT_FLAG_WRITE
);
1269 /* We don't use vmf->pgoff since that has the fake offset */
1270 page_offset
= ((unsigned long)vmf
->virtual_address
- vma
->vm_start
) >>
1273 /* Now bind it into the GTT if needed */
1274 mutex_lock(&dev
->struct_mutex
);
1275 BUG_ON(obj
->pin_count
&& !obj
->pin_mappable
);
1277 if (obj
->gtt_space
) {
1278 if (!obj
->map_and_fenceable
) {
1279 ret
= i915_gem_object_unbind(obj
);
1285 if (!obj
->gtt_space
) {
1286 ret
= i915_gem_object_bind_to_gtt(obj
, 0, true);
1291 ret
= i915_gem_object_set_to_gtt_domain(obj
, write
);
1295 if (!obj
->fault_mappable
) {
1296 obj
->fault_mappable
= true;
1297 i915_gem_info_update_mappable(dev_priv
, obj
, true);
1300 /* Need a new fence register? */
1301 if (obj
->tiling_mode
!= I915_TILING_NONE
) {
1302 ret
= i915_gem_object_get_fence_reg(obj
, true);
1307 if (i915_gem_object_is_inactive(obj
))
1308 list_move_tail(&obj
->mm_list
, &dev_priv
->mm
.inactive_list
);
1310 pfn
= ((dev
->agp
->base
+ obj
->gtt_offset
) >> PAGE_SHIFT
) +
1313 /* Finally, remap it using the new GTT offset */
1314 ret
= vm_insert_pfn(vma
, (unsigned long)vmf
->virtual_address
, pfn
);
1316 mutex_unlock(&dev
->struct_mutex
);
1323 return VM_FAULT_NOPAGE
;
1325 return VM_FAULT_OOM
;
1327 return VM_FAULT_SIGBUS
;
1332 * i915_gem_create_mmap_offset - create a fake mmap offset for an object
1333 * @obj: obj in question
1335 * GEM memory mapping works by handing back to userspace a fake mmap offset
1336 * it can use in a subsequent mmap(2) call. The DRM core code then looks
1337 * up the object based on the offset and sets up the various memory mapping
1340 * This routine allocates and attaches a fake offset for @obj.
1343 i915_gem_create_mmap_offset(struct drm_i915_gem_object
*obj
)
1345 struct drm_device
*dev
= obj
->base
.dev
;
1346 struct drm_gem_mm
*mm
= dev
->mm_private
;
1347 struct drm_map_list
*list
;
1348 struct drm_local_map
*map
;
1351 /* Set the object up for mmap'ing */
1352 list
= &obj
->base
.map_list
;
1353 list
->map
= kzalloc(sizeof(struct drm_map_list
), GFP_KERNEL
);
1358 map
->type
= _DRM_GEM
;
1359 map
->size
= obj
->base
.size
;
1362 /* Get a DRM GEM mmap offset allocated... */
1363 list
->file_offset_node
= drm_mm_search_free(&mm
->offset_manager
,
1364 obj
->base
.size
/ PAGE_SIZE
,
1366 if (!list
->file_offset_node
) {
1367 DRM_ERROR("failed to allocate offset for bo %d\n",
1373 list
->file_offset_node
= drm_mm_get_block(list
->file_offset_node
,
1374 obj
->base
.size
/ PAGE_SIZE
,
1376 if (!list
->file_offset_node
) {
1381 list
->hash
.key
= list
->file_offset_node
->start
;
1382 ret
= drm_ht_insert_item(&mm
->offset_hash
, &list
->hash
);
1384 DRM_ERROR("failed to add to map hash\n");
1391 drm_mm_put_block(list
->file_offset_node
);
1400 * i915_gem_release_mmap - remove physical page mappings
1401 * @obj: obj in question
1403 * Preserve the reservation of the mmapping with the DRM core code, but
1404 * relinquish ownership of the pages back to the system.
1406 * It is vital that we remove the page mapping if we have mapped a tiled
1407 * object through the GTT and then lose the fence register due to
1408 * resource pressure. Similarly if the object has been moved out of the
1409 * aperture, than pages mapped into userspace must be revoked. Removing the
1410 * mapping will then trigger a page fault on the next user access, allowing
1411 * fixup by i915_gem_fault().
1414 i915_gem_release_mmap(struct drm_i915_gem_object
*obj
)
1416 struct drm_device
*dev
= obj
->base
.dev
;
1417 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
1419 if (unlikely(obj
->base
.map_list
.map
&& dev
->dev_mapping
))
1420 unmap_mapping_range(dev
->dev_mapping
,
1421 (loff_t
)obj
->base
.map_list
.hash
.key
<<PAGE_SHIFT
,
1424 if (obj
->fault_mappable
) {
1425 obj
->fault_mappable
= false;
1426 i915_gem_info_update_mappable(dev_priv
, obj
, false);
1431 i915_gem_free_mmap_offset(struct drm_i915_gem_object
*obj
)
1433 struct drm_device
*dev
= obj
->base
.dev
;
1434 struct drm_gem_mm
*mm
= dev
->mm_private
;
1435 struct drm_map_list
*list
= &obj
->base
.map_list
;
1437 drm_ht_remove_item(&mm
->offset_hash
, &list
->hash
);
1438 drm_mm_put_block(list
->file_offset_node
);
1444 i915_gem_get_gtt_size(struct drm_i915_gem_object
*obj
)
1446 struct drm_device
*dev
= obj
->base
.dev
;
1449 if (INTEL_INFO(dev
)->gen
>= 4 ||
1450 obj
->tiling_mode
== I915_TILING_NONE
)
1451 return obj
->base
.size
;
1453 /* Previous chips need a power-of-two fence region when tiling */
1454 if (INTEL_INFO(dev
)->gen
== 3)
1459 while (size
< obj
->base
.size
)
1466 * i915_gem_get_gtt_alignment - return required GTT alignment for an object
1467 * @obj: object to check
1469 * Return the required GTT alignment for an object, taking into account
1470 * potential fence register mapping.
1473 i915_gem_get_gtt_alignment(struct drm_i915_gem_object
*obj
)
1475 struct drm_device
*dev
= obj
->base
.dev
;
1478 * Minimum alignment is 4k (GTT page size), but might be greater
1479 * if a fence register is needed for the object.
1481 if (INTEL_INFO(dev
)->gen
>= 4 ||
1482 obj
->tiling_mode
== I915_TILING_NONE
)
1486 * Previous chips need to be aligned to the size of the smallest
1487 * fence register that can contain the object.
1489 return i915_gem_get_gtt_size(obj
);
1493 * i915_gem_get_unfenced_gtt_alignment - return required GTT alignment for an
1495 * @obj: object to check
1497 * Return the required GTT alignment for an object, only taking into account
1498 * unfenced tiled surface requirements.
1501 i915_gem_get_unfenced_gtt_alignment(struct drm_i915_gem_object
*obj
)
1503 struct drm_device
*dev
= obj
->base
.dev
;
1507 * Minimum alignment is 4k (GTT page size) for sane hw.
1509 if (INTEL_INFO(dev
)->gen
>= 4 || IS_G33(dev
) ||
1510 obj
->tiling_mode
== I915_TILING_NONE
)
1514 * Older chips need unfenced tiled buffers to be aligned to the left
1515 * edge of an even tile row (where tile rows are counted as if the bo is
1516 * placed in a fenced gtt region).
1519 (obj
->tiling_mode
== I915_TILING_Y
&& HAS_128_BYTE_Y_TILING(dev
)))
1524 return tile_height
* obj
->stride
* 2;
1528 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
1530 * @data: GTT mapping ioctl data
1531 * @file: GEM object info
1533 * Simply returns the fake offset to userspace so it can mmap it.
1534 * The mmap call will end up in drm_gem_mmap(), which will set things
1535 * up so we can get faults in the handler above.
1537 * The fault handler will take care of binding the object into the GTT
1538 * (since it may have been evicted to make room for something), allocating
1539 * a fence register, and mapping the appropriate aperture address into
1543 i915_gem_mmap_gtt_ioctl(struct drm_device
*dev
, void *data
,
1544 struct drm_file
*file
)
1546 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
1547 struct drm_i915_gem_mmap_gtt
*args
= data
;
1548 struct drm_i915_gem_object
*obj
;
1551 if (!(dev
->driver
->driver_features
& DRIVER_GEM
))
1554 ret
= i915_mutex_lock_interruptible(dev
);
1558 obj
= to_intel_bo(drm_gem_object_lookup(dev
, file
, args
->handle
));
1564 if (obj
->base
.size
> dev_priv
->mm
.gtt_mappable_end
) {
1569 if (obj
->madv
!= I915_MADV_WILLNEED
) {
1570 DRM_ERROR("Attempting to mmap a purgeable buffer\n");
1575 if (!obj
->base
.map_list
.map
) {
1576 ret
= i915_gem_create_mmap_offset(obj
);
1581 args
->offset
= (u64
)obj
->base
.map_list
.hash
.key
<< PAGE_SHIFT
;
1584 drm_gem_object_unreference(&obj
->base
);
1586 mutex_unlock(&dev
->struct_mutex
);
1591 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object
*obj
,
1595 struct address_space
*mapping
;
1596 struct inode
*inode
;
1599 /* Get the list of pages out of our struct file. They'll be pinned
1600 * at this point until we release them.
1602 page_count
= obj
->base
.size
/ PAGE_SIZE
;
1603 BUG_ON(obj
->pages
!= NULL
);
1604 obj
->pages
= drm_malloc_ab(page_count
, sizeof(struct page
*));
1605 if (obj
->pages
== NULL
)
1608 inode
= obj
->base
.filp
->f_path
.dentry
->d_inode
;
1609 mapping
= inode
->i_mapping
;
1610 for (i
= 0; i
< page_count
; i
++) {
1611 page
= read_cache_page_gfp(mapping
, i
,
1619 obj
->pages
[i
] = page
;
1622 if (obj
->tiling_mode
!= I915_TILING_NONE
)
1623 i915_gem_object_do_bit_17_swizzle(obj
);
1629 page_cache_release(obj
->pages
[i
]);
1631 drm_free_large(obj
->pages
);
1633 return PTR_ERR(page
);
1637 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object
*obj
)
1639 int page_count
= obj
->base
.size
/ PAGE_SIZE
;
1642 BUG_ON(obj
->madv
== __I915_MADV_PURGED
);
1644 if (obj
->tiling_mode
!= I915_TILING_NONE
)
1645 i915_gem_object_save_bit_17_swizzle(obj
);
1647 if (obj
->madv
== I915_MADV_DONTNEED
)
1650 for (i
= 0; i
< page_count
; i
++) {
1652 set_page_dirty(obj
->pages
[i
]);
1654 if (obj
->madv
== I915_MADV_WILLNEED
)
1655 mark_page_accessed(obj
->pages
[i
]);
1657 page_cache_release(obj
->pages
[i
]);
1661 drm_free_large(obj
->pages
);
1666 i915_gem_next_request_seqno(struct drm_device
*dev
,
1667 struct intel_ring_buffer
*ring
)
1669 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
1670 return ring
->outstanding_lazy_request
= dev_priv
->next_seqno
;
1674 i915_gem_object_move_to_active(struct drm_i915_gem_object
*obj
,
1675 struct intel_ring_buffer
*ring
)
1677 struct drm_device
*dev
= obj
->base
.dev
;
1678 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
1679 uint32_t seqno
= i915_gem_next_request_seqno(dev
, ring
);
1681 BUG_ON(ring
== NULL
);
1684 /* Add a reference if we're newly entering the active list. */
1686 drm_gem_object_reference(&obj
->base
);
1690 /* Move from whatever list we were on to the tail of execution. */
1691 list_move_tail(&obj
->mm_list
, &dev_priv
->mm
.active_list
);
1692 list_move_tail(&obj
->ring_list
, &ring
->active_list
);
1693 obj
->last_rendering_seqno
= seqno
;
1697 i915_gem_object_move_to_flushing(struct drm_i915_gem_object
*obj
)
1699 struct drm_device
*dev
= obj
->base
.dev
;
1700 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
1702 BUG_ON(!obj
->active
);
1703 list_move_tail(&obj
->mm_list
, &dev_priv
->mm
.flushing_list
);
1704 list_del_init(&obj
->ring_list
);
1705 obj
->last_rendering_seqno
= 0;
1708 /* Immediately discard the backing storage */
1710 i915_gem_object_truncate(struct drm_i915_gem_object
*obj
)
1712 struct inode
*inode
;
1714 /* Our goal here is to return as much of the memory as
1715 * is possible back to the system as we are called from OOM.
1716 * To do this we must instruct the shmfs to drop all of its
1717 * backing pages, *now*. Here we mirror the actions taken
1718 * when by shmem_delete_inode() to release the backing store.
1720 inode
= obj
->base
.filp
->f_path
.dentry
->d_inode
;
1721 truncate_inode_pages(inode
->i_mapping
, 0);
1722 if (inode
->i_op
->truncate_range
)
1723 inode
->i_op
->truncate_range(inode
, 0, (loff_t
)-1);
1725 obj
->madv
= __I915_MADV_PURGED
;
1729 i915_gem_object_is_purgeable(struct drm_i915_gem_object
*obj
)
1731 return obj
->madv
== I915_MADV_DONTNEED
;
1735 i915_gem_object_move_to_inactive(struct drm_i915_gem_object
*obj
)
1737 struct drm_device
*dev
= obj
->base
.dev
;
1738 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
1740 if (obj
->pin_count
!= 0)
1741 list_move_tail(&obj
->mm_list
, &dev_priv
->mm
.pinned_list
);
1743 list_move_tail(&obj
->mm_list
, &dev_priv
->mm
.inactive_list
);
1744 list_del_init(&obj
->ring_list
);
1746 BUG_ON(!list_empty(&obj
->gpu_write_list
));
1748 obj
->last_rendering_seqno
= 0;
1752 drm_gem_object_unreference(&obj
->base
);
1754 WARN_ON(i915_verify_lists(dev
));
1758 i915_gem_process_flushing_list(struct drm_device
*dev
,
1759 uint32_t flush_domains
,
1760 struct intel_ring_buffer
*ring
)
1762 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
1763 struct drm_i915_gem_object
*obj
, *next
;
1765 list_for_each_entry_safe(obj
, next
,
1766 &ring
->gpu_write_list
,
1768 if (obj
->base
.write_domain
& flush_domains
) {
1769 uint32_t old_write_domain
= obj
->base
.write_domain
;
1771 obj
->base
.write_domain
= 0;
1772 list_del_init(&obj
->gpu_write_list
);
1773 i915_gem_object_move_to_active(obj
, ring
);
1775 /* update the fence lru list */
1776 if (obj
->fence_reg
!= I915_FENCE_REG_NONE
) {
1777 struct drm_i915_fence_reg
*reg
=
1778 &dev_priv
->fence_regs
[obj
->fence_reg
];
1779 list_move_tail(®
->lru_list
,
1780 &dev_priv
->mm
.fence_list
);
1783 trace_i915_gem_object_change_domain(obj
,
1784 obj
->base
.read_domains
,
1791 i915_add_request(struct drm_device
*dev
,
1792 struct drm_file
*file
,
1793 struct drm_i915_gem_request
*request
,
1794 struct intel_ring_buffer
*ring
)
1796 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
1797 struct drm_i915_file_private
*file_priv
= NULL
;
1802 BUG_ON(request
== NULL
);
1805 file_priv
= file
->driver_priv
;
1807 ret
= ring
->add_request(ring
, &seqno
);
1811 ring
->outstanding_lazy_request
= false;
1813 request
->seqno
= seqno
;
1814 request
->ring
= ring
;
1815 request
->emitted_jiffies
= jiffies
;
1816 was_empty
= list_empty(&ring
->request_list
);
1817 list_add_tail(&request
->list
, &ring
->request_list
);
1820 spin_lock(&file_priv
->mm
.lock
);
1821 request
->file_priv
= file_priv
;
1822 list_add_tail(&request
->client_list
,
1823 &file_priv
->mm
.request_list
);
1824 spin_unlock(&file_priv
->mm
.lock
);
1827 if (!dev_priv
->mm
.suspended
) {
1828 mod_timer(&dev_priv
->hangcheck_timer
,
1829 jiffies
+ msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD
));
1831 queue_delayed_work(dev_priv
->wq
,
1832 &dev_priv
->mm
.retire_work
, HZ
);
1838 * Command execution barrier
1840 * Ensures that all commands in the ring are finished
1841 * before signalling the CPU
1844 i915_retire_commands(struct drm_device
*dev
, struct intel_ring_buffer
*ring
)
1846 uint32_t flush_domains
= 0;
1848 /* The sampler always gets flushed on i965 (sigh) */
1849 if (INTEL_INFO(dev
)->gen
>= 4)
1850 flush_domains
|= I915_GEM_DOMAIN_SAMPLER
;
1852 ring
->flush(ring
, I915_GEM_DOMAIN_COMMAND
, flush_domains
);
1856 i915_gem_request_remove_from_client(struct drm_i915_gem_request
*request
)
1858 struct drm_i915_file_private
*file_priv
= request
->file_priv
;
1863 spin_lock(&file_priv
->mm
.lock
);
1864 list_del(&request
->client_list
);
1865 request
->file_priv
= NULL
;
1866 spin_unlock(&file_priv
->mm
.lock
);
1869 static void i915_gem_reset_ring_lists(struct drm_i915_private
*dev_priv
,
1870 struct intel_ring_buffer
*ring
)
1872 while (!list_empty(&ring
->request_list
)) {
1873 struct drm_i915_gem_request
*request
;
1875 request
= list_first_entry(&ring
->request_list
,
1876 struct drm_i915_gem_request
,
1879 list_del(&request
->list
);
1880 i915_gem_request_remove_from_client(request
);
1884 while (!list_empty(&ring
->active_list
)) {
1885 struct drm_i915_gem_object
*obj
;
1887 obj
= list_first_entry(&ring
->active_list
,
1888 struct drm_i915_gem_object
,
1891 obj
->base
.write_domain
= 0;
1892 list_del_init(&obj
->gpu_write_list
);
1893 i915_gem_object_move_to_inactive(obj
);
1897 void i915_gem_reset(struct drm_device
*dev
)
1899 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
1900 struct drm_i915_gem_object
*obj
;
1903 i915_gem_reset_ring_lists(dev_priv
, &dev_priv
->render_ring
);
1904 i915_gem_reset_ring_lists(dev_priv
, &dev_priv
->bsd_ring
);
1905 i915_gem_reset_ring_lists(dev_priv
, &dev_priv
->blt_ring
);
1907 /* Remove anything from the flushing lists. The GPU cache is likely
1908 * to be lost on reset along with the data, so simply move the
1909 * lost bo to the inactive list.
1911 while (!list_empty(&dev_priv
->mm
.flushing_list
)) {
1912 obj
= list_first_entry(&dev_priv
->mm
.flushing_list
,
1913 struct drm_i915_gem_object
,
1916 obj
->base
.write_domain
= 0;
1917 list_del_init(&obj
->gpu_write_list
);
1918 i915_gem_object_move_to_inactive(obj
);
1921 /* Move everything out of the GPU domains to ensure we do any
1922 * necessary invalidation upon reuse.
1924 list_for_each_entry(obj
,
1925 &dev_priv
->mm
.inactive_list
,
1928 obj
->base
.read_domains
&= ~I915_GEM_GPU_DOMAINS
;
1931 /* The fence registers are invalidated so clear them out */
1932 for (i
= 0; i
< 16; i
++) {
1933 struct drm_i915_fence_reg
*reg
;
1935 reg
= &dev_priv
->fence_regs
[i
];
1939 i915_gem_clear_fence_reg(reg
->obj
);
1944 * This function clears the request list as sequence numbers are passed.
1947 i915_gem_retire_requests_ring(struct drm_device
*dev
,
1948 struct intel_ring_buffer
*ring
)
1950 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
1953 if (!ring
->status_page
.page_addr
||
1954 list_empty(&ring
->request_list
))
1957 WARN_ON(i915_verify_lists(dev
));
1959 seqno
= ring
->get_seqno(ring
);
1960 while (!list_empty(&ring
->request_list
)) {
1961 struct drm_i915_gem_request
*request
;
1963 request
= list_first_entry(&ring
->request_list
,
1964 struct drm_i915_gem_request
,
1967 if (!i915_seqno_passed(seqno
, request
->seqno
))
1970 trace_i915_gem_request_retire(dev
, request
->seqno
);
1972 list_del(&request
->list
);
1973 i915_gem_request_remove_from_client(request
);
1977 /* Move any buffers on the active list that are no longer referenced
1978 * by the ringbuffer to the flushing/inactive lists as appropriate.
1980 while (!list_empty(&ring
->active_list
)) {
1981 struct drm_i915_gem_object
*obj
;
1983 obj
= list_first_entry(&ring
->active_list
,
1984 struct drm_i915_gem_object
,
1987 if (!i915_seqno_passed(seqno
, obj
->last_rendering_seqno
))
1990 if (obj
->base
.write_domain
!= 0)
1991 i915_gem_object_move_to_flushing(obj
);
1993 i915_gem_object_move_to_inactive(obj
);
1996 if (unlikely (dev_priv
->trace_irq_seqno
&&
1997 i915_seqno_passed(dev_priv
->trace_irq_seqno
, seqno
))) {
1998 ring
->user_irq_put(ring
);
1999 dev_priv
->trace_irq_seqno
= 0;
2002 WARN_ON(i915_verify_lists(dev
));
2006 i915_gem_retire_requests(struct drm_device
*dev
)
2008 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
2010 if (!list_empty(&dev_priv
->mm
.deferred_free_list
)) {
2011 struct drm_i915_gem_object
*obj
, *next
;
2013 /* We must be careful that during unbind() we do not
2014 * accidentally infinitely recurse into retire requests.
2016 * retire -> free -> unbind -> wait -> retire_ring
2018 list_for_each_entry_safe(obj
, next
,
2019 &dev_priv
->mm
.deferred_free_list
,
2021 i915_gem_free_object_tail(obj
);
2024 i915_gem_retire_requests_ring(dev
, &dev_priv
->render_ring
);
2025 i915_gem_retire_requests_ring(dev
, &dev_priv
->bsd_ring
);
2026 i915_gem_retire_requests_ring(dev
, &dev_priv
->blt_ring
);
2030 i915_gem_retire_work_handler(struct work_struct
*work
)
2032 drm_i915_private_t
*dev_priv
;
2033 struct drm_device
*dev
;
2035 dev_priv
= container_of(work
, drm_i915_private_t
,
2036 mm
.retire_work
.work
);
2037 dev
= dev_priv
->dev
;
2039 /* Come back later if the device is busy... */
2040 if (!mutex_trylock(&dev
->struct_mutex
)) {
2041 queue_delayed_work(dev_priv
->wq
, &dev_priv
->mm
.retire_work
, HZ
);
2045 i915_gem_retire_requests(dev
);
2047 if (!dev_priv
->mm
.suspended
&&
2048 (!list_empty(&dev_priv
->render_ring
.request_list
) ||
2049 !list_empty(&dev_priv
->bsd_ring
.request_list
) ||
2050 !list_empty(&dev_priv
->blt_ring
.request_list
)))
2051 queue_delayed_work(dev_priv
->wq
, &dev_priv
->mm
.retire_work
, HZ
);
2052 mutex_unlock(&dev
->struct_mutex
);
2056 i915_do_wait_request(struct drm_device
*dev
, uint32_t seqno
,
2057 bool interruptible
, struct intel_ring_buffer
*ring
)
2059 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
2065 if (atomic_read(&dev_priv
->mm
.wedged
))
2068 if (seqno
== ring
->outstanding_lazy_request
) {
2069 struct drm_i915_gem_request
*request
;
2071 request
= kzalloc(sizeof(*request
), GFP_KERNEL
);
2072 if (request
== NULL
)
2075 ret
= i915_add_request(dev
, NULL
, request
, ring
);
2081 seqno
= request
->seqno
;
2084 if (!i915_seqno_passed(ring
->get_seqno(ring
), seqno
)) {
2085 if (HAS_PCH_SPLIT(dev
))
2086 ier
= I915_READ(DEIER
) | I915_READ(GTIER
);
2088 ier
= I915_READ(IER
);
2090 DRM_ERROR("something (likely vbetool) disabled "
2091 "interrupts, re-enabling\n");
2092 i915_driver_irq_preinstall(dev
);
2093 i915_driver_irq_postinstall(dev
);
2096 trace_i915_gem_request_wait_begin(dev
, seqno
);
2098 ring
->waiting_seqno
= seqno
;
2099 ring
->user_irq_get(ring
);
2101 ret
= wait_event_interruptible(ring
->irq_queue
,
2102 i915_seqno_passed(ring
->get_seqno(ring
), seqno
)
2103 || atomic_read(&dev_priv
->mm
.wedged
));
2105 wait_event(ring
->irq_queue
,
2106 i915_seqno_passed(ring
->get_seqno(ring
), seqno
)
2107 || atomic_read(&dev_priv
->mm
.wedged
));
2109 ring
->user_irq_put(ring
);
2110 ring
->waiting_seqno
= 0;
2112 trace_i915_gem_request_wait_end(dev
, seqno
);
2114 if (atomic_read(&dev_priv
->mm
.wedged
))
2117 if (ret
&& ret
!= -ERESTARTSYS
)
2118 DRM_ERROR("%s returns %d (awaiting %d at %d, next %d)\n",
2119 __func__
, ret
, seqno
, ring
->get_seqno(ring
),
2120 dev_priv
->next_seqno
);
2122 /* Directly dispatch request retiring. While we have the work queue
2123 * to handle this, the waiter on a request often wants an associated
2124 * buffer to have made it to the inactive list, and we would need
2125 * a separate wait queue to handle that.
2128 i915_gem_retire_requests_ring(dev
, ring
);
2134 * Waits for a sequence number to be signaled, and cleans up the
2135 * request and object lists appropriately for that event.
2138 i915_wait_request(struct drm_device
*dev
, uint32_t seqno
,
2139 struct intel_ring_buffer
*ring
)
2141 return i915_do_wait_request(dev
, seqno
, 1, ring
);
2145 i915_gem_flush_ring(struct drm_device
*dev
,
2146 struct intel_ring_buffer
*ring
,
2147 uint32_t invalidate_domains
,
2148 uint32_t flush_domains
)
2150 ring
->flush(ring
, invalidate_domains
, flush_domains
);
2151 i915_gem_process_flushing_list(dev
, flush_domains
, ring
);
2155 i915_gem_flush(struct drm_device
*dev
,
2156 uint32_t invalidate_domains
,
2157 uint32_t flush_domains
,
2158 uint32_t flush_rings
)
2160 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
2162 if (flush_domains
& I915_GEM_DOMAIN_CPU
)
2163 intel_gtt_chipset_flush();
2165 if ((flush_domains
| invalidate_domains
) & I915_GEM_GPU_DOMAINS
) {
2166 if (flush_rings
& RING_RENDER
)
2167 i915_gem_flush_ring(dev
, &dev_priv
->render_ring
,
2168 invalidate_domains
, flush_domains
);
2169 if (flush_rings
& RING_BSD
)
2170 i915_gem_flush_ring(dev
, &dev_priv
->bsd_ring
,
2171 invalidate_domains
, flush_domains
);
2172 if (flush_rings
& RING_BLT
)
2173 i915_gem_flush_ring(dev
, &dev_priv
->blt_ring
,
2174 invalidate_domains
, flush_domains
);
2179 * Ensures that all rendering to the object has completed and the object is
2180 * safe to unbind from the GTT or access from the CPU.
2183 i915_gem_object_wait_rendering(struct drm_i915_gem_object
*obj
,
2186 struct drm_device
*dev
= obj
->base
.dev
;
2189 /* This function only exists to support waiting for existing rendering,
2190 * not for emitting required flushes.
2192 BUG_ON((obj
->base
.write_domain
& I915_GEM_GPU_DOMAINS
) != 0);
2194 /* If there is rendering queued on the buffer being evicted, wait for
2198 ret
= i915_do_wait_request(dev
,
2199 obj
->last_rendering_seqno
,
2210 * Unbinds an object from the GTT aperture.
2213 i915_gem_object_unbind(struct drm_i915_gem_object
*obj
)
2215 struct drm_device
*dev
= obj
->base
.dev
;
2216 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
2219 if (obj
->gtt_space
== NULL
)
2222 if (obj
->pin_count
!= 0) {
2223 DRM_ERROR("Attempting to unbind pinned buffer\n");
2227 /* blow away mappings if mapped through GTT */
2228 i915_gem_release_mmap(obj
);
2230 /* Move the object to the CPU domain to ensure that
2231 * any possible CPU writes while it's not in the GTT
2232 * are flushed when we go to remap it. This will
2233 * also ensure that all pending GPU writes are finished
2236 ret
= i915_gem_object_set_to_cpu_domain(obj
, 1);
2237 if (ret
== -ERESTARTSYS
)
2239 /* Continue on if we fail due to EIO, the GPU is hung so we
2240 * should be safe and we need to cleanup or else we might
2241 * cause memory corruption through use-after-free.
2244 i915_gem_clflush_object(obj
);
2245 obj
->base
.read_domains
= obj
->base
.write_domain
= I915_GEM_DOMAIN_CPU
;
2248 /* release the fence reg _after_ flushing */
2249 if (obj
->fence_reg
!= I915_FENCE_REG_NONE
)
2250 i915_gem_clear_fence_reg(obj
);
2252 i915_gem_gtt_unbind_object(obj
);
2254 i915_gem_object_put_pages_gtt(obj
);
2256 i915_gem_info_remove_gtt(dev_priv
, obj
);
2257 list_del_init(&obj
->mm_list
);
2258 /* Avoid an unnecessary call to unbind on rebind. */
2259 obj
->map_and_fenceable
= true;
2261 drm_mm_put_block(obj
->gtt_space
);
2262 obj
->gtt_space
= NULL
;
2263 obj
->gtt_offset
= 0;
2265 if (i915_gem_object_is_purgeable(obj
))
2266 i915_gem_object_truncate(obj
);
2268 trace_i915_gem_object_unbind(obj
);
2273 static int i915_ring_idle(struct drm_device
*dev
,
2274 struct intel_ring_buffer
*ring
)
2276 if (list_empty(&ring
->gpu_write_list
) && list_empty(&ring
->active_list
))
2279 i915_gem_flush_ring(dev
, ring
,
2280 I915_GEM_GPU_DOMAINS
, I915_GEM_GPU_DOMAINS
);
2281 return i915_wait_request(dev
,
2282 i915_gem_next_request_seqno(dev
, ring
),
2287 i915_gpu_idle(struct drm_device
*dev
)
2289 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
2293 lists_empty
= (list_empty(&dev_priv
->mm
.flushing_list
) &&
2294 list_empty(&dev_priv
->mm
.active_list
));
2298 /* Flush everything onto the inactive list. */
2299 ret
= i915_ring_idle(dev
, &dev_priv
->render_ring
);
2303 ret
= i915_ring_idle(dev
, &dev_priv
->bsd_ring
);
2307 ret
= i915_ring_idle(dev
, &dev_priv
->blt_ring
);
2314 static void sandybridge_write_fence_reg(struct drm_i915_gem_object
*obj
)
2316 struct drm_device
*dev
= obj
->base
.dev
;
2317 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
2318 u32 size
= obj
->gtt_space
->size
;
2319 int regnum
= obj
->fence_reg
;
2322 val
= (uint64_t)((obj
->gtt_offset
+ size
- 4096) &
2324 val
|= obj
->gtt_offset
& 0xfffff000;
2325 val
|= (uint64_t)((obj
->stride
/ 128) - 1) <<
2326 SANDYBRIDGE_FENCE_PITCH_SHIFT
;
2328 if (obj
->tiling_mode
== I915_TILING_Y
)
2329 val
|= 1 << I965_FENCE_TILING_Y_SHIFT
;
2330 val
|= I965_FENCE_REG_VALID
;
2332 I915_WRITE64(FENCE_REG_SANDYBRIDGE_0
+ (regnum
* 8), val
);
2335 static void i965_write_fence_reg(struct drm_i915_gem_object
*obj
)
2337 struct drm_device
*dev
= obj
->base
.dev
;
2338 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
2339 u32 size
= obj
->gtt_space
->size
;
2340 int regnum
= obj
->fence_reg
;
2343 val
= (uint64_t)((obj
->gtt_offset
+ size
- 4096) &
2345 val
|= obj
->gtt_offset
& 0xfffff000;
2346 val
|= ((obj
->stride
/ 128) - 1) << I965_FENCE_PITCH_SHIFT
;
2347 if (obj
->tiling_mode
== I915_TILING_Y
)
2348 val
|= 1 << I965_FENCE_TILING_Y_SHIFT
;
2349 val
|= I965_FENCE_REG_VALID
;
2351 I915_WRITE64(FENCE_REG_965_0
+ (regnum
* 8), val
);
2354 static void i915_write_fence_reg(struct drm_i915_gem_object
*obj
)
2356 struct drm_device
*dev
= obj
->base
.dev
;
2357 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
2358 u32 size
= obj
->gtt_space
->size
;
2359 uint32_t fence_reg
, val
, pitch_val
;
2362 if ((obj
->gtt_offset
& ~I915_FENCE_START_MASK
) ||
2363 (obj
->gtt_offset
& (size
- 1))) {
2364 WARN(1, "%s: object 0x%08x [fenceable? %d] not 1M or size (0x%08x) aligned [gtt_space offset=%lx, size=%lx]\n",
2365 __func__
, obj
->gtt_offset
, obj
->map_and_fenceable
, size
,
2366 obj
->gtt_space
->start
, obj
->gtt_space
->size
);
2370 if (obj
->tiling_mode
== I915_TILING_Y
&&
2371 HAS_128_BYTE_Y_TILING(dev
))
2376 /* Note: pitch better be a power of two tile widths */
2377 pitch_val
= obj
->stride
/ tile_width
;
2378 pitch_val
= ffs(pitch_val
) - 1;
2380 if (obj
->tiling_mode
== I915_TILING_Y
&&
2381 HAS_128_BYTE_Y_TILING(dev
))
2382 WARN_ON(pitch_val
> I830_FENCE_MAX_PITCH_VAL
);
2384 WARN_ON(pitch_val
> I915_FENCE_MAX_PITCH_VAL
);
2386 val
= obj
->gtt_offset
;
2387 if (obj
->tiling_mode
== I915_TILING_Y
)
2388 val
|= 1 << I830_FENCE_TILING_Y_SHIFT
;
2389 val
|= I915_FENCE_SIZE_BITS(size
);
2390 val
|= pitch_val
<< I830_FENCE_PITCH_SHIFT
;
2391 val
|= I830_FENCE_REG_VALID
;
2393 fence_reg
= obj
->fence_reg
;
2395 fence_reg
= FENCE_REG_830_0
+ fence_reg
* 4;
2397 fence_reg
= FENCE_REG_945_8
+ (fence_reg
- 8) * 4;
2398 I915_WRITE(fence_reg
, val
);
2401 static void i830_write_fence_reg(struct drm_i915_gem_object
*obj
)
2403 struct drm_device
*dev
= obj
->base
.dev
;
2404 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
2405 u32 size
= obj
->gtt_space
->size
;
2406 int regnum
= obj
->fence_reg
;
2409 uint32_t fence_size_bits
;
2411 if ((obj
->gtt_offset
& ~I830_FENCE_START_MASK
) ||
2412 (obj
->gtt_offset
& (obj
->base
.size
- 1))) {
2413 WARN(1, "%s: object 0x%08x not 512K or size aligned\n",
2414 __func__
, obj
->gtt_offset
);
2418 pitch_val
= obj
->stride
/ 128;
2419 pitch_val
= ffs(pitch_val
) - 1;
2420 WARN_ON(pitch_val
> I830_FENCE_MAX_PITCH_VAL
);
2422 val
= obj
->gtt_offset
;
2423 if (obj
->tiling_mode
== I915_TILING_Y
)
2424 val
|= 1 << I830_FENCE_TILING_Y_SHIFT
;
2425 fence_size_bits
= I830_FENCE_SIZE_BITS(size
);
2426 WARN_ON(fence_size_bits
& ~0x00000f00);
2427 val
|= fence_size_bits
;
2428 val
|= pitch_val
<< I830_FENCE_PITCH_SHIFT
;
2429 val
|= I830_FENCE_REG_VALID
;
2431 I915_WRITE(FENCE_REG_830_0
+ (regnum
* 4), val
);
2434 static int i915_find_fence_reg(struct drm_device
*dev
,
2437 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
2438 struct drm_i915_fence_reg
*reg
;
2439 struct drm_i915_gem_object
*obj
= NULL
;
2442 /* First try to find a free reg */
2444 for (i
= dev_priv
->fence_reg_start
; i
< dev_priv
->num_fence_regs
; i
++) {
2445 reg
= &dev_priv
->fence_regs
[i
];
2449 if (!reg
->obj
->pin_count
)
2456 /* None available, try to steal one or wait for a user to finish */
2457 avail
= I915_FENCE_REG_NONE
;
2458 list_for_each_entry(reg
, &dev_priv
->mm
.fence_list
,
2465 avail
= obj
->fence_reg
;
2469 BUG_ON(avail
== I915_FENCE_REG_NONE
);
2471 /* We only have a reference on obj from the active list. put_fence_reg
2472 * might drop that one, causing a use-after-free in it. So hold a
2473 * private reference to obj like the other callers of put_fence_reg
2474 * (set_tiling ioctl) do. */
2475 drm_gem_object_reference(&obj
->base
);
2476 ret
= i915_gem_object_put_fence_reg(obj
, interruptible
);
2477 drm_gem_object_unreference(&obj
->base
);
2485 * i915_gem_object_get_fence_reg - set up a fence reg for an object
2486 * @obj: object to map through a fence reg
2488 * When mapping objects through the GTT, userspace wants to be able to write
2489 * to them without having to worry about swizzling if the object is tiled.
2491 * This function walks the fence regs looking for a free one for @obj,
2492 * stealing one if it can't find any.
2494 * It then sets up the reg based on the object's properties: address, pitch
2495 * and tiling format.
2498 i915_gem_object_get_fence_reg(struct drm_i915_gem_object
*obj
,
2501 struct drm_device
*dev
= obj
->base
.dev
;
2502 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
2503 struct drm_i915_fence_reg
*reg
= NULL
;
2506 /* Just update our place in the LRU if our fence is getting used. */
2507 if (obj
->fence_reg
!= I915_FENCE_REG_NONE
) {
2508 reg
= &dev_priv
->fence_regs
[obj
->fence_reg
];
2509 list_move_tail(®
->lru_list
, &dev_priv
->mm
.fence_list
);
2513 switch (obj
->tiling_mode
) {
2514 case I915_TILING_NONE
:
2515 WARN(1, "allocating a fence for non-tiled object?\n");
2520 WARN((obj
->stride
& (512 - 1)),
2521 "object 0x%08x is X tiled but has non-512B pitch\n",
2527 WARN((obj
->stride
& (128 - 1)),
2528 "object 0x%08x is Y tiled but has non-128B pitch\n",
2533 ret
= i915_find_fence_reg(dev
, interruptible
);
2537 obj
->fence_reg
= ret
;
2538 reg
= &dev_priv
->fence_regs
[obj
->fence_reg
];
2539 list_add_tail(®
->lru_list
, &dev_priv
->mm
.fence_list
);
2543 switch (INTEL_INFO(dev
)->gen
) {
2545 sandybridge_write_fence_reg(obj
);
2549 i965_write_fence_reg(obj
);
2552 i915_write_fence_reg(obj
);
2555 i830_write_fence_reg(obj
);
2559 trace_i915_gem_object_get_fence(obj
,
2567 * i915_gem_clear_fence_reg - clear out fence register info
2568 * @obj: object to clear
2570 * Zeroes out the fence register itself and clears out the associated
2571 * data structures in dev_priv and obj.
2574 i915_gem_clear_fence_reg(struct drm_i915_gem_object
*obj
)
2576 struct drm_device
*dev
= obj
->base
.dev
;
2577 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
2578 struct drm_i915_fence_reg
*reg
= &dev_priv
->fence_regs
[obj
->fence_reg
];
2581 switch (INTEL_INFO(dev
)->gen
) {
2583 I915_WRITE64(FENCE_REG_SANDYBRIDGE_0
+
2584 (obj
->fence_reg
* 8), 0);
2588 I915_WRITE64(FENCE_REG_965_0
+ (obj
->fence_reg
* 8), 0);
2591 if (obj
->fence_reg
>= 8)
2592 fence_reg
= FENCE_REG_945_8
+ (obj
->fence_reg
- 8) * 4;
2595 fence_reg
= FENCE_REG_830_0
+ obj
->fence_reg
* 4;
2597 I915_WRITE(fence_reg
, 0);
2602 obj
->fence_reg
= I915_FENCE_REG_NONE
;
2603 list_del_init(®
->lru_list
);
2607 * i915_gem_object_put_fence_reg - waits on outstanding fenced access
2608 * to the buffer to finish, and then resets the fence register.
2609 * @obj: tiled object holding a fence register.
2610 * @bool: whether the wait upon the fence is interruptible
2612 * Zeroes out the fence register itself and clears out the associated
2613 * data structures in dev_priv and obj.
2616 i915_gem_object_put_fence_reg(struct drm_i915_gem_object
*obj
,
2619 struct drm_device
*dev
= obj
->base
.dev
;
2620 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
2621 struct drm_i915_fence_reg
*reg
;
2623 if (obj
->fence_reg
== I915_FENCE_REG_NONE
)
2626 /* If we've changed tiling, GTT-mappings of the object
2627 * need to re-fault to ensure that the correct fence register
2628 * setup is in place.
2630 i915_gem_release_mmap(obj
);
2632 /* On the i915, GPU access to tiled buffers is via a fence,
2633 * therefore we must wait for any outstanding access to complete
2634 * before clearing the fence.
2636 reg
= &dev_priv
->fence_regs
[obj
->fence_reg
];
2640 ret
= i915_gem_object_flush_gpu_write_domain(obj
, true);
2644 ret
= i915_gem_object_wait_rendering(obj
, interruptible
);
2651 i915_gem_object_flush_gtt_write_domain(obj
);
2652 i915_gem_clear_fence_reg(obj
);
2658 * Finds free space in the GTT aperture and binds the object there.
2661 i915_gem_object_bind_to_gtt(struct drm_i915_gem_object
*obj
,
2663 bool map_and_fenceable
)
2665 struct drm_device
*dev
= obj
->base
.dev
;
2666 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
2667 struct drm_mm_node
*free_space
;
2668 gfp_t gfpmask
= __GFP_NORETRY
| __GFP_NOWARN
;
2669 u32 size
, fence_size
, fence_alignment
, unfenced_alignment
;
2670 bool mappable
, fenceable
;
2673 if (obj
->madv
!= I915_MADV_WILLNEED
) {
2674 DRM_ERROR("Attempting to bind a purgeable object\n");
2678 fence_size
= i915_gem_get_gtt_size(obj
);
2679 fence_alignment
= i915_gem_get_gtt_alignment(obj
);
2680 unfenced_alignment
= i915_gem_get_unfenced_gtt_alignment(obj
);
2683 alignment
= map_and_fenceable
? fence_alignment
:
2685 if (map_and_fenceable
&& alignment
& (fence_alignment
- 1)) {
2686 DRM_ERROR("Invalid object alignment requested %u\n", alignment
);
2690 size
= map_and_fenceable
? fence_size
: obj
->base
.size
;
2692 /* If the object is bigger than the entire aperture, reject it early
2693 * before evicting everything in a vain attempt to find space.
2695 if (obj
->base
.size
>
2696 (map_and_fenceable
? dev_priv
->mm
.gtt_mappable_end
: dev_priv
->mm
.gtt_total
)) {
2697 DRM_ERROR("Attempting to bind an object larger than the aperture\n");
2702 if (map_and_fenceable
)
2704 drm_mm_search_free_in_range(&dev_priv
->mm
.gtt_space
,
2706 dev_priv
->mm
.gtt_mappable_end
,
2709 free_space
= drm_mm_search_free(&dev_priv
->mm
.gtt_space
,
2710 size
, alignment
, 0);
2712 if (free_space
!= NULL
) {
2713 if (map_and_fenceable
)
2715 drm_mm_get_block_range_generic(free_space
,
2717 dev_priv
->mm
.gtt_mappable_end
,
2721 drm_mm_get_block(free_space
, size
, alignment
);
2723 if (obj
->gtt_space
== NULL
) {
2724 /* If the gtt is empty and we're still having trouble
2725 * fitting our object in, we're out of memory.
2727 ret
= i915_gem_evict_something(dev
, size
, alignment
,
2735 ret
= i915_gem_object_get_pages_gtt(obj
, gfpmask
);
2737 drm_mm_put_block(obj
->gtt_space
);
2738 obj
->gtt_space
= NULL
;
2740 if (ret
== -ENOMEM
) {
2741 /* first try to clear up some space from the GTT */
2742 ret
= i915_gem_evict_something(dev
, size
,
2746 /* now try to shrink everyone else */
2761 ret
= i915_gem_gtt_bind_object(obj
);
2763 i915_gem_object_put_pages_gtt(obj
);
2764 drm_mm_put_block(obj
->gtt_space
);
2765 obj
->gtt_space
= NULL
;
2767 ret
= i915_gem_evict_something(dev
, size
,
2768 alignment
, map_and_fenceable
);
2775 obj
->gtt_offset
= obj
->gtt_space
->start
;
2777 /* keep track of bounds object by adding it to the inactive list */
2778 list_add_tail(&obj
->mm_list
, &dev_priv
->mm
.inactive_list
);
2779 i915_gem_info_add_gtt(dev_priv
, obj
);
2781 /* Assert that the object is not currently in any GPU domain. As it
2782 * wasn't in the GTT, there shouldn't be any way it could have been in
2785 BUG_ON(obj
->base
.read_domains
& I915_GEM_GPU_DOMAINS
);
2786 BUG_ON(obj
->base
.write_domain
& I915_GEM_GPU_DOMAINS
);
2788 trace_i915_gem_object_bind(obj
, obj
->gtt_offset
, map_and_fenceable
);
2791 obj
->gtt_space
->size
== fence_size
&&
2792 (obj
->gtt_space
->start
& (fence_alignment
-1)) == 0;
2795 obj
->gtt_offset
+ obj
->base
.size
<= dev_priv
->mm
.gtt_mappable_end
;
2797 obj
->map_and_fenceable
= mappable
&& fenceable
;
2803 i915_gem_clflush_object(struct drm_i915_gem_object
*obj
)
2805 /* If we don't have a page list set up, then we're not pinned
2806 * to GPU, and we can ignore the cache flush because it'll happen
2807 * again at bind time.
2809 if (obj
->pages
== NULL
)
2812 trace_i915_gem_object_clflush(obj
);
2814 drm_clflush_pages(obj
->pages
, obj
->base
.size
/ PAGE_SIZE
);
2817 /** Flushes any GPU write domain for the object if it's dirty. */
2819 i915_gem_object_flush_gpu_write_domain(struct drm_i915_gem_object
*obj
,
2822 struct drm_device
*dev
= obj
->base
.dev
;
2824 if ((obj
->base
.write_domain
& I915_GEM_GPU_DOMAINS
) == 0)
2827 /* Queue the GPU write cache flushing we need. */
2828 i915_gem_flush_ring(dev
, obj
->ring
, 0, obj
->base
.write_domain
);
2829 BUG_ON(obj
->base
.write_domain
);
2834 return i915_gem_object_wait_rendering(obj
, true);
2837 /** Flushes the GTT write domain for the object if it's dirty. */
2839 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object
*obj
)
2841 uint32_t old_write_domain
;
2843 if (obj
->base
.write_domain
!= I915_GEM_DOMAIN_GTT
)
2846 /* No actual flushing is required for the GTT write domain. Writes
2847 * to it immediately go to main memory as far as we know, so there's
2848 * no chipset flush. It also doesn't land in render cache.
2850 i915_gem_release_mmap(obj
);
2852 old_write_domain
= obj
->base
.write_domain
;
2853 obj
->base
.write_domain
= 0;
2855 trace_i915_gem_object_change_domain(obj
,
2856 obj
->base
.read_domains
,
2860 /** Flushes the CPU write domain for the object if it's dirty. */
2862 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object
*obj
)
2864 uint32_t old_write_domain
;
2866 if (obj
->base
.write_domain
!= I915_GEM_DOMAIN_CPU
)
2869 i915_gem_clflush_object(obj
);
2870 intel_gtt_chipset_flush();
2871 old_write_domain
= obj
->base
.write_domain
;
2872 obj
->base
.write_domain
= 0;
2874 trace_i915_gem_object_change_domain(obj
,
2875 obj
->base
.read_domains
,
2880 * Moves a single object to the GTT read, and possibly write domain.
2882 * This function returns when the move is complete, including waiting on
2886 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object
*obj
, int write
)
2888 uint32_t old_write_domain
, old_read_domains
;
2891 /* Not valid to be called on unbound objects. */
2892 if (obj
->gtt_space
== NULL
)
2895 ret
= i915_gem_object_flush_gpu_write_domain(obj
, false);
2899 i915_gem_object_flush_cpu_write_domain(obj
);
2902 ret
= i915_gem_object_wait_rendering(obj
, true);
2907 old_write_domain
= obj
->base
.write_domain
;
2908 old_read_domains
= obj
->base
.read_domains
;
2910 /* It should now be out of any other write domains, and we can update
2911 * the domain values for our changes.
2913 BUG_ON((obj
->base
.write_domain
& ~I915_GEM_DOMAIN_GTT
) != 0);
2914 obj
->base
.read_domains
|= I915_GEM_DOMAIN_GTT
;
2916 obj
->base
.read_domains
= I915_GEM_DOMAIN_GTT
;
2917 obj
->base
.write_domain
= I915_GEM_DOMAIN_GTT
;
2921 trace_i915_gem_object_change_domain(obj
,
2929 * Prepare buffer for display plane. Use uninterruptible for possible flush
2930 * wait, as in modesetting process we're not supposed to be interrupted.
2933 i915_gem_object_set_to_display_plane(struct drm_i915_gem_object
*obj
,
2936 uint32_t old_read_domains
;
2939 /* Not valid to be called on unbound objects. */
2940 if (obj
->gtt_space
== NULL
)
2943 ret
= i915_gem_object_flush_gpu_write_domain(obj
, true);
2947 /* Currently, we are always called from an non-interruptible context. */
2949 ret
= i915_gem_object_wait_rendering(obj
, false);
2954 i915_gem_object_flush_cpu_write_domain(obj
);
2956 old_read_domains
= obj
->base
.read_domains
;
2957 obj
->base
.read_domains
|= I915_GEM_DOMAIN_GTT
;
2959 trace_i915_gem_object_change_domain(obj
,
2961 obj
->base
.write_domain
);
2967 i915_gem_object_flush_gpu(struct drm_i915_gem_object
*obj
,
2973 if (obj
->base
.write_domain
& I915_GEM_GPU_DOMAINS
)
2974 i915_gem_flush_ring(obj
->base
.dev
, obj
->ring
,
2975 0, obj
->base
.write_domain
);
2977 return i915_gem_object_wait_rendering(obj
, interruptible
);
2981 * Moves a single object to the CPU read, and possibly write domain.
2983 * This function returns when the move is complete, including waiting on
2987 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object
*obj
, int write
)
2989 uint32_t old_write_domain
, old_read_domains
;
2992 ret
= i915_gem_object_flush_gpu_write_domain(obj
, false);
2996 i915_gem_object_flush_gtt_write_domain(obj
);
2998 /* If we have a partially-valid cache of the object in the CPU,
2999 * finish invalidating it and free the per-page flags.
3001 i915_gem_object_set_to_full_cpu_read_domain(obj
);
3004 ret
= i915_gem_object_wait_rendering(obj
, true);
3009 old_write_domain
= obj
->base
.write_domain
;
3010 old_read_domains
= obj
->base
.read_domains
;
3012 /* Flush the CPU cache if it's still invalid. */
3013 if ((obj
->base
.read_domains
& I915_GEM_DOMAIN_CPU
) == 0) {
3014 i915_gem_clflush_object(obj
);
3016 obj
->base
.read_domains
|= I915_GEM_DOMAIN_CPU
;
3019 /* It should now be out of any other write domains, and we can update
3020 * the domain values for our changes.
3022 BUG_ON((obj
->base
.write_domain
& ~I915_GEM_DOMAIN_CPU
) != 0);
3024 /* If we're writing through the CPU, then the GPU read domains will
3025 * need to be invalidated at next use.
3028 obj
->base
.read_domains
= I915_GEM_DOMAIN_CPU
;
3029 obj
->base
.write_domain
= I915_GEM_DOMAIN_CPU
;
3032 trace_i915_gem_object_change_domain(obj
,
3040 * Set the next domain for the specified object. This
3041 * may not actually perform the necessary flushing/invaliding though,
3042 * as that may want to be batched with other set_domain operations
3044 * This is (we hope) the only really tricky part of gem. The goal
3045 * is fairly simple -- track which caches hold bits of the object
3046 * and make sure they remain coherent. A few concrete examples may
3047 * help to explain how it works. For shorthand, we use the notation
3048 * (read_domains, write_domain), e.g. (CPU, CPU) to indicate the
3049 * a pair of read and write domain masks.
3051 * Case 1: the batch buffer
3057 * 5. Unmapped from GTT
3060 * Let's take these a step at a time
3063 * Pages allocated from the kernel may still have
3064 * cache contents, so we set them to (CPU, CPU) always.
3065 * 2. Written by CPU (using pwrite)
3066 * The pwrite function calls set_domain (CPU, CPU) and
3067 * this function does nothing (as nothing changes)
3069 * This function asserts that the object is not
3070 * currently in any GPU-based read or write domains
3072 * i915_gem_execbuffer calls set_domain (COMMAND, 0).
3073 * As write_domain is zero, this function adds in the
3074 * current read domains (CPU+COMMAND, 0).
3075 * flush_domains is set to CPU.
3076 * invalidate_domains is set to COMMAND
3077 * clflush is run to get data out of the CPU caches
3078 * then i915_dev_set_domain calls i915_gem_flush to
3079 * emit an MI_FLUSH and drm_agp_chipset_flush
3080 * 5. Unmapped from GTT
3081 * i915_gem_object_unbind calls set_domain (CPU, CPU)
3082 * flush_domains and invalidate_domains end up both zero
3083 * so no flushing/invalidating happens
3087 * Case 2: The shared render buffer
3091 * 3. Read/written by GPU
3092 * 4. set_domain to (CPU,CPU)
3093 * 5. Read/written by CPU
3094 * 6. Read/written by GPU
3097 * Same as last example, (CPU, CPU)
3099 * Nothing changes (assertions find that it is not in the GPU)
3100 * 3. Read/written by GPU
3101 * execbuffer calls set_domain (RENDER, RENDER)
3102 * flush_domains gets CPU
3103 * invalidate_domains gets GPU
3105 * MI_FLUSH and drm_agp_chipset_flush
3106 * 4. set_domain (CPU, CPU)
3107 * flush_domains gets GPU
3108 * invalidate_domains gets CPU
3109 * wait_rendering (obj) to make sure all drawing is complete.
3110 * This will include an MI_FLUSH to get the data from GPU
3112 * clflush (obj) to invalidate the CPU cache
3113 * Another MI_FLUSH in i915_gem_flush (eliminate this somehow?)
3114 * 5. Read/written by CPU
3115 * cache lines are loaded and dirtied
3116 * 6. Read written by GPU
3117 * Same as last GPU access
3119 * Case 3: The constant buffer
3124 * 4. Updated (written) by CPU again
3133 * flush_domains = CPU
3134 * invalidate_domains = RENDER
3137 * drm_agp_chipset_flush
3138 * 4. Updated (written) by CPU again
3140 * flush_domains = 0 (no previous write domain)
3141 * invalidate_domains = 0 (no new read domains)
3144 * flush_domains = CPU
3145 * invalidate_domains = RENDER
3148 * drm_agp_chipset_flush
3151 i915_gem_object_set_to_gpu_domain(struct drm_i915_gem_object
*obj
,
3152 struct intel_ring_buffer
*ring
,
3153 struct change_domains
*cd
)
3155 uint32_t invalidate_domains
= 0, flush_domains
= 0;
3158 * If the object isn't moving to a new write domain,
3159 * let the object stay in multiple read domains
3161 if (obj
->base
.pending_write_domain
== 0)
3162 obj
->base
.pending_read_domains
|= obj
->base
.read_domains
;
3165 * Flush the current write domain if
3166 * the new read domains don't match. Invalidate
3167 * any read domains which differ from the old
3170 if (obj
->base
.write_domain
&&
3171 (obj
->base
.write_domain
!= obj
->base
.pending_read_domains
||
3172 obj
->ring
!= ring
)) {
3173 flush_domains
|= obj
->base
.write_domain
;
3174 invalidate_domains
|=
3175 obj
->base
.pending_read_domains
& ~obj
->base
.write_domain
;
3178 * Invalidate any read caches which may have
3179 * stale data. That is, any new read domains.
3181 invalidate_domains
|= obj
->base
.pending_read_domains
& ~obj
->base
.read_domains
;
3182 if ((flush_domains
| invalidate_domains
) & I915_GEM_DOMAIN_CPU
)
3183 i915_gem_clflush_object(obj
);
3185 /* blow away mappings if mapped through GTT */
3186 if ((flush_domains
| invalidate_domains
) & I915_GEM_DOMAIN_GTT
)
3187 i915_gem_release_mmap(obj
);
3189 /* The actual obj->write_domain will be updated with
3190 * pending_write_domain after we emit the accumulated flush for all
3191 * of our domain changes in execbuffers (which clears objects'
3192 * write_domains). So if we have a current write domain that we
3193 * aren't changing, set pending_write_domain to that.
3195 if (flush_domains
== 0 && obj
->base
.pending_write_domain
== 0)
3196 obj
->base
.pending_write_domain
= obj
->base
.write_domain
;
3198 cd
->invalidate_domains
|= invalidate_domains
;
3199 cd
->flush_domains
|= flush_domains
;
3200 if (flush_domains
& I915_GEM_GPU_DOMAINS
)
3201 cd
->flush_rings
|= obj
->ring
->id
;
3202 if (invalidate_domains
& I915_GEM_GPU_DOMAINS
)
3203 cd
->flush_rings
|= ring
->id
;
3207 * Moves the object from a partially CPU read to a full one.
3209 * Note that this only resolves i915_gem_object_set_cpu_read_domain_range(),
3210 * and doesn't handle transitioning from !(read_domains & I915_GEM_DOMAIN_CPU).
3213 i915_gem_object_set_to_full_cpu_read_domain(struct drm_i915_gem_object
*obj
)
3215 if (!obj
->page_cpu_valid
)
3218 /* If we're partially in the CPU read domain, finish moving it in.
3220 if (obj
->base
.read_domains
& I915_GEM_DOMAIN_CPU
) {
3223 for (i
= 0; i
<= (obj
->base
.size
- 1) / PAGE_SIZE
; i
++) {
3224 if (obj
->page_cpu_valid
[i
])
3226 drm_clflush_pages(obj
->pages
+ i
, 1);
3230 /* Free the page_cpu_valid mappings which are now stale, whether
3231 * or not we've got I915_GEM_DOMAIN_CPU.
3233 kfree(obj
->page_cpu_valid
);
3234 obj
->page_cpu_valid
= NULL
;
3238 * Set the CPU read domain on a range of the object.
3240 * The object ends up with I915_GEM_DOMAIN_CPU in its read flags although it's
3241 * not entirely valid. The page_cpu_valid member of the object flags which
3242 * pages have been flushed, and will be respected by
3243 * i915_gem_object_set_to_cpu_domain() if it's called on to get a valid mapping
3244 * of the whole object.
3246 * This function returns when the move is complete, including waiting on
3250 i915_gem_object_set_cpu_read_domain_range(struct drm_i915_gem_object
*obj
,
3251 uint64_t offset
, uint64_t size
)
3253 uint32_t old_read_domains
;
3256 if (offset
== 0 && size
== obj
->base
.size
)
3257 return i915_gem_object_set_to_cpu_domain(obj
, 0);
3259 ret
= i915_gem_object_flush_gpu_write_domain(obj
, false);
3262 i915_gem_object_flush_gtt_write_domain(obj
);
3264 /* If we're already fully in the CPU read domain, we're done. */
3265 if (obj
->page_cpu_valid
== NULL
&&
3266 (obj
->base
.read_domains
& I915_GEM_DOMAIN_CPU
) != 0)
3269 /* Otherwise, create/clear the per-page CPU read domain flag if we're
3270 * newly adding I915_GEM_DOMAIN_CPU
3272 if (obj
->page_cpu_valid
== NULL
) {
3273 obj
->page_cpu_valid
= kzalloc(obj
->base
.size
/ PAGE_SIZE
,
3275 if (obj
->page_cpu_valid
== NULL
)
3277 } else if ((obj
->base
.read_domains
& I915_GEM_DOMAIN_CPU
) == 0)
3278 memset(obj
->page_cpu_valid
, 0, obj
->base
.size
/ PAGE_SIZE
);
3280 /* Flush the cache on any pages that are still invalid from the CPU's
3283 for (i
= offset
/ PAGE_SIZE
; i
<= (offset
+ size
- 1) / PAGE_SIZE
;
3285 if (obj
->page_cpu_valid
[i
])
3288 drm_clflush_pages(obj
->pages
+ i
, 1);
3290 obj
->page_cpu_valid
[i
] = 1;
3293 /* It should now be out of any other write domains, and we can update
3294 * the domain values for our changes.
3296 BUG_ON((obj
->base
.write_domain
& ~I915_GEM_DOMAIN_CPU
) != 0);
3298 old_read_domains
= obj
->base
.read_domains
;
3299 obj
->base
.read_domains
|= I915_GEM_DOMAIN_CPU
;
3301 trace_i915_gem_object_change_domain(obj
,
3303 obj
->base
.write_domain
);
3309 i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object
*obj
,
3310 struct drm_file
*file_priv
,
3311 struct drm_i915_gem_exec_object2
*entry
,
3312 struct drm_i915_gem_relocation_entry
*reloc
)
3314 struct drm_device
*dev
= obj
->base
.dev
;
3315 struct drm_gem_object
*target_obj
;
3316 uint32_t target_offset
;
3319 target_obj
= drm_gem_object_lookup(dev
, file_priv
,
3320 reloc
->target_handle
);
3321 if (target_obj
== NULL
)
3324 target_offset
= to_intel_bo(target_obj
)->gtt_offset
;
3327 DRM_INFO("%s: obj %p offset %08x target %d "
3328 "read %08x write %08x gtt %08x "
3329 "presumed %08x delta %08x\n",
3332 (int) reloc
->offset
,
3333 (int) reloc
->target_handle
,
3334 (int) reloc
->read_domains
,
3335 (int) reloc
->write_domain
,
3336 (int) target_offset
,
3337 (int) reloc
->presumed_offset
,
3341 /* The target buffer should have appeared before us in the
3342 * exec_object list, so it should have a GTT space bound by now.
3344 if (target_offset
== 0) {
3345 DRM_ERROR("No GTT space found for object %d\n",
3346 reloc
->target_handle
);
3350 /* Validate that the target is in a valid r/w GPU domain */
3351 if (reloc
->write_domain
& (reloc
->write_domain
- 1)) {
3352 DRM_ERROR("reloc with multiple write domains: "
3353 "obj %p target %d offset %d "
3354 "read %08x write %08x",
3355 obj
, reloc
->target_handle
,
3356 (int) reloc
->offset
,
3357 reloc
->read_domains
,
3358 reloc
->write_domain
);
3361 if (reloc
->write_domain
& I915_GEM_DOMAIN_CPU
||
3362 reloc
->read_domains
& I915_GEM_DOMAIN_CPU
) {
3363 DRM_ERROR("reloc with read/write CPU domains: "
3364 "obj %p target %d offset %d "
3365 "read %08x write %08x",
3366 obj
, reloc
->target_handle
,
3367 (int) reloc
->offset
,
3368 reloc
->read_domains
,
3369 reloc
->write_domain
);
3372 if (reloc
->write_domain
&& target_obj
->pending_write_domain
&&
3373 reloc
->write_domain
!= target_obj
->pending_write_domain
) {
3374 DRM_ERROR("Write domain conflict: "
3375 "obj %p target %d offset %d "
3376 "new %08x old %08x\n",
3377 obj
, reloc
->target_handle
,
3378 (int) reloc
->offset
,
3379 reloc
->write_domain
,
3380 target_obj
->pending_write_domain
);
3384 target_obj
->pending_read_domains
|= reloc
->read_domains
;
3385 target_obj
->pending_write_domain
|= reloc
->write_domain
;
3387 /* If the relocation already has the right value in it, no
3388 * more work needs to be done.
3390 if (target_offset
== reloc
->presumed_offset
)
3393 /* Check that the relocation address is valid... */
3394 if (reloc
->offset
> obj
->base
.size
- 4) {
3395 DRM_ERROR("Relocation beyond object bounds: "
3396 "obj %p target %d offset %d size %d.\n",
3397 obj
, reloc
->target_handle
,
3398 (int) reloc
->offset
,
3399 (int) obj
->base
.size
);
3402 if (reloc
->offset
& 3) {
3403 DRM_ERROR("Relocation not 4-byte aligned: "
3404 "obj %p target %d offset %d.\n",
3405 obj
, reloc
->target_handle
,
3406 (int) reloc
->offset
);
3410 /* and points to somewhere within the target object. */
3411 if (reloc
->delta
>= target_obj
->size
) {
3412 DRM_ERROR("Relocation beyond target object bounds: "
3413 "obj %p target %d delta %d size %d.\n",
3414 obj
, reloc
->target_handle
,
3416 (int) target_obj
->size
);
3420 reloc
->delta
+= target_offset
;
3421 if (obj
->base
.write_domain
== I915_GEM_DOMAIN_CPU
) {
3422 uint32_t page_offset
= reloc
->offset
& ~PAGE_MASK
;
3425 vaddr
= kmap_atomic(obj
->pages
[reloc
->offset
>> PAGE_SHIFT
]);
3426 *(uint32_t *)(vaddr
+ page_offset
) = reloc
->delta
;
3427 kunmap_atomic(vaddr
);
3429 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
3430 uint32_t __iomem
*reloc_entry
;
3431 void __iomem
*reloc_page
;
3433 ret
= i915_gem_object_set_to_gtt_domain(obj
, 1);
3437 /* Map the page containing the relocation we're going to perform. */
3438 reloc
->offset
+= obj
->gtt_offset
;
3439 reloc_page
= io_mapping_map_atomic_wc(dev_priv
->mm
.gtt_mapping
,
3440 reloc
->offset
& PAGE_MASK
);
3441 reloc_entry
= (uint32_t __iomem
*)
3442 (reloc_page
+ (reloc
->offset
& ~PAGE_MASK
));
3443 iowrite32(reloc
->delta
, reloc_entry
);
3444 io_mapping_unmap_atomic(reloc_page
);
3447 /* and update the user's relocation entry */
3448 reloc
->presumed_offset
= target_offset
;
3453 drm_gem_object_unreference(target_obj
);
3458 i915_gem_execbuffer_relocate_object(struct drm_i915_gem_object
*obj
,
3459 struct drm_file
*file_priv
,
3460 struct drm_i915_gem_exec_object2
*entry
)
3462 struct drm_i915_gem_relocation_entry __user
*user_relocs
;
3465 user_relocs
= (void __user
*)(uintptr_t)entry
->relocs_ptr
;
3466 for (i
= 0; i
< entry
->relocation_count
; i
++) {
3467 struct drm_i915_gem_relocation_entry reloc
;
3469 if (__copy_from_user_inatomic(&reloc
,
3474 ret
= i915_gem_execbuffer_relocate_entry(obj
, file_priv
, entry
, &reloc
);
3478 if (__copy_to_user_inatomic(&user_relocs
[i
].presumed_offset
,
3479 &reloc
.presumed_offset
,
3480 sizeof(reloc
.presumed_offset
)))
3488 i915_gem_execbuffer_relocate_object_slow(struct drm_i915_gem_object
*obj
,
3489 struct drm_file
*file_priv
,
3490 struct drm_i915_gem_exec_object2
*entry
,
3491 struct drm_i915_gem_relocation_entry
*relocs
)
3495 for (i
= 0; i
< entry
->relocation_count
; i
++) {
3496 ret
= i915_gem_execbuffer_relocate_entry(obj
, file_priv
, entry
, &relocs
[i
]);
3505 i915_gem_execbuffer_relocate(struct drm_device
*dev
,
3506 struct drm_file
*file
,
3507 struct drm_i915_gem_object
**object_list
,
3508 struct drm_i915_gem_exec_object2
*exec_list
,
3513 for (i
= 0; i
< count
; i
++) {
3514 struct drm_i915_gem_object
*obj
= object_list
[i
];
3515 obj
->base
.pending_read_domains
= 0;
3516 obj
->base
.pending_write_domain
= 0;
3517 ret
= i915_gem_execbuffer_relocate_object(obj
, file
,
3527 i915_gem_execbuffer_reserve(struct drm_device
*dev
,
3528 struct drm_file
*file
,
3529 struct drm_i915_gem_object
**object_list
,
3530 struct drm_i915_gem_exec_object2
*exec_list
,
3533 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
3536 /* attempt to pin all of the buffers into the GTT */
3540 for (i
= 0; i
< count
; i
++) {
3541 struct drm_i915_gem_exec_object2
*entry
= &exec_list
[i
];
3542 struct drm_i915_gem_object
*obj
= object_list
[i
];
3544 entry
->flags
& EXEC_OBJECT_NEEDS_FENCE
&&
3545 obj
->tiling_mode
!= I915_TILING_NONE
;
3547 /* g33/pnv can't fence buffers in the unmappable part */
3548 bool need_mappable
=
3549 entry
->relocation_count
? true : need_fence
;
3551 /* Check fence reg constraints and rebind if necessary */
3552 if (need_mappable
&& !obj
->map_and_fenceable
) {
3553 ret
= i915_gem_object_unbind(obj
);
3558 ret
= i915_gem_object_pin(obj
,
3565 * Pre-965 chips need a fence register set up in order
3566 * to properly handle blits to/from tiled surfaces.
3569 ret
= i915_gem_object_get_fence_reg(obj
, true);
3571 i915_gem_object_unpin(obj
);
3575 dev_priv
->fence_regs
[obj
->fence_reg
].gpu
= true;
3578 entry
->offset
= obj
->gtt_offset
;
3582 i915_gem_object_unpin(object_list
[i
]);
3584 if (ret
!= -ENOSPC
|| retry
> 1)
3587 /* First attempt, just clear anything that is purgeable.
3588 * Second attempt, clear the entire GTT.
3590 ret
= i915_gem_evict_everything(dev
, retry
== 0);
3599 i915_gem_execbuffer_relocate_slow(struct drm_device
*dev
,
3600 struct drm_file
*file
,
3601 struct drm_i915_gem_object
**object_list
,
3602 struct drm_i915_gem_exec_object2
*exec_list
,
3605 struct drm_i915_gem_relocation_entry
*reloc
;
3608 for (i
= 0; i
< count
; i
++)
3609 object_list
[i
]->in_execbuffer
= false;
3611 mutex_unlock(&dev
->struct_mutex
);
3614 for (i
= 0; i
< count
; i
++)
3615 total
+= exec_list
[i
].relocation_count
;
3617 reloc
= drm_malloc_ab(total
, sizeof(*reloc
));
3618 if (reloc
== NULL
) {
3619 mutex_lock(&dev
->struct_mutex
);
3624 for (i
= 0; i
< count
; i
++) {
3625 struct drm_i915_gem_relocation_entry __user
*user_relocs
;
3627 user_relocs
= (void __user
*)(uintptr_t)exec_list
[i
].relocs_ptr
;
3629 if (copy_from_user(reloc
+total
, user_relocs
,
3630 exec_list
[i
].relocation_count
*
3633 mutex_lock(&dev
->struct_mutex
);
3637 total
+= exec_list
[i
].relocation_count
;
3640 ret
= i915_mutex_lock_interruptible(dev
);
3642 mutex_lock(&dev
->struct_mutex
);
3646 ret
= i915_gem_execbuffer_reserve(dev
, file
,
3647 object_list
, exec_list
,
3653 for (i
= 0; i
< count
; i
++) {
3654 struct drm_i915_gem_object
*obj
= object_list
[i
];
3655 obj
->base
.pending_read_domains
= 0;
3656 obj
->base
.pending_write_domain
= 0;
3657 ret
= i915_gem_execbuffer_relocate_object_slow(obj
, file
,
3663 total
+= exec_list
[i
].relocation_count
;
3666 /* Leave the user relocations as are, this is the painfully slow path,
3667 * and we want to avoid the complication of dropping the lock whilst
3668 * having buffers reserved in the aperture and so causing spurious
3669 * ENOSPC for random operations.
3673 drm_free_large(reloc
);
3678 i915_gem_execbuffer_move_to_gpu(struct drm_device
*dev
,
3679 struct drm_file
*file
,
3680 struct intel_ring_buffer
*ring
,
3681 struct drm_i915_gem_object
**objects
,
3684 struct change_domains cd
;
3687 cd
.invalidate_domains
= 0;
3688 cd
.flush_domains
= 0;
3690 for (i
= 0; i
< count
; i
++)
3691 i915_gem_object_set_to_gpu_domain(objects
[i
], ring
, &cd
);
3693 if (cd
.invalidate_domains
| cd
.flush_domains
) {
3695 DRM_INFO("%s: invalidate_domains %08x flush_domains %08x\n",
3697 cd
.invalidate_domains
,
3701 cd
.invalidate_domains
,
3706 for (i
= 0; i
< count
; i
++) {
3707 struct drm_i915_gem_object
*obj
= objects
[i
];
3708 /* XXX replace with semaphores */
3709 if (obj
->ring
&& ring
!= obj
->ring
) {
3710 ret
= i915_gem_object_wait_rendering(obj
, true);
3719 /* Throttle our rendering by waiting until the ring has completed our requests
3720 * emitted over 20 msec ago.
3722 * Note that if we were to use the current jiffies each time around the loop,
3723 * we wouldn't escape the function with any frames outstanding if the time to
3724 * render a frame was over 20ms.
3726 * This should get us reasonable parallelism between CPU and GPU but also
3727 * relatively low latency when blocking on a particular request to finish.
3730 i915_gem_ring_throttle(struct drm_device
*dev
, struct drm_file
*file
)
3732 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
3733 struct drm_i915_file_private
*file_priv
= file
->driver_priv
;
3734 unsigned long recent_enough
= jiffies
- msecs_to_jiffies(20);
3735 struct drm_i915_gem_request
*request
;
3736 struct intel_ring_buffer
*ring
= NULL
;
3740 spin_lock(&file_priv
->mm
.lock
);
3741 list_for_each_entry(request
, &file_priv
->mm
.request_list
, client_list
) {
3742 if (time_after_eq(request
->emitted_jiffies
, recent_enough
))
3745 ring
= request
->ring
;
3746 seqno
= request
->seqno
;
3748 spin_unlock(&file_priv
->mm
.lock
);
3754 if (!i915_seqno_passed(ring
->get_seqno(ring
), seqno
)) {
3755 /* And wait for the seqno passing without holding any locks and
3756 * causing extra latency for others. This is safe as the irq
3757 * generation is designed to be run atomically and so is
3760 ring
->user_irq_get(ring
);
3761 ret
= wait_event_interruptible(ring
->irq_queue
,
3762 i915_seqno_passed(ring
->get_seqno(ring
), seqno
)
3763 || atomic_read(&dev_priv
->mm
.wedged
));
3764 ring
->user_irq_put(ring
);
3766 if (ret
== 0 && atomic_read(&dev_priv
->mm
.wedged
))
3771 queue_delayed_work(dev_priv
->wq
, &dev_priv
->mm
.retire_work
, 0);
3777 i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2
*exec
,
3778 uint64_t exec_offset
)
3780 uint32_t exec_start
, exec_len
;
3782 exec_start
= (uint32_t) exec_offset
+ exec
->batch_start_offset
;
3783 exec_len
= (uint32_t) exec
->batch_len
;
3785 if ((exec_start
| exec_len
) & 0x7)
3795 validate_exec_list(struct drm_i915_gem_exec_object2
*exec
,
3800 for (i
= 0; i
< count
; i
++) {
3801 char __user
*ptr
= (char __user
*)(uintptr_t)exec
[i
].relocs_ptr
;
3802 int length
; /* limited by fault_in_pages_readable() */
3804 /* First check for malicious input causing overflow */
3805 if (exec
[i
].relocation_count
>
3806 INT_MAX
/ sizeof(struct drm_i915_gem_relocation_entry
))
3809 length
= exec
[i
].relocation_count
*
3810 sizeof(struct drm_i915_gem_relocation_entry
);
3811 if (!access_ok(VERIFY_READ
, ptr
, length
))
3814 /* we may also need to update the presumed offsets */
3815 if (!access_ok(VERIFY_WRITE
, ptr
, length
))
3818 if (fault_in_pages_readable(ptr
, length
))
3826 i915_gem_do_execbuffer(struct drm_device
*dev
, void *data
,
3827 struct drm_file
*file
,
3828 struct drm_i915_gem_execbuffer2
*args
,
3829 struct drm_i915_gem_exec_object2
*exec_list
)
3831 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
3832 struct drm_i915_gem_object
**object_list
= NULL
;
3833 struct drm_i915_gem_object
*batch_obj
;
3834 struct drm_clip_rect
*cliprects
= NULL
;
3835 struct drm_i915_gem_request
*request
= NULL
;
3837 uint64_t exec_offset
;
3839 struct intel_ring_buffer
*ring
= NULL
;
3841 ret
= i915_gem_check_is_wedged(dev
);
3845 ret
= validate_exec_list(exec_list
, args
->buffer_count
);
3850 DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n",
3851 (int) args
->buffers_ptr
, args
->buffer_count
, args
->batch_len
);
3853 switch (args
->flags
& I915_EXEC_RING_MASK
) {
3854 case I915_EXEC_DEFAULT
:
3855 case I915_EXEC_RENDER
:
3856 ring
= &dev_priv
->render_ring
;
3859 if (!HAS_BSD(dev
)) {
3860 DRM_ERROR("execbuf with invalid ring (BSD)\n");
3863 ring
= &dev_priv
->bsd_ring
;
3866 if (!HAS_BLT(dev
)) {
3867 DRM_ERROR("execbuf with invalid ring (BLT)\n");
3870 ring
= &dev_priv
->blt_ring
;
3873 DRM_ERROR("execbuf with unknown ring: %d\n",
3874 (int)(args
->flags
& I915_EXEC_RING_MASK
));
3878 if (args
->buffer_count
< 1) {
3879 DRM_ERROR("execbuf with %d buffers\n", args
->buffer_count
);
3882 object_list
= drm_malloc_ab(sizeof(*object_list
), args
->buffer_count
);
3883 if (object_list
== NULL
) {
3884 DRM_ERROR("Failed to allocate object list for %d buffers\n",
3885 args
->buffer_count
);
3890 if (args
->num_cliprects
!= 0) {
3891 cliprects
= kcalloc(args
->num_cliprects
, sizeof(*cliprects
),
3893 if (cliprects
== NULL
) {
3898 ret
= copy_from_user(cliprects
,
3899 (struct drm_clip_rect __user
*)
3900 (uintptr_t) args
->cliprects_ptr
,
3901 sizeof(*cliprects
) * args
->num_cliprects
);
3903 DRM_ERROR("copy %d cliprects failed: %d\n",
3904 args
->num_cliprects
, ret
);
3910 request
= kzalloc(sizeof(*request
), GFP_KERNEL
);
3911 if (request
== NULL
) {
3916 ret
= i915_mutex_lock_interruptible(dev
);
3920 if (dev_priv
->mm
.suspended
) {
3921 mutex_unlock(&dev
->struct_mutex
);
3926 /* Look up object handles */
3927 for (i
= 0; i
< args
->buffer_count
; i
++) {
3928 struct drm_i915_gem_object
*obj
;
3930 obj
= to_intel_bo (drm_gem_object_lookup(dev
, file
,
3931 exec_list
[i
].handle
));
3933 DRM_ERROR("Invalid object handle %d at index %d\n",
3934 exec_list
[i
].handle
, i
);
3935 /* prevent error path from reading uninitialized data */
3936 args
->buffer_count
= i
;
3940 object_list
[i
] = obj
;
3942 if (obj
->in_execbuffer
) {
3943 DRM_ERROR("Object %p appears more than once in object list\n",
3945 /* prevent error path from reading uninitialized data */
3946 args
->buffer_count
= i
+ 1;
3950 obj
->in_execbuffer
= true;
3953 /* Move the objects en-masse into the GTT, evicting if necessary. */
3954 ret
= i915_gem_execbuffer_reserve(dev
, file
,
3955 object_list
, exec_list
,
3956 args
->buffer_count
);
3960 /* The objects are in their final locations, apply the relocations. */
3961 ret
= i915_gem_execbuffer_relocate(dev
, file
,
3962 object_list
, exec_list
,
3963 args
->buffer_count
);
3965 if (ret
== -EFAULT
) {
3966 ret
= i915_gem_execbuffer_relocate_slow(dev
, file
,
3969 args
->buffer_count
);
3970 BUG_ON(!mutex_is_locked(&dev
->struct_mutex
));
3976 /* Set the pending read domains for the batch buffer to COMMAND */
3977 batch_obj
= object_list
[args
->buffer_count
-1];
3978 if (batch_obj
->base
.pending_write_domain
) {
3979 DRM_ERROR("Attempting to use self-modifying batch buffer\n");
3983 batch_obj
->base
.pending_read_domains
|= I915_GEM_DOMAIN_COMMAND
;
3985 /* Sanity check the batch buffer */
3986 exec_offset
= batch_obj
->gtt_offset
;
3987 ret
= i915_gem_check_execbuffer(args
, exec_offset
);
3989 DRM_ERROR("execbuf with invalid offset/length\n");
3993 ret
= i915_gem_execbuffer_move_to_gpu(dev
, file
, ring
,
3994 object_list
, args
->buffer_count
);
3999 for (i
= 0; i
< args
->buffer_count
; i
++) {
4000 i915_gem_object_check_coherency(object_list
[i
],
4001 exec_list
[i
].handle
);
4006 i915_gem_dump_object(batch_obj
,
4012 /* Check for any pending flips. As we only maintain a flip queue depth
4013 * of 1, we can simply insert a WAIT for the next display flip prior
4014 * to executing the batch and avoid stalling the CPU.
4017 for (i
= 0; i
< args
->buffer_count
; i
++) {
4018 if (object_list
[i
]->base
.write_domain
)
4019 flips
|= atomic_read(&object_list
[i
]->pending_flip
);
4022 int plane
, flip_mask
;
4024 for (plane
= 0; flips
>> plane
; plane
++) {
4025 if (((flips
>> plane
) & 1) == 0)
4029 flip_mask
= MI_WAIT_FOR_PLANE_B_FLIP
;
4031 flip_mask
= MI_WAIT_FOR_PLANE_A_FLIP
;
4033 ret
= intel_ring_begin(ring
, 2);
4037 intel_ring_emit(ring
, MI_WAIT_FOR_EVENT
| flip_mask
);
4038 intel_ring_emit(ring
, MI_NOOP
);
4039 intel_ring_advance(ring
);
4043 /* Exec the batchbuffer */
4044 ret
= ring
->dispatch_execbuffer(ring
, args
, cliprects
, exec_offset
);
4046 DRM_ERROR("dispatch failed %d\n", ret
);
4050 for (i
= 0; i
< args
->buffer_count
; i
++) {
4051 struct drm_i915_gem_object
*obj
= object_list
[i
];
4053 obj
->base
.read_domains
= obj
->base
.pending_read_domains
;
4054 obj
->base
.write_domain
= obj
->base
.pending_write_domain
;
4056 i915_gem_object_move_to_active(obj
, ring
);
4057 if (obj
->base
.write_domain
) {
4059 list_move_tail(&obj
->gpu_write_list
,
4060 &ring
->gpu_write_list
);
4061 intel_mark_busy(dev
, obj
);
4064 trace_i915_gem_object_change_domain(obj
,
4065 obj
->base
.read_domains
,
4066 obj
->base
.write_domain
);
4070 * Ensure that the commands in the batch buffer are
4071 * finished before the interrupt fires
4073 i915_retire_commands(dev
, ring
);
4075 if (i915_add_request(dev
, file
, request
, ring
))
4076 i915_gem_next_request_seqno(dev
, ring
);
4081 for (i
= 0; i
< args
->buffer_count
; i
++) {
4082 object_list
[i
]->in_execbuffer
= false;
4083 drm_gem_object_unreference(&object_list
[i
]->base
);
4086 mutex_unlock(&dev
->struct_mutex
);
4089 drm_free_large(object_list
);
4097 * Legacy execbuffer just creates an exec2 list from the original exec object
4098 * list array and passes it to the real function.
4101 i915_gem_execbuffer(struct drm_device
*dev
, void *data
,
4102 struct drm_file
*file
)
4104 struct drm_i915_gem_execbuffer
*args
= data
;
4105 struct drm_i915_gem_execbuffer2 exec2
;
4106 struct drm_i915_gem_exec_object
*exec_list
= NULL
;
4107 struct drm_i915_gem_exec_object2
*exec2_list
= NULL
;
4111 DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n",
4112 (int) args
->buffers_ptr
, args
->buffer_count
, args
->batch_len
);
4115 if (args
->buffer_count
< 1) {
4116 DRM_ERROR("execbuf with %d buffers\n", args
->buffer_count
);
4120 /* Copy in the exec list from userland */
4121 exec_list
= drm_malloc_ab(sizeof(*exec_list
), args
->buffer_count
);
4122 exec2_list
= drm_malloc_ab(sizeof(*exec2_list
), args
->buffer_count
);
4123 if (exec_list
== NULL
|| exec2_list
== NULL
) {
4124 DRM_ERROR("Failed to allocate exec list for %d buffers\n",
4125 args
->buffer_count
);
4126 drm_free_large(exec_list
);
4127 drm_free_large(exec2_list
);
4130 ret
= copy_from_user(exec_list
,
4131 (struct drm_i915_relocation_entry __user
*)
4132 (uintptr_t) args
->buffers_ptr
,
4133 sizeof(*exec_list
) * args
->buffer_count
);
4135 DRM_ERROR("copy %d exec entries failed %d\n",
4136 args
->buffer_count
, ret
);
4137 drm_free_large(exec_list
);
4138 drm_free_large(exec2_list
);
4142 for (i
= 0; i
< args
->buffer_count
; i
++) {
4143 exec2_list
[i
].handle
= exec_list
[i
].handle
;
4144 exec2_list
[i
].relocation_count
= exec_list
[i
].relocation_count
;
4145 exec2_list
[i
].relocs_ptr
= exec_list
[i
].relocs_ptr
;
4146 exec2_list
[i
].alignment
= exec_list
[i
].alignment
;
4147 exec2_list
[i
].offset
= exec_list
[i
].offset
;
4148 if (INTEL_INFO(dev
)->gen
< 4)
4149 exec2_list
[i
].flags
= EXEC_OBJECT_NEEDS_FENCE
;
4151 exec2_list
[i
].flags
= 0;
4154 exec2
.buffers_ptr
= args
->buffers_ptr
;
4155 exec2
.buffer_count
= args
->buffer_count
;
4156 exec2
.batch_start_offset
= args
->batch_start_offset
;
4157 exec2
.batch_len
= args
->batch_len
;
4158 exec2
.DR1
= args
->DR1
;
4159 exec2
.DR4
= args
->DR4
;
4160 exec2
.num_cliprects
= args
->num_cliprects
;
4161 exec2
.cliprects_ptr
= args
->cliprects_ptr
;
4162 exec2
.flags
= I915_EXEC_RENDER
;
4164 ret
= i915_gem_do_execbuffer(dev
, data
, file
, &exec2
, exec2_list
);
4166 /* Copy the new buffer offsets back to the user's exec list. */
4167 for (i
= 0; i
< args
->buffer_count
; i
++)
4168 exec_list
[i
].offset
= exec2_list
[i
].offset
;
4169 /* ... and back out to userspace */
4170 ret
= copy_to_user((struct drm_i915_relocation_entry __user
*)
4171 (uintptr_t) args
->buffers_ptr
,
4173 sizeof(*exec_list
) * args
->buffer_count
);
4176 DRM_ERROR("failed to copy %d exec entries "
4177 "back to user (%d)\n",
4178 args
->buffer_count
, ret
);
4182 drm_free_large(exec_list
);
4183 drm_free_large(exec2_list
);
4188 i915_gem_execbuffer2(struct drm_device
*dev
, void *data
,
4189 struct drm_file
*file
)
4191 struct drm_i915_gem_execbuffer2
*args
= data
;
4192 struct drm_i915_gem_exec_object2
*exec2_list
= NULL
;
4196 DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n",
4197 (int) args
->buffers_ptr
, args
->buffer_count
, args
->batch_len
);
4200 if (args
->buffer_count
< 1) {
4201 DRM_ERROR("execbuf2 with %d buffers\n", args
->buffer_count
);
4205 exec2_list
= drm_malloc_ab(sizeof(*exec2_list
), args
->buffer_count
);
4206 if (exec2_list
== NULL
) {
4207 DRM_ERROR("Failed to allocate exec list for %d buffers\n",
4208 args
->buffer_count
);
4211 ret
= copy_from_user(exec2_list
,
4212 (struct drm_i915_relocation_entry __user
*)
4213 (uintptr_t) args
->buffers_ptr
,
4214 sizeof(*exec2_list
) * args
->buffer_count
);
4216 DRM_ERROR("copy %d exec entries failed %d\n",
4217 args
->buffer_count
, ret
);
4218 drm_free_large(exec2_list
);
4222 ret
= i915_gem_do_execbuffer(dev
, data
, file
, args
, exec2_list
);
4224 /* Copy the new buffer offsets back to the user's exec list. */
4225 ret
= copy_to_user((struct drm_i915_relocation_entry __user
*)
4226 (uintptr_t) args
->buffers_ptr
,
4228 sizeof(*exec2_list
) * args
->buffer_count
);
4231 DRM_ERROR("failed to copy %d exec entries "
4232 "back to user (%d)\n",
4233 args
->buffer_count
, ret
);
4237 drm_free_large(exec2_list
);
4242 i915_gem_object_pin(struct drm_i915_gem_object
*obj
,
4244 bool map_and_fenceable
)
4246 struct drm_device
*dev
= obj
->base
.dev
;
4247 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
4250 BUG_ON(obj
->pin_count
== DRM_I915_GEM_OBJECT_MAX_PIN_COUNT
);
4251 BUG_ON(map_and_fenceable
&& !map_and_fenceable
);
4252 WARN_ON(i915_verify_lists(dev
));
4254 if (obj
->gtt_space
!= NULL
) {
4255 if ((alignment
&& obj
->gtt_offset
& (alignment
- 1)) ||
4256 (map_and_fenceable
&& !obj
->map_and_fenceable
)) {
4257 WARN(obj
->pin_count
,
4258 "bo is already pinned with incorrect alignment:"
4259 " offset=%x, req.alignment=%x, req.map_and_fenceable=%d,"
4260 " obj->map_and_fenceable=%d\n",
4261 obj
->gtt_offset
, alignment
,
4263 obj
->map_and_fenceable
);
4264 ret
= i915_gem_object_unbind(obj
);
4270 if (obj
->gtt_space
== NULL
) {
4271 ret
= i915_gem_object_bind_to_gtt(obj
, alignment
,
4277 if (obj
->pin_count
++ == 0) {
4278 i915_gem_info_add_pin(dev_priv
, obj
, map_and_fenceable
);
4280 list_move_tail(&obj
->mm_list
,
4281 &dev_priv
->mm
.pinned_list
);
4283 BUG_ON(!obj
->pin_mappable
&& map_and_fenceable
);
4285 WARN_ON(i915_verify_lists(dev
));
4290 i915_gem_object_unpin(struct drm_i915_gem_object
*obj
)
4292 struct drm_device
*dev
= obj
->base
.dev
;
4293 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
4295 WARN_ON(i915_verify_lists(dev
));
4296 BUG_ON(obj
->pin_count
== 0);
4297 BUG_ON(obj
->gtt_space
== NULL
);
4299 if (--obj
->pin_count
== 0) {
4301 list_move_tail(&obj
->mm_list
,
4302 &dev_priv
->mm
.inactive_list
);
4303 i915_gem_info_remove_pin(dev_priv
, obj
);
4305 WARN_ON(i915_verify_lists(dev
));
4309 i915_gem_pin_ioctl(struct drm_device
*dev
, void *data
,
4310 struct drm_file
*file
)
4312 struct drm_i915_gem_pin
*args
= data
;
4313 struct drm_i915_gem_object
*obj
;
4316 ret
= i915_mutex_lock_interruptible(dev
);
4320 obj
= to_intel_bo(drm_gem_object_lookup(dev
, file
, args
->handle
));
4326 if (obj
->madv
!= I915_MADV_WILLNEED
) {
4327 DRM_ERROR("Attempting to pin a purgeable buffer\n");
4332 if (obj
->pin_filp
!= NULL
&& obj
->pin_filp
!= file
) {
4333 DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n",
4339 obj
->user_pin_count
++;
4340 obj
->pin_filp
= file
;
4341 if (obj
->user_pin_count
== 1) {
4342 ret
= i915_gem_object_pin(obj
, args
->alignment
, true);
4347 /* XXX - flush the CPU caches for pinned objects
4348 * as the X server doesn't manage domains yet
4350 i915_gem_object_flush_cpu_write_domain(obj
);
4351 args
->offset
= obj
->gtt_offset
;
4353 drm_gem_object_unreference(&obj
->base
);
4355 mutex_unlock(&dev
->struct_mutex
);
4360 i915_gem_unpin_ioctl(struct drm_device
*dev
, void *data
,
4361 struct drm_file
*file
)
4363 struct drm_i915_gem_pin
*args
= data
;
4364 struct drm_i915_gem_object
*obj
;
4367 ret
= i915_mutex_lock_interruptible(dev
);
4371 obj
= to_intel_bo(drm_gem_object_lookup(dev
, file
, args
->handle
));
4377 if (obj
->pin_filp
!= file
) {
4378 DRM_ERROR("Not pinned by caller in i915_gem_pin_ioctl(): %d\n",
4383 obj
->user_pin_count
--;
4384 if (obj
->user_pin_count
== 0) {
4385 obj
->pin_filp
= NULL
;
4386 i915_gem_object_unpin(obj
);
4390 drm_gem_object_unreference(&obj
->base
);
4392 mutex_unlock(&dev
->struct_mutex
);
4397 i915_gem_busy_ioctl(struct drm_device
*dev
, void *data
,
4398 struct drm_file
*file
)
4400 struct drm_i915_gem_busy
*args
= data
;
4401 struct drm_i915_gem_object
*obj
;
4404 ret
= i915_mutex_lock_interruptible(dev
);
4408 obj
= to_intel_bo(drm_gem_object_lookup(dev
, file
, args
->handle
));
4414 /* Count all active objects as busy, even if they are currently not used
4415 * by the gpu. Users of this interface expect objects to eventually
4416 * become non-busy without any further actions, therefore emit any
4417 * necessary flushes here.
4419 args
->busy
= obj
->active
;
4421 /* Unconditionally flush objects, even when the gpu still uses this
4422 * object. Userspace calling this function indicates that it wants to
4423 * use this buffer rather sooner than later, so issuing the required
4424 * flush earlier is beneficial.
4426 if (obj
->base
.write_domain
& I915_GEM_GPU_DOMAINS
)
4427 i915_gem_flush_ring(dev
, obj
->ring
,
4428 0, obj
->base
.write_domain
);
4430 /* Update the active list for the hardware's current position.
4431 * Otherwise this only updates on a delayed timer or when irqs
4432 * are actually unmasked, and our working set ends up being
4433 * larger than required.
4435 i915_gem_retire_requests_ring(dev
, obj
->ring
);
4437 args
->busy
= obj
->active
;
4440 drm_gem_object_unreference(&obj
->base
);
4442 mutex_unlock(&dev
->struct_mutex
);
4447 i915_gem_throttle_ioctl(struct drm_device
*dev
, void *data
,
4448 struct drm_file
*file_priv
)
4450 return i915_gem_ring_throttle(dev
, file_priv
);
4454 i915_gem_madvise_ioctl(struct drm_device
*dev
, void *data
,
4455 struct drm_file
*file_priv
)
4457 struct drm_i915_gem_madvise
*args
= data
;
4458 struct drm_i915_gem_object
*obj
;
4461 switch (args
->madv
) {
4462 case I915_MADV_DONTNEED
:
4463 case I915_MADV_WILLNEED
:
4469 ret
= i915_mutex_lock_interruptible(dev
);
4473 obj
= to_intel_bo(drm_gem_object_lookup(dev
, file_priv
, args
->handle
));
4479 if (obj
->pin_count
) {
4484 if (obj
->madv
!= __I915_MADV_PURGED
)
4485 obj
->madv
= args
->madv
;
4487 /* if the object is no longer bound, discard its backing storage */
4488 if (i915_gem_object_is_purgeable(obj
) &&
4489 obj
->gtt_space
== NULL
)
4490 i915_gem_object_truncate(obj
);
4492 args
->retained
= obj
->madv
!= __I915_MADV_PURGED
;
4495 drm_gem_object_unreference(&obj
->base
);
4497 mutex_unlock(&dev
->struct_mutex
);
4501 struct drm_i915_gem_object
*i915_gem_alloc_object(struct drm_device
*dev
,
4504 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
4505 struct drm_i915_gem_object
*obj
;
4507 obj
= kzalloc(sizeof(*obj
), GFP_KERNEL
);
4511 if (drm_gem_object_init(dev
, &obj
->base
, size
) != 0) {
4516 i915_gem_info_add_obj(dev_priv
, size
);
4518 obj
->base
.write_domain
= I915_GEM_DOMAIN_CPU
;
4519 obj
->base
.read_domains
= I915_GEM_DOMAIN_CPU
;
4521 obj
->agp_type
= AGP_USER_MEMORY
;
4522 obj
->base
.driver_private
= NULL
;
4523 obj
->fence_reg
= I915_FENCE_REG_NONE
;
4524 INIT_LIST_HEAD(&obj
->mm_list
);
4525 INIT_LIST_HEAD(&obj
->gtt_list
);
4526 INIT_LIST_HEAD(&obj
->ring_list
);
4527 INIT_LIST_HEAD(&obj
->gpu_write_list
);
4528 obj
->madv
= I915_MADV_WILLNEED
;
4529 /* Avoid an unnecessary call to unbind on the first bind. */
4530 obj
->map_and_fenceable
= true;
4535 int i915_gem_init_object(struct drm_gem_object
*obj
)
4542 static void i915_gem_free_object_tail(struct drm_i915_gem_object
*obj
)
4544 struct drm_device
*dev
= obj
->base
.dev
;
4545 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
4548 ret
= i915_gem_object_unbind(obj
);
4549 if (ret
== -ERESTARTSYS
) {
4550 list_move(&obj
->mm_list
,
4551 &dev_priv
->mm
.deferred_free_list
);
4555 if (obj
->base
.map_list
.map
)
4556 i915_gem_free_mmap_offset(obj
);
4558 drm_gem_object_release(&obj
->base
);
4559 i915_gem_info_remove_obj(dev_priv
, obj
->base
.size
);
4561 kfree(obj
->page_cpu_valid
);
4566 void i915_gem_free_object(struct drm_gem_object
*gem_obj
)
4568 struct drm_i915_gem_object
*obj
= to_intel_bo(gem_obj
);
4569 struct drm_device
*dev
= obj
->base
.dev
;
4571 trace_i915_gem_object_destroy(obj
);
4573 while (obj
->pin_count
> 0)
4574 i915_gem_object_unpin(obj
);
4577 i915_gem_detach_phys_object(dev
, obj
);
4579 i915_gem_free_object_tail(obj
);
4583 i915_gem_idle(struct drm_device
*dev
)
4585 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
4588 mutex_lock(&dev
->struct_mutex
);
4590 if (dev_priv
->mm
.suspended
) {
4591 mutex_unlock(&dev
->struct_mutex
);
4595 ret
= i915_gpu_idle(dev
);
4597 mutex_unlock(&dev
->struct_mutex
);
4601 /* Under UMS, be paranoid and evict. */
4602 if (!drm_core_check_feature(dev
, DRIVER_MODESET
)) {
4603 ret
= i915_gem_evict_inactive(dev
, false);
4605 mutex_unlock(&dev
->struct_mutex
);
4610 /* Hack! Don't let anybody do execbuf while we don't control the chip.
4611 * We need to replace this with a semaphore, or something.
4612 * And not confound mm.suspended!
4614 dev_priv
->mm
.suspended
= 1;
4615 del_timer_sync(&dev_priv
->hangcheck_timer
);
4617 i915_kernel_lost_context(dev
);
4618 i915_gem_cleanup_ringbuffer(dev
);
4620 mutex_unlock(&dev
->struct_mutex
);
4622 /* Cancel the retire work handler, which should be idle now. */
4623 cancel_delayed_work_sync(&dev_priv
->mm
.retire_work
);
4629 i915_gem_init_ringbuffer(struct drm_device
*dev
)
4631 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
4634 ret
= intel_init_render_ring_buffer(dev
);
4639 ret
= intel_init_bsd_ring_buffer(dev
);
4641 goto cleanup_render_ring
;
4645 ret
= intel_init_blt_ring_buffer(dev
);
4647 goto cleanup_bsd_ring
;
4650 dev_priv
->next_seqno
= 1;
4655 intel_cleanup_ring_buffer(&dev_priv
->bsd_ring
);
4656 cleanup_render_ring
:
4657 intel_cleanup_ring_buffer(&dev_priv
->render_ring
);
4662 i915_gem_cleanup_ringbuffer(struct drm_device
*dev
)
4664 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
4666 intel_cleanup_ring_buffer(&dev_priv
->render_ring
);
4667 intel_cleanup_ring_buffer(&dev_priv
->bsd_ring
);
4668 intel_cleanup_ring_buffer(&dev_priv
->blt_ring
);
4672 i915_gem_entervt_ioctl(struct drm_device
*dev
, void *data
,
4673 struct drm_file
*file_priv
)
4675 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
4678 if (drm_core_check_feature(dev
, DRIVER_MODESET
))
4681 if (atomic_read(&dev_priv
->mm
.wedged
)) {
4682 DRM_ERROR("Reenabling wedged hardware, good luck\n");
4683 atomic_set(&dev_priv
->mm
.wedged
, 0);
4686 mutex_lock(&dev
->struct_mutex
);
4687 dev_priv
->mm
.suspended
= 0;
4689 ret
= i915_gem_init_ringbuffer(dev
);
4691 mutex_unlock(&dev
->struct_mutex
);
4695 BUG_ON(!list_empty(&dev_priv
->mm
.active_list
));
4696 BUG_ON(!list_empty(&dev_priv
->render_ring
.active_list
));
4697 BUG_ON(!list_empty(&dev_priv
->bsd_ring
.active_list
));
4698 BUG_ON(!list_empty(&dev_priv
->blt_ring
.active_list
));
4699 BUG_ON(!list_empty(&dev_priv
->mm
.flushing_list
));
4700 BUG_ON(!list_empty(&dev_priv
->mm
.inactive_list
));
4701 BUG_ON(!list_empty(&dev_priv
->render_ring
.request_list
));
4702 BUG_ON(!list_empty(&dev_priv
->bsd_ring
.request_list
));
4703 BUG_ON(!list_empty(&dev_priv
->blt_ring
.request_list
));
4704 mutex_unlock(&dev
->struct_mutex
);
4706 ret
= drm_irq_install(dev
);
4708 goto cleanup_ringbuffer
;
4713 mutex_lock(&dev
->struct_mutex
);
4714 i915_gem_cleanup_ringbuffer(dev
);
4715 dev_priv
->mm
.suspended
= 1;
4716 mutex_unlock(&dev
->struct_mutex
);
4722 i915_gem_leavevt_ioctl(struct drm_device
*dev
, void *data
,
4723 struct drm_file
*file_priv
)
4725 if (drm_core_check_feature(dev
, DRIVER_MODESET
))
4728 drm_irq_uninstall(dev
);
4729 return i915_gem_idle(dev
);
4733 i915_gem_lastclose(struct drm_device
*dev
)
4737 if (drm_core_check_feature(dev
, DRIVER_MODESET
))
4740 ret
= i915_gem_idle(dev
);
4742 DRM_ERROR("failed to idle hardware: %d\n", ret
);
4746 init_ring_lists(struct intel_ring_buffer
*ring
)
4748 INIT_LIST_HEAD(&ring
->active_list
);
4749 INIT_LIST_HEAD(&ring
->request_list
);
4750 INIT_LIST_HEAD(&ring
->gpu_write_list
);
4754 i915_gem_load(struct drm_device
*dev
)
4757 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
4759 INIT_LIST_HEAD(&dev_priv
->mm
.active_list
);
4760 INIT_LIST_HEAD(&dev_priv
->mm
.flushing_list
);
4761 INIT_LIST_HEAD(&dev_priv
->mm
.inactive_list
);
4762 INIT_LIST_HEAD(&dev_priv
->mm
.pinned_list
);
4763 INIT_LIST_HEAD(&dev_priv
->mm
.fence_list
);
4764 INIT_LIST_HEAD(&dev_priv
->mm
.deferred_free_list
);
4765 INIT_LIST_HEAD(&dev_priv
->mm
.gtt_list
);
4766 init_ring_lists(&dev_priv
->render_ring
);
4767 init_ring_lists(&dev_priv
->bsd_ring
);
4768 init_ring_lists(&dev_priv
->blt_ring
);
4769 for (i
= 0; i
< 16; i
++)
4770 INIT_LIST_HEAD(&dev_priv
->fence_regs
[i
].lru_list
);
4771 INIT_DELAYED_WORK(&dev_priv
->mm
.retire_work
,
4772 i915_gem_retire_work_handler
);
4773 init_completion(&dev_priv
->error_completion
);
4775 /* On GEN3 we really need to make sure the ARB C3 LP bit is set */
4777 u32 tmp
= I915_READ(MI_ARB_STATE
);
4778 if (!(tmp
& MI_ARB_C3_LP_WRITE_ENABLE
)) {
4779 /* arb state is a masked write, so set bit + bit in mask */
4780 tmp
= MI_ARB_C3_LP_WRITE_ENABLE
| (MI_ARB_C3_LP_WRITE_ENABLE
<< MI_ARB_MASK_SHIFT
);
4781 I915_WRITE(MI_ARB_STATE
, tmp
);
4785 /* Old X drivers will take 0-2 for front, back, depth buffers */
4786 if (!drm_core_check_feature(dev
, DRIVER_MODESET
))
4787 dev_priv
->fence_reg_start
= 3;
4789 if (INTEL_INFO(dev
)->gen
>= 4 || IS_I945G(dev
) || IS_I945GM(dev
) || IS_G33(dev
))
4790 dev_priv
->num_fence_regs
= 16;
4792 dev_priv
->num_fence_regs
= 8;
4794 /* Initialize fence registers to zero */
4795 switch (INTEL_INFO(dev
)->gen
) {
4797 for (i
= 0; i
< 16; i
++)
4798 I915_WRITE64(FENCE_REG_SANDYBRIDGE_0
+ (i
* 8), 0);
4802 for (i
= 0; i
< 16; i
++)
4803 I915_WRITE64(FENCE_REG_965_0
+ (i
* 8), 0);
4806 if (IS_I945G(dev
) || IS_I945GM(dev
) || IS_G33(dev
))
4807 for (i
= 0; i
< 8; i
++)
4808 I915_WRITE(FENCE_REG_945_8
+ (i
* 4), 0);
4810 for (i
= 0; i
< 8; i
++)
4811 I915_WRITE(FENCE_REG_830_0
+ (i
* 4), 0);
4814 i915_gem_detect_bit_6_swizzle(dev
);
4815 init_waitqueue_head(&dev_priv
->pending_flip_queue
);
4817 dev_priv
->mm
.inactive_shrinker
.shrink
= i915_gem_inactive_shrink
;
4818 dev_priv
->mm
.inactive_shrinker
.seeks
= DEFAULT_SEEKS
;
4819 register_shrinker(&dev_priv
->mm
.inactive_shrinker
);
4823 * Create a physically contiguous memory object for this object
4824 * e.g. for cursor + overlay regs
4826 static int i915_gem_init_phys_object(struct drm_device
*dev
,
4827 int id
, int size
, int align
)
4829 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
4830 struct drm_i915_gem_phys_object
*phys_obj
;
4833 if (dev_priv
->mm
.phys_objs
[id
- 1] || !size
)
4836 phys_obj
= kzalloc(sizeof(struct drm_i915_gem_phys_object
), GFP_KERNEL
);
4842 phys_obj
->handle
= drm_pci_alloc(dev
, size
, align
);
4843 if (!phys_obj
->handle
) {
4848 set_memory_wc((unsigned long)phys_obj
->handle
->vaddr
, phys_obj
->handle
->size
/ PAGE_SIZE
);
4851 dev_priv
->mm
.phys_objs
[id
- 1] = phys_obj
;
4859 static void i915_gem_free_phys_object(struct drm_device
*dev
, int id
)
4861 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
4862 struct drm_i915_gem_phys_object
*phys_obj
;
4864 if (!dev_priv
->mm
.phys_objs
[id
- 1])
4867 phys_obj
= dev_priv
->mm
.phys_objs
[id
- 1];
4868 if (phys_obj
->cur_obj
) {
4869 i915_gem_detach_phys_object(dev
, phys_obj
->cur_obj
);
4873 set_memory_wb((unsigned long)phys_obj
->handle
->vaddr
, phys_obj
->handle
->size
/ PAGE_SIZE
);
4875 drm_pci_free(dev
, phys_obj
->handle
);
4877 dev_priv
->mm
.phys_objs
[id
- 1] = NULL
;
4880 void i915_gem_free_all_phys_object(struct drm_device
*dev
)
4884 for (i
= I915_GEM_PHYS_CURSOR_0
; i
<= I915_MAX_PHYS_OBJECT
; i
++)
4885 i915_gem_free_phys_object(dev
, i
);
4888 void i915_gem_detach_phys_object(struct drm_device
*dev
,
4889 struct drm_i915_gem_object
*obj
)
4891 struct address_space
*mapping
= obj
->base
.filp
->f_path
.dentry
->d_inode
->i_mapping
;
4898 vaddr
= obj
->phys_obj
->handle
->vaddr
;
4900 page_count
= obj
->base
.size
/ PAGE_SIZE
;
4901 for (i
= 0; i
< page_count
; i
++) {
4902 struct page
*page
= read_cache_page_gfp(mapping
, i
,
4903 GFP_HIGHUSER
| __GFP_RECLAIMABLE
);
4904 if (!IS_ERR(page
)) {
4905 char *dst
= kmap_atomic(page
);
4906 memcpy(dst
, vaddr
+ i
*PAGE_SIZE
, PAGE_SIZE
);
4909 drm_clflush_pages(&page
, 1);
4911 set_page_dirty(page
);
4912 mark_page_accessed(page
);
4913 page_cache_release(page
);
4916 intel_gtt_chipset_flush();
4918 obj
->phys_obj
->cur_obj
= NULL
;
4919 obj
->phys_obj
= NULL
;
4923 i915_gem_attach_phys_object(struct drm_device
*dev
,
4924 struct drm_i915_gem_object
*obj
,
4928 struct address_space
*mapping
= obj
->base
.filp
->f_path
.dentry
->d_inode
->i_mapping
;
4929 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
4934 if (id
> I915_MAX_PHYS_OBJECT
)
4937 if (obj
->phys_obj
) {
4938 if (obj
->phys_obj
->id
== id
)
4940 i915_gem_detach_phys_object(dev
, obj
);
4943 /* create a new object */
4944 if (!dev_priv
->mm
.phys_objs
[id
- 1]) {
4945 ret
= i915_gem_init_phys_object(dev
, id
,
4946 obj
->base
.size
, align
);
4948 DRM_ERROR("failed to init phys object %d size: %zu\n",
4949 id
, obj
->base
.size
);
4954 /* bind to the object */
4955 obj
->phys_obj
= dev_priv
->mm
.phys_objs
[id
- 1];
4956 obj
->phys_obj
->cur_obj
= obj
;
4958 page_count
= obj
->base
.size
/ PAGE_SIZE
;
4960 for (i
= 0; i
< page_count
; i
++) {
4964 page
= read_cache_page_gfp(mapping
, i
,
4965 GFP_HIGHUSER
| __GFP_RECLAIMABLE
);
4967 return PTR_ERR(page
);
4969 src
= kmap_atomic(page
);
4970 dst
= obj
->phys_obj
->handle
->vaddr
+ (i
* PAGE_SIZE
);
4971 memcpy(dst
, src
, PAGE_SIZE
);
4974 mark_page_accessed(page
);
4975 page_cache_release(page
);
4982 i915_gem_phys_pwrite(struct drm_device
*dev
,
4983 struct drm_i915_gem_object
*obj
,
4984 struct drm_i915_gem_pwrite
*args
,
4985 struct drm_file
*file_priv
)
4987 void *vaddr
= obj
->phys_obj
->handle
->vaddr
+ args
->offset
;
4988 char __user
*user_data
= (char __user
*) (uintptr_t) args
->data_ptr
;
4990 if (__copy_from_user_inatomic_nocache(vaddr
, user_data
, args
->size
)) {
4991 unsigned long unwritten
;
4993 /* The physical object once assigned is fixed for the lifetime
4994 * of the obj, so we can safely drop the lock and continue
4997 mutex_unlock(&dev
->struct_mutex
);
4998 unwritten
= copy_from_user(vaddr
, user_data
, args
->size
);
4999 mutex_lock(&dev
->struct_mutex
);
5004 intel_gtt_chipset_flush();
5008 void i915_gem_release(struct drm_device
*dev
, struct drm_file
*file
)
5010 struct drm_i915_file_private
*file_priv
= file
->driver_priv
;
5012 /* Clean up our request list when the client is going away, so that
5013 * later retire_requests won't dereference our soon-to-be-gone
5016 spin_lock(&file_priv
->mm
.lock
);
5017 while (!list_empty(&file_priv
->mm
.request_list
)) {
5018 struct drm_i915_gem_request
*request
;
5020 request
= list_first_entry(&file_priv
->mm
.request_list
,
5021 struct drm_i915_gem_request
,
5023 list_del(&request
->client_list
);
5024 request
->file_priv
= NULL
;
5026 spin_unlock(&file_priv
->mm
.lock
);
5030 i915_gpu_is_active(struct drm_device
*dev
)
5032 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
5035 lists_empty
= list_empty(&dev_priv
->mm
.flushing_list
) &&
5036 list_empty(&dev_priv
->mm
.active_list
);
5038 return !lists_empty
;
5042 i915_gem_inactive_shrink(struct shrinker
*shrinker
,
5046 struct drm_i915_private
*dev_priv
=
5047 container_of(shrinker
,
5048 struct drm_i915_private
,
5049 mm
.inactive_shrinker
);
5050 struct drm_device
*dev
= dev_priv
->dev
;
5051 struct drm_i915_gem_object
*obj
, *next
;
5054 if (!mutex_trylock(&dev
->struct_mutex
))
5057 /* "fast-path" to count number of available objects */
5058 if (nr_to_scan
== 0) {
5060 list_for_each_entry(obj
,
5061 &dev_priv
->mm
.inactive_list
,
5064 mutex_unlock(&dev
->struct_mutex
);
5065 return cnt
/ 100 * sysctl_vfs_cache_pressure
;
5069 /* first scan for clean buffers */
5070 i915_gem_retire_requests(dev
);
5072 list_for_each_entry_safe(obj
, next
,
5073 &dev_priv
->mm
.inactive_list
,
5075 if (i915_gem_object_is_purgeable(obj
)) {
5076 i915_gem_object_unbind(obj
);
5077 if (--nr_to_scan
== 0)
5082 /* second pass, evict/count anything still on the inactive list */
5084 list_for_each_entry_safe(obj
, next
,
5085 &dev_priv
->mm
.inactive_list
,
5088 i915_gem_object_unbind(obj
);
5094 if (nr_to_scan
&& i915_gpu_is_active(dev
)) {
5096 * We are desperate for pages, so as a last resort, wait
5097 * for the GPU to finish and discard whatever we can.
5098 * This has a dramatic impact to reduce the number of
5099 * OOM-killer events whilst running the GPU aggressively.
5101 if (i915_gpu_idle(dev
) == 0)
5104 mutex_unlock(&dev
->struct_mutex
);
5105 return cnt
/ 100 * sysctl_vfs_cache_pressure
;