2 * Copyright © 2008 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * Eric Anholt <eric@anholt.net>
32 #include "i915_trace.h"
33 #include "intel_drv.h"
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <linux/pci.h>
38 struct change_domains
{
39 uint32_t invalidate_domains
;
40 uint32_t flush_domains
;
44 static int i915_gem_object_flush_gpu_write_domain(struct drm_i915_gem_object
*obj
,
45 struct intel_ring_buffer
*pipelined
);
46 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object
*obj
);
47 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object
*obj
);
48 static int i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object
*obj
,
50 static int i915_gem_object_set_cpu_read_domain_range(struct drm_i915_gem_object
*obj
,
53 static void i915_gem_object_set_to_full_cpu_read_domain(struct drm_i915_gem_object
*obj
);
54 static int i915_gem_object_wait_rendering(struct drm_i915_gem_object
*obj
,
56 static int i915_gem_object_bind_to_gtt(struct drm_i915_gem_object
*obj
,
58 bool map_and_fenceable
);
59 static void i915_gem_clear_fence_reg(struct drm_i915_gem_object
*obj
);
60 static int i915_gem_phys_pwrite(struct drm_device
*dev
,
61 struct drm_i915_gem_object
*obj
,
62 struct drm_i915_gem_pwrite
*args
,
63 struct drm_file
*file
);
64 static void i915_gem_free_object_tail(struct drm_i915_gem_object
*obj
);
66 static int i915_gem_inactive_shrink(struct shrinker
*shrinker
,
71 /* some bookkeeping */
72 static void i915_gem_info_add_obj(struct drm_i915_private
*dev_priv
,
75 dev_priv
->mm
.object_count
++;
76 dev_priv
->mm
.object_memory
+= size
;
79 static void i915_gem_info_remove_obj(struct drm_i915_private
*dev_priv
,
82 dev_priv
->mm
.object_count
--;
83 dev_priv
->mm
.object_memory
-= size
;
87 i915_gem_check_is_wedged(struct drm_device
*dev
)
89 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
90 struct completion
*x
= &dev_priv
->error_completion
;
94 if (!atomic_read(&dev_priv
->mm
.wedged
))
97 ret
= wait_for_completion_interruptible(x
);
101 /* Success, we reset the GPU! */
102 if (!atomic_read(&dev_priv
->mm
.wedged
))
105 /* GPU is hung, bump the completion count to account for
106 * the token we just consumed so that we never hit zero and
107 * end up waiting upon a subsequent completion event that
110 spin_lock_irqsave(&x
->wait
.lock
, flags
);
112 spin_unlock_irqrestore(&x
->wait
.lock
, flags
);
116 static int i915_mutex_lock_interruptible(struct drm_device
*dev
)
118 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
121 ret
= i915_gem_check_is_wedged(dev
);
125 ret
= mutex_lock_interruptible(&dev
->struct_mutex
);
129 if (atomic_read(&dev_priv
->mm
.wedged
)) {
130 mutex_unlock(&dev
->struct_mutex
);
134 WARN_ON(i915_verify_lists(dev
));
139 i915_gem_object_is_inactive(struct drm_i915_gem_object
*obj
)
141 return obj
->gtt_space
&& !obj
->active
&& obj
->pin_count
== 0;
144 void i915_gem_do_init(struct drm_device
*dev
,
146 unsigned long mappable_end
,
149 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
151 drm_mm_init(&dev_priv
->mm
.gtt_space
, start
,
154 dev_priv
->mm
.gtt_total
= end
- start
;
155 dev_priv
->mm
.mappable_gtt_total
= min(end
, mappable_end
) - start
;
156 dev_priv
->mm
.gtt_mappable_end
= mappable_end
;
160 i915_gem_init_ioctl(struct drm_device
*dev
, void *data
,
161 struct drm_file
*file
)
163 struct drm_i915_gem_init
*args
= data
;
165 if (args
->gtt_start
>= args
->gtt_end
||
166 (args
->gtt_end
| args
->gtt_start
) & (PAGE_SIZE
- 1))
169 mutex_lock(&dev
->struct_mutex
);
170 i915_gem_do_init(dev
, args
->gtt_start
, args
->gtt_end
, args
->gtt_end
);
171 mutex_unlock(&dev
->struct_mutex
);
177 i915_gem_get_aperture_ioctl(struct drm_device
*dev
, void *data
,
178 struct drm_file
*file
)
180 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
181 struct drm_i915_gem_get_aperture
*args
= data
;
182 struct drm_i915_gem_object
*obj
;
185 if (!(dev
->driver
->driver_features
& DRIVER_GEM
))
189 mutex_lock(&dev
->struct_mutex
);
190 list_for_each_entry(obj
, &dev_priv
->mm
.pinned_list
, mm_list
)
191 pinned
+= obj
->gtt_space
->size
;
192 mutex_unlock(&dev
->struct_mutex
);
194 args
->aper_size
= dev_priv
->mm
.gtt_total
;
195 args
->aper_available_size
= args
->aper_size
-pinned
;
201 * Creates a new mm object and returns a handle to it.
204 i915_gem_create_ioctl(struct drm_device
*dev
, void *data
,
205 struct drm_file
*file
)
207 struct drm_i915_gem_create
*args
= data
;
208 struct drm_i915_gem_object
*obj
;
212 args
->size
= roundup(args
->size
, PAGE_SIZE
);
214 /* Allocate the new object */
215 obj
= i915_gem_alloc_object(dev
, args
->size
);
219 ret
= drm_gem_handle_create(file
, &obj
->base
, &handle
);
221 drm_gem_object_release(&obj
->base
);
222 i915_gem_info_remove_obj(dev
->dev_private
, obj
->base
.size
);
227 /* drop reference from allocate - handle holds it now */
228 drm_gem_object_unreference(&obj
->base
);
229 trace_i915_gem_object_create(obj
);
231 args
->handle
= handle
;
235 static int i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object
*obj
)
237 drm_i915_private_t
*dev_priv
= obj
->base
.dev
->dev_private
;
239 return dev_priv
->mm
.bit_6_swizzle_x
== I915_BIT_6_SWIZZLE_9_10_17
&&
240 obj
->tiling_mode
!= I915_TILING_NONE
;
244 slow_shmem_copy(struct page
*dst_page
,
246 struct page
*src_page
,
250 char *dst_vaddr
, *src_vaddr
;
252 dst_vaddr
= kmap(dst_page
);
253 src_vaddr
= kmap(src_page
);
255 memcpy(dst_vaddr
+ dst_offset
, src_vaddr
+ src_offset
, length
);
262 slow_shmem_bit17_copy(struct page
*gpu_page
,
264 struct page
*cpu_page
,
269 char *gpu_vaddr
, *cpu_vaddr
;
271 /* Use the unswizzled path if this page isn't affected. */
272 if ((page_to_phys(gpu_page
) & (1 << 17)) == 0) {
274 return slow_shmem_copy(cpu_page
, cpu_offset
,
275 gpu_page
, gpu_offset
, length
);
277 return slow_shmem_copy(gpu_page
, gpu_offset
,
278 cpu_page
, cpu_offset
, length
);
281 gpu_vaddr
= kmap(gpu_page
);
282 cpu_vaddr
= kmap(cpu_page
);
284 /* Copy the data, XORing A6 with A17 (1). The user already knows he's
285 * XORing with the other bits (A9 for Y, A9 and A10 for X)
288 int cacheline_end
= ALIGN(gpu_offset
+ 1, 64);
289 int this_length
= min(cacheline_end
- gpu_offset
, length
);
290 int swizzled_gpu_offset
= gpu_offset
^ 64;
293 memcpy(cpu_vaddr
+ cpu_offset
,
294 gpu_vaddr
+ swizzled_gpu_offset
,
297 memcpy(gpu_vaddr
+ swizzled_gpu_offset
,
298 cpu_vaddr
+ cpu_offset
,
301 cpu_offset
+= this_length
;
302 gpu_offset
+= this_length
;
303 length
-= this_length
;
311 * This is the fast shmem pread path, which attempts to copy_from_user directly
312 * from the backing pages of the object to the user's address space. On a
313 * fault, it fails so we can fall back to i915_gem_shmem_pwrite_slow().
316 i915_gem_shmem_pread_fast(struct drm_device
*dev
,
317 struct drm_i915_gem_object
*obj
,
318 struct drm_i915_gem_pread
*args
,
319 struct drm_file
*file
)
321 struct address_space
*mapping
= obj
->base
.filp
->f_path
.dentry
->d_inode
->i_mapping
;
324 char __user
*user_data
;
325 int page_offset
, page_length
;
327 user_data
= (char __user
*) (uintptr_t) args
->data_ptr
;
330 offset
= args
->offset
;
337 /* Operation in this page
339 * page_offset = offset within page
340 * page_length = bytes to copy for this page
342 page_offset
= offset
& (PAGE_SIZE
-1);
343 page_length
= remain
;
344 if ((page_offset
+ remain
) > PAGE_SIZE
)
345 page_length
= PAGE_SIZE
- page_offset
;
347 page
= read_cache_page_gfp(mapping
, offset
>> PAGE_SHIFT
,
348 GFP_HIGHUSER
| __GFP_RECLAIMABLE
);
350 return PTR_ERR(page
);
352 vaddr
= kmap_atomic(page
);
353 ret
= __copy_to_user_inatomic(user_data
,
356 kunmap_atomic(vaddr
);
358 mark_page_accessed(page
);
359 page_cache_release(page
);
363 remain
-= page_length
;
364 user_data
+= page_length
;
365 offset
+= page_length
;
372 * This is the fallback shmem pread path, which allocates temporary storage
373 * in kernel space to copy_to_user into outside of the struct_mutex, so we
374 * can copy out of the object's backing pages while holding the struct mutex
375 * and not take page faults.
378 i915_gem_shmem_pread_slow(struct drm_device
*dev
,
379 struct drm_i915_gem_object
*obj
,
380 struct drm_i915_gem_pread
*args
,
381 struct drm_file
*file
)
383 struct address_space
*mapping
= obj
->base
.filp
->f_path
.dentry
->d_inode
->i_mapping
;
384 struct mm_struct
*mm
= current
->mm
;
385 struct page
**user_pages
;
387 loff_t offset
, pinned_pages
, i
;
388 loff_t first_data_page
, last_data_page
, num_pages
;
389 int shmem_page_offset
;
390 int data_page_index
, data_page_offset
;
393 uint64_t data_ptr
= args
->data_ptr
;
394 int do_bit17_swizzling
;
398 /* Pin the user pages containing the data. We can't fault while
399 * holding the struct mutex, yet we want to hold it while
400 * dereferencing the user data.
402 first_data_page
= data_ptr
/ PAGE_SIZE
;
403 last_data_page
= (data_ptr
+ args
->size
- 1) / PAGE_SIZE
;
404 num_pages
= last_data_page
- first_data_page
+ 1;
406 user_pages
= drm_malloc_ab(num_pages
, sizeof(struct page
*));
407 if (user_pages
== NULL
)
410 mutex_unlock(&dev
->struct_mutex
);
411 down_read(&mm
->mmap_sem
);
412 pinned_pages
= get_user_pages(current
, mm
, (uintptr_t)args
->data_ptr
,
413 num_pages
, 1, 0, user_pages
, NULL
);
414 up_read(&mm
->mmap_sem
);
415 mutex_lock(&dev
->struct_mutex
);
416 if (pinned_pages
< num_pages
) {
421 ret
= i915_gem_object_set_cpu_read_domain_range(obj
,
427 do_bit17_swizzling
= i915_gem_object_needs_bit17_swizzle(obj
);
429 offset
= args
->offset
;
434 /* Operation in this page
436 * shmem_page_offset = offset within page in shmem file
437 * data_page_index = page number in get_user_pages return
438 * data_page_offset = offset with data_page_index page.
439 * page_length = bytes to copy for this page
441 shmem_page_offset
= offset
& ~PAGE_MASK
;
442 data_page_index
= data_ptr
/ PAGE_SIZE
- first_data_page
;
443 data_page_offset
= data_ptr
& ~PAGE_MASK
;
445 page_length
= remain
;
446 if ((shmem_page_offset
+ page_length
) > PAGE_SIZE
)
447 page_length
= PAGE_SIZE
- shmem_page_offset
;
448 if ((data_page_offset
+ page_length
) > PAGE_SIZE
)
449 page_length
= PAGE_SIZE
- data_page_offset
;
451 page
= read_cache_page_gfp(mapping
, offset
>> PAGE_SHIFT
,
452 GFP_HIGHUSER
| __GFP_RECLAIMABLE
);
454 return PTR_ERR(page
);
456 if (do_bit17_swizzling
) {
457 slow_shmem_bit17_copy(page
,
459 user_pages
[data_page_index
],
464 slow_shmem_copy(user_pages
[data_page_index
],
471 mark_page_accessed(page
);
472 page_cache_release(page
);
474 remain
-= page_length
;
475 data_ptr
+= page_length
;
476 offset
+= page_length
;
480 for (i
= 0; i
< pinned_pages
; i
++) {
481 SetPageDirty(user_pages
[i
]);
482 mark_page_accessed(user_pages
[i
]);
483 page_cache_release(user_pages
[i
]);
485 drm_free_large(user_pages
);
491 * Reads data from the object referenced by handle.
493 * On error, the contents of *data are undefined.
496 i915_gem_pread_ioctl(struct drm_device
*dev
, void *data
,
497 struct drm_file
*file
)
499 struct drm_i915_gem_pread
*args
= data
;
500 struct drm_i915_gem_object
*obj
;
506 if (!access_ok(VERIFY_WRITE
,
507 (char __user
*)(uintptr_t)args
->data_ptr
,
511 ret
= fault_in_pages_writeable((char __user
*)(uintptr_t)args
->data_ptr
,
516 ret
= i915_mutex_lock_interruptible(dev
);
520 obj
= to_intel_bo(drm_gem_object_lookup(dev
, file
, args
->handle
));
526 /* Bounds check source. */
527 if (args
->offset
> obj
->base
.size
||
528 args
->size
> obj
->base
.size
- args
->offset
) {
533 ret
= i915_gem_object_set_cpu_read_domain_range(obj
,
540 if (!i915_gem_object_needs_bit17_swizzle(obj
))
541 ret
= i915_gem_shmem_pread_fast(dev
, obj
, args
, file
);
543 ret
= i915_gem_shmem_pread_slow(dev
, obj
, args
, file
);
546 drm_gem_object_unreference(&obj
->base
);
548 mutex_unlock(&dev
->struct_mutex
);
552 /* This is the fast write path which cannot handle
553 * page faults in the source data
557 fast_user_write(struct io_mapping
*mapping
,
558 loff_t page_base
, int page_offset
,
559 char __user
*user_data
,
563 unsigned long unwritten
;
565 vaddr_atomic
= io_mapping_map_atomic_wc(mapping
, page_base
);
566 unwritten
= __copy_from_user_inatomic_nocache(vaddr_atomic
+ page_offset
,
568 io_mapping_unmap_atomic(vaddr_atomic
);
572 /* Here's the write path which can sleep for
577 slow_kernel_write(struct io_mapping
*mapping
,
578 loff_t gtt_base
, int gtt_offset
,
579 struct page
*user_page
, int user_offset
,
582 char __iomem
*dst_vaddr
;
585 dst_vaddr
= io_mapping_map_wc(mapping
, gtt_base
);
586 src_vaddr
= kmap(user_page
);
588 memcpy_toio(dst_vaddr
+ gtt_offset
,
589 src_vaddr
+ user_offset
,
593 io_mapping_unmap(dst_vaddr
);
597 * This is the fast pwrite path, where we copy the data directly from the
598 * user into the GTT, uncached.
601 i915_gem_gtt_pwrite_fast(struct drm_device
*dev
,
602 struct drm_i915_gem_object
*obj
,
603 struct drm_i915_gem_pwrite
*args
,
604 struct drm_file
*file
)
606 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
608 loff_t offset
, page_base
;
609 char __user
*user_data
;
610 int page_offset
, page_length
;
612 user_data
= (char __user
*) (uintptr_t) args
->data_ptr
;
615 offset
= obj
->gtt_offset
+ args
->offset
;
618 /* Operation in this page
620 * page_base = page offset within aperture
621 * page_offset = offset within page
622 * page_length = bytes to copy for this page
624 page_base
= (offset
& ~(PAGE_SIZE
-1));
625 page_offset
= offset
& (PAGE_SIZE
-1);
626 page_length
= remain
;
627 if ((page_offset
+ remain
) > PAGE_SIZE
)
628 page_length
= PAGE_SIZE
- page_offset
;
630 /* If we get a fault while copying data, then (presumably) our
631 * source page isn't available. Return the error and we'll
632 * retry in the slow path.
634 if (fast_user_write(dev_priv
->mm
.gtt_mapping
, page_base
,
635 page_offset
, user_data
, page_length
))
639 remain
-= page_length
;
640 user_data
+= page_length
;
641 offset
+= page_length
;
648 * This is the fallback GTT pwrite path, which uses get_user_pages to pin
649 * the memory and maps it using kmap_atomic for copying.
651 * This code resulted in x11perf -rgb10text consuming about 10% more CPU
652 * than using i915_gem_gtt_pwrite_fast on a G45 (32-bit).
655 i915_gem_gtt_pwrite_slow(struct drm_device
*dev
,
656 struct drm_i915_gem_object
*obj
,
657 struct drm_i915_gem_pwrite
*args
,
658 struct drm_file
*file
)
660 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
662 loff_t gtt_page_base
, offset
;
663 loff_t first_data_page
, last_data_page
, num_pages
;
664 loff_t pinned_pages
, i
;
665 struct page
**user_pages
;
666 struct mm_struct
*mm
= current
->mm
;
667 int gtt_page_offset
, data_page_offset
, data_page_index
, page_length
;
669 uint64_t data_ptr
= args
->data_ptr
;
673 /* Pin the user pages containing the data. We can't fault while
674 * holding the struct mutex, and all of the pwrite implementations
675 * want to hold it while dereferencing the user data.
677 first_data_page
= data_ptr
/ PAGE_SIZE
;
678 last_data_page
= (data_ptr
+ args
->size
- 1) / PAGE_SIZE
;
679 num_pages
= last_data_page
- first_data_page
+ 1;
681 user_pages
= drm_malloc_ab(num_pages
, sizeof(struct page
*));
682 if (user_pages
== NULL
)
685 mutex_unlock(&dev
->struct_mutex
);
686 down_read(&mm
->mmap_sem
);
687 pinned_pages
= get_user_pages(current
, mm
, (uintptr_t)args
->data_ptr
,
688 num_pages
, 0, 0, user_pages
, NULL
);
689 up_read(&mm
->mmap_sem
);
690 mutex_lock(&dev
->struct_mutex
);
691 if (pinned_pages
< num_pages
) {
693 goto out_unpin_pages
;
696 ret
= i915_gem_object_set_to_gtt_domain(obj
, 1);
698 goto out_unpin_pages
;
700 offset
= obj
->gtt_offset
+ args
->offset
;
703 /* Operation in this page
705 * gtt_page_base = page offset within aperture
706 * gtt_page_offset = offset within page in aperture
707 * data_page_index = page number in get_user_pages return
708 * data_page_offset = offset with data_page_index page.
709 * page_length = bytes to copy for this page
711 gtt_page_base
= offset
& PAGE_MASK
;
712 gtt_page_offset
= offset
& ~PAGE_MASK
;
713 data_page_index
= data_ptr
/ PAGE_SIZE
- first_data_page
;
714 data_page_offset
= data_ptr
& ~PAGE_MASK
;
716 page_length
= remain
;
717 if ((gtt_page_offset
+ page_length
) > PAGE_SIZE
)
718 page_length
= PAGE_SIZE
- gtt_page_offset
;
719 if ((data_page_offset
+ page_length
) > PAGE_SIZE
)
720 page_length
= PAGE_SIZE
- data_page_offset
;
722 slow_kernel_write(dev_priv
->mm
.gtt_mapping
,
723 gtt_page_base
, gtt_page_offset
,
724 user_pages
[data_page_index
],
728 remain
-= page_length
;
729 offset
+= page_length
;
730 data_ptr
+= page_length
;
734 for (i
= 0; i
< pinned_pages
; i
++)
735 page_cache_release(user_pages
[i
]);
736 drm_free_large(user_pages
);
742 * This is the fast shmem pwrite path, which attempts to directly
743 * copy_from_user into the kmapped pages backing the object.
746 i915_gem_shmem_pwrite_fast(struct drm_device
*dev
,
747 struct drm_i915_gem_object
*obj
,
748 struct drm_i915_gem_pwrite
*args
,
749 struct drm_file
*file
)
751 struct address_space
*mapping
= obj
->base
.filp
->f_path
.dentry
->d_inode
->i_mapping
;
754 char __user
*user_data
;
755 int page_offset
, page_length
;
757 user_data
= (char __user
*) (uintptr_t) args
->data_ptr
;
760 offset
= args
->offset
;
768 /* Operation in this page
770 * page_offset = offset within page
771 * page_length = bytes to copy for this page
773 page_offset
= offset
& (PAGE_SIZE
-1);
774 page_length
= remain
;
775 if ((page_offset
+ remain
) > PAGE_SIZE
)
776 page_length
= PAGE_SIZE
- page_offset
;
778 page
= read_cache_page_gfp(mapping
, offset
>> PAGE_SHIFT
,
779 GFP_HIGHUSER
| __GFP_RECLAIMABLE
);
781 return PTR_ERR(page
);
783 vaddr
= kmap_atomic(page
, KM_USER0
);
784 ret
= __copy_from_user_inatomic(vaddr
+ page_offset
,
787 kunmap_atomic(vaddr
, KM_USER0
);
789 set_page_dirty(page
);
790 mark_page_accessed(page
);
791 page_cache_release(page
);
793 /* If we get a fault while copying data, then (presumably) our
794 * source page isn't available. Return the error and we'll
795 * retry in the slow path.
800 remain
-= page_length
;
801 user_data
+= page_length
;
802 offset
+= page_length
;
809 * This is the fallback shmem pwrite path, which uses get_user_pages to pin
810 * the memory and maps it using kmap_atomic for copying.
812 * This avoids taking mmap_sem for faulting on the user's address while the
813 * struct_mutex is held.
816 i915_gem_shmem_pwrite_slow(struct drm_device
*dev
,
817 struct drm_i915_gem_object
*obj
,
818 struct drm_i915_gem_pwrite
*args
,
819 struct drm_file
*file
)
821 struct address_space
*mapping
= obj
->base
.filp
->f_path
.dentry
->d_inode
->i_mapping
;
822 struct mm_struct
*mm
= current
->mm
;
823 struct page
**user_pages
;
825 loff_t offset
, pinned_pages
, i
;
826 loff_t first_data_page
, last_data_page
, num_pages
;
827 int shmem_page_offset
;
828 int data_page_index
, data_page_offset
;
831 uint64_t data_ptr
= args
->data_ptr
;
832 int do_bit17_swizzling
;
836 /* Pin the user pages containing the data. We can't fault while
837 * holding the struct mutex, and all of the pwrite implementations
838 * want to hold it while dereferencing the user data.
840 first_data_page
= data_ptr
/ PAGE_SIZE
;
841 last_data_page
= (data_ptr
+ args
->size
- 1) / PAGE_SIZE
;
842 num_pages
= last_data_page
- first_data_page
+ 1;
844 user_pages
= drm_malloc_ab(num_pages
, sizeof(struct page
*));
845 if (user_pages
== NULL
)
848 mutex_unlock(&dev
->struct_mutex
);
849 down_read(&mm
->mmap_sem
);
850 pinned_pages
= get_user_pages(current
, mm
, (uintptr_t)args
->data_ptr
,
851 num_pages
, 0, 0, user_pages
, NULL
);
852 up_read(&mm
->mmap_sem
);
853 mutex_lock(&dev
->struct_mutex
);
854 if (pinned_pages
< num_pages
) {
859 ret
= i915_gem_object_set_to_cpu_domain(obj
, 1);
863 do_bit17_swizzling
= i915_gem_object_needs_bit17_swizzle(obj
);
865 offset
= args
->offset
;
871 /* Operation in this page
873 * shmem_page_offset = offset within page in shmem file
874 * data_page_index = page number in get_user_pages return
875 * data_page_offset = offset with data_page_index page.
876 * page_length = bytes to copy for this page
878 shmem_page_offset
= offset
& ~PAGE_MASK
;
879 data_page_index
= data_ptr
/ PAGE_SIZE
- first_data_page
;
880 data_page_offset
= data_ptr
& ~PAGE_MASK
;
882 page_length
= remain
;
883 if ((shmem_page_offset
+ page_length
) > PAGE_SIZE
)
884 page_length
= PAGE_SIZE
- shmem_page_offset
;
885 if ((data_page_offset
+ page_length
) > PAGE_SIZE
)
886 page_length
= PAGE_SIZE
- data_page_offset
;
888 page
= read_cache_page_gfp(mapping
, offset
>> PAGE_SHIFT
,
889 GFP_HIGHUSER
| __GFP_RECLAIMABLE
);
895 if (do_bit17_swizzling
) {
896 slow_shmem_bit17_copy(page
,
898 user_pages
[data_page_index
],
903 slow_shmem_copy(page
,
905 user_pages
[data_page_index
],
910 set_page_dirty(page
);
911 mark_page_accessed(page
);
912 page_cache_release(page
);
914 remain
-= page_length
;
915 data_ptr
+= page_length
;
916 offset
+= page_length
;
920 for (i
= 0; i
< pinned_pages
; i
++)
921 page_cache_release(user_pages
[i
]);
922 drm_free_large(user_pages
);
928 * Writes data to the object referenced by handle.
930 * On error, the contents of the buffer that were to be modified are undefined.
933 i915_gem_pwrite_ioctl(struct drm_device
*dev
, void *data
,
934 struct drm_file
*file
)
936 struct drm_i915_gem_pwrite
*args
= data
;
937 struct drm_i915_gem_object
*obj
;
943 if (!access_ok(VERIFY_READ
,
944 (char __user
*)(uintptr_t)args
->data_ptr
,
948 ret
= fault_in_pages_readable((char __user
*)(uintptr_t)args
->data_ptr
,
953 ret
= i915_mutex_lock_interruptible(dev
);
957 obj
= to_intel_bo(drm_gem_object_lookup(dev
, file
, args
->handle
));
963 /* Bounds check destination. */
964 if (args
->offset
> obj
->base
.size
||
965 args
->size
> obj
->base
.size
- args
->offset
) {
970 /* We can only do the GTT pwrite on untiled buffers, as otherwise
971 * it would end up going through the fenced access, and we'll get
972 * different detiling behavior between reading and writing.
973 * pread/pwrite currently are reading and writing from the CPU
974 * perspective, requiring manual detiling by the client.
977 ret
= i915_gem_phys_pwrite(dev
, obj
, args
, file
);
978 else if (obj
->tiling_mode
== I915_TILING_NONE
&&
980 obj
->base
.write_domain
!= I915_GEM_DOMAIN_CPU
) {
981 ret
= i915_gem_object_pin(obj
, 0, true);
985 ret
= i915_gem_object_set_to_gtt_domain(obj
, 1);
989 ret
= i915_gem_gtt_pwrite_fast(dev
, obj
, args
, file
);
991 ret
= i915_gem_gtt_pwrite_slow(dev
, obj
, args
, file
);
994 i915_gem_object_unpin(obj
);
996 ret
= i915_gem_object_set_to_cpu_domain(obj
, 1);
1001 if (!i915_gem_object_needs_bit17_swizzle(obj
))
1002 ret
= i915_gem_shmem_pwrite_fast(dev
, obj
, args
, file
);
1004 ret
= i915_gem_shmem_pwrite_slow(dev
, obj
, args
, file
);
1008 drm_gem_object_unreference(&obj
->base
);
1010 mutex_unlock(&dev
->struct_mutex
);
1015 * Called when user space prepares to use an object with the CPU, either
1016 * through the mmap ioctl's mapping or a GTT mapping.
1019 i915_gem_set_domain_ioctl(struct drm_device
*dev
, void *data
,
1020 struct drm_file
*file
)
1022 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
1023 struct drm_i915_gem_set_domain
*args
= data
;
1024 struct drm_i915_gem_object
*obj
;
1025 uint32_t read_domains
= args
->read_domains
;
1026 uint32_t write_domain
= args
->write_domain
;
1029 if (!(dev
->driver
->driver_features
& DRIVER_GEM
))
1032 /* Only handle setting domains to types used by the CPU. */
1033 if (write_domain
& I915_GEM_GPU_DOMAINS
)
1036 if (read_domains
& I915_GEM_GPU_DOMAINS
)
1039 /* Having something in the write domain implies it's in the read
1040 * domain, and only that read domain. Enforce that in the request.
1042 if (write_domain
!= 0 && read_domains
!= write_domain
)
1045 ret
= i915_mutex_lock_interruptible(dev
);
1049 obj
= to_intel_bo(drm_gem_object_lookup(dev
, file
, args
->handle
));
1055 intel_mark_busy(dev
, obj
);
1057 if (read_domains
& I915_GEM_DOMAIN_GTT
) {
1058 ret
= i915_gem_object_set_to_gtt_domain(obj
, write_domain
!= 0);
1060 /* Update the LRU on the fence for the CPU access that's
1063 if (obj
->fence_reg
!= I915_FENCE_REG_NONE
) {
1064 struct drm_i915_fence_reg
*reg
=
1065 &dev_priv
->fence_regs
[obj
->fence_reg
];
1066 list_move_tail(®
->lru_list
,
1067 &dev_priv
->mm
.fence_list
);
1070 /* Silently promote "you're not bound, there was nothing to do"
1071 * to success, since the client was just asking us to
1072 * make sure everything was done.
1077 ret
= i915_gem_object_set_to_cpu_domain(obj
, write_domain
!= 0);
1080 /* Maintain LRU order of "inactive" objects */
1081 if (ret
== 0 && i915_gem_object_is_inactive(obj
))
1082 list_move_tail(&obj
->mm_list
, &dev_priv
->mm
.inactive_list
);
1084 drm_gem_object_unreference(&obj
->base
);
1086 mutex_unlock(&dev
->struct_mutex
);
1091 * Called when user space has done writes to this buffer
1094 i915_gem_sw_finish_ioctl(struct drm_device
*dev
, void *data
,
1095 struct drm_file
*file
)
1097 struct drm_i915_gem_sw_finish
*args
= data
;
1098 struct drm_i915_gem_object
*obj
;
1101 if (!(dev
->driver
->driver_features
& DRIVER_GEM
))
1104 ret
= i915_mutex_lock_interruptible(dev
);
1108 obj
= to_intel_bo(drm_gem_object_lookup(dev
, file
, args
->handle
));
1114 /* Pinned buffers may be scanout, so flush the cache */
1116 i915_gem_object_flush_cpu_write_domain(obj
);
1118 drm_gem_object_unreference(&obj
->base
);
1120 mutex_unlock(&dev
->struct_mutex
);
1125 * Maps the contents of an object, returning the address it is mapped
1128 * While the mapping holds a reference on the contents of the object, it doesn't
1129 * imply a ref on the object itself.
1132 i915_gem_mmap_ioctl(struct drm_device
*dev
, void *data
,
1133 struct drm_file
*file
)
1135 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
1136 struct drm_i915_gem_mmap
*args
= data
;
1137 struct drm_gem_object
*obj
;
1141 if (!(dev
->driver
->driver_features
& DRIVER_GEM
))
1144 obj
= drm_gem_object_lookup(dev
, file
, args
->handle
);
1148 if (obj
->size
> dev_priv
->mm
.gtt_mappable_end
) {
1149 drm_gem_object_unreference_unlocked(obj
);
1153 offset
= args
->offset
;
1155 down_write(¤t
->mm
->mmap_sem
);
1156 addr
= do_mmap(obj
->filp
, 0, args
->size
,
1157 PROT_READ
| PROT_WRITE
, MAP_SHARED
,
1159 up_write(¤t
->mm
->mmap_sem
);
1160 drm_gem_object_unreference_unlocked(obj
);
1161 if (IS_ERR((void *)addr
))
1164 args
->addr_ptr
= (uint64_t) addr
;
1170 * i915_gem_fault - fault a page into the GTT
1171 * vma: VMA in question
1174 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
1175 * from userspace. The fault handler takes care of binding the object to
1176 * the GTT (if needed), allocating and programming a fence register (again,
1177 * only if needed based on whether the old reg is still valid or the object
1178 * is tiled) and inserting a new PTE into the faulting process.
1180 * Note that the faulting process may involve evicting existing objects
1181 * from the GTT and/or fence registers to make room. So performance may
1182 * suffer if the GTT working set is large or there are few fence registers
1185 int i915_gem_fault(struct vm_area_struct
*vma
, struct vm_fault
*vmf
)
1187 struct drm_i915_gem_object
*obj
= to_intel_bo(vma
->vm_private_data
);
1188 struct drm_device
*dev
= obj
->base
.dev
;
1189 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
1190 pgoff_t page_offset
;
1193 bool write
= !!(vmf
->flags
& FAULT_FLAG_WRITE
);
1195 /* We don't use vmf->pgoff since that has the fake offset */
1196 page_offset
= ((unsigned long)vmf
->virtual_address
- vma
->vm_start
) >>
1199 /* Now bind it into the GTT if needed */
1200 mutex_lock(&dev
->struct_mutex
);
1202 if (!obj
->map_and_fenceable
) {
1203 ret
= i915_gem_object_unbind(obj
);
1207 if (!obj
->gtt_space
) {
1208 ret
= i915_gem_object_bind_to_gtt(obj
, 0, true);
1213 ret
= i915_gem_object_set_to_gtt_domain(obj
, write
);
1217 /* Need a new fence register? */
1218 if (obj
->tiling_mode
!= I915_TILING_NONE
) {
1219 ret
= i915_gem_object_get_fence_reg(obj
, true);
1224 if (i915_gem_object_is_inactive(obj
))
1225 list_move_tail(&obj
->mm_list
, &dev_priv
->mm
.inactive_list
);
1227 obj
->fault_mappable
= true;
1229 pfn
= ((dev
->agp
->base
+ obj
->gtt_offset
) >> PAGE_SHIFT
) +
1232 /* Finally, remap it using the new GTT offset */
1233 ret
= vm_insert_pfn(vma
, (unsigned long)vmf
->virtual_address
, pfn
);
1235 mutex_unlock(&dev
->struct_mutex
);
1242 return VM_FAULT_NOPAGE
;
1244 return VM_FAULT_OOM
;
1246 return VM_FAULT_SIGBUS
;
1251 * i915_gem_create_mmap_offset - create a fake mmap offset for an object
1252 * @obj: obj in question
1254 * GEM memory mapping works by handing back to userspace a fake mmap offset
1255 * it can use in a subsequent mmap(2) call. The DRM core code then looks
1256 * up the object based on the offset and sets up the various memory mapping
1259 * This routine allocates and attaches a fake offset for @obj.
1262 i915_gem_create_mmap_offset(struct drm_i915_gem_object
*obj
)
1264 struct drm_device
*dev
= obj
->base
.dev
;
1265 struct drm_gem_mm
*mm
= dev
->mm_private
;
1266 struct drm_map_list
*list
;
1267 struct drm_local_map
*map
;
1270 /* Set the object up for mmap'ing */
1271 list
= &obj
->base
.map_list
;
1272 list
->map
= kzalloc(sizeof(struct drm_map_list
), GFP_KERNEL
);
1277 map
->type
= _DRM_GEM
;
1278 map
->size
= obj
->base
.size
;
1281 /* Get a DRM GEM mmap offset allocated... */
1282 list
->file_offset_node
= drm_mm_search_free(&mm
->offset_manager
,
1283 obj
->base
.size
/ PAGE_SIZE
,
1285 if (!list
->file_offset_node
) {
1286 DRM_ERROR("failed to allocate offset for bo %d\n",
1292 list
->file_offset_node
= drm_mm_get_block(list
->file_offset_node
,
1293 obj
->base
.size
/ PAGE_SIZE
,
1295 if (!list
->file_offset_node
) {
1300 list
->hash
.key
= list
->file_offset_node
->start
;
1301 ret
= drm_ht_insert_item(&mm
->offset_hash
, &list
->hash
);
1303 DRM_ERROR("failed to add to map hash\n");
1310 drm_mm_put_block(list
->file_offset_node
);
1319 * i915_gem_release_mmap - remove physical page mappings
1320 * @obj: obj in question
1322 * Preserve the reservation of the mmapping with the DRM core code, but
1323 * relinquish ownership of the pages back to the system.
1325 * It is vital that we remove the page mapping if we have mapped a tiled
1326 * object through the GTT and then lose the fence register due to
1327 * resource pressure. Similarly if the object has been moved out of the
1328 * aperture, than pages mapped into userspace must be revoked. Removing the
1329 * mapping will then trigger a page fault on the next user access, allowing
1330 * fixup by i915_gem_fault().
1333 i915_gem_release_mmap(struct drm_i915_gem_object
*obj
)
1335 if (!obj
->fault_mappable
)
1338 unmap_mapping_range(obj
->base
.dev
->dev_mapping
,
1339 (loff_t
)obj
->base
.map_list
.hash
.key
<<PAGE_SHIFT
,
1342 obj
->fault_mappable
= false;
1346 i915_gem_free_mmap_offset(struct drm_i915_gem_object
*obj
)
1348 struct drm_device
*dev
= obj
->base
.dev
;
1349 struct drm_gem_mm
*mm
= dev
->mm_private
;
1350 struct drm_map_list
*list
= &obj
->base
.map_list
;
1352 drm_ht_remove_item(&mm
->offset_hash
, &list
->hash
);
1353 drm_mm_put_block(list
->file_offset_node
);
1359 i915_gem_get_gtt_size(struct drm_i915_gem_object
*obj
)
1361 struct drm_device
*dev
= obj
->base
.dev
;
1364 if (INTEL_INFO(dev
)->gen
>= 4 ||
1365 obj
->tiling_mode
== I915_TILING_NONE
)
1366 return obj
->base
.size
;
1368 /* Previous chips need a power-of-two fence region when tiling */
1369 if (INTEL_INFO(dev
)->gen
== 3)
1374 while (size
< obj
->base
.size
)
1381 * i915_gem_get_gtt_alignment - return required GTT alignment for an object
1382 * @obj: object to check
1384 * Return the required GTT alignment for an object, taking into account
1385 * potential fence register mapping.
1388 i915_gem_get_gtt_alignment(struct drm_i915_gem_object
*obj
)
1390 struct drm_device
*dev
= obj
->base
.dev
;
1393 * Minimum alignment is 4k (GTT page size), but might be greater
1394 * if a fence register is needed for the object.
1396 if (INTEL_INFO(dev
)->gen
>= 4 ||
1397 obj
->tiling_mode
== I915_TILING_NONE
)
1401 * Previous chips need to be aligned to the size of the smallest
1402 * fence register that can contain the object.
1404 return i915_gem_get_gtt_size(obj
);
1408 * i915_gem_get_unfenced_gtt_alignment - return required GTT alignment for an
1410 * @obj: object to check
1412 * Return the required GTT alignment for an object, only taking into account
1413 * unfenced tiled surface requirements.
1416 i915_gem_get_unfenced_gtt_alignment(struct drm_i915_gem_object
*obj
)
1418 struct drm_device
*dev
= obj
->base
.dev
;
1422 * Minimum alignment is 4k (GTT page size) for sane hw.
1424 if (INTEL_INFO(dev
)->gen
>= 4 || IS_G33(dev
) ||
1425 obj
->tiling_mode
== I915_TILING_NONE
)
1429 * Older chips need unfenced tiled buffers to be aligned to the left
1430 * edge of an even tile row (where tile rows are counted as if the bo is
1431 * placed in a fenced gtt region).
1434 (obj
->tiling_mode
== I915_TILING_Y
&& HAS_128_BYTE_Y_TILING(dev
)))
1439 return tile_height
* obj
->stride
* 2;
1443 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
1445 * @data: GTT mapping ioctl data
1446 * @file: GEM object info
1448 * Simply returns the fake offset to userspace so it can mmap it.
1449 * The mmap call will end up in drm_gem_mmap(), which will set things
1450 * up so we can get faults in the handler above.
1452 * The fault handler will take care of binding the object into the GTT
1453 * (since it may have been evicted to make room for something), allocating
1454 * a fence register, and mapping the appropriate aperture address into
1458 i915_gem_mmap_gtt_ioctl(struct drm_device
*dev
, void *data
,
1459 struct drm_file
*file
)
1461 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
1462 struct drm_i915_gem_mmap_gtt
*args
= data
;
1463 struct drm_i915_gem_object
*obj
;
1466 if (!(dev
->driver
->driver_features
& DRIVER_GEM
))
1469 ret
= i915_mutex_lock_interruptible(dev
);
1473 obj
= to_intel_bo(drm_gem_object_lookup(dev
, file
, args
->handle
));
1479 if (obj
->base
.size
> dev_priv
->mm
.gtt_mappable_end
) {
1484 if (obj
->madv
!= I915_MADV_WILLNEED
) {
1485 DRM_ERROR("Attempting to mmap a purgeable buffer\n");
1490 if (!obj
->base
.map_list
.map
) {
1491 ret
= i915_gem_create_mmap_offset(obj
);
1496 args
->offset
= (u64
)obj
->base
.map_list
.hash
.key
<< PAGE_SHIFT
;
1499 drm_gem_object_unreference(&obj
->base
);
1501 mutex_unlock(&dev
->struct_mutex
);
1506 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object
*obj
,
1510 struct address_space
*mapping
;
1511 struct inode
*inode
;
1514 /* Get the list of pages out of our struct file. They'll be pinned
1515 * at this point until we release them.
1517 page_count
= obj
->base
.size
/ PAGE_SIZE
;
1518 BUG_ON(obj
->pages
!= NULL
);
1519 obj
->pages
= drm_malloc_ab(page_count
, sizeof(struct page
*));
1520 if (obj
->pages
== NULL
)
1523 inode
= obj
->base
.filp
->f_path
.dentry
->d_inode
;
1524 mapping
= inode
->i_mapping
;
1525 for (i
= 0; i
< page_count
; i
++) {
1526 page
= read_cache_page_gfp(mapping
, i
,
1534 obj
->pages
[i
] = page
;
1537 if (obj
->tiling_mode
!= I915_TILING_NONE
)
1538 i915_gem_object_do_bit_17_swizzle(obj
);
1544 page_cache_release(obj
->pages
[i
]);
1546 drm_free_large(obj
->pages
);
1548 return PTR_ERR(page
);
1552 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object
*obj
)
1554 int page_count
= obj
->base
.size
/ PAGE_SIZE
;
1557 BUG_ON(obj
->madv
== __I915_MADV_PURGED
);
1559 if (obj
->tiling_mode
!= I915_TILING_NONE
)
1560 i915_gem_object_save_bit_17_swizzle(obj
);
1562 if (obj
->madv
== I915_MADV_DONTNEED
)
1565 for (i
= 0; i
< page_count
; i
++) {
1567 set_page_dirty(obj
->pages
[i
]);
1569 if (obj
->madv
== I915_MADV_WILLNEED
)
1570 mark_page_accessed(obj
->pages
[i
]);
1572 page_cache_release(obj
->pages
[i
]);
1576 drm_free_large(obj
->pages
);
1581 i915_gem_next_request_seqno(struct drm_device
*dev
,
1582 struct intel_ring_buffer
*ring
)
1584 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
1585 return ring
->outstanding_lazy_request
= dev_priv
->next_seqno
;
1589 i915_gem_object_move_to_active(struct drm_i915_gem_object
*obj
,
1590 struct intel_ring_buffer
*ring
)
1592 struct drm_device
*dev
= obj
->base
.dev
;
1593 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
1594 uint32_t seqno
= i915_gem_next_request_seqno(dev
, ring
);
1596 BUG_ON(ring
== NULL
);
1599 /* Add a reference if we're newly entering the active list. */
1601 drm_gem_object_reference(&obj
->base
);
1605 /* Move from whatever list we were on to the tail of execution. */
1606 list_move_tail(&obj
->mm_list
, &dev_priv
->mm
.active_list
);
1607 list_move_tail(&obj
->ring_list
, &ring
->active_list
);
1609 obj
->last_rendering_seqno
= seqno
;
1610 if (obj
->fenced_gpu_access
) {
1611 struct drm_i915_fence_reg
*reg
;
1613 BUG_ON(obj
->fence_reg
== I915_FENCE_REG_NONE
);
1615 obj
->last_fenced_seqno
= seqno
;
1616 obj
->last_fenced_ring
= ring
;
1618 reg
= &dev_priv
->fence_regs
[obj
->fence_reg
];
1619 list_move_tail(®
->lru_list
, &dev_priv
->mm
.fence_list
);
1624 i915_gem_object_move_off_active(struct drm_i915_gem_object
*obj
)
1626 list_del_init(&obj
->ring_list
);
1627 obj
->last_rendering_seqno
= 0;
1628 obj
->last_fenced_seqno
= 0;
1632 i915_gem_object_move_to_flushing(struct drm_i915_gem_object
*obj
)
1634 struct drm_device
*dev
= obj
->base
.dev
;
1635 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
1637 BUG_ON(!obj
->active
);
1638 list_move_tail(&obj
->mm_list
, &dev_priv
->mm
.flushing_list
);
1640 i915_gem_object_move_off_active(obj
);
1644 i915_gem_object_move_to_inactive(struct drm_i915_gem_object
*obj
)
1646 struct drm_device
*dev
= obj
->base
.dev
;
1647 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
1649 if (obj
->pin_count
!= 0)
1650 list_move_tail(&obj
->mm_list
, &dev_priv
->mm
.pinned_list
);
1652 list_move_tail(&obj
->mm_list
, &dev_priv
->mm
.inactive_list
);
1654 BUG_ON(!list_empty(&obj
->gpu_write_list
));
1655 BUG_ON(!obj
->active
);
1658 i915_gem_object_move_off_active(obj
);
1659 obj
->fenced_gpu_access
= false;
1660 obj
->last_fenced_ring
= NULL
;
1663 drm_gem_object_unreference(&obj
->base
);
1665 WARN_ON(i915_verify_lists(dev
));
1668 /* Immediately discard the backing storage */
1670 i915_gem_object_truncate(struct drm_i915_gem_object
*obj
)
1672 struct inode
*inode
;
1674 /* Our goal here is to return as much of the memory as
1675 * is possible back to the system as we are called from OOM.
1676 * To do this we must instruct the shmfs to drop all of its
1677 * backing pages, *now*. Here we mirror the actions taken
1678 * when by shmem_delete_inode() to release the backing store.
1680 inode
= obj
->base
.filp
->f_path
.dentry
->d_inode
;
1681 truncate_inode_pages(inode
->i_mapping
, 0);
1682 if (inode
->i_op
->truncate_range
)
1683 inode
->i_op
->truncate_range(inode
, 0, (loff_t
)-1);
1685 obj
->madv
= __I915_MADV_PURGED
;
1689 i915_gem_object_is_purgeable(struct drm_i915_gem_object
*obj
)
1691 return obj
->madv
== I915_MADV_DONTNEED
;
1695 i915_gem_process_flushing_list(struct drm_device
*dev
,
1696 uint32_t flush_domains
,
1697 struct intel_ring_buffer
*ring
)
1699 struct drm_i915_gem_object
*obj
, *next
;
1701 list_for_each_entry_safe(obj
, next
,
1702 &ring
->gpu_write_list
,
1704 if (obj
->base
.write_domain
& flush_domains
) {
1705 uint32_t old_write_domain
= obj
->base
.write_domain
;
1707 obj
->base
.write_domain
= 0;
1708 list_del_init(&obj
->gpu_write_list
);
1709 i915_gem_object_move_to_active(obj
, ring
);
1711 trace_i915_gem_object_change_domain(obj
,
1712 obj
->base
.read_domains
,
1719 i915_add_request(struct drm_device
*dev
,
1720 struct drm_file
*file
,
1721 struct drm_i915_gem_request
*request
,
1722 struct intel_ring_buffer
*ring
)
1724 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
1725 struct drm_i915_file_private
*file_priv
= NULL
;
1730 BUG_ON(request
== NULL
);
1733 file_priv
= file
->driver_priv
;
1735 ret
= ring
->add_request(ring
, &seqno
);
1739 ring
->outstanding_lazy_request
= false;
1741 request
->seqno
= seqno
;
1742 request
->ring
= ring
;
1743 request
->emitted_jiffies
= jiffies
;
1744 was_empty
= list_empty(&ring
->request_list
);
1745 list_add_tail(&request
->list
, &ring
->request_list
);
1748 spin_lock(&file_priv
->mm
.lock
);
1749 request
->file_priv
= file_priv
;
1750 list_add_tail(&request
->client_list
,
1751 &file_priv
->mm
.request_list
);
1752 spin_unlock(&file_priv
->mm
.lock
);
1755 if (!dev_priv
->mm
.suspended
) {
1756 mod_timer(&dev_priv
->hangcheck_timer
,
1757 jiffies
+ msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD
));
1759 queue_delayed_work(dev_priv
->wq
,
1760 &dev_priv
->mm
.retire_work
, HZ
);
1766 * Command execution barrier
1768 * Ensures that all commands in the ring are finished
1769 * before signalling the CPU
1772 i915_retire_commands(struct drm_device
*dev
, struct intel_ring_buffer
*ring
)
1774 uint32_t flush_domains
= 0;
1776 /* The sampler always gets flushed on i965 (sigh) */
1777 if (INTEL_INFO(dev
)->gen
>= 4)
1778 flush_domains
|= I915_GEM_DOMAIN_SAMPLER
;
1780 ring
->flush(ring
, I915_GEM_DOMAIN_COMMAND
, flush_domains
);
1784 i915_gem_request_remove_from_client(struct drm_i915_gem_request
*request
)
1786 struct drm_i915_file_private
*file_priv
= request
->file_priv
;
1791 spin_lock(&file_priv
->mm
.lock
);
1792 list_del(&request
->client_list
);
1793 request
->file_priv
= NULL
;
1794 spin_unlock(&file_priv
->mm
.lock
);
1797 static void i915_gem_reset_ring_lists(struct drm_i915_private
*dev_priv
,
1798 struct intel_ring_buffer
*ring
)
1800 while (!list_empty(&ring
->request_list
)) {
1801 struct drm_i915_gem_request
*request
;
1803 request
= list_first_entry(&ring
->request_list
,
1804 struct drm_i915_gem_request
,
1807 list_del(&request
->list
);
1808 i915_gem_request_remove_from_client(request
);
1812 while (!list_empty(&ring
->active_list
)) {
1813 struct drm_i915_gem_object
*obj
;
1815 obj
= list_first_entry(&ring
->active_list
,
1816 struct drm_i915_gem_object
,
1819 obj
->base
.write_domain
= 0;
1820 list_del_init(&obj
->gpu_write_list
);
1821 i915_gem_object_move_to_inactive(obj
);
1825 static void i915_gem_reset_fences(struct drm_device
*dev
)
1827 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
1830 for (i
= 0; i
< 16; i
++) {
1831 struct drm_i915_fence_reg
*reg
= &dev_priv
->fence_regs
[i
];
1833 i915_gem_clear_fence_reg(reg
->obj
);
1837 void i915_gem_reset(struct drm_device
*dev
)
1839 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
1840 struct drm_i915_gem_object
*obj
;
1842 i915_gem_reset_ring_lists(dev_priv
, &dev_priv
->render_ring
);
1843 i915_gem_reset_ring_lists(dev_priv
, &dev_priv
->bsd_ring
);
1844 i915_gem_reset_ring_lists(dev_priv
, &dev_priv
->blt_ring
);
1846 /* Remove anything from the flushing lists. The GPU cache is likely
1847 * to be lost on reset along with the data, so simply move the
1848 * lost bo to the inactive list.
1850 while (!list_empty(&dev_priv
->mm
.flushing_list
)) {
1851 obj
= list_first_entry(&dev_priv
->mm
.flushing_list
,
1852 struct drm_i915_gem_object
,
1855 obj
->base
.write_domain
= 0;
1856 list_del_init(&obj
->gpu_write_list
);
1857 i915_gem_object_move_to_inactive(obj
);
1860 /* Move everything out of the GPU domains to ensure we do any
1861 * necessary invalidation upon reuse.
1863 list_for_each_entry(obj
,
1864 &dev_priv
->mm
.inactive_list
,
1867 obj
->base
.read_domains
&= ~I915_GEM_GPU_DOMAINS
;
1870 /* The fence registers are invalidated so clear them out */
1871 i915_gem_reset_fences(dev
);
1875 * This function clears the request list as sequence numbers are passed.
1878 i915_gem_retire_requests_ring(struct drm_device
*dev
,
1879 struct intel_ring_buffer
*ring
)
1881 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
1884 if (!ring
->status_page
.page_addr
||
1885 list_empty(&ring
->request_list
))
1888 WARN_ON(i915_verify_lists(dev
));
1890 seqno
= ring
->get_seqno(ring
);
1891 while (!list_empty(&ring
->request_list
)) {
1892 struct drm_i915_gem_request
*request
;
1894 request
= list_first_entry(&ring
->request_list
,
1895 struct drm_i915_gem_request
,
1898 if (!i915_seqno_passed(seqno
, request
->seqno
))
1901 trace_i915_gem_request_retire(dev
, request
->seqno
);
1903 list_del(&request
->list
);
1904 i915_gem_request_remove_from_client(request
);
1908 /* Move any buffers on the active list that are no longer referenced
1909 * by the ringbuffer to the flushing/inactive lists as appropriate.
1911 while (!list_empty(&ring
->active_list
)) {
1912 struct drm_i915_gem_object
*obj
;
1914 obj
= list_first_entry(&ring
->active_list
,
1915 struct drm_i915_gem_object
,
1918 if (!i915_seqno_passed(seqno
, obj
->last_rendering_seqno
))
1921 if (obj
->base
.write_domain
!= 0)
1922 i915_gem_object_move_to_flushing(obj
);
1924 i915_gem_object_move_to_inactive(obj
);
1927 if (unlikely (dev_priv
->trace_irq_seqno
&&
1928 i915_seqno_passed(dev_priv
->trace_irq_seqno
, seqno
))) {
1929 ring
->user_irq_put(ring
);
1930 dev_priv
->trace_irq_seqno
= 0;
1933 WARN_ON(i915_verify_lists(dev
));
1937 i915_gem_retire_requests(struct drm_device
*dev
)
1939 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
1941 if (!list_empty(&dev_priv
->mm
.deferred_free_list
)) {
1942 struct drm_i915_gem_object
*obj
, *next
;
1944 /* We must be careful that during unbind() we do not
1945 * accidentally infinitely recurse into retire requests.
1947 * retire -> free -> unbind -> wait -> retire_ring
1949 list_for_each_entry_safe(obj
, next
,
1950 &dev_priv
->mm
.deferred_free_list
,
1952 i915_gem_free_object_tail(obj
);
1955 i915_gem_retire_requests_ring(dev
, &dev_priv
->render_ring
);
1956 i915_gem_retire_requests_ring(dev
, &dev_priv
->bsd_ring
);
1957 i915_gem_retire_requests_ring(dev
, &dev_priv
->blt_ring
);
1961 i915_gem_retire_work_handler(struct work_struct
*work
)
1963 drm_i915_private_t
*dev_priv
;
1964 struct drm_device
*dev
;
1966 dev_priv
= container_of(work
, drm_i915_private_t
,
1967 mm
.retire_work
.work
);
1968 dev
= dev_priv
->dev
;
1970 /* Come back later if the device is busy... */
1971 if (!mutex_trylock(&dev
->struct_mutex
)) {
1972 queue_delayed_work(dev_priv
->wq
, &dev_priv
->mm
.retire_work
, HZ
);
1976 i915_gem_retire_requests(dev
);
1978 if (!dev_priv
->mm
.suspended
&&
1979 (!list_empty(&dev_priv
->render_ring
.request_list
) ||
1980 !list_empty(&dev_priv
->bsd_ring
.request_list
) ||
1981 !list_empty(&dev_priv
->blt_ring
.request_list
)))
1982 queue_delayed_work(dev_priv
->wq
, &dev_priv
->mm
.retire_work
, HZ
);
1983 mutex_unlock(&dev
->struct_mutex
);
1987 i915_do_wait_request(struct drm_device
*dev
, uint32_t seqno
,
1988 bool interruptible
, struct intel_ring_buffer
*ring
)
1990 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
1996 if (atomic_read(&dev_priv
->mm
.wedged
))
1999 if (seqno
== ring
->outstanding_lazy_request
) {
2000 struct drm_i915_gem_request
*request
;
2002 request
= kzalloc(sizeof(*request
), GFP_KERNEL
);
2003 if (request
== NULL
)
2006 ret
= i915_add_request(dev
, NULL
, request
, ring
);
2012 seqno
= request
->seqno
;
2015 if (!i915_seqno_passed(ring
->get_seqno(ring
), seqno
)) {
2016 if (HAS_PCH_SPLIT(dev
))
2017 ier
= I915_READ(DEIER
) | I915_READ(GTIER
);
2019 ier
= I915_READ(IER
);
2021 DRM_ERROR("something (likely vbetool) disabled "
2022 "interrupts, re-enabling\n");
2023 i915_driver_irq_preinstall(dev
);
2024 i915_driver_irq_postinstall(dev
);
2027 trace_i915_gem_request_wait_begin(dev
, seqno
);
2029 ring
->waiting_seqno
= seqno
;
2030 ring
->user_irq_get(ring
);
2032 ret
= wait_event_interruptible(ring
->irq_queue
,
2033 i915_seqno_passed(ring
->get_seqno(ring
), seqno
)
2034 || atomic_read(&dev_priv
->mm
.wedged
));
2036 wait_event(ring
->irq_queue
,
2037 i915_seqno_passed(ring
->get_seqno(ring
), seqno
)
2038 || atomic_read(&dev_priv
->mm
.wedged
));
2040 ring
->user_irq_put(ring
);
2041 ring
->waiting_seqno
= 0;
2043 trace_i915_gem_request_wait_end(dev
, seqno
);
2045 if (atomic_read(&dev_priv
->mm
.wedged
))
2048 if (ret
&& ret
!= -ERESTARTSYS
)
2049 DRM_ERROR("%s returns %d (awaiting %d at %d, next %d)\n",
2050 __func__
, ret
, seqno
, ring
->get_seqno(ring
),
2051 dev_priv
->next_seqno
);
2053 /* Directly dispatch request retiring. While we have the work queue
2054 * to handle this, the waiter on a request often wants an associated
2055 * buffer to have made it to the inactive list, and we would need
2056 * a separate wait queue to handle that.
2059 i915_gem_retire_requests_ring(dev
, ring
);
2065 * Waits for a sequence number to be signaled, and cleans up the
2066 * request and object lists appropriately for that event.
2069 i915_wait_request(struct drm_device
*dev
, uint32_t seqno
,
2070 struct intel_ring_buffer
*ring
)
2072 return i915_do_wait_request(dev
, seqno
, 1, ring
);
2076 i915_gem_flush_ring(struct drm_device
*dev
,
2077 struct intel_ring_buffer
*ring
,
2078 uint32_t invalidate_domains
,
2079 uint32_t flush_domains
)
2081 ring
->flush(ring
, invalidate_domains
, flush_domains
);
2082 i915_gem_process_flushing_list(dev
, flush_domains
, ring
);
2086 i915_gem_flush(struct drm_device
*dev
,
2087 uint32_t invalidate_domains
,
2088 uint32_t flush_domains
,
2089 uint32_t flush_rings
)
2091 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
2093 if (flush_domains
& I915_GEM_DOMAIN_CPU
)
2094 intel_gtt_chipset_flush();
2096 if ((flush_domains
| invalidate_domains
) & I915_GEM_GPU_DOMAINS
) {
2097 if (flush_rings
& RING_RENDER
)
2098 i915_gem_flush_ring(dev
, &dev_priv
->render_ring
,
2099 invalidate_domains
, flush_domains
);
2100 if (flush_rings
& RING_BSD
)
2101 i915_gem_flush_ring(dev
, &dev_priv
->bsd_ring
,
2102 invalidate_domains
, flush_domains
);
2103 if (flush_rings
& RING_BLT
)
2104 i915_gem_flush_ring(dev
, &dev_priv
->blt_ring
,
2105 invalidate_domains
, flush_domains
);
2110 * Ensures that all rendering to the object has completed and the object is
2111 * safe to unbind from the GTT or access from the CPU.
2114 i915_gem_object_wait_rendering(struct drm_i915_gem_object
*obj
,
2117 struct drm_device
*dev
= obj
->base
.dev
;
2120 /* This function only exists to support waiting for existing rendering,
2121 * not for emitting required flushes.
2123 BUG_ON((obj
->base
.write_domain
& I915_GEM_GPU_DOMAINS
) != 0);
2125 /* If there is rendering queued on the buffer being evicted, wait for
2129 ret
= i915_do_wait_request(dev
,
2130 obj
->last_rendering_seqno
,
2141 * Unbinds an object from the GTT aperture.
2144 i915_gem_object_unbind(struct drm_i915_gem_object
*obj
)
2148 if (obj
->gtt_space
== NULL
)
2151 if (obj
->pin_count
!= 0) {
2152 DRM_ERROR("Attempting to unbind pinned buffer\n");
2156 /* blow away mappings if mapped through GTT */
2157 i915_gem_release_mmap(obj
);
2159 /* Move the object to the CPU domain to ensure that
2160 * any possible CPU writes while it's not in the GTT
2161 * are flushed when we go to remap it. This will
2162 * also ensure that all pending GPU writes are finished
2165 ret
= i915_gem_object_set_to_cpu_domain(obj
, 1);
2166 if (ret
== -ERESTARTSYS
)
2168 /* Continue on if we fail due to EIO, the GPU is hung so we
2169 * should be safe and we need to cleanup or else we might
2170 * cause memory corruption through use-after-free.
2173 i915_gem_clflush_object(obj
);
2174 obj
->base
.read_domains
= obj
->base
.write_domain
= I915_GEM_DOMAIN_CPU
;
2177 /* release the fence reg _after_ flushing */
2178 if (obj
->fence_reg
!= I915_FENCE_REG_NONE
)
2179 i915_gem_clear_fence_reg(obj
);
2181 i915_gem_gtt_unbind_object(obj
);
2182 i915_gem_object_put_pages_gtt(obj
);
2184 list_del_init(&obj
->gtt_list
);
2185 list_del_init(&obj
->mm_list
);
2186 /* Avoid an unnecessary call to unbind on rebind. */
2187 obj
->map_and_fenceable
= true;
2189 drm_mm_put_block(obj
->gtt_space
);
2190 obj
->gtt_space
= NULL
;
2191 obj
->gtt_offset
= 0;
2193 if (i915_gem_object_is_purgeable(obj
))
2194 i915_gem_object_truncate(obj
);
2196 trace_i915_gem_object_unbind(obj
);
2201 static int i915_ring_idle(struct drm_device
*dev
,
2202 struct intel_ring_buffer
*ring
)
2204 if (list_empty(&ring
->gpu_write_list
) && list_empty(&ring
->active_list
))
2207 i915_gem_flush_ring(dev
, ring
,
2208 I915_GEM_GPU_DOMAINS
, I915_GEM_GPU_DOMAINS
);
2209 return i915_wait_request(dev
,
2210 i915_gem_next_request_seqno(dev
, ring
),
2215 i915_gpu_idle(struct drm_device
*dev
)
2217 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
2221 lists_empty
= (list_empty(&dev_priv
->mm
.flushing_list
) &&
2222 list_empty(&dev_priv
->mm
.active_list
));
2226 /* Flush everything onto the inactive list. */
2227 ret
= i915_ring_idle(dev
, &dev_priv
->render_ring
);
2231 ret
= i915_ring_idle(dev
, &dev_priv
->bsd_ring
);
2235 ret
= i915_ring_idle(dev
, &dev_priv
->blt_ring
);
2242 static int sandybridge_write_fence_reg(struct drm_i915_gem_object
*obj
,
2243 struct intel_ring_buffer
*pipelined
)
2245 struct drm_device
*dev
= obj
->base
.dev
;
2246 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
2247 u32 size
= obj
->gtt_space
->size
;
2248 int regnum
= obj
->fence_reg
;
2251 val
= (uint64_t)((obj
->gtt_offset
+ size
- 4096) &
2253 val
|= obj
->gtt_offset
& 0xfffff000;
2254 val
|= (uint64_t)((obj
->stride
/ 128) - 1) <<
2255 SANDYBRIDGE_FENCE_PITCH_SHIFT
;
2257 if (obj
->tiling_mode
== I915_TILING_Y
)
2258 val
|= 1 << I965_FENCE_TILING_Y_SHIFT
;
2259 val
|= I965_FENCE_REG_VALID
;
2262 int ret
= intel_ring_begin(pipelined
, 6);
2266 intel_ring_emit(pipelined
, MI_NOOP
);
2267 intel_ring_emit(pipelined
, MI_LOAD_REGISTER_IMM(2));
2268 intel_ring_emit(pipelined
, FENCE_REG_SANDYBRIDGE_0
+ regnum
*8);
2269 intel_ring_emit(pipelined
, (u32
)val
);
2270 intel_ring_emit(pipelined
, FENCE_REG_SANDYBRIDGE_0
+ regnum
*8 + 4);
2271 intel_ring_emit(pipelined
, (u32
)(val
>> 32));
2272 intel_ring_advance(pipelined
);
2274 I915_WRITE64(FENCE_REG_SANDYBRIDGE_0
+ regnum
* 8, val
);
2279 static int i965_write_fence_reg(struct drm_i915_gem_object
*obj
,
2280 struct intel_ring_buffer
*pipelined
)
2282 struct drm_device
*dev
= obj
->base
.dev
;
2283 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
2284 u32 size
= obj
->gtt_space
->size
;
2285 int regnum
= obj
->fence_reg
;
2288 val
= (uint64_t)((obj
->gtt_offset
+ size
- 4096) &
2290 val
|= obj
->gtt_offset
& 0xfffff000;
2291 val
|= ((obj
->stride
/ 128) - 1) << I965_FENCE_PITCH_SHIFT
;
2292 if (obj
->tiling_mode
== I915_TILING_Y
)
2293 val
|= 1 << I965_FENCE_TILING_Y_SHIFT
;
2294 val
|= I965_FENCE_REG_VALID
;
2297 int ret
= intel_ring_begin(pipelined
, 6);
2301 intel_ring_emit(pipelined
, MI_NOOP
);
2302 intel_ring_emit(pipelined
, MI_LOAD_REGISTER_IMM(2));
2303 intel_ring_emit(pipelined
, FENCE_REG_965_0
+ regnum
*8);
2304 intel_ring_emit(pipelined
, (u32
)val
);
2305 intel_ring_emit(pipelined
, FENCE_REG_965_0
+ regnum
*8 + 4);
2306 intel_ring_emit(pipelined
, (u32
)(val
>> 32));
2307 intel_ring_advance(pipelined
);
2309 I915_WRITE64(FENCE_REG_965_0
+ regnum
* 8, val
);
2314 static int i915_write_fence_reg(struct drm_i915_gem_object
*obj
,
2315 struct intel_ring_buffer
*pipelined
)
2317 struct drm_device
*dev
= obj
->base
.dev
;
2318 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
2319 u32 size
= obj
->gtt_space
->size
;
2320 u32 fence_reg
, val
, pitch_val
;
2323 if (WARN((obj
->gtt_offset
& ~I915_FENCE_START_MASK
) ||
2324 (size
& -size
) != size
||
2325 (obj
->gtt_offset
& (size
- 1)),
2326 "object 0x%08x [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n",
2327 obj
->gtt_offset
, obj
->map_and_fenceable
, size
))
2330 if (obj
->tiling_mode
== I915_TILING_Y
&& HAS_128_BYTE_Y_TILING(dev
))
2335 /* Note: pitch better be a power of two tile widths */
2336 pitch_val
= obj
->stride
/ tile_width
;
2337 pitch_val
= ffs(pitch_val
) - 1;
2339 val
= obj
->gtt_offset
;
2340 if (obj
->tiling_mode
== I915_TILING_Y
)
2341 val
|= 1 << I830_FENCE_TILING_Y_SHIFT
;
2342 val
|= I915_FENCE_SIZE_BITS(size
);
2343 val
|= pitch_val
<< I830_FENCE_PITCH_SHIFT
;
2344 val
|= I830_FENCE_REG_VALID
;
2346 fence_reg
= obj
->fence_reg
;
2348 fence_reg
= FENCE_REG_830_0
+ fence_reg
* 4;
2350 fence_reg
= FENCE_REG_945_8
+ (fence_reg
- 8) * 4;
2353 int ret
= intel_ring_begin(pipelined
, 4);
2357 intel_ring_emit(pipelined
, MI_NOOP
);
2358 intel_ring_emit(pipelined
, MI_LOAD_REGISTER_IMM(1));
2359 intel_ring_emit(pipelined
, fence_reg
);
2360 intel_ring_emit(pipelined
, val
);
2361 intel_ring_advance(pipelined
);
2363 I915_WRITE(fence_reg
, val
);
2368 static int i830_write_fence_reg(struct drm_i915_gem_object
*obj
,
2369 struct intel_ring_buffer
*pipelined
)
2371 struct drm_device
*dev
= obj
->base
.dev
;
2372 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
2373 u32 size
= obj
->gtt_space
->size
;
2374 int regnum
= obj
->fence_reg
;
2378 if (WARN((obj
->gtt_offset
& ~I830_FENCE_START_MASK
) ||
2379 (size
& -size
) != size
||
2380 (obj
->gtt_offset
& (size
- 1)),
2381 "object 0x%08x not 512K or pot-size 0x%08x aligned\n",
2382 obj
->gtt_offset
, size
))
2385 pitch_val
= obj
->stride
/ 128;
2386 pitch_val
= ffs(pitch_val
) - 1;
2388 val
= obj
->gtt_offset
;
2389 if (obj
->tiling_mode
== I915_TILING_Y
)
2390 val
|= 1 << I830_FENCE_TILING_Y_SHIFT
;
2391 val
|= I830_FENCE_SIZE_BITS(size
);
2392 val
|= pitch_val
<< I830_FENCE_PITCH_SHIFT
;
2393 val
|= I830_FENCE_REG_VALID
;
2396 int ret
= intel_ring_begin(pipelined
, 4);
2400 intel_ring_emit(pipelined
, MI_NOOP
);
2401 intel_ring_emit(pipelined
, MI_LOAD_REGISTER_IMM(1));
2402 intel_ring_emit(pipelined
, FENCE_REG_830_0
+ regnum
*4);
2403 intel_ring_emit(pipelined
, val
);
2404 intel_ring_advance(pipelined
);
2406 I915_WRITE(FENCE_REG_830_0
+ regnum
* 4, val
);
2411 static int i915_find_fence_reg(struct drm_device
*dev
,
2414 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
2415 struct drm_i915_fence_reg
*reg
;
2416 struct drm_i915_gem_object
*obj
= NULL
;
2419 /* First try to find a free reg */
2421 for (i
= dev_priv
->fence_reg_start
; i
< dev_priv
->num_fence_regs
; i
++) {
2422 reg
= &dev_priv
->fence_regs
[i
];
2426 if (!reg
->obj
->pin_count
)
2433 /* None available, try to steal one or wait for a user to finish */
2434 avail
= I915_FENCE_REG_NONE
;
2435 list_for_each_entry(reg
, &dev_priv
->mm
.fence_list
,
2442 avail
= obj
->fence_reg
;
2446 BUG_ON(avail
== I915_FENCE_REG_NONE
);
2448 /* We only have a reference on obj from the active list. put_fence_reg
2449 * might drop that one, causing a use-after-free in it. So hold a
2450 * private reference to obj like the other callers of put_fence_reg
2451 * (set_tiling ioctl) do. */
2452 drm_gem_object_reference(&obj
->base
);
2453 ret
= i915_gem_object_put_fence_reg(obj
, interruptible
);
2454 drm_gem_object_unreference(&obj
->base
);
2462 * i915_gem_object_get_fence_reg - set up a fence reg for an object
2463 * @obj: object to map through a fence reg
2465 * When mapping objects through the GTT, userspace wants to be able to write
2466 * to them without having to worry about swizzling if the object is tiled.
2468 * This function walks the fence regs looking for a free one for @obj,
2469 * stealing one if it can't find any.
2471 * It then sets up the reg based on the object's properties: address, pitch
2472 * and tiling format.
2475 i915_gem_object_get_fence_reg(struct drm_i915_gem_object
*obj
,
2478 struct drm_device
*dev
= obj
->base
.dev
;
2479 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
2480 struct drm_i915_fence_reg
*reg
= NULL
;
2481 struct intel_ring_buffer
*pipelined
= NULL
;
2484 /* Just update our place in the LRU if our fence is getting used. */
2485 if (obj
->fence_reg
!= I915_FENCE_REG_NONE
) {
2486 reg
= &dev_priv
->fence_regs
[obj
->fence_reg
];
2487 list_move_tail(®
->lru_list
, &dev_priv
->mm
.fence_list
);
2491 switch (obj
->tiling_mode
) {
2492 case I915_TILING_NONE
:
2493 WARN(1, "allocating a fence for non-tiled object?\n");
2498 WARN((obj
->stride
& (512 - 1)),
2499 "object 0x%08x is X tiled but has non-512B pitch\n",
2505 WARN((obj
->stride
& (128 - 1)),
2506 "object 0x%08x is Y tiled but has non-128B pitch\n",
2511 ret
= i915_find_fence_reg(dev
, interruptible
);
2515 obj
->fence_reg
= ret
;
2516 reg
= &dev_priv
->fence_regs
[obj
->fence_reg
];
2517 list_add_tail(®
->lru_list
, &dev_priv
->mm
.fence_list
);
2521 switch (INTEL_INFO(dev
)->gen
) {
2523 ret
= sandybridge_write_fence_reg(obj
, pipelined
);
2527 ret
= i965_write_fence_reg(obj
, pipelined
);
2530 ret
= i915_write_fence_reg(obj
, pipelined
);
2533 ret
= i830_write_fence_reg(obj
, pipelined
);
2537 trace_i915_gem_object_get_fence(obj
,
2544 * i915_gem_clear_fence_reg - clear out fence register info
2545 * @obj: object to clear
2547 * Zeroes out the fence register itself and clears out the associated
2548 * data structures in dev_priv and obj.
2551 i915_gem_clear_fence_reg(struct drm_i915_gem_object
*obj
)
2553 struct drm_device
*dev
= obj
->base
.dev
;
2554 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
2555 struct drm_i915_fence_reg
*reg
= &dev_priv
->fence_regs
[obj
->fence_reg
];
2558 switch (INTEL_INFO(dev
)->gen
) {
2560 I915_WRITE64(FENCE_REG_SANDYBRIDGE_0
+
2561 (obj
->fence_reg
* 8), 0);
2565 I915_WRITE64(FENCE_REG_965_0
+ (obj
->fence_reg
* 8), 0);
2568 if (obj
->fence_reg
>= 8)
2569 fence_reg
= FENCE_REG_945_8
+ (obj
->fence_reg
- 8) * 4;
2572 fence_reg
= FENCE_REG_830_0
+ obj
->fence_reg
* 4;
2574 I915_WRITE(fence_reg
, 0);
2579 obj
->fence_reg
= I915_FENCE_REG_NONE
;
2580 list_del_init(®
->lru_list
);
2584 * i915_gem_object_put_fence_reg - waits on outstanding fenced access
2585 * to the buffer to finish, and then resets the fence register.
2586 * @obj: tiled object holding a fence register.
2587 * @bool: whether the wait upon the fence is interruptible
2589 * Zeroes out the fence register itself and clears out the associated
2590 * data structures in dev_priv and obj.
2593 i915_gem_object_put_fence_reg(struct drm_i915_gem_object
*obj
,
2596 struct drm_device
*dev
= obj
->base
.dev
;
2599 if (obj
->fence_reg
== I915_FENCE_REG_NONE
)
2602 /* If we've changed tiling, GTT-mappings of the object
2603 * need to re-fault to ensure that the correct fence register
2604 * setup is in place.
2606 i915_gem_release_mmap(obj
);
2608 /* On the i915, GPU access to tiled buffers is via a fence,
2609 * therefore we must wait for any outstanding access to complete
2610 * before clearing the fence.
2612 if (obj
->fenced_gpu_access
) {
2613 ret
= i915_gem_object_flush_gpu_write_domain(obj
, NULL
);
2617 obj
->fenced_gpu_access
= false;
2620 if (obj
->last_fenced_seqno
) {
2621 ret
= i915_do_wait_request(dev
,
2622 obj
->last_fenced_seqno
,
2624 obj
->last_fenced_ring
);
2628 obj
->last_fenced_seqno
= false;
2631 i915_gem_object_flush_gtt_write_domain(obj
);
2632 i915_gem_clear_fence_reg(obj
);
2638 * Finds free space in the GTT aperture and binds the object there.
2641 i915_gem_object_bind_to_gtt(struct drm_i915_gem_object
*obj
,
2643 bool map_and_fenceable
)
2645 struct drm_device
*dev
= obj
->base
.dev
;
2646 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
2647 struct drm_mm_node
*free_space
;
2648 gfp_t gfpmask
= __GFP_NORETRY
| __GFP_NOWARN
;
2649 u32 size
, fence_size
, fence_alignment
, unfenced_alignment
;
2650 bool mappable
, fenceable
;
2653 if (obj
->madv
!= I915_MADV_WILLNEED
) {
2654 DRM_ERROR("Attempting to bind a purgeable object\n");
2658 fence_size
= i915_gem_get_gtt_size(obj
);
2659 fence_alignment
= i915_gem_get_gtt_alignment(obj
);
2660 unfenced_alignment
= i915_gem_get_unfenced_gtt_alignment(obj
);
2663 alignment
= map_and_fenceable
? fence_alignment
:
2665 if (map_and_fenceable
&& alignment
& (fence_alignment
- 1)) {
2666 DRM_ERROR("Invalid object alignment requested %u\n", alignment
);
2670 size
= map_and_fenceable
? fence_size
: obj
->base
.size
;
2672 /* If the object is bigger than the entire aperture, reject it early
2673 * before evicting everything in a vain attempt to find space.
2675 if (obj
->base
.size
>
2676 (map_and_fenceable
? dev_priv
->mm
.gtt_mappable_end
: dev_priv
->mm
.gtt_total
)) {
2677 DRM_ERROR("Attempting to bind an object larger than the aperture\n");
2682 if (map_and_fenceable
)
2684 drm_mm_search_free_in_range(&dev_priv
->mm
.gtt_space
,
2686 dev_priv
->mm
.gtt_mappable_end
,
2689 free_space
= drm_mm_search_free(&dev_priv
->mm
.gtt_space
,
2690 size
, alignment
, 0);
2692 if (free_space
!= NULL
) {
2693 if (map_and_fenceable
)
2695 drm_mm_get_block_range_generic(free_space
,
2697 dev_priv
->mm
.gtt_mappable_end
,
2701 drm_mm_get_block(free_space
, size
, alignment
);
2703 if (obj
->gtt_space
== NULL
) {
2704 /* If the gtt is empty and we're still having trouble
2705 * fitting our object in, we're out of memory.
2707 ret
= i915_gem_evict_something(dev
, size
, alignment
,
2715 ret
= i915_gem_object_get_pages_gtt(obj
, gfpmask
);
2717 drm_mm_put_block(obj
->gtt_space
);
2718 obj
->gtt_space
= NULL
;
2720 if (ret
== -ENOMEM
) {
2721 /* first try to clear up some space from the GTT */
2722 ret
= i915_gem_evict_something(dev
, size
,
2726 /* now try to shrink everyone else */
2741 ret
= i915_gem_gtt_bind_object(obj
);
2743 i915_gem_object_put_pages_gtt(obj
);
2744 drm_mm_put_block(obj
->gtt_space
);
2745 obj
->gtt_space
= NULL
;
2747 ret
= i915_gem_evict_something(dev
, size
,
2748 alignment
, map_and_fenceable
);
2755 list_add_tail(&obj
->gtt_list
, &dev_priv
->mm
.gtt_list
);
2756 list_add_tail(&obj
->mm_list
, &dev_priv
->mm
.inactive_list
);
2758 /* Assert that the object is not currently in any GPU domain. As it
2759 * wasn't in the GTT, there shouldn't be any way it could have been in
2762 BUG_ON(obj
->base
.read_domains
& I915_GEM_GPU_DOMAINS
);
2763 BUG_ON(obj
->base
.write_domain
& I915_GEM_GPU_DOMAINS
);
2765 obj
->gtt_offset
= obj
->gtt_space
->start
;
2768 obj
->gtt_space
->size
== fence_size
&&
2769 (obj
->gtt_space
->start
& (fence_alignment
-1)) == 0;
2772 obj
->gtt_offset
+ obj
->base
.size
<= dev_priv
->mm
.gtt_mappable_end
;
2774 obj
->map_and_fenceable
= mappable
&& fenceable
;
2776 trace_i915_gem_object_bind(obj
, obj
->gtt_offset
, map_and_fenceable
);
2781 i915_gem_clflush_object(struct drm_i915_gem_object
*obj
)
2783 /* If we don't have a page list set up, then we're not pinned
2784 * to GPU, and we can ignore the cache flush because it'll happen
2785 * again at bind time.
2787 if (obj
->pages
== NULL
)
2790 trace_i915_gem_object_clflush(obj
);
2792 drm_clflush_pages(obj
->pages
, obj
->base
.size
/ PAGE_SIZE
);
2795 /** Flushes any GPU write domain for the object if it's dirty. */
2797 i915_gem_object_flush_gpu_write_domain(struct drm_i915_gem_object
*obj
,
2798 struct intel_ring_buffer
*pipelined
)
2800 struct drm_device
*dev
= obj
->base
.dev
;
2802 if ((obj
->base
.write_domain
& I915_GEM_GPU_DOMAINS
) == 0)
2805 /* Queue the GPU write cache flushing we need. */
2806 i915_gem_flush_ring(dev
, obj
->ring
, 0, obj
->base
.write_domain
);
2807 BUG_ON(obj
->base
.write_domain
);
2809 if (pipelined
&& pipelined
== obj
->ring
)
2812 return i915_gem_object_wait_rendering(obj
, true);
2815 /** Flushes the GTT write domain for the object if it's dirty. */
2817 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object
*obj
)
2819 uint32_t old_write_domain
;
2821 if (obj
->base
.write_domain
!= I915_GEM_DOMAIN_GTT
)
2824 /* No actual flushing is required for the GTT write domain. Writes
2825 * to it immediately go to main memory as far as we know, so there's
2826 * no chipset flush. It also doesn't land in render cache.
2828 i915_gem_release_mmap(obj
);
2830 old_write_domain
= obj
->base
.write_domain
;
2831 obj
->base
.write_domain
= 0;
2833 trace_i915_gem_object_change_domain(obj
,
2834 obj
->base
.read_domains
,
2838 /** Flushes the CPU write domain for the object if it's dirty. */
2840 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object
*obj
)
2842 uint32_t old_write_domain
;
2844 if (obj
->base
.write_domain
!= I915_GEM_DOMAIN_CPU
)
2847 i915_gem_clflush_object(obj
);
2848 intel_gtt_chipset_flush();
2849 old_write_domain
= obj
->base
.write_domain
;
2850 obj
->base
.write_domain
= 0;
2852 trace_i915_gem_object_change_domain(obj
,
2853 obj
->base
.read_domains
,
2858 * Moves a single object to the GTT read, and possibly write domain.
2860 * This function returns when the move is complete, including waiting on
2864 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object
*obj
, bool write
)
2866 uint32_t old_write_domain
, old_read_domains
;
2869 /* Not valid to be called on unbound objects. */
2870 if (obj
->gtt_space
== NULL
)
2873 ret
= i915_gem_object_flush_gpu_write_domain(obj
, NULL
);
2877 i915_gem_object_flush_cpu_write_domain(obj
);
2880 ret
= i915_gem_object_wait_rendering(obj
, true);
2885 old_write_domain
= obj
->base
.write_domain
;
2886 old_read_domains
= obj
->base
.read_domains
;
2888 /* It should now be out of any other write domains, and we can update
2889 * the domain values for our changes.
2891 BUG_ON((obj
->base
.write_domain
& ~I915_GEM_DOMAIN_GTT
) != 0);
2892 obj
->base
.read_domains
|= I915_GEM_DOMAIN_GTT
;
2894 obj
->base
.read_domains
= I915_GEM_DOMAIN_GTT
;
2895 obj
->base
.write_domain
= I915_GEM_DOMAIN_GTT
;
2899 trace_i915_gem_object_change_domain(obj
,
2907 * Prepare buffer for display plane. Use uninterruptible for possible flush
2908 * wait, as in modesetting process we're not supposed to be interrupted.
2911 i915_gem_object_set_to_display_plane(struct drm_i915_gem_object
*obj
,
2912 struct intel_ring_buffer
*pipelined
)
2914 uint32_t old_read_domains
;
2917 /* Not valid to be called on unbound objects. */
2918 if (obj
->gtt_space
== NULL
)
2921 ret
= i915_gem_object_flush_gpu_write_domain(obj
, pipelined
);
2925 /* Currently, we are always called from an non-interruptible context. */
2927 ret
= i915_gem_object_wait_rendering(obj
, false);
2932 i915_gem_object_flush_cpu_write_domain(obj
);
2934 old_read_domains
= obj
->base
.read_domains
;
2935 obj
->base
.read_domains
|= I915_GEM_DOMAIN_GTT
;
2937 trace_i915_gem_object_change_domain(obj
,
2939 obj
->base
.write_domain
);
2945 i915_gem_object_flush_gpu(struct drm_i915_gem_object
*obj
,
2951 if (obj
->base
.write_domain
& I915_GEM_GPU_DOMAINS
)
2952 i915_gem_flush_ring(obj
->base
.dev
, obj
->ring
,
2953 0, obj
->base
.write_domain
);
2955 return i915_gem_object_wait_rendering(obj
, interruptible
);
2959 * Moves a single object to the CPU read, and possibly write domain.
2961 * This function returns when the move is complete, including waiting on
2965 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object
*obj
, bool write
)
2967 uint32_t old_write_domain
, old_read_domains
;
2970 ret
= i915_gem_object_flush_gpu_write_domain(obj
, false);
2974 i915_gem_object_flush_gtt_write_domain(obj
);
2976 /* If we have a partially-valid cache of the object in the CPU,
2977 * finish invalidating it and free the per-page flags.
2979 i915_gem_object_set_to_full_cpu_read_domain(obj
);
2982 ret
= i915_gem_object_wait_rendering(obj
, true);
2987 old_write_domain
= obj
->base
.write_domain
;
2988 old_read_domains
= obj
->base
.read_domains
;
2990 /* Flush the CPU cache if it's still invalid. */
2991 if ((obj
->base
.read_domains
& I915_GEM_DOMAIN_CPU
) == 0) {
2992 i915_gem_clflush_object(obj
);
2994 obj
->base
.read_domains
|= I915_GEM_DOMAIN_CPU
;
2997 /* It should now be out of any other write domains, and we can update
2998 * the domain values for our changes.
3000 BUG_ON((obj
->base
.write_domain
& ~I915_GEM_DOMAIN_CPU
) != 0);
3002 /* If we're writing through the CPU, then the GPU read domains will
3003 * need to be invalidated at next use.
3006 obj
->base
.read_domains
= I915_GEM_DOMAIN_CPU
;
3007 obj
->base
.write_domain
= I915_GEM_DOMAIN_CPU
;
3010 trace_i915_gem_object_change_domain(obj
,
3018 * Set the next domain for the specified object. This
3019 * may not actually perform the necessary flushing/invaliding though,
3020 * as that may want to be batched with other set_domain operations
3022 * This is (we hope) the only really tricky part of gem. The goal
3023 * is fairly simple -- track which caches hold bits of the object
3024 * and make sure they remain coherent. A few concrete examples may
3025 * help to explain how it works. For shorthand, we use the notation
3026 * (read_domains, write_domain), e.g. (CPU, CPU) to indicate the
3027 * a pair of read and write domain masks.
3029 * Case 1: the batch buffer
3035 * 5. Unmapped from GTT
3038 * Let's take these a step at a time
3041 * Pages allocated from the kernel may still have
3042 * cache contents, so we set them to (CPU, CPU) always.
3043 * 2. Written by CPU (using pwrite)
3044 * The pwrite function calls set_domain (CPU, CPU) and
3045 * this function does nothing (as nothing changes)
3047 * This function asserts that the object is not
3048 * currently in any GPU-based read or write domains
3050 * i915_gem_execbuffer calls set_domain (COMMAND, 0).
3051 * As write_domain is zero, this function adds in the
3052 * current read domains (CPU+COMMAND, 0).
3053 * flush_domains is set to CPU.
3054 * invalidate_domains is set to COMMAND
3055 * clflush is run to get data out of the CPU caches
3056 * then i915_dev_set_domain calls i915_gem_flush to
3057 * emit an MI_FLUSH and drm_agp_chipset_flush
3058 * 5. Unmapped from GTT
3059 * i915_gem_object_unbind calls set_domain (CPU, CPU)
3060 * flush_domains and invalidate_domains end up both zero
3061 * so no flushing/invalidating happens
3065 * Case 2: The shared render buffer
3069 * 3. Read/written by GPU
3070 * 4. set_domain to (CPU,CPU)
3071 * 5. Read/written by CPU
3072 * 6. Read/written by GPU
3075 * Same as last example, (CPU, CPU)
3077 * Nothing changes (assertions find that it is not in the GPU)
3078 * 3. Read/written by GPU
3079 * execbuffer calls set_domain (RENDER, RENDER)
3080 * flush_domains gets CPU
3081 * invalidate_domains gets GPU
3083 * MI_FLUSH and drm_agp_chipset_flush
3084 * 4. set_domain (CPU, CPU)
3085 * flush_domains gets GPU
3086 * invalidate_domains gets CPU
3087 * wait_rendering (obj) to make sure all drawing is complete.
3088 * This will include an MI_FLUSH to get the data from GPU
3090 * clflush (obj) to invalidate the CPU cache
3091 * Another MI_FLUSH in i915_gem_flush (eliminate this somehow?)
3092 * 5. Read/written by CPU
3093 * cache lines are loaded and dirtied
3094 * 6. Read written by GPU
3095 * Same as last GPU access
3097 * Case 3: The constant buffer
3102 * 4. Updated (written) by CPU again
3111 * flush_domains = CPU
3112 * invalidate_domains = RENDER
3115 * drm_agp_chipset_flush
3116 * 4. Updated (written) by CPU again
3118 * flush_domains = 0 (no previous write domain)
3119 * invalidate_domains = 0 (no new read domains)
3122 * flush_domains = CPU
3123 * invalidate_domains = RENDER
3126 * drm_agp_chipset_flush
3129 i915_gem_object_set_to_gpu_domain(struct drm_i915_gem_object
*obj
,
3130 struct intel_ring_buffer
*ring
,
3131 struct change_domains
*cd
)
3133 uint32_t invalidate_domains
= 0, flush_domains
= 0;
3136 * If the object isn't moving to a new write domain,
3137 * let the object stay in multiple read domains
3139 if (obj
->base
.pending_write_domain
== 0)
3140 obj
->base
.pending_read_domains
|= obj
->base
.read_domains
;
3143 * Flush the current write domain if
3144 * the new read domains don't match. Invalidate
3145 * any read domains which differ from the old
3148 if (obj
->base
.write_domain
&&
3149 (((obj
->base
.write_domain
!= obj
->base
.pending_read_domains
||
3150 obj
->ring
!= ring
)) ||
3151 (obj
->fenced_gpu_access
&& !obj
->pending_fenced_gpu_access
))) {
3152 flush_domains
|= obj
->base
.write_domain
;
3153 invalidate_domains
|=
3154 obj
->base
.pending_read_domains
& ~obj
->base
.write_domain
;
3157 * Invalidate any read caches which may have
3158 * stale data. That is, any new read domains.
3160 invalidate_domains
|= obj
->base
.pending_read_domains
& ~obj
->base
.read_domains
;
3161 if ((flush_domains
| invalidate_domains
) & I915_GEM_DOMAIN_CPU
)
3162 i915_gem_clflush_object(obj
);
3164 /* blow away mappings if mapped through GTT */
3165 if ((flush_domains
| invalidate_domains
) & I915_GEM_DOMAIN_GTT
)
3166 i915_gem_release_mmap(obj
);
3168 /* The actual obj->write_domain will be updated with
3169 * pending_write_domain after we emit the accumulated flush for all
3170 * of our domain changes in execbuffers (which clears objects'
3171 * write_domains). So if we have a current write domain that we
3172 * aren't changing, set pending_write_domain to that.
3174 if (flush_domains
== 0 && obj
->base
.pending_write_domain
== 0)
3175 obj
->base
.pending_write_domain
= obj
->base
.write_domain
;
3177 cd
->invalidate_domains
|= invalidate_domains
;
3178 cd
->flush_domains
|= flush_domains
;
3179 if (flush_domains
& I915_GEM_GPU_DOMAINS
)
3180 cd
->flush_rings
|= obj
->ring
->id
;
3181 if (invalidate_domains
& I915_GEM_GPU_DOMAINS
)
3182 cd
->flush_rings
|= ring
->id
;
3186 * Moves the object from a partially CPU read to a full one.
3188 * Note that this only resolves i915_gem_object_set_cpu_read_domain_range(),
3189 * and doesn't handle transitioning from !(read_domains & I915_GEM_DOMAIN_CPU).
3192 i915_gem_object_set_to_full_cpu_read_domain(struct drm_i915_gem_object
*obj
)
3194 if (!obj
->page_cpu_valid
)
3197 /* If we're partially in the CPU read domain, finish moving it in.
3199 if (obj
->base
.read_domains
& I915_GEM_DOMAIN_CPU
) {
3202 for (i
= 0; i
<= (obj
->base
.size
- 1) / PAGE_SIZE
; i
++) {
3203 if (obj
->page_cpu_valid
[i
])
3205 drm_clflush_pages(obj
->pages
+ i
, 1);
3209 /* Free the page_cpu_valid mappings which are now stale, whether
3210 * or not we've got I915_GEM_DOMAIN_CPU.
3212 kfree(obj
->page_cpu_valid
);
3213 obj
->page_cpu_valid
= NULL
;
3217 * Set the CPU read domain on a range of the object.
3219 * The object ends up with I915_GEM_DOMAIN_CPU in its read flags although it's
3220 * not entirely valid. The page_cpu_valid member of the object flags which
3221 * pages have been flushed, and will be respected by
3222 * i915_gem_object_set_to_cpu_domain() if it's called on to get a valid mapping
3223 * of the whole object.
3225 * This function returns when the move is complete, including waiting on
3229 i915_gem_object_set_cpu_read_domain_range(struct drm_i915_gem_object
*obj
,
3230 uint64_t offset
, uint64_t size
)
3232 uint32_t old_read_domains
;
3235 if (offset
== 0 && size
== obj
->base
.size
)
3236 return i915_gem_object_set_to_cpu_domain(obj
, 0);
3238 ret
= i915_gem_object_flush_gpu_write_domain(obj
, false);
3241 i915_gem_object_flush_gtt_write_domain(obj
);
3243 /* If we're already fully in the CPU read domain, we're done. */
3244 if (obj
->page_cpu_valid
== NULL
&&
3245 (obj
->base
.read_domains
& I915_GEM_DOMAIN_CPU
) != 0)
3248 /* Otherwise, create/clear the per-page CPU read domain flag if we're
3249 * newly adding I915_GEM_DOMAIN_CPU
3251 if (obj
->page_cpu_valid
== NULL
) {
3252 obj
->page_cpu_valid
= kzalloc(obj
->base
.size
/ PAGE_SIZE
,
3254 if (obj
->page_cpu_valid
== NULL
)
3256 } else if ((obj
->base
.read_domains
& I915_GEM_DOMAIN_CPU
) == 0)
3257 memset(obj
->page_cpu_valid
, 0, obj
->base
.size
/ PAGE_SIZE
);
3259 /* Flush the cache on any pages that are still invalid from the CPU's
3262 for (i
= offset
/ PAGE_SIZE
; i
<= (offset
+ size
- 1) / PAGE_SIZE
;
3264 if (obj
->page_cpu_valid
[i
])
3267 drm_clflush_pages(obj
->pages
+ i
, 1);
3269 obj
->page_cpu_valid
[i
] = 1;
3272 /* It should now be out of any other write domains, and we can update
3273 * the domain values for our changes.
3275 BUG_ON((obj
->base
.write_domain
& ~I915_GEM_DOMAIN_CPU
) != 0);
3277 old_read_domains
= obj
->base
.read_domains
;
3278 obj
->base
.read_domains
|= I915_GEM_DOMAIN_CPU
;
3280 trace_i915_gem_object_change_domain(obj
,
3282 obj
->base
.write_domain
);
3288 i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object
*obj
,
3289 struct drm_file
*file_priv
,
3290 struct drm_i915_gem_exec_object2
*entry
,
3291 struct drm_i915_gem_relocation_entry
*reloc
)
3293 struct drm_device
*dev
= obj
->base
.dev
;
3294 struct drm_gem_object
*target_obj
;
3295 uint32_t target_offset
;
3298 target_obj
= drm_gem_object_lookup(dev
, file_priv
,
3299 reloc
->target_handle
);
3300 if (target_obj
== NULL
)
3303 target_offset
= to_intel_bo(target_obj
)->gtt_offset
;
3306 DRM_INFO("%s: obj %p offset %08x target %d "
3307 "read %08x write %08x gtt %08x "
3308 "presumed %08x delta %08x\n",
3311 (int) reloc
->offset
,
3312 (int) reloc
->target_handle
,
3313 (int) reloc
->read_domains
,
3314 (int) reloc
->write_domain
,
3315 (int) target_offset
,
3316 (int) reloc
->presumed_offset
,
3320 /* The target buffer should have appeared before us in the
3321 * exec_object list, so it should have a GTT space bound by now.
3323 if (target_offset
== 0) {
3324 DRM_ERROR("No GTT space found for object %d\n",
3325 reloc
->target_handle
);
3329 /* Validate that the target is in a valid r/w GPU domain */
3330 if (reloc
->write_domain
& (reloc
->write_domain
- 1)) {
3331 DRM_ERROR("reloc with multiple write domains: "
3332 "obj %p target %d offset %d "
3333 "read %08x write %08x",
3334 obj
, reloc
->target_handle
,
3335 (int) reloc
->offset
,
3336 reloc
->read_domains
,
3337 reloc
->write_domain
);
3340 if (reloc
->write_domain
& I915_GEM_DOMAIN_CPU
||
3341 reloc
->read_domains
& I915_GEM_DOMAIN_CPU
) {
3342 DRM_ERROR("reloc with read/write CPU domains: "
3343 "obj %p target %d offset %d "
3344 "read %08x write %08x",
3345 obj
, reloc
->target_handle
,
3346 (int) reloc
->offset
,
3347 reloc
->read_domains
,
3348 reloc
->write_domain
);
3351 if (reloc
->write_domain
&& target_obj
->pending_write_domain
&&
3352 reloc
->write_domain
!= target_obj
->pending_write_domain
) {
3353 DRM_ERROR("Write domain conflict: "
3354 "obj %p target %d offset %d "
3355 "new %08x old %08x\n",
3356 obj
, reloc
->target_handle
,
3357 (int) reloc
->offset
,
3358 reloc
->write_domain
,
3359 target_obj
->pending_write_domain
);
3363 target_obj
->pending_read_domains
|= reloc
->read_domains
;
3364 target_obj
->pending_write_domain
|= reloc
->write_domain
;
3366 /* If the relocation already has the right value in it, no
3367 * more work needs to be done.
3369 if (target_offset
== reloc
->presumed_offset
)
3372 /* Check that the relocation address is valid... */
3373 if (reloc
->offset
> obj
->base
.size
- 4) {
3374 DRM_ERROR("Relocation beyond object bounds: "
3375 "obj %p target %d offset %d size %d.\n",
3376 obj
, reloc
->target_handle
,
3377 (int) reloc
->offset
,
3378 (int) obj
->base
.size
);
3381 if (reloc
->offset
& 3) {
3382 DRM_ERROR("Relocation not 4-byte aligned: "
3383 "obj %p target %d offset %d.\n",
3384 obj
, reloc
->target_handle
,
3385 (int) reloc
->offset
);
3389 /* and points to somewhere within the target object. */
3390 if (reloc
->delta
>= target_obj
->size
) {
3391 DRM_ERROR("Relocation beyond target object bounds: "
3392 "obj %p target %d delta %d size %d.\n",
3393 obj
, reloc
->target_handle
,
3395 (int) target_obj
->size
);
3399 reloc
->delta
+= target_offset
;
3400 if (obj
->base
.write_domain
== I915_GEM_DOMAIN_CPU
) {
3401 uint32_t page_offset
= reloc
->offset
& ~PAGE_MASK
;
3404 vaddr
= kmap_atomic(obj
->pages
[reloc
->offset
>> PAGE_SHIFT
]);
3405 *(uint32_t *)(vaddr
+ page_offset
) = reloc
->delta
;
3406 kunmap_atomic(vaddr
);
3408 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
3409 uint32_t __iomem
*reloc_entry
;
3410 void __iomem
*reloc_page
;
3412 ret
= i915_gem_object_set_to_gtt_domain(obj
, 1);
3416 /* Map the page containing the relocation we're going to perform. */
3417 reloc
->offset
+= obj
->gtt_offset
;
3418 reloc_page
= io_mapping_map_atomic_wc(dev_priv
->mm
.gtt_mapping
,
3419 reloc
->offset
& PAGE_MASK
);
3420 reloc_entry
= (uint32_t __iomem
*)
3421 (reloc_page
+ (reloc
->offset
& ~PAGE_MASK
));
3422 iowrite32(reloc
->delta
, reloc_entry
);
3423 io_mapping_unmap_atomic(reloc_page
);
3426 /* and update the user's relocation entry */
3427 reloc
->presumed_offset
= target_offset
;
3432 drm_gem_object_unreference(target_obj
);
3437 i915_gem_execbuffer_relocate_object(struct drm_i915_gem_object
*obj
,
3438 struct drm_file
*file_priv
,
3439 struct drm_i915_gem_exec_object2
*entry
)
3441 struct drm_i915_gem_relocation_entry __user
*user_relocs
;
3444 user_relocs
= (void __user
*)(uintptr_t)entry
->relocs_ptr
;
3445 for (i
= 0; i
< entry
->relocation_count
; i
++) {
3446 struct drm_i915_gem_relocation_entry reloc
;
3448 if (__copy_from_user_inatomic(&reloc
,
3453 ret
= i915_gem_execbuffer_relocate_entry(obj
, file_priv
, entry
, &reloc
);
3457 if (__copy_to_user_inatomic(&user_relocs
[i
].presumed_offset
,
3458 &reloc
.presumed_offset
,
3459 sizeof(reloc
.presumed_offset
)))
3467 i915_gem_execbuffer_relocate_object_slow(struct drm_i915_gem_object
*obj
,
3468 struct drm_file
*file_priv
,
3469 struct drm_i915_gem_exec_object2
*entry
,
3470 struct drm_i915_gem_relocation_entry
*relocs
)
3474 for (i
= 0; i
< entry
->relocation_count
; i
++) {
3475 ret
= i915_gem_execbuffer_relocate_entry(obj
, file_priv
, entry
, &relocs
[i
]);
3484 i915_gem_execbuffer_relocate(struct drm_device
*dev
,
3485 struct drm_file
*file
,
3486 struct drm_i915_gem_object
**object_list
,
3487 struct drm_i915_gem_exec_object2
*exec_list
,
3492 for (i
= 0; i
< count
; i
++) {
3493 struct drm_i915_gem_object
*obj
= object_list
[i
];
3494 obj
->base
.pending_read_domains
= 0;
3495 obj
->base
.pending_write_domain
= 0;
3496 ret
= i915_gem_execbuffer_relocate_object(obj
, file
,
3506 i915_gem_execbuffer_reserve(struct drm_device
*dev
,
3507 struct drm_file
*file
,
3508 struct drm_i915_gem_object
**object_list
,
3509 struct drm_i915_gem_exec_object2
*exec_list
,
3514 /* Attempt to pin all of the buffers into the GTT.
3515 * This is done in 3 phases:
3517 * 1a. Unbind all objects that do not match the GTT constraints for
3518 * the execbuffer (fenceable, mappable, alignment etc).
3519 * 1b. Increment pin count for already bound objects.
3520 * 2. Bind new objects.
3521 * 3. Decrement pin count.
3523 * This avoid unnecessary unbinding of later objects in order to makr
3524 * room for the earlier objects *unless* we need to defragment.
3530 /* Unbind any ill-fitting objects or pin. */
3531 for (i
= 0; i
< count
; i
++) {
3532 struct drm_i915_gem_object
*obj
= object_list
[i
];
3533 struct drm_i915_gem_exec_object2
*entry
= &exec_list
[i
];
3534 bool need_fence
, need_mappable
;
3536 if (!obj
->gtt_space
)
3540 entry
->flags
& EXEC_OBJECT_NEEDS_FENCE
&&
3541 obj
->tiling_mode
!= I915_TILING_NONE
;
3543 entry
->relocation_count
? true : need_fence
;
3545 if ((entry
->alignment
&& obj
->gtt_offset
& (entry
->alignment
- 1)) ||
3546 (need_mappable
&& !obj
->map_and_fenceable
))
3547 ret
= i915_gem_object_unbind(obj
);
3549 ret
= i915_gem_object_pin(obj
,
3558 /* Bind fresh objects */
3559 for (i
= 0; i
< count
; i
++) {
3560 struct drm_i915_gem_exec_object2
*entry
= &exec_list
[i
];
3561 struct drm_i915_gem_object
*obj
= object_list
[i
];
3565 entry
->flags
& EXEC_OBJECT_NEEDS_FENCE
&&
3566 obj
->tiling_mode
!= I915_TILING_NONE
;
3568 if (!obj
->gtt_space
) {
3569 bool need_mappable
=
3570 entry
->relocation_count
? true : need_fence
;
3572 ret
= i915_gem_object_pin(obj
,
3580 ret
= i915_gem_object_get_fence_reg(obj
, true);
3584 obj
->pending_fenced_gpu_access
= true;
3587 entry
->offset
= obj
->gtt_offset
;
3590 err
: /* Decrement pin count for bound objects */
3591 for (i
= 0; i
< count
; i
++) {
3592 struct drm_i915_gem_object
*obj
= object_list
[i
];
3594 i915_gem_object_unpin(obj
);
3597 if (ret
!= -ENOSPC
|| retry
> 1)
3600 /* First attempt, just clear anything that is purgeable.
3601 * Second attempt, clear the entire GTT.
3603 ret
= i915_gem_evict_everything(dev
, retry
== 0);
3612 i915_gem_execbuffer_relocate_slow(struct drm_device
*dev
,
3613 struct drm_file
*file
,
3614 struct drm_i915_gem_object
**object_list
,
3615 struct drm_i915_gem_exec_object2
*exec_list
,
3618 struct drm_i915_gem_relocation_entry
*reloc
;
3621 for (i
= 0; i
< count
; i
++)
3622 object_list
[i
]->in_execbuffer
= false;
3624 mutex_unlock(&dev
->struct_mutex
);
3627 for (i
= 0; i
< count
; i
++)
3628 total
+= exec_list
[i
].relocation_count
;
3630 reloc
= drm_malloc_ab(total
, sizeof(*reloc
));
3631 if (reloc
== NULL
) {
3632 mutex_lock(&dev
->struct_mutex
);
3637 for (i
= 0; i
< count
; i
++) {
3638 struct drm_i915_gem_relocation_entry __user
*user_relocs
;
3640 user_relocs
= (void __user
*)(uintptr_t)exec_list
[i
].relocs_ptr
;
3642 if (copy_from_user(reloc
+total
, user_relocs
,
3643 exec_list
[i
].relocation_count
*
3646 mutex_lock(&dev
->struct_mutex
);
3650 total
+= exec_list
[i
].relocation_count
;
3653 ret
= i915_mutex_lock_interruptible(dev
);
3655 mutex_lock(&dev
->struct_mutex
);
3659 ret
= i915_gem_execbuffer_reserve(dev
, file
,
3660 object_list
, exec_list
,
3666 for (i
= 0; i
< count
; i
++) {
3667 struct drm_i915_gem_object
*obj
= object_list
[i
];
3668 obj
->base
.pending_read_domains
= 0;
3669 obj
->base
.pending_write_domain
= 0;
3670 ret
= i915_gem_execbuffer_relocate_object_slow(obj
, file
,
3676 total
+= exec_list
[i
].relocation_count
;
3679 /* Leave the user relocations as are, this is the painfully slow path,
3680 * and we want to avoid the complication of dropping the lock whilst
3681 * having buffers reserved in the aperture and so causing spurious
3682 * ENOSPC for random operations.
3686 drm_free_large(reloc
);
3691 i915_gem_execbuffer_move_to_gpu(struct drm_device
*dev
,
3692 struct drm_file
*file
,
3693 struct intel_ring_buffer
*ring
,
3694 struct drm_i915_gem_object
**objects
,
3697 struct change_domains cd
;
3700 cd
.invalidate_domains
= 0;
3701 cd
.flush_domains
= 0;
3703 for (i
= 0; i
< count
; i
++)
3704 i915_gem_object_set_to_gpu_domain(objects
[i
], ring
, &cd
);
3706 if (cd
.invalidate_domains
| cd
.flush_domains
) {
3708 DRM_INFO("%s: invalidate_domains %08x flush_domains %08x\n",
3710 cd
.invalidate_domains
,
3714 cd
.invalidate_domains
,
3719 for (i
= 0; i
< count
; i
++) {
3720 struct drm_i915_gem_object
*obj
= objects
[i
];
3721 /* XXX replace with semaphores */
3722 if (obj
->ring
&& ring
!= obj
->ring
) {
3723 ret
= i915_gem_object_wait_rendering(obj
, true);
3732 /* Throttle our rendering by waiting until the ring has completed our requests
3733 * emitted over 20 msec ago.
3735 * Note that if we were to use the current jiffies each time around the loop,
3736 * we wouldn't escape the function with any frames outstanding if the time to
3737 * render a frame was over 20ms.
3739 * This should get us reasonable parallelism between CPU and GPU but also
3740 * relatively low latency when blocking on a particular request to finish.
3743 i915_gem_ring_throttle(struct drm_device
*dev
, struct drm_file
*file
)
3745 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
3746 struct drm_i915_file_private
*file_priv
= file
->driver_priv
;
3747 unsigned long recent_enough
= jiffies
- msecs_to_jiffies(20);
3748 struct drm_i915_gem_request
*request
;
3749 struct intel_ring_buffer
*ring
= NULL
;
3753 spin_lock(&file_priv
->mm
.lock
);
3754 list_for_each_entry(request
, &file_priv
->mm
.request_list
, client_list
) {
3755 if (time_after_eq(request
->emitted_jiffies
, recent_enough
))
3758 ring
= request
->ring
;
3759 seqno
= request
->seqno
;
3761 spin_unlock(&file_priv
->mm
.lock
);
3767 if (!i915_seqno_passed(ring
->get_seqno(ring
), seqno
)) {
3768 /* And wait for the seqno passing without holding any locks and
3769 * causing extra latency for others. This is safe as the irq
3770 * generation is designed to be run atomically and so is
3773 ring
->user_irq_get(ring
);
3774 ret
= wait_event_interruptible(ring
->irq_queue
,
3775 i915_seqno_passed(ring
->get_seqno(ring
), seqno
)
3776 || atomic_read(&dev_priv
->mm
.wedged
));
3777 ring
->user_irq_put(ring
);
3779 if (ret
== 0 && atomic_read(&dev_priv
->mm
.wedged
))
3784 queue_delayed_work(dev_priv
->wq
, &dev_priv
->mm
.retire_work
, 0);
3790 i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2
*exec
,
3791 uint64_t exec_offset
)
3793 uint32_t exec_start
, exec_len
;
3795 exec_start
= (uint32_t) exec_offset
+ exec
->batch_start_offset
;
3796 exec_len
= (uint32_t) exec
->batch_len
;
3798 if ((exec_start
| exec_len
) & 0x7)
3808 validate_exec_list(struct drm_i915_gem_exec_object2
*exec
,
3813 for (i
= 0; i
< count
; i
++) {
3814 char __user
*ptr
= (char __user
*)(uintptr_t)exec
[i
].relocs_ptr
;
3815 int length
; /* limited by fault_in_pages_readable() */
3817 /* First check for malicious input causing overflow */
3818 if (exec
[i
].relocation_count
>
3819 INT_MAX
/ sizeof(struct drm_i915_gem_relocation_entry
))
3822 length
= exec
[i
].relocation_count
*
3823 sizeof(struct drm_i915_gem_relocation_entry
);
3824 if (!access_ok(VERIFY_READ
, ptr
, length
))
3827 /* we may also need to update the presumed offsets */
3828 if (!access_ok(VERIFY_WRITE
, ptr
, length
))
3831 if (fault_in_pages_readable(ptr
, length
))
3839 i915_gem_do_execbuffer(struct drm_device
*dev
, void *data
,
3840 struct drm_file
*file
,
3841 struct drm_i915_gem_execbuffer2
*args
,
3842 struct drm_i915_gem_exec_object2
*exec_list
)
3844 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
3845 struct drm_i915_gem_object
**object_list
= NULL
;
3846 struct drm_i915_gem_object
*batch_obj
;
3847 struct drm_clip_rect
*cliprects
= NULL
;
3848 struct drm_i915_gem_request
*request
= NULL
;
3850 uint64_t exec_offset
;
3852 struct intel_ring_buffer
*ring
= NULL
;
3854 ret
= i915_gem_check_is_wedged(dev
);
3858 ret
= validate_exec_list(exec_list
, args
->buffer_count
);
3863 DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n",
3864 (int) args
->buffers_ptr
, args
->buffer_count
, args
->batch_len
);
3866 switch (args
->flags
& I915_EXEC_RING_MASK
) {
3867 case I915_EXEC_DEFAULT
:
3868 case I915_EXEC_RENDER
:
3869 ring
= &dev_priv
->render_ring
;
3872 if (!HAS_BSD(dev
)) {
3873 DRM_ERROR("execbuf with invalid ring (BSD)\n");
3876 ring
= &dev_priv
->bsd_ring
;
3879 if (!HAS_BLT(dev
)) {
3880 DRM_ERROR("execbuf with invalid ring (BLT)\n");
3883 ring
= &dev_priv
->blt_ring
;
3886 DRM_ERROR("execbuf with unknown ring: %d\n",
3887 (int)(args
->flags
& I915_EXEC_RING_MASK
));
3891 if (args
->buffer_count
< 1) {
3892 DRM_ERROR("execbuf with %d buffers\n", args
->buffer_count
);
3895 object_list
= drm_malloc_ab(sizeof(*object_list
), args
->buffer_count
);
3896 if (object_list
== NULL
) {
3897 DRM_ERROR("Failed to allocate object list for %d buffers\n",
3898 args
->buffer_count
);
3903 if (args
->num_cliprects
!= 0) {
3904 cliprects
= kcalloc(args
->num_cliprects
, sizeof(*cliprects
),
3906 if (cliprects
== NULL
) {
3911 ret
= copy_from_user(cliprects
,
3912 (struct drm_clip_rect __user
*)
3913 (uintptr_t) args
->cliprects_ptr
,
3914 sizeof(*cliprects
) * args
->num_cliprects
);
3916 DRM_ERROR("copy %d cliprects failed: %d\n",
3917 args
->num_cliprects
, ret
);
3923 request
= kzalloc(sizeof(*request
), GFP_KERNEL
);
3924 if (request
== NULL
) {
3929 ret
= i915_mutex_lock_interruptible(dev
);
3933 if (dev_priv
->mm
.suspended
) {
3934 mutex_unlock(&dev
->struct_mutex
);
3939 /* Look up object handles */
3940 for (i
= 0; i
< args
->buffer_count
; i
++) {
3941 struct drm_i915_gem_object
*obj
;
3943 obj
= to_intel_bo (drm_gem_object_lookup(dev
, file
,
3944 exec_list
[i
].handle
));
3946 DRM_ERROR("Invalid object handle %d at index %d\n",
3947 exec_list
[i
].handle
, i
);
3948 /* prevent error path from reading uninitialized data */
3949 args
->buffer_count
= i
;
3953 object_list
[i
] = obj
;
3955 if (obj
->in_execbuffer
) {
3956 DRM_ERROR("Object %p appears more than once in object list\n",
3958 /* prevent error path from reading uninitialized data */
3959 args
->buffer_count
= i
+ 1;
3963 obj
->in_execbuffer
= true;
3964 obj
->pending_fenced_gpu_access
= false;
3967 /* Move the objects en-masse into the GTT, evicting if necessary. */
3968 ret
= i915_gem_execbuffer_reserve(dev
, file
,
3969 object_list
, exec_list
,
3970 args
->buffer_count
);
3974 /* The objects are in their final locations, apply the relocations. */
3975 ret
= i915_gem_execbuffer_relocate(dev
, file
,
3976 object_list
, exec_list
,
3977 args
->buffer_count
);
3979 if (ret
== -EFAULT
) {
3980 ret
= i915_gem_execbuffer_relocate_slow(dev
, file
,
3983 args
->buffer_count
);
3984 BUG_ON(!mutex_is_locked(&dev
->struct_mutex
));
3990 /* Set the pending read domains for the batch buffer to COMMAND */
3991 batch_obj
= object_list
[args
->buffer_count
-1];
3992 if (batch_obj
->base
.pending_write_domain
) {
3993 DRM_ERROR("Attempting to use self-modifying batch buffer\n");
3997 batch_obj
->base
.pending_read_domains
|= I915_GEM_DOMAIN_COMMAND
;
3999 /* Sanity check the batch buffer */
4000 exec_offset
= batch_obj
->gtt_offset
;
4001 ret
= i915_gem_check_execbuffer(args
, exec_offset
);
4003 DRM_ERROR("execbuf with invalid offset/length\n");
4007 ret
= i915_gem_execbuffer_move_to_gpu(dev
, file
, ring
,
4008 object_list
, args
->buffer_count
);
4013 for (i
= 0; i
< args
->buffer_count
; i
++) {
4014 i915_gem_object_check_coherency(object_list
[i
],
4015 exec_list
[i
].handle
);
4020 i915_gem_dump_object(batch_obj
,
4026 /* Check for any pending flips. As we only maintain a flip queue depth
4027 * of 1, we can simply insert a WAIT for the next display flip prior
4028 * to executing the batch and avoid stalling the CPU.
4031 for (i
= 0; i
< args
->buffer_count
; i
++) {
4032 if (object_list
[i
]->base
.write_domain
)
4033 flips
|= atomic_read(&object_list
[i
]->pending_flip
);
4036 int plane
, flip_mask
;
4038 for (plane
= 0; flips
>> plane
; plane
++) {
4039 if (((flips
>> plane
) & 1) == 0)
4043 flip_mask
= MI_WAIT_FOR_PLANE_B_FLIP
;
4045 flip_mask
= MI_WAIT_FOR_PLANE_A_FLIP
;
4047 ret
= intel_ring_begin(ring
, 2);
4051 intel_ring_emit(ring
, MI_WAIT_FOR_EVENT
| flip_mask
);
4052 intel_ring_emit(ring
, MI_NOOP
);
4053 intel_ring_advance(ring
);
4057 /* Exec the batchbuffer */
4058 ret
= ring
->dispatch_execbuffer(ring
, args
, cliprects
, exec_offset
);
4060 DRM_ERROR("dispatch failed %d\n", ret
);
4064 for (i
= 0; i
< args
->buffer_count
; i
++) {
4065 struct drm_i915_gem_object
*obj
= object_list
[i
];
4067 obj
->base
.read_domains
= obj
->base
.pending_read_domains
;
4068 obj
->base
.write_domain
= obj
->base
.pending_write_domain
;
4069 obj
->fenced_gpu_access
= obj
->pending_fenced_gpu_access
;
4071 i915_gem_object_move_to_active(obj
, ring
);
4072 if (obj
->base
.write_domain
) {
4074 list_move_tail(&obj
->gpu_write_list
,
4075 &ring
->gpu_write_list
);
4076 intel_mark_busy(dev
, obj
);
4079 trace_i915_gem_object_change_domain(obj
,
4080 obj
->base
.read_domains
,
4081 obj
->base
.write_domain
);
4085 * Ensure that the commands in the batch buffer are
4086 * finished before the interrupt fires
4088 i915_retire_commands(dev
, ring
);
4090 if (i915_add_request(dev
, file
, request
, ring
))
4091 i915_gem_next_request_seqno(dev
, ring
);
4096 for (i
= 0; i
< args
->buffer_count
; i
++) {
4097 object_list
[i
]->in_execbuffer
= false;
4098 drm_gem_object_unreference(&object_list
[i
]->base
);
4101 mutex_unlock(&dev
->struct_mutex
);
4104 drm_free_large(object_list
);
4112 * Legacy execbuffer just creates an exec2 list from the original exec object
4113 * list array and passes it to the real function.
4116 i915_gem_execbuffer(struct drm_device
*dev
, void *data
,
4117 struct drm_file
*file
)
4119 struct drm_i915_gem_execbuffer
*args
= data
;
4120 struct drm_i915_gem_execbuffer2 exec2
;
4121 struct drm_i915_gem_exec_object
*exec_list
= NULL
;
4122 struct drm_i915_gem_exec_object2
*exec2_list
= NULL
;
4126 DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n",
4127 (int) args
->buffers_ptr
, args
->buffer_count
, args
->batch_len
);
4130 if (args
->buffer_count
< 1) {
4131 DRM_ERROR("execbuf with %d buffers\n", args
->buffer_count
);
4135 /* Copy in the exec list from userland */
4136 exec_list
= drm_malloc_ab(sizeof(*exec_list
), args
->buffer_count
);
4137 exec2_list
= drm_malloc_ab(sizeof(*exec2_list
), args
->buffer_count
);
4138 if (exec_list
== NULL
|| exec2_list
== NULL
) {
4139 DRM_ERROR("Failed to allocate exec list for %d buffers\n",
4140 args
->buffer_count
);
4141 drm_free_large(exec_list
);
4142 drm_free_large(exec2_list
);
4145 ret
= copy_from_user(exec_list
,
4146 (struct drm_i915_relocation_entry __user
*)
4147 (uintptr_t) args
->buffers_ptr
,
4148 sizeof(*exec_list
) * args
->buffer_count
);
4150 DRM_ERROR("copy %d exec entries failed %d\n",
4151 args
->buffer_count
, ret
);
4152 drm_free_large(exec_list
);
4153 drm_free_large(exec2_list
);
4157 for (i
= 0; i
< args
->buffer_count
; i
++) {
4158 exec2_list
[i
].handle
= exec_list
[i
].handle
;
4159 exec2_list
[i
].relocation_count
= exec_list
[i
].relocation_count
;
4160 exec2_list
[i
].relocs_ptr
= exec_list
[i
].relocs_ptr
;
4161 exec2_list
[i
].alignment
= exec_list
[i
].alignment
;
4162 exec2_list
[i
].offset
= exec_list
[i
].offset
;
4163 if (INTEL_INFO(dev
)->gen
< 4)
4164 exec2_list
[i
].flags
= EXEC_OBJECT_NEEDS_FENCE
;
4166 exec2_list
[i
].flags
= 0;
4169 exec2
.buffers_ptr
= args
->buffers_ptr
;
4170 exec2
.buffer_count
= args
->buffer_count
;
4171 exec2
.batch_start_offset
= args
->batch_start_offset
;
4172 exec2
.batch_len
= args
->batch_len
;
4173 exec2
.DR1
= args
->DR1
;
4174 exec2
.DR4
= args
->DR4
;
4175 exec2
.num_cliprects
= args
->num_cliprects
;
4176 exec2
.cliprects_ptr
= args
->cliprects_ptr
;
4177 exec2
.flags
= I915_EXEC_RENDER
;
4179 ret
= i915_gem_do_execbuffer(dev
, data
, file
, &exec2
, exec2_list
);
4181 /* Copy the new buffer offsets back to the user's exec list. */
4182 for (i
= 0; i
< args
->buffer_count
; i
++)
4183 exec_list
[i
].offset
= exec2_list
[i
].offset
;
4184 /* ... and back out to userspace */
4185 ret
= copy_to_user((struct drm_i915_relocation_entry __user
*)
4186 (uintptr_t) args
->buffers_ptr
,
4188 sizeof(*exec_list
) * args
->buffer_count
);
4191 DRM_ERROR("failed to copy %d exec entries "
4192 "back to user (%d)\n",
4193 args
->buffer_count
, ret
);
4197 drm_free_large(exec_list
);
4198 drm_free_large(exec2_list
);
4203 i915_gem_execbuffer2(struct drm_device
*dev
, void *data
,
4204 struct drm_file
*file
)
4206 struct drm_i915_gem_execbuffer2
*args
= data
;
4207 struct drm_i915_gem_exec_object2
*exec2_list
= NULL
;
4211 DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n",
4212 (int) args
->buffers_ptr
, args
->buffer_count
, args
->batch_len
);
4215 if (args
->buffer_count
< 1) {
4216 DRM_ERROR("execbuf2 with %d buffers\n", args
->buffer_count
);
4220 exec2_list
= drm_malloc_ab(sizeof(*exec2_list
), args
->buffer_count
);
4221 if (exec2_list
== NULL
) {
4222 DRM_ERROR("Failed to allocate exec list for %d buffers\n",
4223 args
->buffer_count
);
4226 ret
= copy_from_user(exec2_list
,
4227 (struct drm_i915_relocation_entry __user
*)
4228 (uintptr_t) args
->buffers_ptr
,
4229 sizeof(*exec2_list
) * args
->buffer_count
);
4231 DRM_ERROR("copy %d exec entries failed %d\n",
4232 args
->buffer_count
, ret
);
4233 drm_free_large(exec2_list
);
4237 ret
= i915_gem_do_execbuffer(dev
, data
, file
, args
, exec2_list
);
4239 /* Copy the new buffer offsets back to the user's exec list. */
4240 ret
= copy_to_user((struct drm_i915_relocation_entry __user
*)
4241 (uintptr_t) args
->buffers_ptr
,
4243 sizeof(*exec2_list
) * args
->buffer_count
);
4246 DRM_ERROR("failed to copy %d exec entries "
4247 "back to user (%d)\n",
4248 args
->buffer_count
, ret
);
4252 drm_free_large(exec2_list
);
4257 i915_gem_object_pin(struct drm_i915_gem_object
*obj
,
4259 bool map_and_fenceable
)
4261 struct drm_device
*dev
= obj
->base
.dev
;
4262 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
4265 BUG_ON(obj
->pin_count
== DRM_I915_GEM_OBJECT_MAX_PIN_COUNT
);
4266 WARN_ON(i915_verify_lists(dev
));
4268 if (obj
->gtt_space
!= NULL
) {
4269 if ((alignment
&& obj
->gtt_offset
& (alignment
- 1)) ||
4270 (map_and_fenceable
&& !obj
->map_and_fenceable
)) {
4271 WARN(obj
->pin_count
,
4272 "bo is already pinned with incorrect alignment:"
4273 " offset=%x, req.alignment=%x, req.map_and_fenceable=%d,"
4274 " obj->map_and_fenceable=%d\n",
4275 obj
->gtt_offset
, alignment
,
4277 obj
->map_and_fenceable
);
4278 ret
= i915_gem_object_unbind(obj
);
4284 if (obj
->gtt_space
== NULL
) {
4285 ret
= i915_gem_object_bind_to_gtt(obj
, alignment
,
4291 if (obj
->pin_count
++ == 0) {
4293 list_move_tail(&obj
->mm_list
,
4294 &dev_priv
->mm
.pinned_list
);
4296 obj
->pin_mappable
|= map_and_fenceable
;
4298 WARN_ON(i915_verify_lists(dev
));
4303 i915_gem_object_unpin(struct drm_i915_gem_object
*obj
)
4305 struct drm_device
*dev
= obj
->base
.dev
;
4306 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
4308 WARN_ON(i915_verify_lists(dev
));
4309 BUG_ON(obj
->pin_count
== 0);
4310 BUG_ON(obj
->gtt_space
== NULL
);
4312 if (--obj
->pin_count
== 0) {
4314 list_move_tail(&obj
->mm_list
,
4315 &dev_priv
->mm
.inactive_list
);
4316 obj
->pin_mappable
= false;
4318 WARN_ON(i915_verify_lists(dev
));
4322 i915_gem_pin_ioctl(struct drm_device
*dev
, void *data
,
4323 struct drm_file
*file
)
4325 struct drm_i915_gem_pin
*args
= data
;
4326 struct drm_i915_gem_object
*obj
;
4329 ret
= i915_mutex_lock_interruptible(dev
);
4333 obj
= to_intel_bo(drm_gem_object_lookup(dev
, file
, args
->handle
));
4339 if (obj
->madv
!= I915_MADV_WILLNEED
) {
4340 DRM_ERROR("Attempting to pin a purgeable buffer\n");
4345 if (obj
->pin_filp
!= NULL
&& obj
->pin_filp
!= file
) {
4346 DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n",
4352 obj
->user_pin_count
++;
4353 obj
->pin_filp
= file
;
4354 if (obj
->user_pin_count
== 1) {
4355 ret
= i915_gem_object_pin(obj
, args
->alignment
, true);
4360 /* XXX - flush the CPU caches for pinned objects
4361 * as the X server doesn't manage domains yet
4363 i915_gem_object_flush_cpu_write_domain(obj
);
4364 args
->offset
= obj
->gtt_offset
;
4366 drm_gem_object_unreference(&obj
->base
);
4368 mutex_unlock(&dev
->struct_mutex
);
4373 i915_gem_unpin_ioctl(struct drm_device
*dev
, void *data
,
4374 struct drm_file
*file
)
4376 struct drm_i915_gem_pin
*args
= data
;
4377 struct drm_i915_gem_object
*obj
;
4380 ret
= i915_mutex_lock_interruptible(dev
);
4384 obj
= to_intel_bo(drm_gem_object_lookup(dev
, file
, args
->handle
));
4390 if (obj
->pin_filp
!= file
) {
4391 DRM_ERROR("Not pinned by caller in i915_gem_pin_ioctl(): %d\n",
4396 obj
->user_pin_count
--;
4397 if (obj
->user_pin_count
== 0) {
4398 obj
->pin_filp
= NULL
;
4399 i915_gem_object_unpin(obj
);
4403 drm_gem_object_unreference(&obj
->base
);
4405 mutex_unlock(&dev
->struct_mutex
);
4410 i915_gem_busy_ioctl(struct drm_device
*dev
, void *data
,
4411 struct drm_file
*file
)
4413 struct drm_i915_gem_busy
*args
= data
;
4414 struct drm_i915_gem_object
*obj
;
4417 ret
= i915_mutex_lock_interruptible(dev
);
4421 obj
= to_intel_bo(drm_gem_object_lookup(dev
, file
, args
->handle
));
4427 /* Count all active objects as busy, even if they are currently not used
4428 * by the gpu. Users of this interface expect objects to eventually
4429 * become non-busy without any further actions, therefore emit any
4430 * necessary flushes here.
4432 args
->busy
= obj
->active
;
4434 /* Unconditionally flush objects, even when the gpu still uses this
4435 * object. Userspace calling this function indicates that it wants to
4436 * use this buffer rather sooner than later, so issuing the required
4437 * flush earlier is beneficial.
4439 if (obj
->base
.write_domain
& I915_GEM_GPU_DOMAINS
)
4440 i915_gem_flush_ring(dev
, obj
->ring
,
4441 0, obj
->base
.write_domain
);
4443 /* Update the active list for the hardware's current position.
4444 * Otherwise this only updates on a delayed timer or when irqs
4445 * are actually unmasked, and our working set ends up being
4446 * larger than required.
4448 i915_gem_retire_requests_ring(dev
, obj
->ring
);
4450 args
->busy
= obj
->active
;
4453 drm_gem_object_unreference(&obj
->base
);
4455 mutex_unlock(&dev
->struct_mutex
);
4460 i915_gem_throttle_ioctl(struct drm_device
*dev
, void *data
,
4461 struct drm_file
*file_priv
)
4463 return i915_gem_ring_throttle(dev
, file_priv
);
4467 i915_gem_madvise_ioctl(struct drm_device
*dev
, void *data
,
4468 struct drm_file
*file_priv
)
4470 struct drm_i915_gem_madvise
*args
= data
;
4471 struct drm_i915_gem_object
*obj
;
4474 switch (args
->madv
) {
4475 case I915_MADV_DONTNEED
:
4476 case I915_MADV_WILLNEED
:
4482 ret
= i915_mutex_lock_interruptible(dev
);
4486 obj
= to_intel_bo(drm_gem_object_lookup(dev
, file_priv
, args
->handle
));
4492 if (obj
->pin_count
) {
4497 if (obj
->madv
!= __I915_MADV_PURGED
)
4498 obj
->madv
= args
->madv
;
4500 /* if the object is no longer bound, discard its backing storage */
4501 if (i915_gem_object_is_purgeable(obj
) &&
4502 obj
->gtt_space
== NULL
)
4503 i915_gem_object_truncate(obj
);
4505 args
->retained
= obj
->madv
!= __I915_MADV_PURGED
;
4508 drm_gem_object_unreference(&obj
->base
);
4510 mutex_unlock(&dev
->struct_mutex
);
4514 struct drm_i915_gem_object
*i915_gem_alloc_object(struct drm_device
*dev
,
4517 struct drm_i915_private
*dev_priv
= dev
->dev_private
;
4518 struct drm_i915_gem_object
*obj
;
4520 obj
= kzalloc(sizeof(*obj
), GFP_KERNEL
);
4524 if (drm_gem_object_init(dev
, &obj
->base
, size
) != 0) {
4529 i915_gem_info_add_obj(dev_priv
, size
);
4531 obj
->base
.write_domain
= I915_GEM_DOMAIN_CPU
;
4532 obj
->base
.read_domains
= I915_GEM_DOMAIN_CPU
;
4534 obj
->agp_type
= AGP_USER_MEMORY
;
4535 obj
->base
.driver_private
= NULL
;
4536 obj
->fence_reg
= I915_FENCE_REG_NONE
;
4537 INIT_LIST_HEAD(&obj
->mm_list
);
4538 INIT_LIST_HEAD(&obj
->gtt_list
);
4539 INIT_LIST_HEAD(&obj
->ring_list
);
4540 INIT_LIST_HEAD(&obj
->gpu_write_list
);
4541 obj
->madv
= I915_MADV_WILLNEED
;
4542 /* Avoid an unnecessary call to unbind on the first bind. */
4543 obj
->map_and_fenceable
= true;
4548 int i915_gem_init_object(struct drm_gem_object
*obj
)
4555 static void i915_gem_free_object_tail(struct drm_i915_gem_object
*obj
)
4557 struct drm_device
*dev
= obj
->base
.dev
;
4558 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
4561 ret
= i915_gem_object_unbind(obj
);
4562 if (ret
== -ERESTARTSYS
) {
4563 list_move(&obj
->mm_list
,
4564 &dev_priv
->mm
.deferred_free_list
);
4568 if (obj
->base
.map_list
.map
)
4569 i915_gem_free_mmap_offset(obj
);
4571 drm_gem_object_release(&obj
->base
);
4572 i915_gem_info_remove_obj(dev_priv
, obj
->base
.size
);
4574 kfree(obj
->page_cpu_valid
);
4579 void i915_gem_free_object(struct drm_gem_object
*gem_obj
)
4581 struct drm_i915_gem_object
*obj
= to_intel_bo(gem_obj
);
4582 struct drm_device
*dev
= obj
->base
.dev
;
4584 trace_i915_gem_object_destroy(obj
);
4586 while (obj
->pin_count
> 0)
4587 i915_gem_object_unpin(obj
);
4590 i915_gem_detach_phys_object(dev
, obj
);
4592 i915_gem_free_object_tail(obj
);
4596 i915_gem_idle(struct drm_device
*dev
)
4598 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
4601 mutex_lock(&dev
->struct_mutex
);
4603 if (dev_priv
->mm
.suspended
) {
4604 mutex_unlock(&dev
->struct_mutex
);
4608 ret
= i915_gpu_idle(dev
);
4610 mutex_unlock(&dev
->struct_mutex
);
4614 /* Under UMS, be paranoid and evict. */
4615 if (!drm_core_check_feature(dev
, DRIVER_MODESET
)) {
4616 ret
= i915_gem_evict_inactive(dev
, false);
4618 mutex_unlock(&dev
->struct_mutex
);
4623 i915_gem_reset_fences(dev
);
4625 /* Hack! Don't let anybody do execbuf while we don't control the chip.
4626 * We need to replace this with a semaphore, or something.
4627 * And not confound mm.suspended!
4629 dev_priv
->mm
.suspended
= 1;
4630 del_timer_sync(&dev_priv
->hangcheck_timer
);
4632 i915_kernel_lost_context(dev
);
4633 i915_gem_cleanup_ringbuffer(dev
);
4635 mutex_unlock(&dev
->struct_mutex
);
4637 /* Cancel the retire work handler, which should be idle now. */
4638 cancel_delayed_work_sync(&dev_priv
->mm
.retire_work
);
4644 i915_gem_init_ringbuffer(struct drm_device
*dev
)
4646 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
4649 ret
= intel_init_render_ring_buffer(dev
);
4654 ret
= intel_init_bsd_ring_buffer(dev
);
4656 goto cleanup_render_ring
;
4660 ret
= intel_init_blt_ring_buffer(dev
);
4662 goto cleanup_bsd_ring
;
4665 dev_priv
->next_seqno
= 1;
4670 intel_cleanup_ring_buffer(&dev_priv
->bsd_ring
);
4671 cleanup_render_ring
:
4672 intel_cleanup_ring_buffer(&dev_priv
->render_ring
);
4677 i915_gem_cleanup_ringbuffer(struct drm_device
*dev
)
4679 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
4681 intel_cleanup_ring_buffer(&dev_priv
->render_ring
);
4682 intel_cleanup_ring_buffer(&dev_priv
->bsd_ring
);
4683 intel_cleanup_ring_buffer(&dev_priv
->blt_ring
);
4687 i915_gem_entervt_ioctl(struct drm_device
*dev
, void *data
,
4688 struct drm_file
*file_priv
)
4690 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
4693 if (drm_core_check_feature(dev
, DRIVER_MODESET
))
4696 if (atomic_read(&dev_priv
->mm
.wedged
)) {
4697 DRM_ERROR("Reenabling wedged hardware, good luck\n");
4698 atomic_set(&dev_priv
->mm
.wedged
, 0);
4701 mutex_lock(&dev
->struct_mutex
);
4702 dev_priv
->mm
.suspended
= 0;
4704 ret
= i915_gem_init_ringbuffer(dev
);
4706 mutex_unlock(&dev
->struct_mutex
);
4710 BUG_ON(!list_empty(&dev_priv
->mm
.active_list
));
4711 BUG_ON(!list_empty(&dev_priv
->render_ring
.active_list
));
4712 BUG_ON(!list_empty(&dev_priv
->bsd_ring
.active_list
));
4713 BUG_ON(!list_empty(&dev_priv
->blt_ring
.active_list
));
4714 BUG_ON(!list_empty(&dev_priv
->mm
.flushing_list
));
4715 BUG_ON(!list_empty(&dev_priv
->mm
.inactive_list
));
4716 BUG_ON(!list_empty(&dev_priv
->render_ring
.request_list
));
4717 BUG_ON(!list_empty(&dev_priv
->bsd_ring
.request_list
));
4718 BUG_ON(!list_empty(&dev_priv
->blt_ring
.request_list
));
4719 mutex_unlock(&dev
->struct_mutex
);
4721 ret
= drm_irq_install(dev
);
4723 goto cleanup_ringbuffer
;
4728 mutex_lock(&dev
->struct_mutex
);
4729 i915_gem_cleanup_ringbuffer(dev
);
4730 dev_priv
->mm
.suspended
= 1;
4731 mutex_unlock(&dev
->struct_mutex
);
4737 i915_gem_leavevt_ioctl(struct drm_device
*dev
, void *data
,
4738 struct drm_file
*file_priv
)
4740 if (drm_core_check_feature(dev
, DRIVER_MODESET
))
4743 drm_irq_uninstall(dev
);
4744 return i915_gem_idle(dev
);
4748 i915_gem_lastclose(struct drm_device
*dev
)
4752 if (drm_core_check_feature(dev
, DRIVER_MODESET
))
4755 ret
= i915_gem_idle(dev
);
4757 DRM_ERROR("failed to idle hardware: %d\n", ret
);
4761 init_ring_lists(struct intel_ring_buffer
*ring
)
4763 INIT_LIST_HEAD(&ring
->active_list
);
4764 INIT_LIST_HEAD(&ring
->request_list
);
4765 INIT_LIST_HEAD(&ring
->gpu_write_list
);
4769 i915_gem_load(struct drm_device
*dev
)
4772 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
4774 INIT_LIST_HEAD(&dev_priv
->mm
.active_list
);
4775 INIT_LIST_HEAD(&dev_priv
->mm
.flushing_list
);
4776 INIT_LIST_HEAD(&dev_priv
->mm
.inactive_list
);
4777 INIT_LIST_HEAD(&dev_priv
->mm
.pinned_list
);
4778 INIT_LIST_HEAD(&dev_priv
->mm
.fence_list
);
4779 INIT_LIST_HEAD(&dev_priv
->mm
.deferred_free_list
);
4780 INIT_LIST_HEAD(&dev_priv
->mm
.gtt_list
);
4781 init_ring_lists(&dev_priv
->render_ring
);
4782 init_ring_lists(&dev_priv
->bsd_ring
);
4783 init_ring_lists(&dev_priv
->blt_ring
);
4784 for (i
= 0; i
< 16; i
++)
4785 INIT_LIST_HEAD(&dev_priv
->fence_regs
[i
].lru_list
);
4786 INIT_DELAYED_WORK(&dev_priv
->mm
.retire_work
,
4787 i915_gem_retire_work_handler
);
4788 init_completion(&dev_priv
->error_completion
);
4790 /* On GEN3 we really need to make sure the ARB C3 LP bit is set */
4792 u32 tmp
= I915_READ(MI_ARB_STATE
);
4793 if (!(tmp
& MI_ARB_C3_LP_WRITE_ENABLE
)) {
4794 /* arb state is a masked write, so set bit + bit in mask */
4795 tmp
= MI_ARB_C3_LP_WRITE_ENABLE
| (MI_ARB_C3_LP_WRITE_ENABLE
<< MI_ARB_MASK_SHIFT
);
4796 I915_WRITE(MI_ARB_STATE
, tmp
);
4800 /* Old X drivers will take 0-2 for front, back, depth buffers */
4801 if (!drm_core_check_feature(dev
, DRIVER_MODESET
))
4802 dev_priv
->fence_reg_start
= 3;
4804 if (INTEL_INFO(dev
)->gen
>= 4 || IS_I945G(dev
) || IS_I945GM(dev
) || IS_G33(dev
))
4805 dev_priv
->num_fence_regs
= 16;
4807 dev_priv
->num_fence_regs
= 8;
4809 /* Initialize fence registers to zero */
4810 switch (INTEL_INFO(dev
)->gen
) {
4812 for (i
= 0; i
< 16; i
++)
4813 I915_WRITE64(FENCE_REG_SANDYBRIDGE_0
+ (i
* 8), 0);
4817 for (i
= 0; i
< 16; i
++)
4818 I915_WRITE64(FENCE_REG_965_0
+ (i
* 8), 0);
4821 if (IS_I945G(dev
) || IS_I945GM(dev
) || IS_G33(dev
))
4822 for (i
= 0; i
< 8; i
++)
4823 I915_WRITE(FENCE_REG_945_8
+ (i
* 4), 0);
4825 for (i
= 0; i
< 8; i
++)
4826 I915_WRITE(FENCE_REG_830_0
+ (i
* 4), 0);
4829 i915_gem_detect_bit_6_swizzle(dev
);
4830 init_waitqueue_head(&dev_priv
->pending_flip_queue
);
4832 dev_priv
->mm
.inactive_shrinker
.shrink
= i915_gem_inactive_shrink
;
4833 dev_priv
->mm
.inactive_shrinker
.seeks
= DEFAULT_SEEKS
;
4834 register_shrinker(&dev_priv
->mm
.inactive_shrinker
);
4838 * Create a physically contiguous memory object for this object
4839 * e.g. for cursor + overlay regs
4841 static int i915_gem_init_phys_object(struct drm_device
*dev
,
4842 int id
, int size
, int align
)
4844 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
4845 struct drm_i915_gem_phys_object
*phys_obj
;
4848 if (dev_priv
->mm
.phys_objs
[id
- 1] || !size
)
4851 phys_obj
= kzalloc(sizeof(struct drm_i915_gem_phys_object
), GFP_KERNEL
);
4857 phys_obj
->handle
= drm_pci_alloc(dev
, size
, align
);
4858 if (!phys_obj
->handle
) {
4863 set_memory_wc((unsigned long)phys_obj
->handle
->vaddr
, phys_obj
->handle
->size
/ PAGE_SIZE
);
4866 dev_priv
->mm
.phys_objs
[id
- 1] = phys_obj
;
4874 static void i915_gem_free_phys_object(struct drm_device
*dev
, int id
)
4876 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
4877 struct drm_i915_gem_phys_object
*phys_obj
;
4879 if (!dev_priv
->mm
.phys_objs
[id
- 1])
4882 phys_obj
= dev_priv
->mm
.phys_objs
[id
- 1];
4883 if (phys_obj
->cur_obj
) {
4884 i915_gem_detach_phys_object(dev
, phys_obj
->cur_obj
);
4888 set_memory_wb((unsigned long)phys_obj
->handle
->vaddr
, phys_obj
->handle
->size
/ PAGE_SIZE
);
4890 drm_pci_free(dev
, phys_obj
->handle
);
4892 dev_priv
->mm
.phys_objs
[id
- 1] = NULL
;
4895 void i915_gem_free_all_phys_object(struct drm_device
*dev
)
4899 for (i
= I915_GEM_PHYS_CURSOR_0
; i
<= I915_MAX_PHYS_OBJECT
; i
++)
4900 i915_gem_free_phys_object(dev
, i
);
4903 void i915_gem_detach_phys_object(struct drm_device
*dev
,
4904 struct drm_i915_gem_object
*obj
)
4906 struct address_space
*mapping
= obj
->base
.filp
->f_path
.dentry
->d_inode
->i_mapping
;
4913 vaddr
= obj
->phys_obj
->handle
->vaddr
;
4915 page_count
= obj
->base
.size
/ PAGE_SIZE
;
4916 for (i
= 0; i
< page_count
; i
++) {
4917 struct page
*page
= read_cache_page_gfp(mapping
, i
,
4918 GFP_HIGHUSER
| __GFP_RECLAIMABLE
);
4919 if (!IS_ERR(page
)) {
4920 char *dst
= kmap_atomic(page
);
4921 memcpy(dst
, vaddr
+ i
*PAGE_SIZE
, PAGE_SIZE
);
4924 drm_clflush_pages(&page
, 1);
4926 set_page_dirty(page
);
4927 mark_page_accessed(page
);
4928 page_cache_release(page
);
4931 intel_gtt_chipset_flush();
4933 obj
->phys_obj
->cur_obj
= NULL
;
4934 obj
->phys_obj
= NULL
;
4938 i915_gem_attach_phys_object(struct drm_device
*dev
,
4939 struct drm_i915_gem_object
*obj
,
4943 struct address_space
*mapping
= obj
->base
.filp
->f_path
.dentry
->d_inode
->i_mapping
;
4944 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
4949 if (id
> I915_MAX_PHYS_OBJECT
)
4952 if (obj
->phys_obj
) {
4953 if (obj
->phys_obj
->id
== id
)
4955 i915_gem_detach_phys_object(dev
, obj
);
4958 /* create a new object */
4959 if (!dev_priv
->mm
.phys_objs
[id
- 1]) {
4960 ret
= i915_gem_init_phys_object(dev
, id
,
4961 obj
->base
.size
, align
);
4963 DRM_ERROR("failed to init phys object %d size: %zu\n",
4964 id
, obj
->base
.size
);
4969 /* bind to the object */
4970 obj
->phys_obj
= dev_priv
->mm
.phys_objs
[id
- 1];
4971 obj
->phys_obj
->cur_obj
= obj
;
4973 page_count
= obj
->base
.size
/ PAGE_SIZE
;
4975 for (i
= 0; i
< page_count
; i
++) {
4979 page
= read_cache_page_gfp(mapping
, i
,
4980 GFP_HIGHUSER
| __GFP_RECLAIMABLE
);
4982 return PTR_ERR(page
);
4984 src
= kmap_atomic(page
);
4985 dst
= obj
->phys_obj
->handle
->vaddr
+ (i
* PAGE_SIZE
);
4986 memcpy(dst
, src
, PAGE_SIZE
);
4989 mark_page_accessed(page
);
4990 page_cache_release(page
);
4997 i915_gem_phys_pwrite(struct drm_device
*dev
,
4998 struct drm_i915_gem_object
*obj
,
4999 struct drm_i915_gem_pwrite
*args
,
5000 struct drm_file
*file_priv
)
5002 void *vaddr
= obj
->phys_obj
->handle
->vaddr
+ args
->offset
;
5003 char __user
*user_data
= (char __user
*) (uintptr_t) args
->data_ptr
;
5005 if (__copy_from_user_inatomic_nocache(vaddr
, user_data
, args
->size
)) {
5006 unsigned long unwritten
;
5008 /* The physical object once assigned is fixed for the lifetime
5009 * of the obj, so we can safely drop the lock and continue
5012 mutex_unlock(&dev
->struct_mutex
);
5013 unwritten
= copy_from_user(vaddr
, user_data
, args
->size
);
5014 mutex_lock(&dev
->struct_mutex
);
5019 intel_gtt_chipset_flush();
5023 void i915_gem_release(struct drm_device
*dev
, struct drm_file
*file
)
5025 struct drm_i915_file_private
*file_priv
= file
->driver_priv
;
5027 /* Clean up our request list when the client is going away, so that
5028 * later retire_requests won't dereference our soon-to-be-gone
5031 spin_lock(&file_priv
->mm
.lock
);
5032 while (!list_empty(&file_priv
->mm
.request_list
)) {
5033 struct drm_i915_gem_request
*request
;
5035 request
= list_first_entry(&file_priv
->mm
.request_list
,
5036 struct drm_i915_gem_request
,
5038 list_del(&request
->client_list
);
5039 request
->file_priv
= NULL
;
5041 spin_unlock(&file_priv
->mm
.lock
);
5045 i915_gpu_is_active(struct drm_device
*dev
)
5047 drm_i915_private_t
*dev_priv
= dev
->dev_private
;
5050 lists_empty
= list_empty(&dev_priv
->mm
.flushing_list
) &&
5051 list_empty(&dev_priv
->mm
.active_list
);
5053 return !lists_empty
;
5057 i915_gem_inactive_shrink(struct shrinker
*shrinker
,
5061 struct drm_i915_private
*dev_priv
=
5062 container_of(shrinker
,
5063 struct drm_i915_private
,
5064 mm
.inactive_shrinker
);
5065 struct drm_device
*dev
= dev_priv
->dev
;
5066 struct drm_i915_gem_object
*obj
, *next
;
5069 if (!mutex_trylock(&dev
->struct_mutex
))
5072 /* "fast-path" to count number of available objects */
5073 if (nr_to_scan
== 0) {
5075 list_for_each_entry(obj
,
5076 &dev_priv
->mm
.inactive_list
,
5079 mutex_unlock(&dev
->struct_mutex
);
5080 return cnt
/ 100 * sysctl_vfs_cache_pressure
;
5084 /* first scan for clean buffers */
5085 i915_gem_retire_requests(dev
);
5087 list_for_each_entry_safe(obj
, next
,
5088 &dev_priv
->mm
.inactive_list
,
5090 if (i915_gem_object_is_purgeable(obj
)) {
5091 if (i915_gem_object_unbind(obj
) == 0 &&
5097 /* second pass, evict/count anything still on the inactive list */
5099 list_for_each_entry_safe(obj
, next
,
5100 &dev_priv
->mm
.inactive_list
,
5103 i915_gem_object_unbind(obj
) == 0)
5109 if (nr_to_scan
&& i915_gpu_is_active(dev
)) {
5111 * We are desperate for pages, so as a last resort, wait
5112 * for the GPU to finish and discard whatever we can.
5113 * This has a dramatic impact to reduce the number of
5114 * OOM-killer events whilst running the GPU aggressively.
5116 if (i915_gpu_idle(dev
) == 0)
5119 mutex_unlock(&dev
->struct_mutex
);
5120 return cnt
/ 100 * sysctl_vfs_cache_pressure
;