drm/i915: Defer accounting until read from debugfs
[deliverable/linux.git] / drivers / gpu / drm / i915 / i915_gem.c
1 /*
2 * Copyright © 2008 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Eric Anholt <eric@anholt.net>
25 *
26 */
27
28 #include "drmP.h"
29 #include "drm.h"
30 #include "i915_drm.h"
31 #include "i915_drv.h"
32 #include "i915_trace.h"
33 #include "intel_drv.h"
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <linux/pci.h>
37
38 struct change_domains {
39 uint32_t invalidate_domains;
40 uint32_t flush_domains;
41 uint32_t flush_rings;
42 };
43
44 static int i915_gem_object_flush_gpu_write_domain(struct drm_i915_gem_object *obj,
45 struct intel_ring_buffer *pipelined);
46 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
47 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
48 static int i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj,
49 bool write);
50 static int i915_gem_object_set_cpu_read_domain_range(struct drm_i915_gem_object *obj,
51 uint64_t offset,
52 uint64_t size);
53 static void i915_gem_object_set_to_full_cpu_read_domain(struct drm_i915_gem_object *obj);
54 static int i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
55 bool interruptible);
56 static int i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
57 unsigned alignment,
58 bool map_and_fenceable);
59 static void i915_gem_clear_fence_reg(struct drm_i915_gem_object *obj);
60 static int i915_gem_phys_pwrite(struct drm_device *dev,
61 struct drm_i915_gem_object *obj,
62 struct drm_i915_gem_pwrite *args,
63 struct drm_file *file);
64 static void i915_gem_free_object_tail(struct drm_i915_gem_object *obj);
65
66 static int i915_gem_inactive_shrink(struct shrinker *shrinker,
67 int nr_to_scan,
68 gfp_t gfp_mask);
69
70
71 /* some bookkeeping */
72 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
73 size_t size)
74 {
75 dev_priv->mm.object_count++;
76 dev_priv->mm.object_memory += size;
77 }
78
79 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
80 size_t size)
81 {
82 dev_priv->mm.object_count--;
83 dev_priv->mm.object_memory -= size;
84 }
85
86 int
87 i915_gem_check_is_wedged(struct drm_device *dev)
88 {
89 struct drm_i915_private *dev_priv = dev->dev_private;
90 struct completion *x = &dev_priv->error_completion;
91 unsigned long flags;
92 int ret;
93
94 if (!atomic_read(&dev_priv->mm.wedged))
95 return 0;
96
97 ret = wait_for_completion_interruptible(x);
98 if (ret)
99 return ret;
100
101 /* Success, we reset the GPU! */
102 if (!atomic_read(&dev_priv->mm.wedged))
103 return 0;
104
105 /* GPU is hung, bump the completion count to account for
106 * the token we just consumed so that we never hit zero and
107 * end up waiting upon a subsequent completion event that
108 * will never happen.
109 */
110 spin_lock_irqsave(&x->wait.lock, flags);
111 x->done++;
112 spin_unlock_irqrestore(&x->wait.lock, flags);
113 return -EIO;
114 }
115
116 static int i915_mutex_lock_interruptible(struct drm_device *dev)
117 {
118 struct drm_i915_private *dev_priv = dev->dev_private;
119 int ret;
120
121 ret = i915_gem_check_is_wedged(dev);
122 if (ret)
123 return ret;
124
125 ret = mutex_lock_interruptible(&dev->struct_mutex);
126 if (ret)
127 return ret;
128
129 if (atomic_read(&dev_priv->mm.wedged)) {
130 mutex_unlock(&dev->struct_mutex);
131 return -EAGAIN;
132 }
133
134 WARN_ON(i915_verify_lists(dev));
135 return 0;
136 }
137
138 static inline bool
139 i915_gem_object_is_inactive(struct drm_i915_gem_object *obj)
140 {
141 return obj->gtt_space && !obj->active && obj->pin_count == 0;
142 }
143
144 void i915_gem_do_init(struct drm_device *dev,
145 unsigned long start,
146 unsigned long mappable_end,
147 unsigned long end)
148 {
149 drm_i915_private_t *dev_priv = dev->dev_private;
150
151 drm_mm_init(&dev_priv->mm.gtt_space, start,
152 end - start);
153
154 dev_priv->mm.gtt_total = end - start;
155 dev_priv->mm.mappable_gtt_total = min(end, mappable_end) - start;
156 dev_priv->mm.gtt_mappable_end = mappable_end;
157 }
158
159 int
160 i915_gem_init_ioctl(struct drm_device *dev, void *data,
161 struct drm_file *file)
162 {
163 struct drm_i915_gem_init *args = data;
164
165 if (args->gtt_start >= args->gtt_end ||
166 (args->gtt_end | args->gtt_start) & (PAGE_SIZE - 1))
167 return -EINVAL;
168
169 mutex_lock(&dev->struct_mutex);
170 i915_gem_do_init(dev, args->gtt_start, args->gtt_end, args->gtt_end);
171 mutex_unlock(&dev->struct_mutex);
172
173 return 0;
174 }
175
176 int
177 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
178 struct drm_file *file)
179 {
180 struct drm_i915_private *dev_priv = dev->dev_private;
181 struct drm_i915_gem_get_aperture *args = data;
182 struct drm_i915_gem_object *obj;
183 size_t pinned;
184
185 if (!(dev->driver->driver_features & DRIVER_GEM))
186 return -ENODEV;
187
188 pinned = 0;
189 mutex_lock(&dev->struct_mutex);
190 list_for_each_entry(obj, &dev_priv->mm.pinned_list, mm_list)
191 pinned += obj->gtt_space->size;
192 mutex_unlock(&dev->struct_mutex);
193
194 args->aper_size = dev_priv->mm.gtt_total;
195 args->aper_available_size = args->aper_size -pinned;
196
197 return 0;
198 }
199
200 /**
201 * Creates a new mm object and returns a handle to it.
202 */
203 int
204 i915_gem_create_ioctl(struct drm_device *dev, void *data,
205 struct drm_file *file)
206 {
207 struct drm_i915_gem_create *args = data;
208 struct drm_i915_gem_object *obj;
209 int ret;
210 u32 handle;
211
212 args->size = roundup(args->size, PAGE_SIZE);
213
214 /* Allocate the new object */
215 obj = i915_gem_alloc_object(dev, args->size);
216 if (obj == NULL)
217 return -ENOMEM;
218
219 ret = drm_gem_handle_create(file, &obj->base, &handle);
220 if (ret) {
221 drm_gem_object_release(&obj->base);
222 i915_gem_info_remove_obj(dev->dev_private, obj->base.size);
223 kfree(obj);
224 return ret;
225 }
226
227 /* drop reference from allocate - handle holds it now */
228 drm_gem_object_unreference(&obj->base);
229 trace_i915_gem_object_create(obj);
230
231 args->handle = handle;
232 return 0;
233 }
234
235 static int i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj)
236 {
237 drm_i915_private_t *dev_priv = obj->base.dev->dev_private;
238
239 return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 &&
240 obj->tiling_mode != I915_TILING_NONE;
241 }
242
243 static inline void
244 slow_shmem_copy(struct page *dst_page,
245 int dst_offset,
246 struct page *src_page,
247 int src_offset,
248 int length)
249 {
250 char *dst_vaddr, *src_vaddr;
251
252 dst_vaddr = kmap(dst_page);
253 src_vaddr = kmap(src_page);
254
255 memcpy(dst_vaddr + dst_offset, src_vaddr + src_offset, length);
256
257 kunmap(src_page);
258 kunmap(dst_page);
259 }
260
261 static inline void
262 slow_shmem_bit17_copy(struct page *gpu_page,
263 int gpu_offset,
264 struct page *cpu_page,
265 int cpu_offset,
266 int length,
267 int is_read)
268 {
269 char *gpu_vaddr, *cpu_vaddr;
270
271 /* Use the unswizzled path if this page isn't affected. */
272 if ((page_to_phys(gpu_page) & (1 << 17)) == 0) {
273 if (is_read)
274 return slow_shmem_copy(cpu_page, cpu_offset,
275 gpu_page, gpu_offset, length);
276 else
277 return slow_shmem_copy(gpu_page, gpu_offset,
278 cpu_page, cpu_offset, length);
279 }
280
281 gpu_vaddr = kmap(gpu_page);
282 cpu_vaddr = kmap(cpu_page);
283
284 /* Copy the data, XORing A6 with A17 (1). The user already knows he's
285 * XORing with the other bits (A9 for Y, A9 and A10 for X)
286 */
287 while (length > 0) {
288 int cacheline_end = ALIGN(gpu_offset + 1, 64);
289 int this_length = min(cacheline_end - gpu_offset, length);
290 int swizzled_gpu_offset = gpu_offset ^ 64;
291
292 if (is_read) {
293 memcpy(cpu_vaddr + cpu_offset,
294 gpu_vaddr + swizzled_gpu_offset,
295 this_length);
296 } else {
297 memcpy(gpu_vaddr + swizzled_gpu_offset,
298 cpu_vaddr + cpu_offset,
299 this_length);
300 }
301 cpu_offset += this_length;
302 gpu_offset += this_length;
303 length -= this_length;
304 }
305
306 kunmap(cpu_page);
307 kunmap(gpu_page);
308 }
309
310 /**
311 * This is the fast shmem pread path, which attempts to copy_from_user directly
312 * from the backing pages of the object to the user's address space. On a
313 * fault, it fails so we can fall back to i915_gem_shmem_pwrite_slow().
314 */
315 static int
316 i915_gem_shmem_pread_fast(struct drm_device *dev,
317 struct drm_i915_gem_object *obj,
318 struct drm_i915_gem_pread *args,
319 struct drm_file *file)
320 {
321 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
322 ssize_t remain;
323 loff_t offset;
324 char __user *user_data;
325 int page_offset, page_length;
326
327 user_data = (char __user *) (uintptr_t) args->data_ptr;
328 remain = args->size;
329
330 offset = args->offset;
331
332 while (remain > 0) {
333 struct page *page;
334 char *vaddr;
335 int ret;
336
337 /* Operation in this page
338 *
339 * page_offset = offset within page
340 * page_length = bytes to copy for this page
341 */
342 page_offset = offset & (PAGE_SIZE-1);
343 page_length = remain;
344 if ((page_offset + remain) > PAGE_SIZE)
345 page_length = PAGE_SIZE - page_offset;
346
347 page = read_cache_page_gfp(mapping, offset >> PAGE_SHIFT,
348 GFP_HIGHUSER | __GFP_RECLAIMABLE);
349 if (IS_ERR(page))
350 return PTR_ERR(page);
351
352 vaddr = kmap_atomic(page);
353 ret = __copy_to_user_inatomic(user_data,
354 vaddr + page_offset,
355 page_length);
356 kunmap_atomic(vaddr);
357
358 mark_page_accessed(page);
359 page_cache_release(page);
360 if (ret)
361 return -EFAULT;
362
363 remain -= page_length;
364 user_data += page_length;
365 offset += page_length;
366 }
367
368 return 0;
369 }
370
371 /**
372 * This is the fallback shmem pread path, which allocates temporary storage
373 * in kernel space to copy_to_user into outside of the struct_mutex, so we
374 * can copy out of the object's backing pages while holding the struct mutex
375 * and not take page faults.
376 */
377 static int
378 i915_gem_shmem_pread_slow(struct drm_device *dev,
379 struct drm_i915_gem_object *obj,
380 struct drm_i915_gem_pread *args,
381 struct drm_file *file)
382 {
383 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
384 struct mm_struct *mm = current->mm;
385 struct page **user_pages;
386 ssize_t remain;
387 loff_t offset, pinned_pages, i;
388 loff_t first_data_page, last_data_page, num_pages;
389 int shmem_page_offset;
390 int data_page_index, data_page_offset;
391 int page_length;
392 int ret;
393 uint64_t data_ptr = args->data_ptr;
394 int do_bit17_swizzling;
395
396 remain = args->size;
397
398 /* Pin the user pages containing the data. We can't fault while
399 * holding the struct mutex, yet we want to hold it while
400 * dereferencing the user data.
401 */
402 first_data_page = data_ptr / PAGE_SIZE;
403 last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
404 num_pages = last_data_page - first_data_page + 1;
405
406 user_pages = drm_malloc_ab(num_pages, sizeof(struct page *));
407 if (user_pages == NULL)
408 return -ENOMEM;
409
410 mutex_unlock(&dev->struct_mutex);
411 down_read(&mm->mmap_sem);
412 pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
413 num_pages, 1, 0, user_pages, NULL);
414 up_read(&mm->mmap_sem);
415 mutex_lock(&dev->struct_mutex);
416 if (pinned_pages < num_pages) {
417 ret = -EFAULT;
418 goto out;
419 }
420
421 ret = i915_gem_object_set_cpu_read_domain_range(obj,
422 args->offset,
423 args->size);
424 if (ret)
425 goto out;
426
427 do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
428
429 offset = args->offset;
430
431 while (remain > 0) {
432 struct page *page;
433
434 /* Operation in this page
435 *
436 * shmem_page_offset = offset within page in shmem file
437 * data_page_index = page number in get_user_pages return
438 * data_page_offset = offset with data_page_index page.
439 * page_length = bytes to copy for this page
440 */
441 shmem_page_offset = offset & ~PAGE_MASK;
442 data_page_index = data_ptr / PAGE_SIZE - first_data_page;
443 data_page_offset = data_ptr & ~PAGE_MASK;
444
445 page_length = remain;
446 if ((shmem_page_offset + page_length) > PAGE_SIZE)
447 page_length = PAGE_SIZE - shmem_page_offset;
448 if ((data_page_offset + page_length) > PAGE_SIZE)
449 page_length = PAGE_SIZE - data_page_offset;
450
451 page = read_cache_page_gfp(mapping, offset >> PAGE_SHIFT,
452 GFP_HIGHUSER | __GFP_RECLAIMABLE);
453 if (IS_ERR(page))
454 return PTR_ERR(page);
455
456 if (do_bit17_swizzling) {
457 slow_shmem_bit17_copy(page,
458 shmem_page_offset,
459 user_pages[data_page_index],
460 data_page_offset,
461 page_length,
462 1);
463 } else {
464 slow_shmem_copy(user_pages[data_page_index],
465 data_page_offset,
466 page,
467 shmem_page_offset,
468 page_length);
469 }
470
471 mark_page_accessed(page);
472 page_cache_release(page);
473
474 remain -= page_length;
475 data_ptr += page_length;
476 offset += page_length;
477 }
478
479 out:
480 for (i = 0; i < pinned_pages; i++) {
481 SetPageDirty(user_pages[i]);
482 mark_page_accessed(user_pages[i]);
483 page_cache_release(user_pages[i]);
484 }
485 drm_free_large(user_pages);
486
487 return ret;
488 }
489
490 /**
491 * Reads data from the object referenced by handle.
492 *
493 * On error, the contents of *data are undefined.
494 */
495 int
496 i915_gem_pread_ioctl(struct drm_device *dev, void *data,
497 struct drm_file *file)
498 {
499 struct drm_i915_gem_pread *args = data;
500 struct drm_i915_gem_object *obj;
501 int ret = 0;
502
503 if (args->size == 0)
504 return 0;
505
506 if (!access_ok(VERIFY_WRITE,
507 (char __user *)(uintptr_t)args->data_ptr,
508 args->size))
509 return -EFAULT;
510
511 ret = fault_in_pages_writeable((char __user *)(uintptr_t)args->data_ptr,
512 args->size);
513 if (ret)
514 return -EFAULT;
515
516 ret = i915_mutex_lock_interruptible(dev);
517 if (ret)
518 return ret;
519
520 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
521 if (obj == NULL) {
522 ret = -ENOENT;
523 goto unlock;
524 }
525
526 /* Bounds check source. */
527 if (args->offset > obj->base.size ||
528 args->size > obj->base.size - args->offset) {
529 ret = -EINVAL;
530 goto out;
531 }
532
533 ret = i915_gem_object_set_cpu_read_domain_range(obj,
534 args->offset,
535 args->size);
536 if (ret)
537 goto out;
538
539 ret = -EFAULT;
540 if (!i915_gem_object_needs_bit17_swizzle(obj))
541 ret = i915_gem_shmem_pread_fast(dev, obj, args, file);
542 if (ret == -EFAULT)
543 ret = i915_gem_shmem_pread_slow(dev, obj, args, file);
544
545 out:
546 drm_gem_object_unreference(&obj->base);
547 unlock:
548 mutex_unlock(&dev->struct_mutex);
549 return ret;
550 }
551
552 /* This is the fast write path which cannot handle
553 * page faults in the source data
554 */
555
556 static inline int
557 fast_user_write(struct io_mapping *mapping,
558 loff_t page_base, int page_offset,
559 char __user *user_data,
560 int length)
561 {
562 char *vaddr_atomic;
563 unsigned long unwritten;
564
565 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base);
566 unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + page_offset,
567 user_data, length);
568 io_mapping_unmap_atomic(vaddr_atomic);
569 return unwritten;
570 }
571
572 /* Here's the write path which can sleep for
573 * page faults
574 */
575
576 static inline void
577 slow_kernel_write(struct io_mapping *mapping,
578 loff_t gtt_base, int gtt_offset,
579 struct page *user_page, int user_offset,
580 int length)
581 {
582 char __iomem *dst_vaddr;
583 char *src_vaddr;
584
585 dst_vaddr = io_mapping_map_wc(mapping, gtt_base);
586 src_vaddr = kmap(user_page);
587
588 memcpy_toio(dst_vaddr + gtt_offset,
589 src_vaddr + user_offset,
590 length);
591
592 kunmap(user_page);
593 io_mapping_unmap(dst_vaddr);
594 }
595
596 /**
597 * This is the fast pwrite path, where we copy the data directly from the
598 * user into the GTT, uncached.
599 */
600 static int
601 i915_gem_gtt_pwrite_fast(struct drm_device *dev,
602 struct drm_i915_gem_object *obj,
603 struct drm_i915_gem_pwrite *args,
604 struct drm_file *file)
605 {
606 drm_i915_private_t *dev_priv = dev->dev_private;
607 ssize_t remain;
608 loff_t offset, page_base;
609 char __user *user_data;
610 int page_offset, page_length;
611
612 user_data = (char __user *) (uintptr_t) args->data_ptr;
613 remain = args->size;
614
615 offset = obj->gtt_offset + args->offset;
616
617 while (remain > 0) {
618 /* Operation in this page
619 *
620 * page_base = page offset within aperture
621 * page_offset = offset within page
622 * page_length = bytes to copy for this page
623 */
624 page_base = (offset & ~(PAGE_SIZE-1));
625 page_offset = offset & (PAGE_SIZE-1);
626 page_length = remain;
627 if ((page_offset + remain) > PAGE_SIZE)
628 page_length = PAGE_SIZE - page_offset;
629
630 /* If we get a fault while copying data, then (presumably) our
631 * source page isn't available. Return the error and we'll
632 * retry in the slow path.
633 */
634 if (fast_user_write(dev_priv->mm.gtt_mapping, page_base,
635 page_offset, user_data, page_length))
636
637 return -EFAULT;
638
639 remain -= page_length;
640 user_data += page_length;
641 offset += page_length;
642 }
643
644 return 0;
645 }
646
647 /**
648 * This is the fallback GTT pwrite path, which uses get_user_pages to pin
649 * the memory and maps it using kmap_atomic for copying.
650 *
651 * This code resulted in x11perf -rgb10text consuming about 10% more CPU
652 * than using i915_gem_gtt_pwrite_fast on a G45 (32-bit).
653 */
654 static int
655 i915_gem_gtt_pwrite_slow(struct drm_device *dev,
656 struct drm_i915_gem_object *obj,
657 struct drm_i915_gem_pwrite *args,
658 struct drm_file *file)
659 {
660 drm_i915_private_t *dev_priv = dev->dev_private;
661 ssize_t remain;
662 loff_t gtt_page_base, offset;
663 loff_t first_data_page, last_data_page, num_pages;
664 loff_t pinned_pages, i;
665 struct page **user_pages;
666 struct mm_struct *mm = current->mm;
667 int gtt_page_offset, data_page_offset, data_page_index, page_length;
668 int ret;
669 uint64_t data_ptr = args->data_ptr;
670
671 remain = args->size;
672
673 /* Pin the user pages containing the data. We can't fault while
674 * holding the struct mutex, and all of the pwrite implementations
675 * want to hold it while dereferencing the user data.
676 */
677 first_data_page = data_ptr / PAGE_SIZE;
678 last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
679 num_pages = last_data_page - first_data_page + 1;
680
681 user_pages = drm_malloc_ab(num_pages, sizeof(struct page *));
682 if (user_pages == NULL)
683 return -ENOMEM;
684
685 mutex_unlock(&dev->struct_mutex);
686 down_read(&mm->mmap_sem);
687 pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
688 num_pages, 0, 0, user_pages, NULL);
689 up_read(&mm->mmap_sem);
690 mutex_lock(&dev->struct_mutex);
691 if (pinned_pages < num_pages) {
692 ret = -EFAULT;
693 goto out_unpin_pages;
694 }
695
696 ret = i915_gem_object_set_to_gtt_domain(obj, 1);
697 if (ret)
698 goto out_unpin_pages;
699
700 offset = obj->gtt_offset + args->offset;
701
702 while (remain > 0) {
703 /* Operation in this page
704 *
705 * gtt_page_base = page offset within aperture
706 * gtt_page_offset = offset within page in aperture
707 * data_page_index = page number in get_user_pages return
708 * data_page_offset = offset with data_page_index page.
709 * page_length = bytes to copy for this page
710 */
711 gtt_page_base = offset & PAGE_MASK;
712 gtt_page_offset = offset & ~PAGE_MASK;
713 data_page_index = data_ptr / PAGE_SIZE - first_data_page;
714 data_page_offset = data_ptr & ~PAGE_MASK;
715
716 page_length = remain;
717 if ((gtt_page_offset + page_length) > PAGE_SIZE)
718 page_length = PAGE_SIZE - gtt_page_offset;
719 if ((data_page_offset + page_length) > PAGE_SIZE)
720 page_length = PAGE_SIZE - data_page_offset;
721
722 slow_kernel_write(dev_priv->mm.gtt_mapping,
723 gtt_page_base, gtt_page_offset,
724 user_pages[data_page_index],
725 data_page_offset,
726 page_length);
727
728 remain -= page_length;
729 offset += page_length;
730 data_ptr += page_length;
731 }
732
733 out_unpin_pages:
734 for (i = 0; i < pinned_pages; i++)
735 page_cache_release(user_pages[i]);
736 drm_free_large(user_pages);
737
738 return ret;
739 }
740
741 /**
742 * This is the fast shmem pwrite path, which attempts to directly
743 * copy_from_user into the kmapped pages backing the object.
744 */
745 static int
746 i915_gem_shmem_pwrite_fast(struct drm_device *dev,
747 struct drm_i915_gem_object *obj,
748 struct drm_i915_gem_pwrite *args,
749 struct drm_file *file)
750 {
751 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
752 ssize_t remain;
753 loff_t offset;
754 char __user *user_data;
755 int page_offset, page_length;
756
757 user_data = (char __user *) (uintptr_t) args->data_ptr;
758 remain = args->size;
759
760 offset = args->offset;
761 obj->dirty = 1;
762
763 while (remain > 0) {
764 struct page *page;
765 char *vaddr;
766 int ret;
767
768 /* Operation in this page
769 *
770 * page_offset = offset within page
771 * page_length = bytes to copy for this page
772 */
773 page_offset = offset & (PAGE_SIZE-1);
774 page_length = remain;
775 if ((page_offset + remain) > PAGE_SIZE)
776 page_length = PAGE_SIZE - page_offset;
777
778 page = read_cache_page_gfp(mapping, offset >> PAGE_SHIFT,
779 GFP_HIGHUSER | __GFP_RECLAIMABLE);
780 if (IS_ERR(page))
781 return PTR_ERR(page);
782
783 vaddr = kmap_atomic(page, KM_USER0);
784 ret = __copy_from_user_inatomic(vaddr + page_offset,
785 user_data,
786 page_length);
787 kunmap_atomic(vaddr, KM_USER0);
788
789 set_page_dirty(page);
790 mark_page_accessed(page);
791 page_cache_release(page);
792
793 /* If we get a fault while copying data, then (presumably) our
794 * source page isn't available. Return the error and we'll
795 * retry in the slow path.
796 */
797 if (ret)
798 return -EFAULT;
799
800 remain -= page_length;
801 user_data += page_length;
802 offset += page_length;
803 }
804
805 return 0;
806 }
807
808 /**
809 * This is the fallback shmem pwrite path, which uses get_user_pages to pin
810 * the memory and maps it using kmap_atomic for copying.
811 *
812 * This avoids taking mmap_sem for faulting on the user's address while the
813 * struct_mutex is held.
814 */
815 static int
816 i915_gem_shmem_pwrite_slow(struct drm_device *dev,
817 struct drm_i915_gem_object *obj,
818 struct drm_i915_gem_pwrite *args,
819 struct drm_file *file)
820 {
821 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
822 struct mm_struct *mm = current->mm;
823 struct page **user_pages;
824 ssize_t remain;
825 loff_t offset, pinned_pages, i;
826 loff_t first_data_page, last_data_page, num_pages;
827 int shmem_page_offset;
828 int data_page_index, data_page_offset;
829 int page_length;
830 int ret;
831 uint64_t data_ptr = args->data_ptr;
832 int do_bit17_swizzling;
833
834 remain = args->size;
835
836 /* Pin the user pages containing the data. We can't fault while
837 * holding the struct mutex, and all of the pwrite implementations
838 * want to hold it while dereferencing the user data.
839 */
840 first_data_page = data_ptr / PAGE_SIZE;
841 last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
842 num_pages = last_data_page - first_data_page + 1;
843
844 user_pages = drm_malloc_ab(num_pages, sizeof(struct page *));
845 if (user_pages == NULL)
846 return -ENOMEM;
847
848 mutex_unlock(&dev->struct_mutex);
849 down_read(&mm->mmap_sem);
850 pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
851 num_pages, 0, 0, user_pages, NULL);
852 up_read(&mm->mmap_sem);
853 mutex_lock(&dev->struct_mutex);
854 if (pinned_pages < num_pages) {
855 ret = -EFAULT;
856 goto out;
857 }
858
859 ret = i915_gem_object_set_to_cpu_domain(obj, 1);
860 if (ret)
861 goto out;
862
863 do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
864
865 offset = args->offset;
866 obj->dirty = 1;
867
868 while (remain > 0) {
869 struct page *page;
870
871 /* Operation in this page
872 *
873 * shmem_page_offset = offset within page in shmem file
874 * data_page_index = page number in get_user_pages return
875 * data_page_offset = offset with data_page_index page.
876 * page_length = bytes to copy for this page
877 */
878 shmem_page_offset = offset & ~PAGE_MASK;
879 data_page_index = data_ptr / PAGE_SIZE - first_data_page;
880 data_page_offset = data_ptr & ~PAGE_MASK;
881
882 page_length = remain;
883 if ((shmem_page_offset + page_length) > PAGE_SIZE)
884 page_length = PAGE_SIZE - shmem_page_offset;
885 if ((data_page_offset + page_length) > PAGE_SIZE)
886 page_length = PAGE_SIZE - data_page_offset;
887
888 page = read_cache_page_gfp(mapping, offset >> PAGE_SHIFT,
889 GFP_HIGHUSER | __GFP_RECLAIMABLE);
890 if (IS_ERR(page)) {
891 ret = PTR_ERR(page);
892 goto out;
893 }
894
895 if (do_bit17_swizzling) {
896 slow_shmem_bit17_copy(page,
897 shmem_page_offset,
898 user_pages[data_page_index],
899 data_page_offset,
900 page_length,
901 0);
902 } else {
903 slow_shmem_copy(page,
904 shmem_page_offset,
905 user_pages[data_page_index],
906 data_page_offset,
907 page_length);
908 }
909
910 set_page_dirty(page);
911 mark_page_accessed(page);
912 page_cache_release(page);
913
914 remain -= page_length;
915 data_ptr += page_length;
916 offset += page_length;
917 }
918
919 out:
920 for (i = 0; i < pinned_pages; i++)
921 page_cache_release(user_pages[i]);
922 drm_free_large(user_pages);
923
924 return ret;
925 }
926
927 /**
928 * Writes data to the object referenced by handle.
929 *
930 * On error, the contents of the buffer that were to be modified are undefined.
931 */
932 int
933 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
934 struct drm_file *file)
935 {
936 struct drm_i915_gem_pwrite *args = data;
937 struct drm_i915_gem_object *obj;
938 int ret;
939
940 if (args->size == 0)
941 return 0;
942
943 if (!access_ok(VERIFY_READ,
944 (char __user *)(uintptr_t)args->data_ptr,
945 args->size))
946 return -EFAULT;
947
948 ret = fault_in_pages_readable((char __user *)(uintptr_t)args->data_ptr,
949 args->size);
950 if (ret)
951 return -EFAULT;
952
953 ret = i915_mutex_lock_interruptible(dev);
954 if (ret)
955 return ret;
956
957 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
958 if (obj == NULL) {
959 ret = -ENOENT;
960 goto unlock;
961 }
962
963 /* Bounds check destination. */
964 if (args->offset > obj->base.size ||
965 args->size > obj->base.size - args->offset) {
966 ret = -EINVAL;
967 goto out;
968 }
969
970 /* We can only do the GTT pwrite on untiled buffers, as otherwise
971 * it would end up going through the fenced access, and we'll get
972 * different detiling behavior between reading and writing.
973 * pread/pwrite currently are reading and writing from the CPU
974 * perspective, requiring manual detiling by the client.
975 */
976 if (obj->phys_obj)
977 ret = i915_gem_phys_pwrite(dev, obj, args, file);
978 else if (obj->tiling_mode == I915_TILING_NONE &&
979 obj->gtt_space &&
980 obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
981 ret = i915_gem_object_pin(obj, 0, true);
982 if (ret)
983 goto out;
984
985 ret = i915_gem_object_set_to_gtt_domain(obj, 1);
986 if (ret)
987 goto out_unpin;
988
989 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file);
990 if (ret == -EFAULT)
991 ret = i915_gem_gtt_pwrite_slow(dev, obj, args, file);
992
993 out_unpin:
994 i915_gem_object_unpin(obj);
995 } else {
996 ret = i915_gem_object_set_to_cpu_domain(obj, 1);
997 if (ret)
998 goto out;
999
1000 ret = -EFAULT;
1001 if (!i915_gem_object_needs_bit17_swizzle(obj))
1002 ret = i915_gem_shmem_pwrite_fast(dev, obj, args, file);
1003 if (ret == -EFAULT)
1004 ret = i915_gem_shmem_pwrite_slow(dev, obj, args, file);
1005 }
1006
1007 out:
1008 drm_gem_object_unreference(&obj->base);
1009 unlock:
1010 mutex_unlock(&dev->struct_mutex);
1011 return ret;
1012 }
1013
1014 /**
1015 * Called when user space prepares to use an object with the CPU, either
1016 * through the mmap ioctl's mapping or a GTT mapping.
1017 */
1018 int
1019 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
1020 struct drm_file *file)
1021 {
1022 struct drm_i915_private *dev_priv = dev->dev_private;
1023 struct drm_i915_gem_set_domain *args = data;
1024 struct drm_i915_gem_object *obj;
1025 uint32_t read_domains = args->read_domains;
1026 uint32_t write_domain = args->write_domain;
1027 int ret;
1028
1029 if (!(dev->driver->driver_features & DRIVER_GEM))
1030 return -ENODEV;
1031
1032 /* Only handle setting domains to types used by the CPU. */
1033 if (write_domain & I915_GEM_GPU_DOMAINS)
1034 return -EINVAL;
1035
1036 if (read_domains & I915_GEM_GPU_DOMAINS)
1037 return -EINVAL;
1038
1039 /* Having something in the write domain implies it's in the read
1040 * domain, and only that read domain. Enforce that in the request.
1041 */
1042 if (write_domain != 0 && read_domains != write_domain)
1043 return -EINVAL;
1044
1045 ret = i915_mutex_lock_interruptible(dev);
1046 if (ret)
1047 return ret;
1048
1049 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
1050 if (obj == NULL) {
1051 ret = -ENOENT;
1052 goto unlock;
1053 }
1054
1055 intel_mark_busy(dev, obj);
1056
1057 if (read_domains & I915_GEM_DOMAIN_GTT) {
1058 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
1059
1060 /* Update the LRU on the fence for the CPU access that's
1061 * about to occur.
1062 */
1063 if (obj->fence_reg != I915_FENCE_REG_NONE) {
1064 struct drm_i915_fence_reg *reg =
1065 &dev_priv->fence_regs[obj->fence_reg];
1066 list_move_tail(&reg->lru_list,
1067 &dev_priv->mm.fence_list);
1068 }
1069
1070 /* Silently promote "you're not bound, there was nothing to do"
1071 * to success, since the client was just asking us to
1072 * make sure everything was done.
1073 */
1074 if (ret == -EINVAL)
1075 ret = 0;
1076 } else {
1077 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
1078 }
1079
1080 /* Maintain LRU order of "inactive" objects */
1081 if (ret == 0 && i915_gem_object_is_inactive(obj))
1082 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
1083
1084 drm_gem_object_unreference(&obj->base);
1085 unlock:
1086 mutex_unlock(&dev->struct_mutex);
1087 return ret;
1088 }
1089
1090 /**
1091 * Called when user space has done writes to this buffer
1092 */
1093 int
1094 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
1095 struct drm_file *file)
1096 {
1097 struct drm_i915_gem_sw_finish *args = data;
1098 struct drm_i915_gem_object *obj;
1099 int ret = 0;
1100
1101 if (!(dev->driver->driver_features & DRIVER_GEM))
1102 return -ENODEV;
1103
1104 ret = i915_mutex_lock_interruptible(dev);
1105 if (ret)
1106 return ret;
1107
1108 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
1109 if (obj == NULL) {
1110 ret = -ENOENT;
1111 goto unlock;
1112 }
1113
1114 /* Pinned buffers may be scanout, so flush the cache */
1115 if (obj->pin_count)
1116 i915_gem_object_flush_cpu_write_domain(obj);
1117
1118 drm_gem_object_unreference(&obj->base);
1119 unlock:
1120 mutex_unlock(&dev->struct_mutex);
1121 return ret;
1122 }
1123
1124 /**
1125 * Maps the contents of an object, returning the address it is mapped
1126 * into.
1127 *
1128 * While the mapping holds a reference on the contents of the object, it doesn't
1129 * imply a ref on the object itself.
1130 */
1131 int
1132 i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1133 struct drm_file *file)
1134 {
1135 struct drm_i915_private *dev_priv = dev->dev_private;
1136 struct drm_i915_gem_mmap *args = data;
1137 struct drm_gem_object *obj;
1138 loff_t offset;
1139 unsigned long addr;
1140
1141 if (!(dev->driver->driver_features & DRIVER_GEM))
1142 return -ENODEV;
1143
1144 obj = drm_gem_object_lookup(dev, file, args->handle);
1145 if (obj == NULL)
1146 return -ENOENT;
1147
1148 if (obj->size > dev_priv->mm.gtt_mappable_end) {
1149 drm_gem_object_unreference_unlocked(obj);
1150 return -E2BIG;
1151 }
1152
1153 offset = args->offset;
1154
1155 down_write(&current->mm->mmap_sem);
1156 addr = do_mmap(obj->filp, 0, args->size,
1157 PROT_READ | PROT_WRITE, MAP_SHARED,
1158 args->offset);
1159 up_write(&current->mm->mmap_sem);
1160 drm_gem_object_unreference_unlocked(obj);
1161 if (IS_ERR((void *)addr))
1162 return addr;
1163
1164 args->addr_ptr = (uint64_t) addr;
1165
1166 return 0;
1167 }
1168
1169 /**
1170 * i915_gem_fault - fault a page into the GTT
1171 * vma: VMA in question
1172 * vmf: fault info
1173 *
1174 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
1175 * from userspace. The fault handler takes care of binding the object to
1176 * the GTT (if needed), allocating and programming a fence register (again,
1177 * only if needed based on whether the old reg is still valid or the object
1178 * is tiled) and inserting a new PTE into the faulting process.
1179 *
1180 * Note that the faulting process may involve evicting existing objects
1181 * from the GTT and/or fence registers to make room. So performance may
1182 * suffer if the GTT working set is large or there are few fence registers
1183 * left.
1184 */
1185 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1186 {
1187 struct drm_i915_gem_object *obj = to_intel_bo(vma->vm_private_data);
1188 struct drm_device *dev = obj->base.dev;
1189 drm_i915_private_t *dev_priv = dev->dev_private;
1190 pgoff_t page_offset;
1191 unsigned long pfn;
1192 int ret = 0;
1193 bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
1194
1195 /* We don't use vmf->pgoff since that has the fake offset */
1196 page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >>
1197 PAGE_SHIFT;
1198
1199 /* Now bind it into the GTT if needed */
1200 mutex_lock(&dev->struct_mutex);
1201
1202 if (!obj->map_and_fenceable) {
1203 ret = i915_gem_object_unbind(obj);
1204 if (ret)
1205 goto unlock;
1206 }
1207 if (!obj->gtt_space) {
1208 ret = i915_gem_object_bind_to_gtt(obj, 0, true);
1209 if (ret)
1210 goto unlock;
1211 }
1212
1213 ret = i915_gem_object_set_to_gtt_domain(obj, write);
1214 if (ret)
1215 goto unlock;
1216
1217 /* Need a new fence register? */
1218 if (obj->tiling_mode != I915_TILING_NONE) {
1219 ret = i915_gem_object_get_fence_reg(obj, true);
1220 if (ret)
1221 goto unlock;
1222 }
1223
1224 if (i915_gem_object_is_inactive(obj))
1225 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
1226
1227 obj->fault_mappable = true;
1228
1229 pfn = ((dev->agp->base + obj->gtt_offset) >> PAGE_SHIFT) +
1230 page_offset;
1231
1232 /* Finally, remap it using the new GTT offset */
1233 ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn);
1234 unlock:
1235 mutex_unlock(&dev->struct_mutex);
1236
1237 switch (ret) {
1238 case -EAGAIN:
1239 set_need_resched();
1240 case 0:
1241 case -ERESTARTSYS:
1242 return VM_FAULT_NOPAGE;
1243 case -ENOMEM:
1244 return VM_FAULT_OOM;
1245 default:
1246 return VM_FAULT_SIGBUS;
1247 }
1248 }
1249
1250 /**
1251 * i915_gem_create_mmap_offset - create a fake mmap offset for an object
1252 * @obj: obj in question
1253 *
1254 * GEM memory mapping works by handing back to userspace a fake mmap offset
1255 * it can use in a subsequent mmap(2) call. The DRM core code then looks
1256 * up the object based on the offset and sets up the various memory mapping
1257 * structures.
1258 *
1259 * This routine allocates and attaches a fake offset for @obj.
1260 */
1261 static int
1262 i915_gem_create_mmap_offset(struct drm_i915_gem_object *obj)
1263 {
1264 struct drm_device *dev = obj->base.dev;
1265 struct drm_gem_mm *mm = dev->mm_private;
1266 struct drm_map_list *list;
1267 struct drm_local_map *map;
1268 int ret = 0;
1269
1270 /* Set the object up for mmap'ing */
1271 list = &obj->base.map_list;
1272 list->map = kzalloc(sizeof(struct drm_map_list), GFP_KERNEL);
1273 if (!list->map)
1274 return -ENOMEM;
1275
1276 map = list->map;
1277 map->type = _DRM_GEM;
1278 map->size = obj->base.size;
1279 map->handle = obj;
1280
1281 /* Get a DRM GEM mmap offset allocated... */
1282 list->file_offset_node = drm_mm_search_free(&mm->offset_manager,
1283 obj->base.size / PAGE_SIZE,
1284 0, 0);
1285 if (!list->file_offset_node) {
1286 DRM_ERROR("failed to allocate offset for bo %d\n",
1287 obj->base.name);
1288 ret = -ENOSPC;
1289 goto out_free_list;
1290 }
1291
1292 list->file_offset_node = drm_mm_get_block(list->file_offset_node,
1293 obj->base.size / PAGE_SIZE,
1294 0);
1295 if (!list->file_offset_node) {
1296 ret = -ENOMEM;
1297 goto out_free_list;
1298 }
1299
1300 list->hash.key = list->file_offset_node->start;
1301 ret = drm_ht_insert_item(&mm->offset_hash, &list->hash);
1302 if (ret) {
1303 DRM_ERROR("failed to add to map hash\n");
1304 goto out_free_mm;
1305 }
1306
1307 return 0;
1308
1309 out_free_mm:
1310 drm_mm_put_block(list->file_offset_node);
1311 out_free_list:
1312 kfree(list->map);
1313 list->map = NULL;
1314
1315 return ret;
1316 }
1317
1318 /**
1319 * i915_gem_release_mmap - remove physical page mappings
1320 * @obj: obj in question
1321 *
1322 * Preserve the reservation of the mmapping with the DRM core code, but
1323 * relinquish ownership of the pages back to the system.
1324 *
1325 * It is vital that we remove the page mapping if we have mapped a tiled
1326 * object through the GTT and then lose the fence register due to
1327 * resource pressure. Similarly if the object has been moved out of the
1328 * aperture, than pages mapped into userspace must be revoked. Removing the
1329 * mapping will then trigger a page fault on the next user access, allowing
1330 * fixup by i915_gem_fault().
1331 */
1332 void
1333 i915_gem_release_mmap(struct drm_i915_gem_object *obj)
1334 {
1335 if (!obj->fault_mappable)
1336 return;
1337
1338 unmap_mapping_range(obj->base.dev->dev_mapping,
1339 (loff_t)obj->base.map_list.hash.key<<PAGE_SHIFT,
1340 obj->base.size, 1);
1341
1342 obj->fault_mappable = false;
1343 }
1344
1345 static void
1346 i915_gem_free_mmap_offset(struct drm_i915_gem_object *obj)
1347 {
1348 struct drm_device *dev = obj->base.dev;
1349 struct drm_gem_mm *mm = dev->mm_private;
1350 struct drm_map_list *list = &obj->base.map_list;
1351
1352 drm_ht_remove_item(&mm->offset_hash, &list->hash);
1353 drm_mm_put_block(list->file_offset_node);
1354 kfree(list->map);
1355 list->map = NULL;
1356 }
1357
1358 static uint32_t
1359 i915_gem_get_gtt_size(struct drm_i915_gem_object *obj)
1360 {
1361 struct drm_device *dev = obj->base.dev;
1362 uint32_t size;
1363
1364 if (INTEL_INFO(dev)->gen >= 4 ||
1365 obj->tiling_mode == I915_TILING_NONE)
1366 return obj->base.size;
1367
1368 /* Previous chips need a power-of-two fence region when tiling */
1369 if (INTEL_INFO(dev)->gen == 3)
1370 size = 1024*1024;
1371 else
1372 size = 512*1024;
1373
1374 while (size < obj->base.size)
1375 size <<= 1;
1376
1377 return size;
1378 }
1379
1380 /**
1381 * i915_gem_get_gtt_alignment - return required GTT alignment for an object
1382 * @obj: object to check
1383 *
1384 * Return the required GTT alignment for an object, taking into account
1385 * potential fence register mapping.
1386 */
1387 static uint32_t
1388 i915_gem_get_gtt_alignment(struct drm_i915_gem_object *obj)
1389 {
1390 struct drm_device *dev = obj->base.dev;
1391
1392 /*
1393 * Minimum alignment is 4k (GTT page size), but might be greater
1394 * if a fence register is needed for the object.
1395 */
1396 if (INTEL_INFO(dev)->gen >= 4 ||
1397 obj->tiling_mode == I915_TILING_NONE)
1398 return 4096;
1399
1400 /*
1401 * Previous chips need to be aligned to the size of the smallest
1402 * fence register that can contain the object.
1403 */
1404 return i915_gem_get_gtt_size(obj);
1405 }
1406
1407 /**
1408 * i915_gem_get_unfenced_gtt_alignment - return required GTT alignment for an
1409 * unfenced object
1410 * @obj: object to check
1411 *
1412 * Return the required GTT alignment for an object, only taking into account
1413 * unfenced tiled surface requirements.
1414 */
1415 static uint32_t
1416 i915_gem_get_unfenced_gtt_alignment(struct drm_i915_gem_object *obj)
1417 {
1418 struct drm_device *dev = obj->base.dev;
1419 int tile_height;
1420
1421 /*
1422 * Minimum alignment is 4k (GTT page size) for sane hw.
1423 */
1424 if (INTEL_INFO(dev)->gen >= 4 || IS_G33(dev) ||
1425 obj->tiling_mode == I915_TILING_NONE)
1426 return 4096;
1427
1428 /*
1429 * Older chips need unfenced tiled buffers to be aligned to the left
1430 * edge of an even tile row (where tile rows are counted as if the bo is
1431 * placed in a fenced gtt region).
1432 */
1433 if (IS_GEN2(dev) ||
1434 (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev)))
1435 tile_height = 32;
1436 else
1437 tile_height = 8;
1438
1439 return tile_height * obj->stride * 2;
1440 }
1441
1442 /**
1443 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
1444 * @dev: DRM device
1445 * @data: GTT mapping ioctl data
1446 * @file: GEM object info
1447 *
1448 * Simply returns the fake offset to userspace so it can mmap it.
1449 * The mmap call will end up in drm_gem_mmap(), which will set things
1450 * up so we can get faults in the handler above.
1451 *
1452 * The fault handler will take care of binding the object into the GTT
1453 * (since it may have been evicted to make room for something), allocating
1454 * a fence register, and mapping the appropriate aperture address into
1455 * userspace.
1456 */
1457 int
1458 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
1459 struct drm_file *file)
1460 {
1461 struct drm_i915_private *dev_priv = dev->dev_private;
1462 struct drm_i915_gem_mmap_gtt *args = data;
1463 struct drm_i915_gem_object *obj;
1464 int ret;
1465
1466 if (!(dev->driver->driver_features & DRIVER_GEM))
1467 return -ENODEV;
1468
1469 ret = i915_mutex_lock_interruptible(dev);
1470 if (ret)
1471 return ret;
1472
1473 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
1474 if (obj == NULL) {
1475 ret = -ENOENT;
1476 goto unlock;
1477 }
1478
1479 if (obj->base.size > dev_priv->mm.gtt_mappable_end) {
1480 ret = -E2BIG;
1481 goto unlock;
1482 }
1483
1484 if (obj->madv != I915_MADV_WILLNEED) {
1485 DRM_ERROR("Attempting to mmap a purgeable buffer\n");
1486 ret = -EINVAL;
1487 goto out;
1488 }
1489
1490 if (!obj->base.map_list.map) {
1491 ret = i915_gem_create_mmap_offset(obj);
1492 if (ret)
1493 goto out;
1494 }
1495
1496 args->offset = (u64)obj->base.map_list.hash.key << PAGE_SHIFT;
1497
1498 out:
1499 drm_gem_object_unreference(&obj->base);
1500 unlock:
1501 mutex_unlock(&dev->struct_mutex);
1502 return ret;
1503 }
1504
1505 static int
1506 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj,
1507 gfp_t gfpmask)
1508 {
1509 int page_count, i;
1510 struct address_space *mapping;
1511 struct inode *inode;
1512 struct page *page;
1513
1514 /* Get the list of pages out of our struct file. They'll be pinned
1515 * at this point until we release them.
1516 */
1517 page_count = obj->base.size / PAGE_SIZE;
1518 BUG_ON(obj->pages != NULL);
1519 obj->pages = drm_malloc_ab(page_count, sizeof(struct page *));
1520 if (obj->pages == NULL)
1521 return -ENOMEM;
1522
1523 inode = obj->base.filp->f_path.dentry->d_inode;
1524 mapping = inode->i_mapping;
1525 for (i = 0; i < page_count; i++) {
1526 page = read_cache_page_gfp(mapping, i,
1527 GFP_HIGHUSER |
1528 __GFP_COLD |
1529 __GFP_RECLAIMABLE |
1530 gfpmask);
1531 if (IS_ERR(page))
1532 goto err_pages;
1533
1534 obj->pages[i] = page;
1535 }
1536
1537 if (obj->tiling_mode != I915_TILING_NONE)
1538 i915_gem_object_do_bit_17_swizzle(obj);
1539
1540 return 0;
1541
1542 err_pages:
1543 while (i--)
1544 page_cache_release(obj->pages[i]);
1545
1546 drm_free_large(obj->pages);
1547 obj->pages = NULL;
1548 return PTR_ERR(page);
1549 }
1550
1551 static void
1552 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
1553 {
1554 int page_count = obj->base.size / PAGE_SIZE;
1555 int i;
1556
1557 BUG_ON(obj->madv == __I915_MADV_PURGED);
1558
1559 if (obj->tiling_mode != I915_TILING_NONE)
1560 i915_gem_object_save_bit_17_swizzle(obj);
1561
1562 if (obj->madv == I915_MADV_DONTNEED)
1563 obj->dirty = 0;
1564
1565 for (i = 0; i < page_count; i++) {
1566 if (obj->dirty)
1567 set_page_dirty(obj->pages[i]);
1568
1569 if (obj->madv == I915_MADV_WILLNEED)
1570 mark_page_accessed(obj->pages[i]);
1571
1572 page_cache_release(obj->pages[i]);
1573 }
1574 obj->dirty = 0;
1575
1576 drm_free_large(obj->pages);
1577 obj->pages = NULL;
1578 }
1579
1580 static uint32_t
1581 i915_gem_next_request_seqno(struct drm_device *dev,
1582 struct intel_ring_buffer *ring)
1583 {
1584 drm_i915_private_t *dev_priv = dev->dev_private;
1585 return ring->outstanding_lazy_request = dev_priv->next_seqno;
1586 }
1587
1588 static void
1589 i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
1590 struct intel_ring_buffer *ring)
1591 {
1592 struct drm_device *dev = obj->base.dev;
1593 struct drm_i915_private *dev_priv = dev->dev_private;
1594 uint32_t seqno = i915_gem_next_request_seqno(dev, ring);
1595
1596 BUG_ON(ring == NULL);
1597 obj->ring = ring;
1598
1599 /* Add a reference if we're newly entering the active list. */
1600 if (!obj->active) {
1601 drm_gem_object_reference(&obj->base);
1602 obj->active = 1;
1603 }
1604
1605 /* Move from whatever list we were on to the tail of execution. */
1606 list_move_tail(&obj->mm_list, &dev_priv->mm.active_list);
1607 list_move_tail(&obj->ring_list, &ring->active_list);
1608
1609 obj->last_rendering_seqno = seqno;
1610 if (obj->fenced_gpu_access) {
1611 struct drm_i915_fence_reg *reg;
1612
1613 BUG_ON(obj->fence_reg == I915_FENCE_REG_NONE);
1614
1615 obj->last_fenced_seqno = seqno;
1616 obj->last_fenced_ring = ring;
1617
1618 reg = &dev_priv->fence_regs[obj->fence_reg];
1619 list_move_tail(&reg->lru_list, &dev_priv->mm.fence_list);
1620 }
1621 }
1622
1623 static void
1624 i915_gem_object_move_off_active(struct drm_i915_gem_object *obj)
1625 {
1626 list_del_init(&obj->ring_list);
1627 obj->last_rendering_seqno = 0;
1628 obj->last_fenced_seqno = 0;
1629 }
1630
1631 static void
1632 i915_gem_object_move_to_flushing(struct drm_i915_gem_object *obj)
1633 {
1634 struct drm_device *dev = obj->base.dev;
1635 drm_i915_private_t *dev_priv = dev->dev_private;
1636
1637 BUG_ON(!obj->active);
1638 list_move_tail(&obj->mm_list, &dev_priv->mm.flushing_list);
1639
1640 i915_gem_object_move_off_active(obj);
1641 }
1642
1643 static void
1644 i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
1645 {
1646 struct drm_device *dev = obj->base.dev;
1647 struct drm_i915_private *dev_priv = dev->dev_private;
1648
1649 if (obj->pin_count != 0)
1650 list_move_tail(&obj->mm_list, &dev_priv->mm.pinned_list);
1651 else
1652 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
1653
1654 BUG_ON(!list_empty(&obj->gpu_write_list));
1655 BUG_ON(!obj->active);
1656 obj->ring = NULL;
1657
1658 i915_gem_object_move_off_active(obj);
1659 obj->fenced_gpu_access = false;
1660 obj->last_fenced_ring = NULL;
1661
1662 obj->active = 0;
1663 drm_gem_object_unreference(&obj->base);
1664
1665 WARN_ON(i915_verify_lists(dev));
1666 }
1667
1668 /* Immediately discard the backing storage */
1669 static void
1670 i915_gem_object_truncate(struct drm_i915_gem_object *obj)
1671 {
1672 struct inode *inode;
1673
1674 /* Our goal here is to return as much of the memory as
1675 * is possible back to the system as we are called from OOM.
1676 * To do this we must instruct the shmfs to drop all of its
1677 * backing pages, *now*. Here we mirror the actions taken
1678 * when by shmem_delete_inode() to release the backing store.
1679 */
1680 inode = obj->base.filp->f_path.dentry->d_inode;
1681 truncate_inode_pages(inode->i_mapping, 0);
1682 if (inode->i_op->truncate_range)
1683 inode->i_op->truncate_range(inode, 0, (loff_t)-1);
1684
1685 obj->madv = __I915_MADV_PURGED;
1686 }
1687
1688 static inline int
1689 i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj)
1690 {
1691 return obj->madv == I915_MADV_DONTNEED;
1692 }
1693
1694 static void
1695 i915_gem_process_flushing_list(struct drm_device *dev,
1696 uint32_t flush_domains,
1697 struct intel_ring_buffer *ring)
1698 {
1699 struct drm_i915_gem_object *obj, *next;
1700
1701 list_for_each_entry_safe(obj, next,
1702 &ring->gpu_write_list,
1703 gpu_write_list) {
1704 if (obj->base.write_domain & flush_domains) {
1705 uint32_t old_write_domain = obj->base.write_domain;
1706
1707 obj->base.write_domain = 0;
1708 list_del_init(&obj->gpu_write_list);
1709 i915_gem_object_move_to_active(obj, ring);
1710
1711 trace_i915_gem_object_change_domain(obj,
1712 obj->base.read_domains,
1713 old_write_domain);
1714 }
1715 }
1716 }
1717
1718 int
1719 i915_add_request(struct drm_device *dev,
1720 struct drm_file *file,
1721 struct drm_i915_gem_request *request,
1722 struct intel_ring_buffer *ring)
1723 {
1724 drm_i915_private_t *dev_priv = dev->dev_private;
1725 struct drm_i915_file_private *file_priv = NULL;
1726 uint32_t seqno;
1727 int was_empty;
1728 int ret;
1729
1730 BUG_ON(request == NULL);
1731
1732 if (file != NULL)
1733 file_priv = file->driver_priv;
1734
1735 ret = ring->add_request(ring, &seqno);
1736 if (ret)
1737 return ret;
1738
1739 ring->outstanding_lazy_request = false;
1740
1741 request->seqno = seqno;
1742 request->ring = ring;
1743 request->emitted_jiffies = jiffies;
1744 was_empty = list_empty(&ring->request_list);
1745 list_add_tail(&request->list, &ring->request_list);
1746
1747 if (file_priv) {
1748 spin_lock(&file_priv->mm.lock);
1749 request->file_priv = file_priv;
1750 list_add_tail(&request->client_list,
1751 &file_priv->mm.request_list);
1752 spin_unlock(&file_priv->mm.lock);
1753 }
1754
1755 if (!dev_priv->mm.suspended) {
1756 mod_timer(&dev_priv->hangcheck_timer,
1757 jiffies + msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD));
1758 if (was_empty)
1759 queue_delayed_work(dev_priv->wq,
1760 &dev_priv->mm.retire_work, HZ);
1761 }
1762 return 0;
1763 }
1764
1765 /**
1766 * Command execution barrier
1767 *
1768 * Ensures that all commands in the ring are finished
1769 * before signalling the CPU
1770 */
1771 static void
1772 i915_retire_commands(struct drm_device *dev, struct intel_ring_buffer *ring)
1773 {
1774 uint32_t flush_domains = 0;
1775
1776 /* The sampler always gets flushed on i965 (sigh) */
1777 if (INTEL_INFO(dev)->gen >= 4)
1778 flush_domains |= I915_GEM_DOMAIN_SAMPLER;
1779
1780 ring->flush(ring, I915_GEM_DOMAIN_COMMAND, flush_domains);
1781 }
1782
1783 static inline void
1784 i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
1785 {
1786 struct drm_i915_file_private *file_priv = request->file_priv;
1787
1788 if (!file_priv)
1789 return;
1790
1791 spin_lock(&file_priv->mm.lock);
1792 list_del(&request->client_list);
1793 request->file_priv = NULL;
1794 spin_unlock(&file_priv->mm.lock);
1795 }
1796
1797 static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv,
1798 struct intel_ring_buffer *ring)
1799 {
1800 while (!list_empty(&ring->request_list)) {
1801 struct drm_i915_gem_request *request;
1802
1803 request = list_first_entry(&ring->request_list,
1804 struct drm_i915_gem_request,
1805 list);
1806
1807 list_del(&request->list);
1808 i915_gem_request_remove_from_client(request);
1809 kfree(request);
1810 }
1811
1812 while (!list_empty(&ring->active_list)) {
1813 struct drm_i915_gem_object *obj;
1814
1815 obj = list_first_entry(&ring->active_list,
1816 struct drm_i915_gem_object,
1817 ring_list);
1818
1819 obj->base.write_domain = 0;
1820 list_del_init(&obj->gpu_write_list);
1821 i915_gem_object_move_to_inactive(obj);
1822 }
1823 }
1824
1825 static void i915_gem_reset_fences(struct drm_device *dev)
1826 {
1827 struct drm_i915_private *dev_priv = dev->dev_private;
1828 int i;
1829
1830 for (i = 0; i < 16; i++) {
1831 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
1832 if (reg->obj)
1833 i915_gem_clear_fence_reg(reg->obj);
1834 }
1835 }
1836
1837 void i915_gem_reset(struct drm_device *dev)
1838 {
1839 struct drm_i915_private *dev_priv = dev->dev_private;
1840 struct drm_i915_gem_object *obj;
1841
1842 i915_gem_reset_ring_lists(dev_priv, &dev_priv->render_ring);
1843 i915_gem_reset_ring_lists(dev_priv, &dev_priv->bsd_ring);
1844 i915_gem_reset_ring_lists(dev_priv, &dev_priv->blt_ring);
1845
1846 /* Remove anything from the flushing lists. The GPU cache is likely
1847 * to be lost on reset along with the data, so simply move the
1848 * lost bo to the inactive list.
1849 */
1850 while (!list_empty(&dev_priv->mm.flushing_list)) {
1851 obj= list_first_entry(&dev_priv->mm.flushing_list,
1852 struct drm_i915_gem_object,
1853 mm_list);
1854
1855 obj->base.write_domain = 0;
1856 list_del_init(&obj->gpu_write_list);
1857 i915_gem_object_move_to_inactive(obj);
1858 }
1859
1860 /* Move everything out of the GPU domains to ensure we do any
1861 * necessary invalidation upon reuse.
1862 */
1863 list_for_each_entry(obj,
1864 &dev_priv->mm.inactive_list,
1865 mm_list)
1866 {
1867 obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS;
1868 }
1869
1870 /* The fence registers are invalidated so clear them out */
1871 i915_gem_reset_fences(dev);
1872 }
1873
1874 /**
1875 * This function clears the request list as sequence numbers are passed.
1876 */
1877 static void
1878 i915_gem_retire_requests_ring(struct drm_device *dev,
1879 struct intel_ring_buffer *ring)
1880 {
1881 drm_i915_private_t *dev_priv = dev->dev_private;
1882 uint32_t seqno;
1883
1884 if (!ring->status_page.page_addr ||
1885 list_empty(&ring->request_list))
1886 return;
1887
1888 WARN_ON(i915_verify_lists(dev));
1889
1890 seqno = ring->get_seqno(ring);
1891 while (!list_empty(&ring->request_list)) {
1892 struct drm_i915_gem_request *request;
1893
1894 request = list_first_entry(&ring->request_list,
1895 struct drm_i915_gem_request,
1896 list);
1897
1898 if (!i915_seqno_passed(seqno, request->seqno))
1899 break;
1900
1901 trace_i915_gem_request_retire(dev, request->seqno);
1902
1903 list_del(&request->list);
1904 i915_gem_request_remove_from_client(request);
1905 kfree(request);
1906 }
1907
1908 /* Move any buffers on the active list that are no longer referenced
1909 * by the ringbuffer to the flushing/inactive lists as appropriate.
1910 */
1911 while (!list_empty(&ring->active_list)) {
1912 struct drm_i915_gem_object *obj;
1913
1914 obj= list_first_entry(&ring->active_list,
1915 struct drm_i915_gem_object,
1916 ring_list);
1917
1918 if (!i915_seqno_passed(seqno, obj->last_rendering_seqno))
1919 break;
1920
1921 if (obj->base.write_domain != 0)
1922 i915_gem_object_move_to_flushing(obj);
1923 else
1924 i915_gem_object_move_to_inactive(obj);
1925 }
1926
1927 if (unlikely (dev_priv->trace_irq_seqno &&
1928 i915_seqno_passed(dev_priv->trace_irq_seqno, seqno))) {
1929 ring->user_irq_put(ring);
1930 dev_priv->trace_irq_seqno = 0;
1931 }
1932
1933 WARN_ON(i915_verify_lists(dev));
1934 }
1935
1936 void
1937 i915_gem_retire_requests(struct drm_device *dev)
1938 {
1939 drm_i915_private_t *dev_priv = dev->dev_private;
1940
1941 if (!list_empty(&dev_priv->mm.deferred_free_list)) {
1942 struct drm_i915_gem_object *obj, *next;
1943
1944 /* We must be careful that during unbind() we do not
1945 * accidentally infinitely recurse into retire requests.
1946 * Currently:
1947 * retire -> free -> unbind -> wait -> retire_ring
1948 */
1949 list_for_each_entry_safe(obj, next,
1950 &dev_priv->mm.deferred_free_list,
1951 mm_list)
1952 i915_gem_free_object_tail(obj);
1953 }
1954
1955 i915_gem_retire_requests_ring(dev, &dev_priv->render_ring);
1956 i915_gem_retire_requests_ring(dev, &dev_priv->bsd_ring);
1957 i915_gem_retire_requests_ring(dev, &dev_priv->blt_ring);
1958 }
1959
1960 static void
1961 i915_gem_retire_work_handler(struct work_struct *work)
1962 {
1963 drm_i915_private_t *dev_priv;
1964 struct drm_device *dev;
1965
1966 dev_priv = container_of(work, drm_i915_private_t,
1967 mm.retire_work.work);
1968 dev = dev_priv->dev;
1969
1970 /* Come back later if the device is busy... */
1971 if (!mutex_trylock(&dev->struct_mutex)) {
1972 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ);
1973 return;
1974 }
1975
1976 i915_gem_retire_requests(dev);
1977
1978 if (!dev_priv->mm.suspended &&
1979 (!list_empty(&dev_priv->render_ring.request_list) ||
1980 !list_empty(&dev_priv->bsd_ring.request_list) ||
1981 !list_empty(&dev_priv->blt_ring.request_list)))
1982 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ);
1983 mutex_unlock(&dev->struct_mutex);
1984 }
1985
1986 int
1987 i915_do_wait_request(struct drm_device *dev, uint32_t seqno,
1988 bool interruptible, struct intel_ring_buffer *ring)
1989 {
1990 drm_i915_private_t *dev_priv = dev->dev_private;
1991 u32 ier;
1992 int ret = 0;
1993
1994 BUG_ON(seqno == 0);
1995
1996 if (atomic_read(&dev_priv->mm.wedged))
1997 return -EAGAIN;
1998
1999 if (seqno == ring->outstanding_lazy_request) {
2000 struct drm_i915_gem_request *request;
2001
2002 request = kzalloc(sizeof(*request), GFP_KERNEL);
2003 if (request == NULL)
2004 return -ENOMEM;
2005
2006 ret = i915_add_request(dev, NULL, request, ring);
2007 if (ret) {
2008 kfree(request);
2009 return ret;
2010 }
2011
2012 seqno = request->seqno;
2013 }
2014
2015 if (!i915_seqno_passed(ring->get_seqno(ring), seqno)) {
2016 if (HAS_PCH_SPLIT(dev))
2017 ier = I915_READ(DEIER) | I915_READ(GTIER);
2018 else
2019 ier = I915_READ(IER);
2020 if (!ier) {
2021 DRM_ERROR("something (likely vbetool) disabled "
2022 "interrupts, re-enabling\n");
2023 i915_driver_irq_preinstall(dev);
2024 i915_driver_irq_postinstall(dev);
2025 }
2026
2027 trace_i915_gem_request_wait_begin(dev, seqno);
2028
2029 ring->waiting_seqno = seqno;
2030 ring->user_irq_get(ring);
2031 if (interruptible)
2032 ret = wait_event_interruptible(ring->irq_queue,
2033 i915_seqno_passed(ring->get_seqno(ring), seqno)
2034 || atomic_read(&dev_priv->mm.wedged));
2035 else
2036 wait_event(ring->irq_queue,
2037 i915_seqno_passed(ring->get_seqno(ring), seqno)
2038 || atomic_read(&dev_priv->mm.wedged));
2039
2040 ring->user_irq_put(ring);
2041 ring->waiting_seqno = 0;
2042
2043 trace_i915_gem_request_wait_end(dev, seqno);
2044 }
2045 if (atomic_read(&dev_priv->mm.wedged))
2046 ret = -EAGAIN;
2047
2048 if (ret && ret != -ERESTARTSYS)
2049 DRM_ERROR("%s returns %d (awaiting %d at %d, next %d)\n",
2050 __func__, ret, seqno, ring->get_seqno(ring),
2051 dev_priv->next_seqno);
2052
2053 /* Directly dispatch request retiring. While we have the work queue
2054 * to handle this, the waiter on a request often wants an associated
2055 * buffer to have made it to the inactive list, and we would need
2056 * a separate wait queue to handle that.
2057 */
2058 if (ret == 0)
2059 i915_gem_retire_requests_ring(dev, ring);
2060
2061 return ret;
2062 }
2063
2064 /**
2065 * Waits for a sequence number to be signaled, and cleans up the
2066 * request and object lists appropriately for that event.
2067 */
2068 static int
2069 i915_wait_request(struct drm_device *dev, uint32_t seqno,
2070 struct intel_ring_buffer *ring)
2071 {
2072 return i915_do_wait_request(dev, seqno, 1, ring);
2073 }
2074
2075 static void
2076 i915_gem_flush_ring(struct drm_device *dev,
2077 struct intel_ring_buffer *ring,
2078 uint32_t invalidate_domains,
2079 uint32_t flush_domains)
2080 {
2081 ring->flush(ring, invalidate_domains, flush_domains);
2082 i915_gem_process_flushing_list(dev, flush_domains, ring);
2083 }
2084
2085 static void
2086 i915_gem_flush(struct drm_device *dev,
2087 uint32_t invalidate_domains,
2088 uint32_t flush_domains,
2089 uint32_t flush_rings)
2090 {
2091 drm_i915_private_t *dev_priv = dev->dev_private;
2092
2093 if (flush_domains & I915_GEM_DOMAIN_CPU)
2094 intel_gtt_chipset_flush();
2095
2096 if ((flush_domains | invalidate_domains) & I915_GEM_GPU_DOMAINS) {
2097 if (flush_rings & RING_RENDER)
2098 i915_gem_flush_ring(dev, &dev_priv->render_ring,
2099 invalidate_domains, flush_domains);
2100 if (flush_rings & RING_BSD)
2101 i915_gem_flush_ring(dev, &dev_priv->bsd_ring,
2102 invalidate_domains, flush_domains);
2103 if (flush_rings & RING_BLT)
2104 i915_gem_flush_ring(dev, &dev_priv->blt_ring,
2105 invalidate_domains, flush_domains);
2106 }
2107 }
2108
2109 /**
2110 * Ensures that all rendering to the object has completed and the object is
2111 * safe to unbind from the GTT or access from the CPU.
2112 */
2113 static int
2114 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
2115 bool interruptible)
2116 {
2117 struct drm_device *dev = obj->base.dev;
2118 int ret;
2119
2120 /* This function only exists to support waiting for existing rendering,
2121 * not for emitting required flushes.
2122 */
2123 BUG_ON((obj->base.write_domain & I915_GEM_GPU_DOMAINS) != 0);
2124
2125 /* If there is rendering queued on the buffer being evicted, wait for
2126 * it.
2127 */
2128 if (obj->active) {
2129 ret = i915_do_wait_request(dev,
2130 obj->last_rendering_seqno,
2131 interruptible,
2132 obj->ring);
2133 if (ret)
2134 return ret;
2135 }
2136
2137 return 0;
2138 }
2139
2140 /**
2141 * Unbinds an object from the GTT aperture.
2142 */
2143 int
2144 i915_gem_object_unbind(struct drm_i915_gem_object *obj)
2145 {
2146 int ret = 0;
2147
2148 if (obj->gtt_space == NULL)
2149 return 0;
2150
2151 if (obj->pin_count != 0) {
2152 DRM_ERROR("Attempting to unbind pinned buffer\n");
2153 return -EINVAL;
2154 }
2155
2156 /* blow away mappings if mapped through GTT */
2157 i915_gem_release_mmap(obj);
2158
2159 /* Move the object to the CPU domain to ensure that
2160 * any possible CPU writes while it's not in the GTT
2161 * are flushed when we go to remap it. This will
2162 * also ensure that all pending GPU writes are finished
2163 * before we unbind.
2164 */
2165 ret = i915_gem_object_set_to_cpu_domain(obj, 1);
2166 if (ret == -ERESTARTSYS)
2167 return ret;
2168 /* Continue on if we fail due to EIO, the GPU is hung so we
2169 * should be safe and we need to cleanup or else we might
2170 * cause memory corruption through use-after-free.
2171 */
2172 if (ret) {
2173 i915_gem_clflush_object(obj);
2174 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
2175 }
2176
2177 /* release the fence reg _after_ flushing */
2178 if (obj->fence_reg != I915_FENCE_REG_NONE)
2179 i915_gem_clear_fence_reg(obj);
2180
2181 i915_gem_gtt_unbind_object(obj);
2182 i915_gem_object_put_pages_gtt(obj);
2183
2184 list_del_init(&obj->gtt_list);
2185 list_del_init(&obj->mm_list);
2186 /* Avoid an unnecessary call to unbind on rebind. */
2187 obj->map_and_fenceable = true;
2188
2189 drm_mm_put_block(obj->gtt_space);
2190 obj->gtt_space = NULL;
2191 obj->gtt_offset = 0;
2192
2193 if (i915_gem_object_is_purgeable(obj))
2194 i915_gem_object_truncate(obj);
2195
2196 trace_i915_gem_object_unbind(obj);
2197
2198 return ret;
2199 }
2200
2201 static int i915_ring_idle(struct drm_device *dev,
2202 struct intel_ring_buffer *ring)
2203 {
2204 if (list_empty(&ring->gpu_write_list) && list_empty(&ring->active_list))
2205 return 0;
2206
2207 i915_gem_flush_ring(dev, ring,
2208 I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
2209 return i915_wait_request(dev,
2210 i915_gem_next_request_seqno(dev, ring),
2211 ring);
2212 }
2213
2214 int
2215 i915_gpu_idle(struct drm_device *dev)
2216 {
2217 drm_i915_private_t *dev_priv = dev->dev_private;
2218 bool lists_empty;
2219 int ret;
2220
2221 lists_empty = (list_empty(&dev_priv->mm.flushing_list) &&
2222 list_empty(&dev_priv->mm.active_list));
2223 if (lists_empty)
2224 return 0;
2225
2226 /* Flush everything onto the inactive list. */
2227 ret = i915_ring_idle(dev, &dev_priv->render_ring);
2228 if (ret)
2229 return ret;
2230
2231 ret = i915_ring_idle(dev, &dev_priv->bsd_ring);
2232 if (ret)
2233 return ret;
2234
2235 ret = i915_ring_idle(dev, &dev_priv->blt_ring);
2236 if (ret)
2237 return ret;
2238
2239 return 0;
2240 }
2241
2242 static int sandybridge_write_fence_reg(struct drm_i915_gem_object *obj,
2243 struct intel_ring_buffer *pipelined)
2244 {
2245 struct drm_device *dev = obj->base.dev;
2246 drm_i915_private_t *dev_priv = dev->dev_private;
2247 u32 size = obj->gtt_space->size;
2248 int regnum = obj->fence_reg;
2249 uint64_t val;
2250
2251 val = (uint64_t)((obj->gtt_offset + size - 4096) &
2252 0xfffff000) << 32;
2253 val |= obj->gtt_offset & 0xfffff000;
2254 val |= (uint64_t)((obj->stride / 128) - 1) <<
2255 SANDYBRIDGE_FENCE_PITCH_SHIFT;
2256
2257 if (obj->tiling_mode == I915_TILING_Y)
2258 val |= 1 << I965_FENCE_TILING_Y_SHIFT;
2259 val |= I965_FENCE_REG_VALID;
2260
2261 if (pipelined) {
2262 int ret = intel_ring_begin(pipelined, 6);
2263 if (ret)
2264 return ret;
2265
2266 intel_ring_emit(pipelined, MI_NOOP);
2267 intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(2));
2268 intel_ring_emit(pipelined, FENCE_REG_SANDYBRIDGE_0 + regnum*8);
2269 intel_ring_emit(pipelined, (u32)val);
2270 intel_ring_emit(pipelined, FENCE_REG_SANDYBRIDGE_0 + regnum*8 + 4);
2271 intel_ring_emit(pipelined, (u32)(val >> 32));
2272 intel_ring_advance(pipelined);
2273 } else
2274 I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + regnum * 8, val);
2275
2276 return 0;
2277 }
2278
2279 static int i965_write_fence_reg(struct drm_i915_gem_object *obj,
2280 struct intel_ring_buffer *pipelined)
2281 {
2282 struct drm_device *dev = obj->base.dev;
2283 drm_i915_private_t *dev_priv = dev->dev_private;
2284 u32 size = obj->gtt_space->size;
2285 int regnum = obj->fence_reg;
2286 uint64_t val;
2287
2288 val = (uint64_t)((obj->gtt_offset + size - 4096) &
2289 0xfffff000) << 32;
2290 val |= obj->gtt_offset & 0xfffff000;
2291 val |= ((obj->stride / 128) - 1) << I965_FENCE_PITCH_SHIFT;
2292 if (obj->tiling_mode == I915_TILING_Y)
2293 val |= 1 << I965_FENCE_TILING_Y_SHIFT;
2294 val |= I965_FENCE_REG_VALID;
2295
2296 if (pipelined) {
2297 int ret = intel_ring_begin(pipelined, 6);
2298 if (ret)
2299 return ret;
2300
2301 intel_ring_emit(pipelined, MI_NOOP);
2302 intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(2));
2303 intel_ring_emit(pipelined, FENCE_REG_965_0 + regnum*8);
2304 intel_ring_emit(pipelined, (u32)val);
2305 intel_ring_emit(pipelined, FENCE_REG_965_0 + regnum*8 + 4);
2306 intel_ring_emit(pipelined, (u32)(val >> 32));
2307 intel_ring_advance(pipelined);
2308 } else
2309 I915_WRITE64(FENCE_REG_965_0 + regnum * 8, val);
2310
2311 return 0;
2312 }
2313
2314 static int i915_write_fence_reg(struct drm_i915_gem_object *obj,
2315 struct intel_ring_buffer *pipelined)
2316 {
2317 struct drm_device *dev = obj->base.dev;
2318 drm_i915_private_t *dev_priv = dev->dev_private;
2319 u32 size = obj->gtt_space->size;
2320 u32 fence_reg, val, pitch_val;
2321 int tile_width;
2322
2323 if (WARN((obj->gtt_offset & ~I915_FENCE_START_MASK) ||
2324 (size & -size) != size ||
2325 (obj->gtt_offset & (size - 1)),
2326 "object 0x%08x [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n",
2327 obj->gtt_offset, obj->map_and_fenceable, size))
2328 return -EINVAL;
2329
2330 if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))
2331 tile_width = 128;
2332 else
2333 tile_width = 512;
2334
2335 /* Note: pitch better be a power of two tile widths */
2336 pitch_val = obj->stride / tile_width;
2337 pitch_val = ffs(pitch_val) - 1;
2338
2339 val = obj->gtt_offset;
2340 if (obj->tiling_mode == I915_TILING_Y)
2341 val |= 1 << I830_FENCE_TILING_Y_SHIFT;
2342 val |= I915_FENCE_SIZE_BITS(size);
2343 val |= pitch_val << I830_FENCE_PITCH_SHIFT;
2344 val |= I830_FENCE_REG_VALID;
2345
2346 fence_reg = obj->fence_reg;
2347 if (fence_reg < 8)
2348 fence_reg = FENCE_REG_830_0 + fence_reg * 4;
2349 else
2350 fence_reg = FENCE_REG_945_8 + (fence_reg - 8) * 4;
2351
2352 if (pipelined) {
2353 int ret = intel_ring_begin(pipelined, 4);
2354 if (ret)
2355 return ret;
2356
2357 intel_ring_emit(pipelined, MI_NOOP);
2358 intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(1));
2359 intel_ring_emit(pipelined, fence_reg);
2360 intel_ring_emit(pipelined, val);
2361 intel_ring_advance(pipelined);
2362 } else
2363 I915_WRITE(fence_reg, val);
2364
2365 return 0;
2366 }
2367
2368 static int i830_write_fence_reg(struct drm_i915_gem_object *obj,
2369 struct intel_ring_buffer *pipelined)
2370 {
2371 struct drm_device *dev = obj->base.dev;
2372 drm_i915_private_t *dev_priv = dev->dev_private;
2373 u32 size = obj->gtt_space->size;
2374 int regnum = obj->fence_reg;
2375 uint32_t val;
2376 uint32_t pitch_val;
2377
2378 if (WARN((obj->gtt_offset & ~I830_FENCE_START_MASK) ||
2379 (size & -size) != size ||
2380 (obj->gtt_offset & (size - 1)),
2381 "object 0x%08x not 512K or pot-size 0x%08x aligned\n",
2382 obj->gtt_offset, size))
2383 return -EINVAL;
2384
2385 pitch_val = obj->stride / 128;
2386 pitch_val = ffs(pitch_val) - 1;
2387
2388 val = obj->gtt_offset;
2389 if (obj->tiling_mode == I915_TILING_Y)
2390 val |= 1 << I830_FENCE_TILING_Y_SHIFT;
2391 val |= I830_FENCE_SIZE_BITS(size);
2392 val |= pitch_val << I830_FENCE_PITCH_SHIFT;
2393 val |= I830_FENCE_REG_VALID;
2394
2395 if (pipelined) {
2396 int ret = intel_ring_begin(pipelined, 4);
2397 if (ret)
2398 return ret;
2399
2400 intel_ring_emit(pipelined, MI_NOOP);
2401 intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(1));
2402 intel_ring_emit(pipelined, FENCE_REG_830_0 + regnum*4);
2403 intel_ring_emit(pipelined, val);
2404 intel_ring_advance(pipelined);
2405 } else
2406 I915_WRITE(FENCE_REG_830_0 + regnum * 4, val);
2407
2408 return 0;
2409 }
2410
2411 static int i915_find_fence_reg(struct drm_device *dev,
2412 bool interruptible)
2413 {
2414 struct drm_i915_private *dev_priv = dev->dev_private;
2415 struct drm_i915_fence_reg *reg;
2416 struct drm_i915_gem_object *obj = NULL;
2417 int i, avail, ret;
2418
2419 /* First try to find a free reg */
2420 avail = 0;
2421 for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) {
2422 reg = &dev_priv->fence_regs[i];
2423 if (!reg->obj)
2424 return i;
2425
2426 if (!reg->obj->pin_count)
2427 avail++;
2428 }
2429
2430 if (avail == 0)
2431 return -ENOSPC;
2432
2433 /* None available, try to steal one or wait for a user to finish */
2434 avail = I915_FENCE_REG_NONE;
2435 list_for_each_entry(reg, &dev_priv->mm.fence_list,
2436 lru_list) {
2437 obj = reg->obj;
2438 if (obj->pin_count)
2439 continue;
2440
2441 /* found one! */
2442 avail = obj->fence_reg;
2443 break;
2444 }
2445
2446 BUG_ON(avail == I915_FENCE_REG_NONE);
2447
2448 /* We only have a reference on obj from the active list. put_fence_reg
2449 * might drop that one, causing a use-after-free in it. So hold a
2450 * private reference to obj like the other callers of put_fence_reg
2451 * (set_tiling ioctl) do. */
2452 drm_gem_object_reference(&obj->base);
2453 ret = i915_gem_object_put_fence_reg(obj, interruptible);
2454 drm_gem_object_unreference(&obj->base);
2455 if (ret != 0)
2456 return ret;
2457
2458 return avail;
2459 }
2460
2461 /**
2462 * i915_gem_object_get_fence_reg - set up a fence reg for an object
2463 * @obj: object to map through a fence reg
2464 *
2465 * When mapping objects through the GTT, userspace wants to be able to write
2466 * to them without having to worry about swizzling if the object is tiled.
2467 *
2468 * This function walks the fence regs looking for a free one for @obj,
2469 * stealing one if it can't find any.
2470 *
2471 * It then sets up the reg based on the object's properties: address, pitch
2472 * and tiling format.
2473 */
2474 int
2475 i915_gem_object_get_fence_reg(struct drm_i915_gem_object *obj,
2476 bool interruptible)
2477 {
2478 struct drm_device *dev = obj->base.dev;
2479 struct drm_i915_private *dev_priv = dev->dev_private;
2480 struct drm_i915_fence_reg *reg = NULL;
2481 struct intel_ring_buffer *pipelined = NULL;
2482 int ret;
2483
2484 /* Just update our place in the LRU if our fence is getting used. */
2485 if (obj->fence_reg != I915_FENCE_REG_NONE) {
2486 reg = &dev_priv->fence_regs[obj->fence_reg];
2487 list_move_tail(&reg->lru_list, &dev_priv->mm.fence_list);
2488 return 0;
2489 }
2490
2491 switch (obj->tiling_mode) {
2492 case I915_TILING_NONE:
2493 WARN(1, "allocating a fence for non-tiled object?\n");
2494 break;
2495 case I915_TILING_X:
2496 if (!obj->stride)
2497 return -EINVAL;
2498 WARN((obj->stride & (512 - 1)),
2499 "object 0x%08x is X tiled but has non-512B pitch\n",
2500 obj->gtt_offset);
2501 break;
2502 case I915_TILING_Y:
2503 if (!obj->stride)
2504 return -EINVAL;
2505 WARN((obj->stride & (128 - 1)),
2506 "object 0x%08x is Y tiled but has non-128B pitch\n",
2507 obj->gtt_offset);
2508 break;
2509 }
2510
2511 ret = i915_find_fence_reg(dev, interruptible);
2512 if (ret < 0)
2513 return ret;
2514
2515 obj->fence_reg = ret;
2516 reg = &dev_priv->fence_regs[obj->fence_reg];
2517 list_add_tail(&reg->lru_list, &dev_priv->mm.fence_list);
2518
2519 reg->obj = obj;
2520
2521 switch (INTEL_INFO(dev)->gen) {
2522 case 6:
2523 ret = sandybridge_write_fence_reg(obj, pipelined);
2524 break;
2525 case 5:
2526 case 4:
2527 ret = i965_write_fence_reg(obj, pipelined);
2528 break;
2529 case 3:
2530 ret = i915_write_fence_reg(obj, pipelined);
2531 break;
2532 case 2:
2533 ret = i830_write_fence_reg(obj, pipelined);
2534 break;
2535 }
2536
2537 trace_i915_gem_object_get_fence(obj,
2538 obj->fence_reg,
2539 obj->tiling_mode);
2540 return ret;
2541 }
2542
2543 /**
2544 * i915_gem_clear_fence_reg - clear out fence register info
2545 * @obj: object to clear
2546 *
2547 * Zeroes out the fence register itself and clears out the associated
2548 * data structures in dev_priv and obj.
2549 */
2550 static void
2551 i915_gem_clear_fence_reg(struct drm_i915_gem_object *obj)
2552 {
2553 struct drm_device *dev = obj->base.dev;
2554 drm_i915_private_t *dev_priv = dev->dev_private;
2555 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[obj->fence_reg];
2556 uint32_t fence_reg;
2557
2558 switch (INTEL_INFO(dev)->gen) {
2559 case 6:
2560 I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 +
2561 (obj->fence_reg * 8), 0);
2562 break;
2563 case 5:
2564 case 4:
2565 I915_WRITE64(FENCE_REG_965_0 + (obj->fence_reg * 8), 0);
2566 break;
2567 case 3:
2568 if (obj->fence_reg >= 8)
2569 fence_reg = FENCE_REG_945_8 + (obj->fence_reg - 8) * 4;
2570 else
2571 case 2:
2572 fence_reg = FENCE_REG_830_0 + obj->fence_reg * 4;
2573
2574 I915_WRITE(fence_reg, 0);
2575 break;
2576 }
2577
2578 reg->obj = NULL;
2579 obj->fence_reg = I915_FENCE_REG_NONE;
2580 list_del_init(&reg->lru_list);
2581 }
2582
2583 /**
2584 * i915_gem_object_put_fence_reg - waits on outstanding fenced access
2585 * to the buffer to finish, and then resets the fence register.
2586 * @obj: tiled object holding a fence register.
2587 * @bool: whether the wait upon the fence is interruptible
2588 *
2589 * Zeroes out the fence register itself and clears out the associated
2590 * data structures in dev_priv and obj.
2591 */
2592 int
2593 i915_gem_object_put_fence_reg(struct drm_i915_gem_object *obj,
2594 bool interruptible)
2595 {
2596 struct drm_device *dev = obj->base.dev;
2597 int ret;
2598
2599 if (obj->fence_reg == I915_FENCE_REG_NONE)
2600 return 0;
2601
2602 /* If we've changed tiling, GTT-mappings of the object
2603 * need to re-fault to ensure that the correct fence register
2604 * setup is in place.
2605 */
2606 i915_gem_release_mmap(obj);
2607
2608 /* On the i915, GPU access to tiled buffers is via a fence,
2609 * therefore we must wait for any outstanding access to complete
2610 * before clearing the fence.
2611 */
2612 if (obj->fenced_gpu_access) {
2613 ret = i915_gem_object_flush_gpu_write_domain(obj, NULL);
2614 if (ret)
2615 return ret;
2616
2617 obj->fenced_gpu_access = false;
2618 }
2619
2620 if (obj->last_fenced_seqno) {
2621 ret = i915_do_wait_request(dev,
2622 obj->last_fenced_seqno,
2623 interruptible,
2624 obj->last_fenced_ring);
2625 if (ret)
2626 return ret;
2627
2628 obj->last_fenced_seqno = false;
2629 }
2630
2631 i915_gem_object_flush_gtt_write_domain(obj);
2632 i915_gem_clear_fence_reg(obj);
2633
2634 return 0;
2635 }
2636
2637 /**
2638 * Finds free space in the GTT aperture and binds the object there.
2639 */
2640 static int
2641 i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
2642 unsigned alignment,
2643 bool map_and_fenceable)
2644 {
2645 struct drm_device *dev = obj->base.dev;
2646 drm_i915_private_t *dev_priv = dev->dev_private;
2647 struct drm_mm_node *free_space;
2648 gfp_t gfpmask = __GFP_NORETRY | __GFP_NOWARN;
2649 u32 size, fence_size, fence_alignment, unfenced_alignment;
2650 bool mappable, fenceable;
2651 int ret;
2652
2653 if (obj->madv != I915_MADV_WILLNEED) {
2654 DRM_ERROR("Attempting to bind a purgeable object\n");
2655 return -EINVAL;
2656 }
2657
2658 fence_size = i915_gem_get_gtt_size(obj);
2659 fence_alignment = i915_gem_get_gtt_alignment(obj);
2660 unfenced_alignment = i915_gem_get_unfenced_gtt_alignment(obj);
2661
2662 if (alignment == 0)
2663 alignment = map_and_fenceable ? fence_alignment :
2664 unfenced_alignment;
2665 if (map_and_fenceable && alignment & (fence_alignment - 1)) {
2666 DRM_ERROR("Invalid object alignment requested %u\n", alignment);
2667 return -EINVAL;
2668 }
2669
2670 size = map_and_fenceable ? fence_size : obj->base.size;
2671
2672 /* If the object is bigger than the entire aperture, reject it early
2673 * before evicting everything in a vain attempt to find space.
2674 */
2675 if (obj->base.size >
2676 (map_and_fenceable ? dev_priv->mm.gtt_mappable_end : dev_priv->mm.gtt_total)) {
2677 DRM_ERROR("Attempting to bind an object larger than the aperture\n");
2678 return -E2BIG;
2679 }
2680
2681 search_free:
2682 if (map_and_fenceable)
2683 free_space =
2684 drm_mm_search_free_in_range(&dev_priv->mm.gtt_space,
2685 size, alignment, 0,
2686 dev_priv->mm.gtt_mappable_end,
2687 0);
2688 else
2689 free_space = drm_mm_search_free(&dev_priv->mm.gtt_space,
2690 size, alignment, 0);
2691
2692 if (free_space != NULL) {
2693 if (map_and_fenceable)
2694 obj->gtt_space =
2695 drm_mm_get_block_range_generic(free_space,
2696 size, alignment, 0,
2697 dev_priv->mm.gtt_mappable_end,
2698 0);
2699 else
2700 obj->gtt_space =
2701 drm_mm_get_block(free_space, size, alignment);
2702 }
2703 if (obj->gtt_space == NULL) {
2704 /* If the gtt is empty and we're still having trouble
2705 * fitting our object in, we're out of memory.
2706 */
2707 ret = i915_gem_evict_something(dev, size, alignment,
2708 map_and_fenceable);
2709 if (ret)
2710 return ret;
2711
2712 goto search_free;
2713 }
2714
2715 ret = i915_gem_object_get_pages_gtt(obj, gfpmask);
2716 if (ret) {
2717 drm_mm_put_block(obj->gtt_space);
2718 obj->gtt_space = NULL;
2719
2720 if (ret == -ENOMEM) {
2721 /* first try to clear up some space from the GTT */
2722 ret = i915_gem_evict_something(dev, size,
2723 alignment,
2724 map_and_fenceable);
2725 if (ret) {
2726 /* now try to shrink everyone else */
2727 if (gfpmask) {
2728 gfpmask = 0;
2729 goto search_free;
2730 }
2731
2732 return ret;
2733 }
2734
2735 goto search_free;
2736 }
2737
2738 return ret;
2739 }
2740
2741 ret = i915_gem_gtt_bind_object(obj);
2742 if (ret) {
2743 i915_gem_object_put_pages_gtt(obj);
2744 drm_mm_put_block(obj->gtt_space);
2745 obj->gtt_space = NULL;
2746
2747 ret = i915_gem_evict_something(dev, size,
2748 alignment, map_and_fenceable);
2749 if (ret)
2750 return ret;
2751
2752 goto search_free;
2753 }
2754
2755 list_add_tail(&obj->gtt_list, &dev_priv->mm.gtt_list);
2756 list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
2757
2758 /* Assert that the object is not currently in any GPU domain. As it
2759 * wasn't in the GTT, there shouldn't be any way it could have been in
2760 * a GPU cache
2761 */
2762 BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
2763 BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
2764
2765 obj->gtt_offset = obj->gtt_space->start;
2766
2767 fenceable =
2768 obj->gtt_space->size == fence_size &&
2769 (obj->gtt_space->start & (fence_alignment -1)) == 0;
2770
2771 mappable =
2772 obj->gtt_offset + obj->base.size <= dev_priv->mm.gtt_mappable_end;
2773
2774 obj->map_and_fenceable = mappable && fenceable;
2775
2776 trace_i915_gem_object_bind(obj, obj->gtt_offset, map_and_fenceable);
2777 return 0;
2778 }
2779
2780 void
2781 i915_gem_clflush_object(struct drm_i915_gem_object *obj)
2782 {
2783 /* If we don't have a page list set up, then we're not pinned
2784 * to GPU, and we can ignore the cache flush because it'll happen
2785 * again at bind time.
2786 */
2787 if (obj->pages == NULL)
2788 return;
2789
2790 trace_i915_gem_object_clflush(obj);
2791
2792 drm_clflush_pages(obj->pages, obj->base.size / PAGE_SIZE);
2793 }
2794
2795 /** Flushes any GPU write domain for the object if it's dirty. */
2796 static int
2797 i915_gem_object_flush_gpu_write_domain(struct drm_i915_gem_object *obj,
2798 struct intel_ring_buffer *pipelined)
2799 {
2800 struct drm_device *dev = obj->base.dev;
2801
2802 if ((obj->base.write_domain & I915_GEM_GPU_DOMAINS) == 0)
2803 return 0;
2804
2805 /* Queue the GPU write cache flushing we need. */
2806 i915_gem_flush_ring(dev, obj->ring, 0, obj->base.write_domain);
2807 BUG_ON(obj->base.write_domain);
2808
2809 if (pipelined && pipelined == obj->ring)
2810 return 0;
2811
2812 return i915_gem_object_wait_rendering(obj, true);
2813 }
2814
2815 /** Flushes the GTT write domain for the object if it's dirty. */
2816 static void
2817 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
2818 {
2819 uint32_t old_write_domain;
2820
2821 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT)
2822 return;
2823
2824 /* No actual flushing is required for the GTT write domain. Writes
2825 * to it immediately go to main memory as far as we know, so there's
2826 * no chipset flush. It also doesn't land in render cache.
2827 */
2828 i915_gem_release_mmap(obj);
2829
2830 old_write_domain = obj->base.write_domain;
2831 obj->base.write_domain = 0;
2832
2833 trace_i915_gem_object_change_domain(obj,
2834 obj->base.read_domains,
2835 old_write_domain);
2836 }
2837
2838 /** Flushes the CPU write domain for the object if it's dirty. */
2839 static void
2840 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
2841 {
2842 uint32_t old_write_domain;
2843
2844 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
2845 return;
2846
2847 i915_gem_clflush_object(obj);
2848 intel_gtt_chipset_flush();
2849 old_write_domain = obj->base.write_domain;
2850 obj->base.write_domain = 0;
2851
2852 trace_i915_gem_object_change_domain(obj,
2853 obj->base.read_domains,
2854 old_write_domain);
2855 }
2856
2857 /**
2858 * Moves a single object to the GTT read, and possibly write domain.
2859 *
2860 * This function returns when the move is complete, including waiting on
2861 * flushes to occur.
2862 */
2863 int
2864 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
2865 {
2866 uint32_t old_write_domain, old_read_domains;
2867 int ret;
2868
2869 /* Not valid to be called on unbound objects. */
2870 if (obj->gtt_space == NULL)
2871 return -EINVAL;
2872
2873 ret = i915_gem_object_flush_gpu_write_domain(obj, NULL);
2874 if (ret != 0)
2875 return ret;
2876
2877 i915_gem_object_flush_cpu_write_domain(obj);
2878
2879 if (write) {
2880 ret = i915_gem_object_wait_rendering(obj, true);
2881 if (ret)
2882 return ret;
2883 }
2884
2885 old_write_domain = obj->base.write_domain;
2886 old_read_domains = obj->base.read_domains;
2887
2888 /* It should now be out of any other write domains, and we can update
2889 * the domain values for our changes.
2890 */
2891 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
2892 obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
2893 if (write) {
2894 obj->base.read_domains = I915_GEM_DOMAIN_GTT;
2895 obj->base.write_domain = I915_GEM_DOMAIN_GTT;
2896 obj->dirty = 1;
2897 }
2898
2899 trace_i915_gem_object_change_domain(obj,
2900 old_read_domains,
2901 old_write_domain);
2902
2903 return 0;
2904 }
2905
2906 /*
2907 * Prepare buffer for display plane. Use uninterruptible for possible flush
2908 * wait, as in modesetting process we're not supposed to be interrupted.
2909 */
2910 int
2911 i915_gem_object_set_to_display_plane(struct drm_i915_gem_object *obj,
2912 struct intel_ring_buffer *pipelined)
2913 {
2914 uint32_t old_read_domains;
2915 int ret;
2916
2917 /* Not valid to be called on unbound objects. */
2918 if (obj->gtt_space == NULL)
2919 return -EINVAL;
2920
2921 ret = i915_gem_object_flush_gpu_write_domain(obj, pipelined);
2922 if (ret)
2923 return ret;
2924
2925 /* Currently, we are always called from an non-interruptible context. */
2926 if (!pipelined) {
2927 ret = i915_gem_object_wait_rendering(obj, false);
2928 if (ret)
2929 return ret;
2930 }
2931
2932 i915_gem_object_flush_cpu_write_domain(obj);
2933
2934 old_read_domains = obj->base.read_domains;
2935 obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
2936
2937 trace_i915_gem_object_change_domain(obj,
2938 old_read_domains,
2939 obj->base.write_domain);
2940
2941 return 0;
2942 }
2943
2944 int
2945 i915_gem_object_flush_gpu(struct drm_i915_gem_object *obj,
2946 bool interruptible)
2947 {
2948 if (!obj->active)
2949 return 0;
2950
2951 if (obj->base.write_domain & I915_GEM_GPU_DOMAINS)
2952 i915_gem_flush_ring(obj->base.dev, obj->ring,
2953 0, obj->base.write_domain);
2954
2955 return i915_gem_object_wait_rendering(obj, interruptible);
2956 }
2957
2958 /**
2959 * Moves a single object to the CPU read, and possibly write domain.
2960 *
2961 * This function returns when the move is complete, including waiting on
2962 * flushes to occur.
2963 */
2964 static int
2965 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
2966 {
2967 uint32_t old_write_domain, old_read_domains;
2968 int ret;
2969
2970 ret = i915_gem_object_flush_gpu_write_domain(obj, false);
2971 if (ret != 0)
2972 return ret;
2973
2974 i915_gem_object_flush_gtt_write_domain(obj);
2975
2976 /* If we have a partially-valid cache of the object in the CPU,
2977 * finish invalidating it and free the per-page flags.
2978 */
2979 i915_gem_object_set_to_full_cpu_read_domain(obj);
2980
2981 if (write) {
2982 ret = i915_gem_object_wait_rendering(obj, true);
2983 if (ret)
2984 return ret;
2985 }
2986
2987 old_write_domain = obj->base.write_domain;
2988 old_read_domains = obj->base.read_domains;
2989
2990 /* Flush the CPU cache if it's still invalid. */
2991 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
2992 i915_gem_clflush_object(obj);
2993
2994 obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
2995 }
2996
2997 /* It should now be out of any other write domains, and we can update
2998 * the domain values for our changes.
2999 */
3000 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
3001
3002 /* If we're writing through the CPU, then the GPU read domains will
3003 * need to be invalidated at next use.
3004 */
3005 if (write) {
3006 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
3007 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
3008 }
3009
3010 trace_i915_gem_object_change_domain(obj,
3011 old_read_domains,
3012 old_write_domain);
3013
3014 return 0;
3015 }
3016
3017 /*
3018 * Set the next domain for the specified object. This
3019 * may not actually perform the necessary flushing/invaliding though,
3020 * as that may want to be batched with other set_domain operations
3021 *
3022 * This is (we hope) the only really tricky part of gem. The goal
3023 * is fairly simple -- track which caches hold bits of the object
3024 * and make sure they remain coherent. A few concrete examples may
3025 * help to explain how it works. For shorthand, we use the notation
3026 * (read_domains, write_domain), e.g. (CPU, CPU) to indicate the
3027 * a pair of read and write domain masks.
3028 *
3029 * Case 1: the batch buffer
3030 *
3031 * 1. Allocated
3032 * 2. Written by CPU
3033 * 3. Mapped to GTT
3034 * 4. Read by GPU
3035 * 5. Unmapped from GTT
3036 * 6. Freed
3037 *
3038 * Let's take these a step at a time
3039 *
3040 * 1. Allocated
3041 * Pages allocated from the kernel may still have
3042 * cache contents, so we set them to (CPU, CPU) always.
3043 * 2. Written by CPU (using pwrite)
3044 * The pwrite function calls set_domain (CPU, CPU) and
3045 * this function does nothing (as nothing changes)
3046 * 3. Mapped by GTT
3047 * This function asserts that the object is not
3048 * currently in any GPU-based read or write domains
3049 * 4. Read by GPU
3050 * i915_gem_execbuffer calls set_domain (COMMAND, 0).
3051 * As write_domain is zero, this function adds in the
3052 * current read domains (CPU+COMMAND, 0).
3053 * flush_domains is set to CPU.
3054 * invalidate_domains is set to COMMAND
3055 * clflush is run to get data out of the CPU caches
3056 * then i915_dev_set_domain calls i915_gem_flush to
3057 * emit an MI_FLUSH and drm_agp_chipset_flush
3058 * 5. Unmapped from GTT
3059 * i915_gem_object_unbind calls set_domain (CPU, CPU)
3060 * flush_domains and invalidate_domains end up both zero
3061 * so no flushing/invalidating happens
3062 * 6. Freed
3063 * yay, done
3064 *
3065 * Case 2: The shared render buffer
3066 *
3067 * 1. Allocated
3068 * 2. Mapped to GTT
3069 * 3. Read/written by GPU
3070 * 4. set_domain to (CPU,CPU)
3071 * 5. Read/written by CPU
3072 * 6. Read/written by GPU
3073 *
3074 * 1. Allocated
3075 * Same as last example, (CPU, CPU)
3076 * 2. Mapped to GTT
3077 * Nothing changes (assertions find that it is not in the GPU)
3078 * 3. Read/written by GPU
3079 * execbuffer calls set_domain (RENDER, RENDER)
3080 * flush_domains gets CPU
3081 * invalidate_domains gets GPU
3082 * clflush (obj)
3083 * MI_FLUSH and drm_agp_chipset_flush
3084 * 4. set_domain (CPU, CPU)
3085 * flush_domains gets GPU
3086 * invalidate_domains gets CPU
3087 * wait_rendering (obj) to make sure all drawing is complete.
3088 * This will include an MI_FLUSH to get the data from GPU
3089 * to memory
3090 * clflush (obj) to invalidate the CPU cache
3091 * Another MI_FLUSH in i915_gem_flush (eliminate this somehow?)
3092 * 5. Read/written by CPU
3093 * cache lines are loaded and dirtied
3094 * 6. Read written by GPU
3095 * Same as last GPU access
3096 *
3097 * Case 3: The constant buffer
3098 *
3099 * 1. Allocated
3100 * 2. Written by CPU
3101 * 3. Read by GPU
3102 * 4. Updated (written) by CPU again
3103 * 5. Read by GPU
3104 *
3105 * 1. Allocated
3106 * (CPU, CPU)
3107 * 2. Written by CPU
3108 * (CPU, CPU)
3109 * 3. Read by GPU
3110 * (CPU+RENDER, 0)
3111 * flush_domains = CPU
3112 * invalidate_domains = RENDER
3113 * clflush (obj)
3114 * MI_FLUSH
3115 * drm_agp_chipset_flush
3116 * 4. Updated (written) by CPU again
3117 * (CPU, CPU)
3118 * flush_domains = 0 (no previous write domain)
3119 * invalidate_domains = 0 (no new read domains)
3120 * 5. Read by GPU
3121 * (CPU+RENDER, 0)
3122 * flush_domains = CPU
3123 * invalidate_domains = RENDER
3124 * clflush (obj)
3125 * MI_FLUSH
3126 * drm_agp_chipset_flush
3127 */
3128 static void
3129 i915_gem_object_set_to_gpu_domain(struct drm_i915_gem_object *obj,
3130 struct intel_ring_buffer *ring,
3131 struct change_domains *cd)
3132 {
3133 uint32_t invalidate_domains = 0, flush_domains = 0;
3134
3135 /*
3136 * If the object isn't moving to a new write domain,
3137 * let the object stay in multiple read domains
3138 */
3139 if (obj->base.pending_write_domain == 0)
3140 obj->base.pending_read_domains |= obj->base.read_domains;
3141
3142 /*
3143 * Flush the current write domain if
3144 * the new read domains don't match. Invalidate
3145 * any read domains which differ from the old
3146 * write domain
3147 */
3148 if (obj->base.write_domain &&
3149 (((obj->base.write_domain != obj->base.pending_read_domains ||
3150 obj->ring != ring)) ||
3151 (obj->fenced_gpu_access && !obj->pending_fenced_gpu_access))) {
3152 flush_domains |= obj->base.write_domain;
3153 invalidate_domains |=
3154 obj->base.pending_read_domains & ~obj->base.write_domain;
3155 }
3156 /*
3157 * Invalidate any read caches which may have
3158 * stale data. That is, any new read domains.
3159 */
3160 invalidate_domains |= obj->base.pending_read_domains & ~obj->base.read_domains;
3161 if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_CPU)
3162 i915_gem_clflush_object(obj);
3163
3164 /* blow away mappings if mapped through GTT */
3165 if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_GTT)
3166 i915_gem_release_mmap(obj);
3167
3168 /* The actual obj->write_domain will be updated with
3169 * pending_write_domain after we emit the accumulated flush for all
3170 * of our domain changes in execbuffers (which clears objects'
3171 * write_domains). So if we have a current write domain that we
3172 * aren't changing, set pending_write_domain to that.
3173 */
3174 if (flush_domains == 0 && obj->base.pending_write_domain == 0)
3175 obj->base.pending_write_domain = obj->base.write_domain;
3176
3177 cd->invalidate_domains |= invalidate_domains;
3178 cd->flush_domains |= flush_domains;
3179 if (flush_domains & I915_GEM_GPU_DOMAINS)
3180 cd->flush_rings |= obj->ring->id;
3181 if (invalidate_domains & I915_GEM_GPU_DOMAINS)
3182 cd->flush_rings |= ring->id;
3183 }
3184
3185 /**
3186 * Moves the object from a partially CPU read to a full one.
3187 *
3188 * Note that this only resolves i915_gem_object_set_cpu_read_domain_range(),
3189 * and doesn't handle transitioning from !(read_domains & I915_GEM_DOMAIN_CPU).
3190 */
3191 static void
3192 i915_gem_object_set_to_full_cpu_read_domain(struct drm_i915_gem_object *obj)
3193 {
3194 if (!obj->page_cpu_valid)
3195 return;
3196
3197 /* If we're partially in the CPU read domain, finish moving it in.
3198 */
3199 if (obj->base.read_domains & I915_GEM_DOMAIN_CPU) {
3200 int i;
3201
3202 for (i = 0; i <= (obj->base.size - 1) / PAGE_SIZE; i++) {
3203 if (obj->page_cpu_valid[i])
3204 continue;
3205 drm_clflush_pages(obj->pages + i, 1);
3206 }
3207 }
3208
3209 /* Free the page_cpu_valid mappings which are now stale, whether
3210 * or not we've got I915_GEM_DOMAIN_CPU.
3211 */
3212 kfree(obj->page_cpu_valid);
3213 obj->page_cpu_valid = NULL;
3214 }
3215
3216 /**
3217 * Set the CPU read domain on a range of the object.
3218 *
3219 * The object ends up with I915_GEM_DOMAIN_CPU in its read flags although it's
3220 * not entirely valid. The page_cpu_valid member of the object flags which
3221 * pages have been flushed, and will be respected by
3222 * i915_gem_object_set_to_cpu_domain() if it's called on to get a valid mapping
3223 * of the whole object.
3224 *
3225 * This function returns when the move is complete, including waiting on
3226 * flushes to occur.
3227 */
3228 static int
3229 i915_gem_object_set_cpu_read_domain_range(struct drm_i915_gem_object *obj,
3230 uint64_t offset, uint64_t size)
3231 {
3232 uint32_t old_read_domains;
3233 int i, ret;
3234
3235 if (offset == 0 && size == obj->base.size)
3236 return i915_gem_object_set_to_cpu_domain(obj, 0);
3237
3238 ret = i915_gem_object_flush_gpu_write_domain(obj, false);
3239 if (ret != 0)
3240 return ret;
3241 i915_gem_object_flush_gtt_write_domain(obj);
3242
3243 /* If we're already fully in the CPU read domain, we're done. */
3244 if (obj->page_cpu_valid == NULL &&
3245 (obj->base.read_domains & I915_GEM_DOMAIN_CPU) != 0)
3246 return 0;
3247
3248 /* Otherwise, create/clear the per-page CPU read domain flag if we're
3249 * newly adding I915_GEM_DOMAIN_CPU
3250 */
3251 if (obj->page_cpu_valid == NULL) {
3252 obj->page_cpu_valid = kzalloc(obj->base.size / PAGE_SIZE,
3253 GFP_KERNEL);
3254 if (obj->page_cpu_valid == NULL)
3255 return -ENOMEM;
3256 } else if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0)
3257 memset(obj->page_cpu_valid, 0, obj->base.size / PAGE_SIZE);
3258
3259 /* Flush the cache on any pages that are still invalid from the CPU's
3260 * perspective.
3261 */
3262 for (i = offset / PAGE_SIZE; i <= (offset + size - 1) / PAGE_SIZE;
3263 i++) {
3264 if (obj->page_cpu_valid[i])
3265 continue;
3266
3267 drm_clflush_pages(obj->pages + i, 1);
3268
3269 obj->page_cpu_valid[i] = 1;
3270 }
3271
3272 /* It should now be out of any other write domains, and we can update
3273 * the domain values for our changes.
3274 */
3275 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
3276
3277 old_read_domains = obj->base.read_domains;
3278 obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
3279
3280 trace_i915_gem_object_change_domain(obj,
3281 old_read_domains,
3282 obj->base.write_domain);
3283
3284 return 0;
3285 }
3286
3287 static int
3288 i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
3289 struct drm_file *file_priv,
3290 struct drm_i915_gem_exec_object2 *entry,
3291 struct drm_i915_gem_relocation_entry *reloc)
3292 {
3293 struct drm_device *dev = obj->base.dev;
3294 struct drm_gem_object *target_obj;
3295 uint32_t target_offset;
3296 int ret = -EINVAL;
3297
3298 target_obj = drm_gem_object_lookup(dev, file_priv,
3299 reloc->target_handle);
3300 if (target_obj == NULL)
3301 return -ENOENT;
3302
3303 target_offset = to_intel_bo(target_obj)->gtt_offset;
3304
3305 #if WATCH_RELOC
3306 DRM_INFO("%s: obj %p offset %08x target %d "
3307 "read %08x write %08x gtt %08x "
3308 "presumed %08x delta %08x\n",
3309 __func__,
3310 obj,
3311 (int) reloc->offset,
3312 (int) reloc->target_handle,
3313 (int) reloc->read_domains,
3314 (int) reloc->write_domain,
3315 (int) target_offset,
3316 (int) reloc->presumed_offset,
3317 reloc->delta);
3318 #endif
3319
3320 /* The target buffer should have appeared before us in the
3321 * exec_object list, so it should have a GTT space bound by now.
3322 */
3323 if (target_offset == 0) {
3324 DRM_ERROR("No GTT space found for object %d\n",
3325 reloc->target_handle);
3326 goto err;
3327 }
3328
3329 /* Validate that the target is in a valid r/w GPU domain */
3330 if (reloc->write_domain & (reloc->write_domain - 1)) {
3331 DRM_ERROR("reloc with multiple write domains: "
3332 "obj %p target %d offset %d "
3333 "read %08x write %08x",
3334 obj, reloc->target_handle,
3335 (int) reloc->offset,
3336 reloc->read_domains,
3337 reloc->write_domain);
3338 goto err;
3339 }
3340 if (reloc->write_domain & I915_GEM_DOMAIN_CPU ||
3341 reloc->read_domains & I915_GEM_DOMAIN_CPU) {
3342 DRM_ERROR("reloc with read/write CPU domains: "
3343 "obj %p target %d offset %d "
3344 "read %08x write %08x",
3345 obj, reloc->target_handle,
3346 (int) reloc->offset,
3347 reloc->read_domains,
3348 reloc->write_domain);
3349 goto err;
3350 }
3351 if (reloc->write_domain && target_obj->pending_write_domain &&
3352 reloc->write_domain != target_obj->pending_write_domain) {
3353 DRM_ERROR("Write domain conflict: "
3354 "obj %p target %d offset %d "
3355 "new %08x old %08x\n",
3356 obj, reloc->target_handle,
3357 (int) reloc->offset,
3358 reloc->write_domain,
3359 target_obj->pending_write_domain);
3360 goto err;
3361 }
3362
3363 target_obj->pending_read_domains |= reloc->read_domains;
3364 target_obj->pending_write_domain |= reloc->write_domain;
3365
3366 /* If the relocation already has the right value in it, no
3367 * more work needs to be done.
3368 */
3369 if (target_offset == reloc->presumed_offset)
3370 goto out;
3371
3372 /* Check that the relocation address is valid... */
3373 if (reloc->offset > obj->base.size - 4) {
3374 DRM_ERROR("Relocation beyond object bounds: "
3375 "obj %p target %d offset %d size %d.\n",
3376 obj, reloc->target_handle,
3377 (int) reloc->offset,
3378 (int) obj->base.size);
3379 goto err;
3380 }
3381 if (reloc->offset & 3) {
3382 DRM_ERROR("Relocation not 4-byte aligned: "
3383 "obj %p target %d offset %d.\n",
3384 obj, reloc->target_handle,
3385 (int) reloc->offset);
3386 goto err;
3387 }
3388
3389 /* and points to somewhere within the target object. */
3390 if (reloc->delta >= target_obj->size) {
3391 DRM_ERROR("Relocation beyond target object bounds: "
3392 "obj %p target %d delta %d size %d.\n",
3393 obj, reloc->target_handle,
3394 (int) reloc->delta,
3395 (int) target_obj->size);
3396 goto err;
3397 }
3398
3399 reloc->delta += target_offset;
3400 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) {
3401 uint32_t page_offset = reloc->offset & ~PAGE_MASK;
3402 char *vaddr;
3403
3404 vaddr = kmap_atomic(obj->pages[reloc->offset >> PAGE_SHIFT]);
3405 *(uint32_t *)(vaddr + page_offset) = reloc->delta;
3406 kunmap_atomic(vaddr);
3407 } else {
3408 struct drm_i915_private *dev_priv = dev->dev_private;
3409 uint32_t __iomem *reloc_entry;
3410 void __iomem *reloc_page;
3411
3412 ret = i915_gem_object_set_to_gtt_domain(obj, 1);
3413 if (ret)
3414 goto err;
3415
3416 /* Map the page containing the relocation we're going to perform. */
3417 reloc->offset += obj->gtt_offset;
3418 reloc_page = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping,
3419 reloc->offset & PAGE_MASK);
3420 reloc_entry = (uint32_t __iomem *)
3421 (reloc_page + (reloc->offset & ~PAGE_MASK));
3422 iowrite32(reloc->delta, reloc_entry);
3423 io_mapping_unmap_atomic(reloc_page);
3424 }
3425
3426 /* and update the user's relocation entry */
3427 reloc->presumed_offset = target_offset;
3428
3429 out:
3430 ret = 0;
3431 err:
3432 drm_gem_object_unreference(target_obj);
3433 return ret;
3434 }
3435
3436 static int
3437 i915_gem_execbuffer_relocate_object(struct drm_i915_gem_object *obj,
3438 struct drm_file *file_priv,
3439 struct drm_i915_gem_exec_object2 *entry)
3440 {
3441 struct drm_i915_gem_relocation_entry __user *user_relocs;
3442 int i, ret;
3443
3444 user_relocs = (void __user *)(uintptr_t)entry->relocs_ptr;
3445 for (i = 0; i < entry->relocation_count; i++) {
3446 struct drm_i915_gem_relocation_entry reloc;
3447
3448 if (__copy_from_user_inatomic(&reloc,
3449 user_relocs+i,
3450 sizeof(reloc)))
3451 return -EFAULT;
3452
3453 ret = i915_gem_execbuffer_relocate_entry(obj, file_priv, entry, &reloc);
3454 if (ret)
3455 return ret;
3456
3457 if (__copy_to_user_inatomic(&user_relocs[i].presumed_offset,
3458 &reloc.presumed_offset,
3459 sizeof(reloc.presumed_offset)))
3460 return -EFAULT;
3461 }
3462
3463 return 0;
3464 }
3465
3466 static int
3467 i915_gem_execbuffer_relocate_object_slow(struct drm_i915_gem_object *obj,
3468 struct drm_file *file_priv,
3469 struct drm_i915_gem_exec_object2 *entry,
3470 struct drm_i915_gem_relocation_entry *relocs)
3471 {
3472 int i, ret;
3473
3474 for (i = 0; i < entry->relocation_count; i++) {
3475 ret = i915_gem_execbuffer_relocate_entry(obj, file_priv, entry, &relocs[i]);
3476 if (ret)
3477 return ret;
3478 }
3479
3480 return 0;
3481 }
3482
3483 static int
3484 i915_gem_execbuffer_relocate(struct drm_device *dev,
3485 struct drm_file *file,
3486 struct drm_i915_gem_object **object_list,
3487 struct drm_i915_gem_exec_object2 *exec_list,
3488 int count)
3489 {
3490 int i, ret;
3491
3492 for (i = 0; i < count; i++) {
3493 struct drm_i915_gem_object *obj = object_list[i];
3494 obj->base.pending_read_domains = 0;
3495 obj->base.pending_write_domain = 0;
3496 ret = i915_gem_execbuffer_relocate_object(obj, file,
3497 &exec_list[i]);
3498 if (ret)
3499 return ret;
3500 }
3501
3502 return 0;
3503 }
3504
3505 static int
3506 i915_gem_execbuffer_reserve(struct drm_device *dev,
3507 struct drm_file *file,
3508 struct drm_i915_gem_object **object_list,
3509 struct drm_i915_gem_exec_object2 *exec_list,
3510 int count)
3511 {
3512 int ret, i, retry;
3513
3514 /* Attempt to pin all of the buffers into the GTT.
3515 * This is done in 3 phases:
3516 *
3517 * 1a. Unbind all objects that do not match the GTT constraints for
3518 * the execbuffer (fenceable, mappable, alignment etc).
3519 * 1b. Increment pin count for already bound objects.
3520 * 2. Bind new objects.
3521 * 3. Decrement pin count.
3522 *
3523 * This avoid unnecessary unbinding of later objects in order to makr
3524 * room for the earlier objects *unless* we need to defragment.
3525 */
3526 retry = 0;
3527 do {
3528 ret = 0;
3529
3530 /* Unbind any ill-fitting objects or pin. */
3531 for (i = 0; i < count; i++) {
3532 struct drm_i915_gem_object *obj = object_list[i];
3533 struct drm_i915_gem_exec_object2 *entry = &exec_list[i];
3534 bool need_fence, need_mappable;
3535
3536 if (!obj->gtt_space)
3537 continue;
3538
3539 need_fence =
3540 entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
3541 obj->tiling_mode != I915_TILING_NONE;
3542 need_mappable =
3543 entry->relocation_count ? true : need_fence;
3544
3545 if ((entry->alignment && obj->gtt_offset & (entry->alignment - 1)) ||
3546 (need_mappable && !obj->map_and_fenceable))
3547 ret = i915_gem_object_unbind(obj);
3548 else
3549 ret = i915_gem_object_pin(obj,
3550 entry->alignment,
3551 need_mappable);
3552 if (ret) {
3553 count = i;
3554 goto err;
3555 }
3556 }
3557
3558 /* Bind fresh objects */
3559 for (i = 0; i < count; i++) {
3560 struct drm_i915_gem_exec_object2 *entry = &exec_list[i];
3561 struct drm_i915_gem_object *obj = object_list[i];
3562 bool need_fence;
3563
3564 need_fence =
3565 entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
3566 obj->tiling_mode != I915_TILING_NONE;
3567
3568 if (!obj->gtt_space) {
3569 bool need_mappable =
3570 entry->relocation_count ? true : need_fence;
3571
3572 ret = i915_gem_object_pin(obj,
3573 entry->alignment,
3574 need_mappable);
3575 if (ret)
3576 break;
3577 }
3578
3579 if (need_fence) {
3580 ret = i915_gem_object_get_fence_reg(obj, true);
3581 if (ret)
3582 break;
3583
3584 obj->pending_fenced_gpu_access = true;
3585 }
3586
3587 entry->offset = obj->gtt_offset;
3588 }
3589
3590 err: /* Decrement pin count for bound objects */
3591 for (i = 0; i < count; i++) {
3592 struct drm_i915_gem_object *obj = object_list[i];
3593 if (obj->gtt_space)
3594 i915_gem_object_unpin(obj);
3595 }
3596
3597 if (ret != -ENOSPC || retry > 1)
3598 return ret;
3599
3600 /* First attempt, just clear anything that is purgeable.
3601 * Second attempt, clear the entire GTT.
3602 */
3603 ret = i915_gem_evict_everything(dev, retry == 0);
3604 if (ret)
3605 return ret;
3606
3607 retry++;
3608 } while (1);
3609 }
3610
3611 static int
3612 i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
3613 struct drm_file *file,
3614 struct drm_i915_gem_object **object_list,
3615 struct drm_i915_gem_exec_object2 *exec_list,
3616 int count)
3617 {
3618 struct drm_i915_gem_relocation_entry *reloc;
3619 int i, total, ret;
3620
3621 for (i = 0; i < count; i++)
3622 object_list[i]->in_execbuffer = false;
3623
3624 mutex_unlock(&dev->struct_mutex);
3625
3626 total = 0;
3627 for (i = 0; i < count; i++)
3628 total += exec_list[i].relocation_count;
3629
3630 reloc = drm_malloc_ab(total, sizeof(*reloc));
3631 if (reloc == NULL) {
3632 mutex_lock(&dev->struct_mutex);
3633 return -ENOMEM;
3634 }
3635
3636 total = 0;
3637 for (i = 0; i < count; i++) {
3638 struct drm_i915_gem_relocation_entry __user *user_relocs;
3639
3640 user_relocs = (void __user *)(uintptr_t)exec_list[i].relocs_ptr;
3641
3642 if (copy_from_user(reloc+total, user_relocs,
3643 exec_list[i].relocation_count *
3644 sizeof(*reloc))) {
3645 ret = -EFAULT;
3646 mutex_lock(&dev->struct_mutex);
3647 goto err;
3648 }
3649
3650 total += exec_list[i].relocation_count;
3651 }
3652
3653 ret = i915_mutex_lock_interruptible(dev);
3654 if (ret) {
3655 mutex_lock(&dev->struct_mutex);
3656 goto err;
3657 }
3658
3659 ret = i915_gem_execbuffer_reserve(dev, file,
3660 object_list, exec_list,
3661 count);
3662 if (ret)
3663 goto err;
3664
3665 total = 0;
3666 for (i = 0; i < count; i++) {
3667 struct drm_i915_gem_object *obj = object_list[i];
3668 obj->base.pending_read_domains = 0;
3669 obj->base.pending_write_domain = 0;
3670 ret = i915_gem_execbuffer_relocate_object_slow(obj, file,
3671 &exec_list[i],
3672 reloc + total);
3673 if (ret)
3674 goto err;
3675
3676 total += exec_list[i].relocation_count;
3677 }
3678
3679 /* Leave the user relocations as are, this is the painfully slow path,
3680 * and we want to avoid the complication of dropping the lock whilst
3681 * having buffers reserved in the aperture and so causing spurious
3682 * ENOSPC for random operations.
3683 */
3684
3685 err:
3686 drm_free_large(reloc);
3687 return ret;
3688 }
3689
3690 static int
3691 i915_gem_execbuffer_move_to_gpu(struct drm_device *dev,
3692 struct drm_file *file,
3693 struct intel_ring_buffer *ring,
3694 struct drm_i915_gem_object **objects,
3695 int count)
3696 {
3697 struct change_domains cd;
3698 int ret, i;
3699
3700 cd.invalidate_domains = 0;
3701 cd.flush_domains = 0;
3702 cd.flush_rings = 0;
3703 for (i = 0; i < count; i++)
3704 i915_gem_object_set_to_gpu_domain(objects[i], ring, &cd);
3705
3706 if (cd.invalidate_domains | cd.flush_domains) {
3707 #if WATCH_EXEC
3708 DRM_INFO("%s: invalidate_domains %08x flush_domains %08x\n",
3709 __func__,
3710 cd.invalidate_domains,
3711 cd.flush_domains);
3712 #endif
3713 i915_gem_flush(dev,
3714 cd.invalidate_domains,
3715 cd.flush_domains,
3716 cd.flush_rings);
3717 }
3718
3719 for (i = 0; i < count; i++) {
3720 struct drm_i915_gem_object *obj = objects[i];
3721 /* XXX replace with semaphores */
3722 if (obj->ring && ring != obj->ring) {
3723 ret = i915_gem_object_wait_rendering(obj, true);
3724 if (ret)
3725 return ret;
3726 }
3727 }
3728
3729 return 0;
3730 }
3731
3732 /* Throttle our rendering by waiting until the ring has completed our requests
3733 * emitted over 20 msec ago.
3734 *
3735 * Note that if we were to use the current jiffies each time around the loop,
3736 * we wouldn't escape the function with any frames outstanding if the time to
3737 * render a frame was over 20ms.
3738 *
3739 * This should get us reasonable parallelism between CPU and GPU but also
3740 * relatively low latency when blocking on a particular request to finish.
3741 */
3742 static int
3743 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
3744 {
3745 struct drm_i915_private *dev_priv = dev->dev_private;
3746 struct drm_i915_file_private *file_priv = file->driver_priv;
3747 unsigned long recent_enough = jiffies - msecs_to_jiffies(20);
3748 struct drm_i915_gem_request *request;
3749 struct intel_ring_buffer *ring = NULL;
3750 u32 seqno = 0;
3751 int ret;
3752
3753 spin_lock(&file_priv->mm.lock);
3754 list_for_each_entry(request, &file_priv->mm.request_list, client_list) {
3755 if (time_after_eq(request->emitted_jiffies, recent_enough))
3756 break;
3757
3758 ring = request->ring;
3759 seqno = request->seqno;
3760 }
3761 spin_unlock(&file_priv->mm.lock);
3762
3763 if (seqno == 0)
3764 return 0;
3765
3766 ret = 0;
3767 if (!i915_seqno_passed(ring->get_seqno(ring), seqno)) {
3768 /* And wait for the seqno passing without holding any locks and
3769 * causing extra latency for others. This is safe as the irq
3770 * generation is designed to be run atomically and so is
3771 * lockless.
3772 */
3773 ring->user_irq_get(ring);
3774 ret = wait_event_interruptible(ring->irq_queue,
3775 i915_seqno_passed(ring->get_seqno(ring), seqno)
3776 || atomic_read(&dev_priv->mm.wedged));
3777 ring->user_irq_put(ring);
3778
3779 if (ret == 0 && atomic_read(&dev_priv->mm.wedged))
3780 ret = -EIO;
3781 }
3782
3783 if (ret == 0)
3784 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0);
3785
3786 return ret;
3787 }
3788
3789 static int
3790 i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec,
3791 uint64_t exec_offset)
3792 {
3793 uint32_t exec_start, exec_len;
3794
3795 exec_start = (uint32_t) exec_offset + exec->batch_start_offset;
3796 exec_len = (uint32_t) exec->batch_len;
3797
3798 if ((exec_start | exec_len) & 0x7)
3799 return -EINVAL;
3800
3801 if (!exec_start)
3802 return -EINVAL;
3803
3804 return 0;
3805 }
3806
3807 static int
3808 validate_exec_list(struct drm_i915_gem_exec_object2 *exec,
3809 int count)
3810 {
3811 int i;
3812
3813 for (i = 0; i < count; i++) {
3814 char __user *ptr = (char __user *)(uintptr_t)exec[i].relocs_ptr;
3815 int length; /* limited by fault_in_pages_readable() */
3816
3817 /* First check for malicious input causing overflow */
3818 if (exec[i].relocation_count >
3819 INT_MAX / sizeof(struct drm_i915_gem_relocation_entry))
3820 return -EINVAL;
3821
3822 length = exec[i].relocation_count *
3823 sizeof(struct drm_i915_gem_relocation_entry);
3824 if (!access_ok(VERIFY_READ, ptr, length))
3825 return -EFAULT;
3826
3827 /* we may also need to update the presumed offsets */
3828 if (!access_ok(VERIFY_WRITE, ptr, length))
3829 return -EFAULT;
3830
3831 if (fault_in_pages_readable(ptr, length))
3832 return -EFAULT;
3833 }
3834
3835 return 0;
3836 }
3837
3838 static int
3839 i915_gem_do_execbuffer(struct drm_device *dev, void *data,
3840 struct drm_file *file,
3841 struct drm_i915_gem_execbuffer2 *args,
3842 struct drm_i915_gem_exec_object2 *exec_list)
3843 {
3844 drm_i915_private_t *dev_priv = dev->dev_private;
3845 struct drm_i915_gem_object **object_list = NULL;
3846 struct drm_i915_gem_object *batch_obj;
3847 struct drm_clip_rect *cliprects = NULL;
3848 struct drm_i915_gem_request *request = NULL;
3849 int ret, i, flips;
3850 uint64_t exec_offset;
3851
3852 struct intel_ring_buffer *ring = NULL;
3853
3854 ret = i915_gem_check_is_wedged(dev);
3855 if (ret)
3856 return ret;
3857
3858 ret = validate_exec_list(exec_list, args->buffer_count);
3859 if (ret)
3860 return ret;
3861
3862 #if WATCH_EXEC
3863 DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n",
3864 (int) args->buffers_ptr, args->buffer_count, args->batch_len);
3865 #endif
3866 switch (args->flags & I915_EXEC_RING_MASK) {
3867 case I915_EXEC_DEFAULT:
3868 case I915_EXEC_RENDER:
3869 ring = &dev_priv->render_ring;
3870 break;
3871 case I915_EXEC_BSD:
3872 if (!HAS_BSD(dev)) {
3873 DRM_ERROR("execbuf with invalid ring (BSD)\n");
3874 return -EINVAL;
3875 }
3876 ring = &dev_priv->bsd_ring;
3877 break;
3878 case I915_EXEC_BLT:
3879 if (!HAS_BLT(dev)) {
3880 DRM_ERROR("execbuf with invalid ring (BLT)\n");
3881 return -EINVAL;
3882 }
3883 ring = &dev_priv->blt_ring;
3884 break;
3885 default:
3886 DRM_ERROR("execbuf with unknown ring: %d\n",
3887 (int)(args->flags & I915_EXEC_RING_MASK));
3888 return -EINVAL;
3889 }
3890
3891 if (args->buffer_count < 1) {
3892 DRM_ERROR("execbuf with %d buffers\n", args->buffer_count);
3893 return -EINVAL;
3894 }
3895 object_list = drm_malloc_ab(sizeof(*object_list), args->buffer_count);
3896 if (object_list == NULL) {
3897 DRM_ERROR("Failed to allocate object list for %d buffers\n",
3898 args->buffer_count);
3899 ret = -ENOMEM;
3900 goto pre_mutex_err;
3901 }
3902
3903 if (args->num_cliprects != 0) {
3904 cliprects = kcalloc(args->num_cliprects, sizeof(*cliprects),
3905 GFP_KERNEL);
3906 if (cliprects == NULL) {
3907 ret = -ENOMEM;
3908 goto pre_mutex_err;
3909 }
3910
3911 ret = copy_from_user(cliprects,
3912 (struct drm_clip_rect __user *)
3913 (uintptr_t) args->cliprects_ptr,
3914 sizeof(*cliprects) * args->num_cliprects);
3915 if (ret != 0) {
3916 DRM_ERROR("copy %d cliprects failed: %d\n",
3917 args->num_cliprects, ret);
3918 ret = -EFAULT;
3919 goto pre_mutex_err;
3920 }
3921 }
3922
3923 request = kzalloc(sizeof(*request), GFP_KERNEL);
3924 if (request == NULL) {
3925 ret = -ENOMEM;
3926 goto pre_mutex_err;
3927 }
3928
3929 ret = i915_mutex_lock_interruptible(dev);
3930 if (ret)
3931 goto pre_mutex_err;
3932
3933 if (dev_priv->mm.suspended) {
3934 mutex_unlock(&dev->struct_mutex);
3935 ret = -EBUSY;
3936 goto pre_mutex_err;
3937 }
3938
3939 /* Look up object handles */
3940 for (i = 0; i < args->buffer_count; i++) {
3941 struct drm_i915_gem_object *obj;
3942
3943 obj = to_intel_bo (drm_gem_object_lookup(dev, file,
3944 exec_list[i].handle));
3945 if (obj == NULL) {
3946 DRM_ERROR("Invalid object handle %d at index %d\n",
3947 exec_list[i].handle, i);
3948 /* prevent error path from reading uninitialized data */
3949 args->buffer_count = i;
3950 ret = -ENOENT;
3951 goto err;
3952 }
3953 object_list[i] = obj;
3954
3955 if (obj->in_execbuffer) {
3956 DRM_ERROR("Object %p appears more than once in object list\n",
3957 obj);
3958 /* prevent error path from reading uninitialized data */
3959 args->buffer_count = i + 1;
3960 ret = -EINVAL;
3961 goto err;
3962 }
3963 obj->in_execbuffer = true;
3964 obj->pending_fenced_gpu_access = false;
3965 }
3966
3967 /* Move the objects en-masse into the GTT, evicting if necessary. */
3968 ret = i915_gem_execbuffer_reserve(dev, file,
3969 object_list, exec_list,
3970 args->buffer_count);
3971 if (ret)
3972 goto err;
3973
3974 /* The objects are in their final locations, apply the relocations. */
3975 ret = i915_gem_execbuffer_relocate(dev, file,
3976 object_list, exec_list,
3977 args->buffer_count);
3978 if (ret) {
3979 if (ret == -EFAULT) {
3980 ret = i915_gem_execbuffer_relocate_slow(dev, file,
3981 object_list,
3982 exec_list,
3983 args->buffer_count);
3984 BUG_ON(!mutex_is_locked(&dev->struct_mutex));
3985 }
3986 if (ret)
3987 goto err;
3988 }
3989
3990 /* Set the pending read domains for the batch buffer to COMMAND */
3991 batch_obj = object_list[args->buffer_count-1];
3992 if (batch_obj->base.pending_write_domain) {
3993 DRM_ERROR("Attempting to use self-modifying batch buffer\n");
3994 ret = -EINVAL;
3995 goto err;
3996 }
3997 batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
3998
3999 /* Sanity check the batch buffer */
4000 exec_offset = batch_obj->gtt_offset;
4001 ret = i915_gem_check_execbuffer(args, exec_offset);
4002 if (ret != 0) {
4003 DRM_ERROR("execbuf with invalid offset/length\n");
4004 goto err;
4005 }
4006
4007 ret = i915_gem_execbuffer_move_to_gpu(dev, file, ring,
4008 object_list, args->buffer_count);
4009 if (ret)
4010 goto err;
4011
4012 #if WATCH_COHERENCY
4013 for (i = 0; i < args->buffer_count; i++) {
4014 i915_gem_object_check_coherency(object_list[i],
4015 exec_list[i].handle);
4016 }
4017 #endif
4018
4019 #if WATCH_EXEC
4020 i915_gem_dump_object(batch_obj,
4021 args->batch_len,
4022 __func__,
4023 ~0);
4024 #endif
4025
4026 /* Check for any pending flips. As we only maintain a flip queue depth
4027 * of 1, we can simply insert a WAIT for the next display flip prior
4028 * to executing the batch and avoid stalling the CPU.
4029 */
4030 flips = 0;
4031 for (i = 0; i < args->buffer_count; i++) {
4032 if (object_list[i]->base.write_domain)
4033 flips |= atomic_read(&object_list[i]->pending_flip);
4034 }
4035 if (flips) {
4036 int plane, flip_mask;
4037
4038 for (plane = 0; flips >> plane; plane++) {
4039 if (((flips >> plane) & 1) == 0)
4040 continue;
4041
4042 if (plane)
4043 flip_mask = MI_WAIT_FOR_PLANE_B_FLIP;
4044 else
4045 flip_mask = MI_WAIT_FOR_PLANE_A_FLIP;
4046
4047 ret = intel_ring_begin(ring, 2);
4048 if (ret)
4049 goto err;
4050
4051 intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask);
4052 intel_ring_emit(ring, MI_NOOP);
4053 intel_ring_advance(ring);
4054 }
4055 }
4056
4057 /* Exec the batchbuffer */
4058 ret = ring->dispatch_execbuffer(ring, args, cliprects, exec_offset);
4059 if (ret) {
4060 DRM_ERROR("dispatch failed %d\n", ret);
4061 goto err;
4062 }
4063
4064 for (i = 0; i < args->buffer_count; i++) {
4065 struct drm_i915_gem_object *obj = object_list[i];
4066
4067 obj->base.read_domains = obj->base.pending_read_domains;
4068 obj->base.write_domain = obj->base.pending_write_domain;
4069 obj->fenced_gpu_access = obj->pending_fenced_gpu_access;
4070
4071 i915_gem_object_move_to_active(obj, ring);
4072 if (obj->base.write_domain) {
4073 obj->dirty = 1;
4074 list_move_tail(&obj->gpu_write_list,
4075 &ring->gpu_write_list);
4076 intel_mark_busy(dev, obj);
4077 }
4078
4079 trace_i915_gem_object_change_domain(obj,
4080 obj->base.read_domains,
4081 obj->base.write_domain);
4082 }
4083
4084 /*
4085 * Ensure that the commands in the batch buffer are
4086 * finished before the interrupt fires
4087 */
4088 i915_retire_commands(dev, ring);
4089
4090 if (i915_add_request(dev, file, request, ring))
4091 i915_gem_next_request_seqno(dev, ring);
4092 else
4093 request = NULL;
4094
4095 err:
4096 for (i = 0; i < args->buffer_count; i++) {
4097 object_list[i]->in_execbuffer = false;
4098 drm_gem_object_unreference(&object_list[i]->base);
4099 }
4100
4101 mutex_unlock(&dev->struct_mutex);
4102
4103 pre_mutex_err:
4104 drm_free_large(object_list);
4105 kfree(cliprects);
4106 kfree(request);
4107
4108 return ret;
4109 }
4110
4111 /*
4112 * Legacy execbuffer just creates an exec2 list from the original exec object
4113 * list array and passes it to the real function.
4114 */
4115 int
4116 i915_gem_execbuffer(struct drm_device *dev, void *data,
4117 struct drm_file *file)
4118 {
4119 struct drm_i915_gem_execbuffer *args = data;
4120 struct drm_i915_gem_execbuffer2 exec2;
4121 struct drm_i915_gem_exec_object *exec_list = NULL;
4122 struct drm_i915_gem_exec_object2 *exec2_list = NULL;
4123 int ret, i;
4124
4125 #if WATCH_EXEC
4126 DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n",
4127 (int) args->buffers_ptr, args->buffer_count, args->batch_len);
4128 #endif
4129
4130 if (args->buffer_count < 1) {
4131 DRM_ERROR("execbuf with %d buffers\n", args->buffer_count);
4132 return -EINVAL;
4133 }
4134
4135 /* Copy in the exec list from userland */
4136 exec_list = drm_malloc_ab(sizeof(*exec_list), args->buffer_count);
4137 exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count);
4138 if (exec_list == NULL || exec2_list == NULL) {
4139 DRM_ERROR("Failed to allocate exec list for %d buffers\n",
4140 args->buffer_count);
4141 drm_free_large(exec_list);
4142 drm_free_large(exec2_list);
4143 return -ENOMEM;
4144 }
4145 ret = copy_from_user(exec_list,
4146 (struct drm_i915_relocation_entry __user *)
4147 (uintptr_t) args->buffers_ptr,
4148 sizeof(*exec_list) * args->buffer_count);
4149 if (ret != 0) {
4150 DRM_ERROR("copy %d exec entries failed %d\n",
4151 args->buffer_count, ret);
4152 drm_free_large(exec_list);
4153 drm_free_large(exec2_list);
4154 return -EFAULT;
4155 }
4156
4157 for (i = 0; i < args->buffer_count; i++) {
4158 exec2_list[i].handle = exec_list[i].handle;
4159 exec2_list[i].relocation_count = exec_list[i].relocation_count;
4160 exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr;
4161 exec2_list[i].alignment = exec_list[i].alignment;
4162 exec2_list[i].offset = exec_list[i].offset;
4163 if (INTEL_INFO(dev)->gen < 4)
4164 exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE;
4165 else
4166 exec2_list[i].flags = 0;
4167 }
4168
4169 exec2.buffers_ptr = args->buffers_ptr;
4170 exec2.buffer_count = args->buffer_count;
4171 exec2.batch_start_offset = args->batch_start_offset;
4172 exec2.batch_len = args->batch_len;
4173 exec2.DR1 = args->DR1;
4174 exec2.DR4 = args->DR4;
4175 exec2.num_cliprects = args->num_cliprects;
4176 exec2.cliprects_ptr = args->cliprects_ptr;
4177 exec2.flags = I915_EXEC_RENDER;
4178
4179 ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list);
4180 if (!ret) {
4181 /* Copy the new buffer offsets back to the user's exec list. */
4182 for (i = 0; i < args->buffer_count; i++)
4183 exec_list[i].offset = exec2_list[i].offset;
4184 /* ... and back out to userspace */
4185 ret = copy_to_user((struct drm_i915_relocation_entry __user *)
4186 (uintptr_t) args->buffers_ptr,
4187 exec_list,
4188 sizeof(*exec_list) * args->buffer_count);
4189 if (ret) {
4190 ret = -EFAULT;
4191 DRM_ERROR("failed to copy %d exec entries "
4192 "back to user (%d)\n",
4193 args->buffer_count, ret);
4194 }
4195 }
4196
4197 drm_free_large(exec_list);
4198 drm_free_large(exec2_list);
4199 return ret;
4200 }
4201
4202 int
4203 i915_gem_execbuffer2(struct drm_device *dev, void *data,
4204 struct drm_file *file)
4205 {
4206 struct drm_i915_gem_execbuffer2 *args = data;
4207 struct drm_i915_gem_exec_object2 *exec2_list = NULL;
4208 int ret;
4209
4210 #if WATCH_EXEC
4211 DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n",
4212 (int) args->buffers_ptr, args->buffer_count, args->batch_len);
4213 #endif
4214
4215 if (args->buffer_count < 1) {
4216 DRM_ERROR("execbuf2 with %d buffers\n", args->buffer_count);
4217 return -EINVAL;
4218 }
4219
4220 exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count);
4221 if (exec2_list == NULL) {
4222 DRM_ERROR("Failed to allocate exec list for %d buffers\n",
4223 args->buffer_count);
4224 return -ENOMEM;
4225 }
4226 ret = copy_from_user(exec2_list,
4227 (struct drm_i915_relocation_entry __user *)
4228 (uintptr_t) args->buffers_ptr,
4229 sizeof(*exec2_list) * args->buffer_count);
4230 if (ret != 0) {
4231 DRM_ERROR("copy %d exec entries failed %d\n",
4232 args->buffer_count, ret);
4233 drm_free_large(exec2_list);
4234 return -EFAULT;
4235 }
4236
4237 ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list);
4238 if (!ret) {
4239 /* Copy the new buffer offsets back to the user's exec list. */
4240 ret = copy_to_user((struct drm_i915_relocation_entry __user *)
4241 (uintptr_t) args->buffers_ptr,
4242 exec2_list,
4243 sizeof(*exec2_list) * args->buffer_count);
4244 if (ret) {
4245 ret = -EFAULT;
4246 DRM_ERROR("failed to copy %d exec entries "
4247 "back to user (%d)\n",
4248 args->buffer_count, ret);
4249 }
4250 }
4251
4252 drm_free_large(exec2_list);
4253 return ret;
4254 }
4255
4256 int
4257 i915_gem_object_pin(struct drm_i915_gem_object *obj,
4258 uint32_t alignment,
4259 bool map_and_fenceable)
4260 {
4261 struct drm_device *dev = obj->base.dev;
4262 struct drm_i915_private *dev_priv = dev->dev_private;
4263 int ret;
4264
4265 BUG_ON(obj->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT);
4266 WARN_ON(i915_verify_lists(dev));
4267
4268 if (obj->gtt_space != NULL) {
4269 if ((alignment && obj->gtt_offset & (alignment - 1)) ||
4270 (map_and_fenceable && !obj->map_and_fenceable)) {
4271 WARN(obj->pin_count,
4272 "bo is already pinned with incorrect alignment:"
4273 " offset=%x, req.alignment=%x, req.map_and_fenceable=%d,"
4274 " obj->map_and_fenceable=%d\n",
4275 obj->gtt_offset, alignment,
4276 map_and_fenceable,
4277 obj->map_and_fenceable);
4278 ret = i915_gem_object_unbind(obj);
4279 if (ret)
4280 return ret;
4281 }
4282 }
4283
4284 if (obj->gtt_space == NULL) {
4285 ret = i915_gem_object_bind_to_gtt(obj, alignment,
4286 map_and_fenceable);
4287 if (ret)
4288 return ret;
4289 }
4290
4291 if (obj->pin_count++ == 0) {
4292 if (!obj->active)
4293 list_move_tail(&obj->mm_list,
4294 &dev_priv->mm.pinned_list);
4295 }
4296 obj->pin_mappable |= map_and_fenceable;
4297
4298 WARN_ON(i915_verify_lists(dev));
4299 return 0;
4300 }
4301
4302 void
4303 i915_gem_object_unpin(struct drm_i915_gem_object *obj)
4304 {
4305 struct drm_device *dev = obj->base.dev;
4306 drm_i915_private_t *dev_priv = dev->dev_private;
4307
4308 WARN_ON(i915_verify_lists(dev));
4309 BUG_ON(obj->pin_count == 0);
4310 BUG_ON(obj->gtt_space == NULL);
4311
4312 if (--obj->pin_count == 0) {
4313 if (!obj->active)
4314 list_move_tail(&obj->mm_list,
4315 &dev_priv->mm.inactive_list);
4316 obj->pin_mappable = false;
4317 }
4318 WARN_ON(i915_verify_lists(dev));
4319 }
4320
4321 int
4322 i915_gem_pin_ioctl(struct drm_device *dev, void *data,
4323 struct drm_file *file)
4324 {
4325 struct drm_i915_gem_pin *args = data;
4326 struct drm_i915_gem_object *obj;
4327 int ret;
4328
4329 ret = i915_mutex_lock_interruptible(dev);
4330 if (ret)
4331 return ret;
4332
4333 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
4334 if (obj == NULL) {
4335 ret = -ENOENT;
4336 goto unlock;
4337 }
4338
4339 if (obj->madv != I915_MADV_WILLNEED) {
4340 DRM_ERROR("Attempting to pin a purgeable buffer\n");
4341 ret = -EINVAL;
4342 goto out;
4343 }
4344
4345 if (obj->pin_filp != NULL && obj->pin_filp != file) {
4346 DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n",
4347 args->handle);
4348 ret = -EINVAL;
4349 goto out;
4350 }
4351
4352 obj->user_pin_count++;
4353 obj->pin_filp = file;
4354 if (obj->user_pin_count == 1) {
4355 ret = i915_gem_object_pin(obj, args->alignment, true);
4356 if (ret)
4357 goto out;
4358 }
4359
4360 /* XXX - flush the CPU caches for pinned objects
4361 * as the X server doesn't manage domains yet
4362 */
4363 i915_gem_object_flush_cpu_write_domain(obj);
4364 args->offset = obj->gtt_offset;
4365 out:
4366 drm_gem_object_unreference(&obj->base);
4367 unlock:
4368 mutex_unlock(&dev->struct_mutex);
4369 return ret;
4370 }
4371
4372 int
4373 i915_gem_unpin_ioctl(struct drm_device *dev, void *data,
4374 struct drm_file *file)
4375 {
4376 struct drm_i915_gem_pin *args = data;
4377 struct drm_i915_gem_object *obj;
4378 int ret;
4379
4380 ret = i915_mutex_lock_interruptible(dev);
4381 if (ret)
4382 return ret;
4383
4384 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
4385 if (obj == NULL) {
4386 ret = -ENOENT;
4387 goto unlock;
4388 }
4389
4390 if (obj->pin_filp != file) {
4391 DRM_ERROR("Not pinned by caller in i915_gem_pin_ioctl(): %d\n",
4392 args->handle);
4393 ret = -EINVAL;
4394 goto out;
4395 }
4396 obj->user_pin_count--;
4397 if (obj->user_pin_count == 0) {
4398 obj->pin_filp = NULL;
4399 i915_gem_object_unpin(obj);
4400 }
4401
4402 out:
4403 drm_gem_object_unreference(&obj->base);
4404 unlock:
4405 mutex_unlock(&dev->struct_mutex);
4406 return ret;
4407 }
4408
4409 int
4410 i915_gem_busy_ioctl(struct drm_device *dev, void *data,
4411 struct drm_file *file)
4412 {
4413 struct drm_i915_gem_busy *args = data;
4414 struct drm_i915_gem_object *obj;
4415 int ret;
4416
4417 ret = i915_mutex_lock_interruptible(dev);
4418 if (ret)
4419 return ret;
4420
4421 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
4422 if (obj == NULL) {
4423 ret = -ENOENT;
4424 goto unlock;
4425 }
4426
4427 /* Count all active objects as busy, even if they are currently not used
4428 * by the gpu. Users of this interface expect objects to eventually
4429 * become non-busy without any further actions, therefore emit any
4430 * necessary flushes here.
4431 */
4432 args->busy = obj->active;
4433 if (args->busy) {
4434 /* Unconditionally flush objects, even when the gpu still uses this
4435 * object. Userspace calling this function indicates that it wants to
4436 * use this buffer rather sooner than later, so issuing the required
4437 * flush earlier is beneficial.
4438 */
4439 if (obj->base.write_domain & I915_GEM_GPU_DOMAINS)
4440 i915_gem_flush_ring(dev, obj->ring,
4441 0, obj->base.write_domain);
4442
4443 /* Update the active list for the hardware's current position.
4444 * Otherwise this only updates on a delayed timer or when irqs
4445 * are actually unmasked, and our working set ends up being
4446 * larger than required.
4447 */
4448 i915_gem_retire_requests_ring(dev, obj->ring);
4449
4450 args->busy = obj->active;
4451 }
4452
4453 drm_gem_object_unreference(&obj->base);
4454 unlock:
4455 mutex_unlock(&dev->struct_mutex);
4456 return ret;
4457 }
4458
4459 int
4460 i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
4461 struct drm_file *file_priv)
4462 {
4463 return i915_gem_ring_throttle(dev, file_priv);
4464 }
4465
4466 int
4467 i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
4468 struct drm_file *file_priv)
4469 {
4470 struct drm_i915_gem_madvise *args = data;
4471 struct drm_i915_gem_object *obj;
4472 int ret;
4473
4474 switch (args->madv) {
4475 case I915_MADV_DONTNEED:
4476 case I915_MADV_WILLNEED:
4477 break;
4478 default:
4479 return -EINVAL;
4480 }
4481
4482 ret = i915_mutex_lock_interruptible(dev);
4483 if (ret)
4484 return ret;
4485
4486 obj = to_intel_bo(drm_gem_object_lookup(dev, file_priv, args->handle));
4487 if (obj == NULL) {
4488 ret = -ENOENT;
4489 goto unlock;
4490 }
4491
4492 if (obj->pin_count) {
4493 ret = -EINVAL;
4494 goto out;
4495 }
4496
4497 if (obj->madv != __I915_MADV_PURGED)
4498 obj->madv = args->madv;
4499
4500 /* if the object is no longer bound, discard its backing storage */
4501 if (i915_gem_object_is_purgeable(obj) &&
4502 obj->gtt_space == NULL)
4503 i915_gem_object_truncate(obj);
4504
4505 args->retained = obj->madv != __I915_MADV_PURGED;
4506
4507 out:
4508 drm_gem_object_unreference(&obj->base);
4509 unlock:
4510 mutex_unlock(&dev->struct_mutex);
4511 return ret;
4512 }
4513
4514 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
4515 size_t size)
4516 {
4517 struct drm_i915_private *dev_priv = dev->dev_private;
4518 struct drm_i915_gem_object *obj;
4519
4520 obj = kzalloc(sizeof(*obj), GFP_KERNEL);
4521 if (obj == NULL)
4522 return NULL;
4523
4524 if (drm_gem_object_init(dev, &obj->base, size) != 0) {
4525 kfree(obj);
4526 return NULL;
4527 }
4528
4529 i915_gem_info_add_obj(dev_priv, size);
4530
4531 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
4532 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
4533
4534 obj->agp_type = AGP_USER_MEMORY;
4535 obj->base.driver_private = NULL;
4536 obj->fence_reg = I915_FENCE_REG_NONE;
4537 INIT_LIST_HEAD(&obj->mm_list);
4538 INIT_LIST_HEAD(&obj->gtt_list);
4539 INIT_LIST_HEAD(&obj->ring_list);
4540 INIT_LIST_HEAD(&obj->gpu_write_list);
4541 obj->madv = I915_MADV_WILLNEED;
4542 /* Avoid an unnecessary call to unbind on the first bind. */
4543 obj->map_and_fenceable = true;
4544
4545 return obj;
4546 }
4547
4548 int i915_gem_init_object(struct drm_gem_object *obj)
4549 {
4550 BUG();
4551
4552 return 0;
4553 }
4554
4555 static void i915_gem_free_object_tail(struct drm_i915_gem_object *obj)
4556 {
4557 struct drm_device *dev = obj->base.dev;
4558 drm_i915_private_t *dev_priv = dev->dev_private;
4559 int ret;
4560
4561 ret = i915_gem_object_unbind(obj);
4562 if (ret == -ERESTARTSYS) {
4563 list_move(&obj->mm_list,
4564 &dev_priv->mm.deferred_free_list);
4565 return;
4566 }
4567
4568 if (obj->base.map_list.map)
4569 i915_gem_free_mmap_offset(obj);
4570
4571 drm_gem_object_release(&obj->base);
4572 i915_gem_info_remove_obj(dev_priv, obj->base.size);
4573
4574 kfree(obj->page_cpu_valid);
4575 kfree(obj->bit_17);
4576 kfree(obj);
4577 }
4578
4579 void i915_gem_free_object(struct drm_gem_object *gem_obj)
4580 {
4581 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
4582 struct drm_device *dev = obj->base.dev;
4583
4584 trace_i915_gem_object_destroy(obj);
4585
4586 while (obj->pin_count > 0)
4587 i915_gem_object_unpin(obj);
4588
4589 if (obj->phys_obj)
4590 i915_gem_detach_phys_object(dev, obj);
4591
4592 i915_gem_free_object_tail(obj);
4593 }
4594
4595 int
4596 i915_gem_idle(struct drm_device *dev)
4597 {
4598 drm_i915_private_t *dev_priv = dev->dev_private;
4599 int ret;
4600
4601 mutex_lock(&dev->struct_mutex);
4602
4603 if (dev_priv->mm.suspended) {
4604 mutex_unlock(&dev->struct_mutex);
4605 return 0;
4606 }
4607
4608 ret = i915_gpu_idle(dev);
4609 if (ret) {
4610 mutex_unlock(&dev->struct_mutex);
4611 return ret;
4612 }
4613
4614 /* Under UMS, be paranoid and evict. */
4615 if (!drm_core_check_feature(dev, DRIVER_MODESET)) {
4616 ret = i915_gem_evict_inactive(dev, false);
4617 if (ret) {
4618 mutex_unlock(&dev->struct_mutex);
4619 return ret;
4620 }
4621 }
4622
4623 i915_gem_reset_fences(dev);
4624
4625 /* Hack! Don't let anybody do execbuf while we don't control the chip.
4626 * We need to replace this with a semaphore, or something.
4627 * And not confound mm.suspended!
4628 */
4629 dev_priv->mm.suspended = 1;
4630 del_timer_sync(&dev_priv->hangcheck_timer);
4631
4632 i915_kernel_lost_context(dev);
4633 i915_gem_cleanup_ringbuffer(dev);
4634
4635 mutex_unlock(&dev->struct_mutex);
4636
4637 /* Cancel the retire work handler, which should be idle now. */
4638 cancel_delayed_work_sync(&dev_priv->mm.retire_work);
4639
4640 return 0;
4641 }
4642
4643 int
4644 i915_gem_init_ringbuffer(struct drm_device *dev)
4645 {
4646 drm_i915_private_t *dev_priv = dev->dev_private;
4647 int ret;
4648
4649 ret = intel_init_render_ring_buffer(dev);
4650 if (ret)
4651 return ret;
4652
4653 if (HAS_BSD(dev)) {
4654 ret = intel_init_bsd_ring_buffer(dev);
4655 if (ret)
4656 goto cleanup_render_ring;
4657 }
4658
4659 if (HAS_BLT(dev)) {
4660 ret = intel_init_blt_ring_buffer(dev);
4661 if (ret)
4662 goto cleanup_bsd_ring;
4663 }
4664
4665 dev_priv->next_seqno = 1;
4666
4667 return 0;
4668
4669 cleanup_bsd_ring:
4670 intel_cleanup_ring_buffer(&dev_priv->bsd_ring);
4671 cleanup_render_ring:
4672 intel_cleanup_ring_buffer(&dev_priv->render_ring);
4673 return ret;
4674 }
4675
4676 void
4677 i915_gem_cleanup_ringbuffer(struct drm_device *dev)
4678 {
4679 drm_i915_private_t *dev_priv = dev->dev_private;
4680
4681 intel_cleanup_ring_buffer(&dev_priv->render_ring);
4682 intel_cleanup_ring_buffer(&dev_priv->bsd_ring);
4683 intel_cleanup_ring_buffer(&dev_priv->blt_ring);
4684 }
4685
4686 int
4687 i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
4688 struct drm_file *file_priv)
4689 {
4690 drm_i915_private_t *dev_priv = dev->dev_private;
4691 int ret;
4692
4693 if (drm_core_check_feature(dev, DRIVER_MODESET))
4694 return 0;
4695
4696 if (atomic_read(&dev_priv->mm.wedged)) {
4697 DRM_ERROR("Reenabling wedged hardware, good luck\n");
4698 atomic_set(&dev_priv->mm.wedged, 0);
4699 }
4700
4701 mutex_lock(&dev->struct_mutex);
4702 dev_priv->mm.suspended = 0;
4703
4704 ret = i915_gem_init_ringbuffer(dev);
4705 if (ret != 0) {
4706 mutex_unlock(&dev->struct_mutex);
4707 return ret;
4708 }
4709
4710 BUG_ON(!list_empty(&dev_priv->mm.active_list));
4711 BUG_ON(!list_empty(&dev_priv->render_ring.active_list));
4712 BUG_ON(!list_empty(&dev_priv->bsd_ring.active_list));
4713 BUG_ON(!list_empty(&dev_priv->blt_ring.active_list));
4714 BUG_ON(!list_empty(&dev_priv->mm.flushing_list));
4715 BUG_ON(!list_empty(&dev_priv->mm.inactive_list));
4716 BUG_ON(!list_empty(&dev_priv->render_ring.request_list));
4717 BUG_ON(!list_empty(&dev_priv->bsd_ring.request_list));
4718 BUG_ON(!list_empty(&dev_priv->blt_ring.request_list));
4719 mutex_unlock(&dev->struct_mutex);
4720
4721 ret = drm_irq_install(dev);
4722 if (ret)
4723 goto cleanup_ringbuffer;
4724
4725 return 0;
4726
4727 cleanup_ringbuffer:
4728 mutex_lock(&dev->struct_mutex);
4729 i915_gem_cleanup_ringbuffer(dev);
4730 dev_priv->mm.suspended = 1;
4731 mutex_unlock(&dev->struct_mutex);
4732
4733 return ret;
4734 }
4735
4736 int
4737 i915_gem_leavevt_ioctl(struct drm_device *dev, void *data,
4738 struct drm_file *file_priv)
4739 {
4740 if (drm_core_check_feature(dev, DRIVER_MODESET))
4741 return 0;
4742
4743 drm_irq_uninstall(dev);
4744 return i915_gem_idle(dev);
4745 }
4746
4747 void
4748 i915_gem_lastclose(struct drm_device *dev)
4749 {
4750 int ret;
4751
4752 if (drm_core_check_feature(dev, DRIVER_MODESET))
4753 return;
4754
4755 ret = i915_gem_idle(dev);
4756 if (ret)
4757 DRM_ERROR("failed to idle hardware: %d\n", ret);
4758 }
4759
4760 static void
4761 init_ring_lists(struct intel_ring_buffer *ring)
4762 {
4763 INIT_LIST_HEAD(&ring->active_list);
4764 INIT_LIST_HEAD(&ring->request_list);
4765 INIT_LIST_HEAD(&ring->gpu_write_list);
4766 }
4767
4768 void
4769 i915_gem_load(struct drm_device *dev)
4770 {
4771 int i;
4772 drm_i915_private_t *dev_priv = dev->dev_private;
4773
4774 INIT_LIST_HEAD(&dev_priv->mm.active_list);
4775 INIT_LIST_HEAD(&dev_priv->mm.flushing_list);
4776 INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
4777 INIT_LIST_HEAD(&dev_priv->mm.pinned_list);
4778 INIT_LIST_HEAD(&dev_priv->mm.fence_list);
4779 INIT_LIST_HEAD(&dev_priv->mm.deferred_free_list);
4780 INIT_LIST_HEAD(&dev_priv->mm.gtt_list);
4781 init_ring_lists(&dev_priv->render_ring);
4782 init_ring_lists(&dev_priv->bsd_ring);
4783 init_ring_lists(&dev_priv->blt_ring);
4784 for (i = 0; i < 16; i++)
4785 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list);
4786 INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
4787 i915_gem_retire_work_handler);
4788 init_completion(&dev_priv->error_completion);
4789
4790 /* On GEN3 we really need to make sure the ARB C3 LP bit is set */
4791 if (IS_GEN3(dev)) {
4792 u32 tmp = I915_READ(MI_ARB_STATE);
4793 if (!(tmp & MI_ARB_C3_LP_WRITE_ENABLE)) {
4794 /* arb state is a masked write, so set bit + bit in mask */
4795 tmp = MI_ARB_C3_LP_WRITE_ENABLE | (MI_ARB_C3_LP_WRITE_ENABLE << MI_ARB_MASK_SHIFT);
4796 I915_WRITE(MI_ARB_STATE, tmp);
4797 }
4798 }
4799
4800 /* Old X drivers will take 0-2 for front, back, depth buffers */
4801 if (!drm_core_check_feature(dev, DRIVER_MODESET))
4802 dev_priv->fence_reg_start = 3;
4803
4804 if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
4805 dev_priv->num_fence_regs = 16;
4806 else
4807 dev_priv->num_fence_regs = 8;
4808
4809 /* Initialize fence registers to zero */
4810 switch (INTEL_INFO(dev)->gen) {
4811 case 6:
4812 for (i = 0; i < 16; i++)
4813 I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + (i * 8), 0);
4814 break;
4815 case 5:
4816 case 4:
4817 for (i = 0; i < 16; i++)
4818 I915_WRITE64(FENCE_REG_965_0 + (i * 8), 0);
4819 break;
4820 case 3:
4821 if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
4822 for (i = 0; i < 8; i++)
4823 I915_WRITE(FENCE_REG_945_8 + (i * 4), 0);
4824 case 2:
4825 for (i = 0; i < 8; i++)
4826 I915_WRITE(FENCE_REG_830_0 + (i * 4), 0);
4827 break;
4828 }
4829 i915_gem_detect_bit_6_swizzle(dev);
4830 init_waitqueue_head(&dev_priv->pending_flip_queue);
4831
4832 dev_priv->mm.inactive_shrinker.shrink = i915_gem_inactive_shrink;
4833 dev_priv->mm.inactive_shrinker.seeks = DEFAULT_SEEKS;
4834 register_shrinker(&dev_priv->mm.inactive_shrinker);
4835 }
4836
4837 /*
4838 * Create a physically contiguous memory object for this object
4839 * e.g. for cursor + overlay regs
4840 */
4841 static int i915_gem_init_phys_object(struct drm_device *dev,
4842 int id, int size, int align)
4843 {
4844 drm_i915_private_t *dev_priv = dev->dev_private;
4845 struct drm_i915_gem_phys_object *phys_obj;
4846 int ret;
4847
4848 if (dev_priv->mm.phys_objs[id - 1] || !size)
4849 return 0;
4850
4851 phys_obj = kzalloc(sizeof(struct drm_i915_gem_phys_object), GFP_KERNEL);
4852 if (!phys_obj)
4853 return -ENOMEM;
4854
4855 phys_obj->id = id;
4856
4857 phys_obj->handle = drm_pci_alloc(dev, size, align);
4858 if (!phys_obj->handle) {
4859 ret = -ENOMEM;
4860 goto kfree_obj;
4861 }
4862 #ifdef CONFIG_X86
4863 set_memory_wc((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE);
4864 #endif
4865
4866 dev_priv->mm.phys_objs[id - 1] = phys_obj;
4867
4868 return 0;
4869 kfree_obj:
4870 kfree(phys_obj);
4871 return ret;
4872 }
4873
4874 static void i915_gem_free_phys_object(struct drm_device *dev, int id)
4875 {
4876 drm_i915_private_t *dev_priv = dev->dev_private;
4877 struct drm_i915_gem_phys_object *phys_obj;
4878
4879 if (!dev_priv->mm.phys_objs[id - 1])
4880 return;
4881
4882 phys_obj = dev_priv->mm.phys_objs[id - 1];
4883 if (phys_obj->cur_obj) {
4884 i915_gem_detach_phys_object(dev, phys_obj->cur_obj);
4885 }
4886
4887 #ifdef CONFIG_X86
4888 set_memory_wb((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE);
4889 #endif
4890 drm_pci_free(dev, phys_obj->handle);
4891 kfree(phys_obj);
4892 dev_priv->mm.phys_objs[id - 1] = NULL;
4893 }
4894
4895 void i915_gem_free_all_phys_object(struct drm_device *dev)
4896 {
4897 int i;
4898
4899 for (i = I915_GEM_PHYS_CURSOR_0; i <= I915_MAX_PHYS_OBJECT; i++)
4900 i915_gem_free_phys_object(dev, i);
4901 }
4902
4903 void i915_gem_detach_phys_object(struct drm_device *dev,
4904 struct drm_i915_gem_object *obj)
4905 {
4906 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
4907 char *vaddr;
4908 int i;
4909 int page_count;
4910
4911 if (!obj->phys_obj)
4912 return;
4913 vaddr = obj->phys_obj->handle->vaddr;
4914
4915 page_count = obj->base.size / PAGE_SIZE;
4916 for (i = 0; i < page_count; i++) {
4917 struct page *page = read_cache_page_gfp(mapping, i,
4918 GFP_HIGHUSER | __GFP_RECLAIMABLE);
4919 if (!IS_ERR(page)) {
4920 char *dst = kmap_atomic(page);
4921 memcpy(dst, vaddr + i*PAGE_SIZE, PAGE_SIZE);
4922 kunmap_atomic(dst);
4923
4924 drm_clflush_pages(&page, 1);
4925
4926 set_page_dirty(page);
4927 mark_page_accessed(page);
4928 page_cache_release(page);
4929 }
4930 }
4931 intel_gtt_chipset_flush();
4932
4933 obj->phys_obj->cur_obj = NULL;
4934 obj->phys_obj = NULL;
4935 }
4936
4937 int
4938 i915_gem_attach_phys_object(struct drm_device *dev,
4939 struct drm_i915_gem_object *obj,
4940 int id,
4941 int align)
4942 {
4943 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
4944 drm_i915_private_t *dev_priv = dev->dev_private;
4945 int ret = 0;
4946 int page_count;
4947 int i;
4948
4949 if (id > I915_MAX_PHYS_OBJECT)
4950 return -EINVAL;
4951
4952 if (obj->phys_obj) {
4953 if (obj->phys_obj->id == id)
4954 return 0;
4955 i915_gem_detach_phys_object(dev, obj);
4956 }
4957
4958 /* create a new object */
4959 if (!dev_priv->mm.phys_objs[id - 1]) {
4960 ret = i915_gem_init_phys_object(dev, id,
4961 obj->base.size, align);
4962 if (ret) {
4963 DRM_ERROR("failed to init phys object %d size: %zu\n",
4964 id, obj->base.size);
4965 return ret;
4966 }
4967 }
4968
4969 /* bind to the object */
4970 obj->phys_obj = dev_priv->mm.phys_objs[id - 1];
4971 obj->phys_obj->cur_obj = obj;
4972
4973 page_count = obj->base.size / PAGE_SIZE;
4974
4975 for (i = 0; i < page_count; i++) {
4976 struct page *page;
4977 char *dst, *src;
4978
4979 page = read_cache_page_gfp(mapping, i,
4980 GFP_HIGHUSER | __GFP_RECLAIMABLE);
4981 if (IS_ERR(page))
4982 return PTR_ERR(page);
4983
4984 src = kmap_atomic(page);
4985 dst = obj->phys_obj->handle->vaddr + (i * PAGE_SIZE);
4986 memcpy(dst, src, PAGE_SIZE);
4987 kunmap_atomic(src);
4988
4989 mark_page_accessed(page);
4990 page_cache_release(page);
4991 }
4992
4993 return 0;
4994 }
4995
4996 static int
4997 i915_gem_phys_pwrite(struct drm_device *dev,
4998 struct drm_i915_gem_object *obj,
4999 struct drm_i915_gem_pwrite *args,
5000 struct drm_file *file_priv)
5001 {
5002 void *vaddr = obj->phys_obj->handle->vaddr + args->offset;
5003 char __user *user_data = (char __user *) (uintptr_t) args->data_ptr;
5004
5005 if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) {
5006 unsigned long unwritten;
5007
5008 /* The physical object once assigned is fixed for the lifetime
5009 * of the obj, so we can safely drop the lock and continue
5010 * to access vaddr.
5011 */
5012 mutex_unlock(&dev->struct_mutex);
5013 unwritten = copy_from_user(vaddr, user_data, args->size);
5014 mutex_lock(&dev->struct_mutex);
5015 if (unwritten)
5016 return -EFAULT;
5017 }
5018
5019 intel_gtt_chipset_flush();
5020 return 0;
5021 }
5022
5023 void i915_gem_release(struct drm_device *dev, struct drm_file *file)
5024 {
5025 struct drm_i915_file_private *file_priv = file->driver_priv;
5026
5027 /* Clean up our request list when the client is going away, so that
5028 * later retire_requests won't dereference our soon-to-be-gone
5029 * file_priv.
5030 */
5031 spin_lock(&file_priv->mm.lock);
5032 while (!list_empty(&file_priv->mm.request_list)) {
5033 struct drm_i915_gem_request *request;
5034
5035 request = list_first_entry(&file_priv->mm.request_list,
5036 struct drm_i915_gem_request,
5037 client_list);
5038 list_del(&request->client_list);
5039 request->file_priv = NULL;
5040 }
5041 spin_unlock(&file_priv->mm.lock);
5042 }
5043
5044 static int
5045 i915_gpu_is_active(struct drm_device *dev)
5046 {
5047 drm_i915_private_t *dev_priv = dev->dev_private;
5048 int lists_empty;
5049
5050 lists_empty = list_empty(&dev_priv->mm.flushing_list) &&
5051 list_empty(&dev_priv->mm.active_list);
5052
5053 return !lists_empty;
5054 }
5055
5056 static int
5057 i915_gem_inactive_shrink(struct shrinker *shrinker,
5058 int nr_to_scan,
5059 gfp_t gfp_mask)
5060 {
5061 struct drm_i915_private *dev_priv =
5062 container_of(shrinker,
5063 struct drm_i915_private,
5064 mm.inactive_shrinker);
5065 struct drm_device *dev = dev_priv->dev;
5066 struct drm_i915_gem_object *obj, *next;
5067 int cnt;
5068
5069 if (!mutex_trylock(&dev->struct_mutex))
5070 return 0;
5071
5072 /* "fast-path" to count number of available objects */
5073 if (nr_to_scan == 0) {
5074 cnt = 0;
5075 list_for_each_entry(obj,
5076 &dev_priv->mm.inactive_list,
5077 mm_list)
5078 cnt++;
5079 mutex_unlock(&dev->struct_mutex);
5080 return cnt / 100 * sysctl_vfs_cache_pressure;
5081 }
5082
5083 rescan:
5084 /* first scan for clean buffers */
5085 i915_gem_retire_requests(dev);
5086
5087 list_for_each_entry_safe(obj, next,
5088 &dev_priv->mm.inactive_list,
5089 mm_list) {
5090 if (i915_gem_object_is_purgeable(obj)) {
5091 if (i915_gem_object_unbind(obj) == 0 &&
5092 --nr_to_scan == 0)
5093 break;
5094 }
5095 }
5096
5097 /* second pass, evict/count anything still on the inactive list */
5098 cnt = 0;
5099 list_for_each_entry_safe(obj, next,
5100 &dev_priv->mm.inactive_list,
5101 mm_list) {
5102 if (nr_to_scan &&
5103 i915_gem_object_unbind(obj) == 0)
5104 nr_to_scan--;
5105 else
5106 cnt++;
5107 }
5108
5109 if (nr_to_scan && i915_gpu_is_active(dev)) {
5110 /*
5111 * We are desperate for pages, so as a last resort, wait
5112 * for the GPU to finish and discard whatever we can.
5113 * This has a dramatic impact to reduce the number of
5114 * OOM-killer events whilst running the GPU aggressively.
5115 */
5116 if (i915_gpu_idle(dev) == 0)
5117 goto rescan;
5118 }
5119 mutex_unlock(&dev->struct_mutex);
5120 return cnt / 100 * sysctl_vfs_cache_pressure;
5121 }
This page took 0.137699 seconds and 6 git commands to generate.