drm/i915: Track pinned VMA
[deliverable/linux.git] / drivers / gpu / drm / i915 / i915_gem.c
CommitLineData
673a394b 1/*
be6a0376 2 * Copyright © 2008-2015 Intel Corporation
673a394b
EA
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Eric Anholt <eric@anholt.net>
25 *
26 */
27
760285e7 28#include <drm/drmP.h>
0de23977 29#include <drm/drm_vma_manager.h>
760285e7 30#include <drm/i915_drm.h>
673a394b 31#include "i915_drv.h"
c13d87ea 32#include "i915_gem_dmabuf.h"
eb82289a 33#include "i915_vgpu.h"
1c5d22f7 34#include "i915_trace.h"
652c393a 35#include "intel_drv.h"
5d723d7a 36#include "intel_frontbuffer.h"
0ccdacf6 37#include "intel_mocs.h"
c13d87ea 38#include <linux/reservation.h>
5949eac4 39#include <linux/shmem_fs.h>
5a0e3ad6 40#include <linux/slab.h>
673a394b 41#include <linux/swap.h>
79e53945 42#include <linux/pci.h>
1286ff73 43#include <linux/dma-buf.h>
673a394b 44
05394f39 45static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
e62b59e4 46static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
61050808 47
c76ce038
CW
48static bool cpu_cache_is_coherent(struct drm_device *dev,
49 enum i915_cache_level level)
50{
51 return HAS_LLC(dev) || level != I915_CACHE_NONE;
52}
53
2c22569b
CW
54static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
55{
b50a5371
AS
56 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
57 return false;
58
2c22569b
CW
59 if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
60 return true;
61
62 return obj->pin_display;
63}
64
4f1959ee
AS
65static int
66insert_mappable_node(struct drm_i915_private *i915,
67 struct drm_mm_node *node, u32 size)
68{
69 memset(node, 0, sizeof(*node));
70 return drm_mm_insert_node_in_range_generic(&i915->ggtt.base.mm, node,
71 size, 0, 0, 0,
72 i915->ggtt.mappable_end,
73 DRM_MM_SEARCH_DEFAULT,
74 DRM_MM_CREATE_DEFAULT);
75}
76
77static void
78remove_mappable_node(struct drm_mm_node *node)
79{
80 drm_mm_remove_node(node);
81}
82
73aa808f
CW
83/* some bookkeeping */
84static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
85 size_t size)
86{
c20e8355 87 spin_lock(&dev_priv->mm.object_stat_lock);
73aa808f
CW
88 dev_priv->mm.object_count++;
89 dev_priv->mm.object_memory += size;
c20e8355 90 spin_unlock(&dev_priv->mm.object_stat_lock);
73aa808f
CW
91}
92
93static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
94 size_t size)
95{
c20e8355 96 spin_lock(&dev_priv->mm.object_stat_lock);
73aa808f
CW
97 dev_priv->mm.object_count--;
98 dev_priv->mm.object_memory -= size;
c20e8355 99 spin_unlock(&dev_priv->mm.object_stat_lock);
73aa808f
CW
100}
101
21dd3734 102static int
33196ded 103i915_gem_wait_for_error(struct i915_gpu_error *error)
30dbf0c0 104{
30dbf0c0
CW
105 int ret;
106
d98c52cf 107 if (!i915_reset_in_progress(error))
30dbf0c0
CW
108 return 0;
109
0a6759c6
DV
110 /*
111 * Only wait 10 seconds for the gpu reset to complete to avoid hanging
112 * userspace. If it takes that long something really bad is going on and
113 * we should simply try to bail out and fail as gracefully as possible.
114 */
1f83fee0 115 ret = wait_event_interruptible_timeout(error->reset_queue,
d98c52cf 116 !i915_reset_in_progress(error),
1f83fee0 117 10*HZ);
0a6759c6
DV
118 if (ret == 0) {
119 DRM_ERROR("Timed out waiting for the gpu reset to complete\n");
120 return -EIO;
121 } else if (ret < 0) {
30dbf0c0 122 return ret;
d98c52cf
CW
123 } else {
124 return 0;
0a6759c6 125 }
30dbf0c0
CW
126}
127
54cf91dc 128int i915_mutex_lock_interruptible(struct drm_device *dev)
76c1dec1 129{
fac5e23e 130 struct drm_i915_private *dev_priv = to_i915(dev);
76c1dec1
CW
131 int ret;
132
33196ded 133 ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
76c1dec1
CW
134 if (ret)
135 return ret;
136
137 ret = mutex_lock_interruptible(&dev->struct_mutex);
138 if (ret)
139 return ret;
140
76c1dec1
CW
141 return 0;
142}
30dbf0c0 143
5a125c3c
EA
144int
145i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
05394f39 146 struct drm_file *file)
5a125c3c 147{
72e96d64 148 struct drm_i915_private *dev_priv = to_i915(dev);
62106b4f 149 struct i915_ggtt *ggtt = &dev_priv->ggtt;
72e96d64 150 struct drm_i915_gem_get_aperture *args = data;
ca1543be 151 struct i915_vma *vma;
6299f992 152 size_t pinned;
5a125c3c 153
6299f992 154 pinned = 0;
73aa808f 155 mutex_lock(&dev->struct_mutex);
1c7f4bca 156 list_for_each_entry(vma, &ggtt->base.active_list, vm_link)
20dfbde4 157 if (i915_vma_is_pinned(vma))
ca1543be 158 pinned += vma->node.size;
1c7f4bca 159 list_for_each_entry(vma, &ggtt->base.inactive_list, vm_link)
20dfbde4 160 if (i915_vma_is_pinned(vma))
ca1543be 161 pinned += vma->node.size;
73aa808f 162 mutex_unlock(&dev->struct_mutex);
5a125c3c 163
72e96d64 164 args->aper_size = ggtt->base.total;
0206e353 165 args->aper_available_size = args->aper_size - pinned;
6299f992 166
5a125c3c
EA
167 return 0;
168}
169
6a2c4232
CW
170static int
171i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
00731155 172{
93c76a3d 173 struct address_space *mapping = obj->base.filp->f_mapping;
6a2c4232
CW
174 char *vaddr = obj->phys_handle->vaddr;
175 struct sg_table *st;
176 struct scatterlist *sg;
177 int i;
00731155 178
6a2c4232
CW
179 if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
180 return -EINVAL;
181
182 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
183 struct page *page;
184 char *src;
185
186 page = shmem_read_mapping_page(mapping, i);
187 if (IS_ERR(page))
188 return PTR_ERR(page);
189
190 src = kmap_atomic(page);
191 memcpy(vaddr, src, PAGE_SIZE);
192 drm_clflush_virt_range(vaddr, PAGE_SIZE);
193 kunmap_atomic(src);
194
09cbfeaf 195 put_page(page);
6a2c4232
CW
196 vaddr += PAGE_SIZE;
197 }
198
c033666a 199 i915_gem_chipset_flush(to_i915(obj->base.dev));
6a2c4232
CW
200
201 st = kmalloc(sizeof(*st), GFP_KERNEL);
202 if (st == NULL)
203 return -ENOMEM;
204
205 if (sg_alloc_table(st, 1, GFP_KERNEL)) {
206 kfree(st);
207 return -ENOMEM;
208 }
209
210 sg = st->sgl;
211 sg->offset = 0;
212 sg->length = obj->base.size;
00731155 213
6a2c4232
CW
214 sg_dma_address(sg) = obj->phys_handle->busaddr;
215 sg_dma_len(sg) = obj->base.size;
216
217 obj->pages = st;
6a2c4232
CW
218 return 0;
219}
220
221static void
222i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj)
223{
224 int ret;
225
226 BUG_ON(obj->madv == __I915_MADV_PURGED);
00731155 227
6a2c4232 228 ret = i915_gem_object_set_to_cpu_domain(obj, true);
f4457ae7 229 if (WARN_ON(ret)) {
6a2c4232
CW
230 /* In the event of a disaster, abandon all caches and
231 * hope for the best.
232 */
6a2c4232
CW
233 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
234 }
235
236 if (obj->madv == I915_MADV_DONTNEED)
237 obj->dirty = 0;
238
239 if (obj->dirty) {
93c76a3d 240 struct address_space *mapping = obj->base.filp->f_mapping;
6a2c4232 241 char *vaddr = obj->phys_handle->vaddr;
00731155
CW
242 int i;
243
244 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
6a2c4232
CW
245 struct page *page;
246 char *dst;
247
248 page = shmem_read_mapping_page(mapping, i);
249 if (IS_ERR(page))
250 continue;
251
252 dst = kmap_atomic(page);
253 drm_clflush_virt_range(vaddr, PAGE_SIZE);
254 memcpy(dst, vaddr, PAGE_SIZE);
255 kunmap_atomic(dst);
256
257 set_page_dirty(page);
258 if (obj->madv == I915_MADV_WILLNEED)
00731155 259 mark_page_accessed(page);
09cbfeaf 260 put_page(page);
00731155
CW
261 vaddr += PAGE_SIZE;
262 }
6a2c4232 263 obj->dirty = 0;
00731155
CW
264 }
265
6a2c4232
CW
266 sg_free_table(obj->pages);
267 kfree(obj->pages);
6a2c4232
CW
268}
269
270static void
271i915_gem_object_release_phys(struct drm_i915_gem_object *obj)
272{
273 drm_pci_free(obj->base.dev, obj->phys_handle);
274}
275
276static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
277 .get_pages = i915_gem_object_get_pages_phys,
278 .put_pages = i915_gem_object_put_pages_phys,
279 .release = i915_gem_object_release_phys,
280};
281
35a9611c 282int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
aa653a68
CW
283{
284 struct i915_vma *vma;
285 LIST_HEAD(still_in_list);
02bef8f9
CW
286 int ret;
287
288 lockdep_assert_held(&obj->base.dev->struct_mutex);
aa653a68 289
02bef8f9
CW
290 /* Closed vma are removed from the obj->vma_list - but they may
291 * still have an active binding on the object. To remove those we
292 * must wait for all rendering to complete to the object (as unbinding
293 * must anyway), and retire the requests.
aa653a68 294 */
02bef8f9
CW
295 ret = i915_gem_object_wait_rendering(obj, false);
296 if (ret)
297 return ret;
298
299 i915_gem_retire_requests(to_i915(obj->base.dev));
300
aa653a68
CW
301 while ((vma = list_first_entry_or_null(&obj->vma_list,
302 struct i915_vma,
303 obj_link))) {
304 list_move_tail(&vma->obj_link, &still_in_list);
305 ret = i915_vma_unbind(vma);
306 if (ret)
307 break;
308 }
309 list_splice(&still_in_list, &obj->vma_list);
310
311 return ret;
312}
313
00e60f26
CW
314/**
315 * Ensures that all rendering to the object has completed and the object is
316 * safe to unbind from the GTT or access from the CPU.
317 * @obj: i915 gem object
318 * @readonly: waiting for just read access or read-write access
319 */
320int
321i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
322 bool readonly)
323{
324 struct reservation_object *resv;
325 struct i915_gem_active *active;
326 unsigned long active_mask;
327 int idx;
328
329 lockdep_assert_held(&obj->base.dev->struct_mutex);
330
331 if (!readonly) {
332 active = obj->last_read;
333 active_mask = i915_gem_object_get_active(obj);
334 } else {
335 active_mask = 1;
336 active = &obj->last_write;
337 }
338
339 for_each_active(active_mask, idx) {
340 int ret;
341
342 ret = i915_gem_active_wait(&active[idx],
343 &obj->base.dev->struct_mutex);
344 if (ret)
345 return ret;
346 }
347
348 resv = i915_gem_object_get_dmabuf_resv(obj);
349 if (resv) {
350 long err;
351
352 err = reservation_object_wait_timeout_rcu(resv, !readonly, true,
353 MAX_SCHEDULE_TIMEOUT);
354 if (err < 0)
355 return err;
356 }
357
358 return 0;
359}
360
b8f9096d
CW
361/* A nonblocking variant of the above wait. Must be called prior to
362 * acquiring the mutex for the object, as the object state may change
363 * during this call. A reference must be held by the caller for the object.
00e60f26
CW
364 */
365static __must_check int
b8f9096d
CW
366__unsafe_wait_rendering(struct drm_i915_gem_object *obj,
367 struct intel_rps_client *rps,
368 bool readonly)
00e60f26 369{
00e60f26
CW
370 struct i915_gem_active *active;
371 unsigned long active_mask;
b8f9096d 372 int idx;
00e60f26 373
b8f9096d 374 active_mask = __I915_BO_ACTIVE(obj);
00e60f26
CW
375 if (!active_mask)
376 return 0;
377
378 if (!readonly) {
379 active = obj->last_read;
380 } else {
381 active_mask = 1;
382 active = &obj->last_write;
383 }
384
b8f9096d
CW
385 for_each_active(active_mask, idx) {
386 int ret;
00e60f26 387
b8f9096d
CW
388 ret = i915_gem_active_wait_unlocked(&active[idx],
389 true, NULL, rps);
390 if (ret)
391 return ret;
00e60f26
CW
392 }
393
b8f9096d 394 return 0;
00e60f26
CW
395}
396
397static struct intel_rps_client *to_rps_client(struct drm_file *file)
398{
399 struct drm_i915_file_private *fpriv = file->driver_priv;
400
401 return &fpriv->rps;
402}
403
00731155
CW
404int
405i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
406 int align)
407{
408 drm_dma_handle_t *phys;
6a2c4232 409 int ret;
00731155
CW
410
411 if (obj->phys_handle) {
412 if ((unsigned long)obj->phys_handle->vaddr & (align -1))
413 return -EBUSY;
414
415 return 0;
416 }
417
418 if (obj->madv != I915_MADV_WILLNEED)
419 return -EFAULT;
420
421 if (obj->base.filp == NULL)
422 return -EINVAL;
423
4717ca9e
CW
424 ret = i915_gem_object_unbind(obj);
425 if (ret)
426 return ret;
427
428 ret = i915_gem_object_put_pages(obj);
6a2c4232
CW
429 if (ret)
430 return ret;
431
00731155
CW
432 /* create a new object */
433 phys = drm_pci_alloc(obj->base.dev, obj->base.size, align);
434 if (!phys)
435 return -ENOMEM;
436
00731155 437 obj->phys_handle = phys;
6a2c4232
CW
438 obj->ops = &i915_gem_phys_ops;
439
440 return i915_gem_object_get_pages(obj);
00731155
CW
441}
442
443static int
444i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
445 struct drm_i915_gem_pwrite *args,
446 struct drm_file *file_priv)
447{
448 struct drm_device *dev = obj->base.dev;
449 void *vaddr = obj->phys_handle->vaddr + args->offset;
3ed605bc 450 char __user *user_data = u64_to_user_ptr(args->data_ptr);
063e4e6b 451 int ret = 0;
6a2c4232
CW
452
453 /* We manually control the domain here and pretend that it
454 * remains coherent i.e. in the GTT domain, like shmem_pwrite.
455 */
456 ret = i915_gem_object_wait_rendering(obj, false);
457 if (ret)
458 return ret;
00731155 459
77a0d1ca 460 intel_fb_obj_invalidate(obj, ORIGIN_CPU);
00731155
CW
461 if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) {
462 unsigned long unwritten;
463
464 /* The physical object once assigned is fixed for the lifetime
465 * of the obj, so we can safely drop the lock and continue
466 * to access vaddr.
467 */
468 mutex_unlock(&dev->struct_mutex);
469 unwritten = copy_from_user(vaddr, user_data, args->size);
470 mutex_lock(&dev->struct_mutex);
063e4e6b
PZ
471 if (unwritten) {
472 ret = -EFAULT;
473 goto out;
474 }
00731155
CW
475 }
476
6a2c4232 477 drm_clflush_virt_range(vaddr, args->size);
c033666a 478 i915_gem_chipset_flush(to_i915(dev));
063e4e6b
PZ
479
480out:
de152b62 481 intel_fb_obj_flush(obj, false, ORIGIN_CPU);
063e4e6b 482 return ret;
00731155
CW
483}
484
42dcedd4
CW
485void *i915_gem_object_alloc(struct drm_device *dev)
486{
fac5e23e 487 struct drm_i915_private *dev_priv = to_i915(dev);
efab6d8d 488 return kmem_cache_zalloc(dev_priv->objects, GFP_KERNEL);
42dcedd4
CW
489}
490
491void i915_gem_object_free(struct drm_i915_gem_object *obj)
492{
fac5e23e 493 struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
efab6d8d 494 kmem_cache_free(dev_priv->objects, obj);
42dcedd4
CW
495}
496
ff72145b
DA
497static int
498i915_gem_create(struct drm_file *file,
499 struct drm_device *dev,
500 uint64_t size,
501 uint32_t *handle_p)
673a394b 502{
05394f39 503 struct drm_i915_gem_object *obj;
a1a2d1d3
PP
504 int ret;
505 u32 handle;
673a394b 506
ff72145b 507 size = roundup(size, PAGE_SIZE);
8ffc0246
CW
508 if (size == 0)
509 return -EINVAL;
673a394b
EA
510
511 /* Allocate the new object */
d37cd8a8 512 obj = i915_gem_object_create(dev, size);
fe3db79b
CW
513 if (IS_ERR(obj))
514 return PTR_ERR(obj);
673a394b 515
05394f39 516 ret = drm_gem_handle_create(file, &obj->base, &handle);
202f2fef 517 /* drop reference from allocate - handle holds it now */
34911fd3 518 i915_gem_object_put_unlocked(obj);
d861e338
DV
519 if (ret)
520 return ret;
202f2fef 521
ff72145b 522 *handle_p = handle;
673a394b
EA
523 return 0;
524}
525
ff72145b
DA
526int
527i915_gem_dumb_create(struct drm_file *file,
528 struct drm_device *dev,
529 struct drm_mode_create_dumb *args)
530{
531 /* have to work out size/pitch and return them */
de45eaf7 532 args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64);
ff72145b
DA
533 args->size = args->pitch * args->height;
534 return i915_gem_create(file, dev,
da6b51d0 535 args->size, &args->handle);
ff72145b
DA
536}
537
ff72145b
DA
538/**
539 * Creates a new mm object and returns a handle to it.
14bb2c11
TU
540 * @dev: drm device pointer
541 * @data: ioctl data blob
542 * @file: drm file pointer
ff72145b
DA
543 */
544int
545i915_gem_create_ioctl(struct drm_device *dev, void *data,
546 struct drm_file *file)
547{
548 struct drm_i915_gem_create *args = data;
63ed2cb2 549
ff72145b 550 return i915_gem_create(file, dev,
da6b51d0 551 args->size, &args->handle);
ff72145b
DA
552}
553
8461d226
DV
554static inline int
555__copy_to_user_swizzled(char __user *cpu_vaddr,
556 const char *gpu_vaddr, int gpu_offset,
557 int length)
558{
559 int ret, cpu_offset = 0;
560
561 while (length > 0) {
562 int cacheline_end = ALIGN(gpu_offset + 1, 64);
563 int this_length = min(cacheline_end - gpu_offset, length);
564 int swizzled_gpu_offset = gpu_offset ^ 64;
565
566 ret = __copy_to_user(cpu_vaddr + cpu_offset,
567 gpu_vaddr + swizzled_gpu_offset,
568 this_length);
569 if (ret)
570 return ret + length;
571
572 cpu_offset += this_length;
573 gpu_offset += this_length;
574 length -= this_length;
575 }
576
577 return 0;
578}
579
8c59967c 580static inline int
4f0c7cfb
BW
581__copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
582 const char __user *cpu_vaddr,
8c59967c
DV
583 int length)
584{
585 int ret, cpu_offset = 0;
586
587 while (length > 0) {
588 int cacheline_end = ALIGN(gpu_offset + 1, 64);
589 int this_length = min(cacheline_end - gpu_offset, length);
590 int swizzled_gpu_offset = gpu_offset ^ 64;
591
592 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset,
593 cpu_vaddr + cpu_offset,
594 this_length);
595 if (ret)
596 return ret + length;
597
598 cpu_offset += this_length;
599 gpu_offset += this_length;
600 length -= this_length;
601 }
602
603 return 0;
604}
605
4c914c0c
BV
606/*
607 * Pins the specified object's pages and synchronizes the object with
608 * GPU accesses. Sets needs_clflush to non-zero if the caller should
609 * flush the object from the CPU cache.
610 */
611int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
612 int *needs_clflush)
613{
614 int ret;
615
616 *needs_clflush = 0;
617
b9bcd14a 618 if (WARN_ON(!i915_gem_object_has_struct_page(obj)))
4c914c0c
BV
619 return -EINVAL;
620
c13d87ea
CW
621 ret = i915_gem_object_wait_rendering(obj, true);
622 if (ret)
623 return ret;
624
4c914c0c
BV
625 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) {
626 /* If we're not in the cpu read domain, set ourself into the gtt
627 * read domain and manually flush cachelines (if required). This
628 * optimizes for the case when the gpu will dirty the data
629 * anyway again before the next pread happens. */
630 *needs_clflush = !cpu_cache_is_coherent(obj->base.dev,
631 obj->cache_level);
4c914c0c
BV
632 }
633
634 ret = i915_gem_object_get_pages(obj);
635 if (ret)
636 return ret;
637
638 i915_gem_object_pin_pages(obj);
639
640 return ret;
641}
642
d174bd64
DV
643/* Per-page copy function for the shmem pread fastpath.
644 * Flushes invalid cachelines before reading the target if
645 * needs_clflush is set. */
eb01459f 646static int
d174bd64
DV
647shmem_pread_fast(struct page *page, int shmem_page_offset, int page_length,
648 char __user *user_data,
649 bool page_do_bit17_swizzling, bool needs_clflush)
650{
651 char *vaddr;
652 int ret;
653
e7e58eb5 654 if (unlikely(page_do_bit17_swizzling))
d174bd64
DV
655 return -EINVAL;
656
657 vaddr = kmap_atomic(page);
658 if (needs_clflush)
659 drm_clflush_virt_range(vaddr + shmem_page_offset,
660 page_length);
661 ret = __copy_to_user_inatomic(user_data,
662 vaddr + shmem_page_offset,
663 page_length);
664 kunmap_atomic(vaddr);
665
f60d7f0c 666 return ret ? -EFAULT : 0;
d174bd64
DV
667}
668
23c18c71
DV
669static void
670shmem_clflush_swizzled_range(char *addr, unsigned long length,
671 bool swizzled)
672{
e7e58eb5 673 if (unlikely(swizzled)) {
23c18c71
DV
674 unsigned long start = (unsigned long) addr;
675 unsigned long end = (unsigned long) addr + length;
676
677 /* For swizzling simply ensure that we always flush both
678 * channels. Lame, but simple and it works. Swizzled
679 * pwrite/pread is far from a hotpath - current userspace
680 * doesn't use it at all. */
681 start = round_down(start, 128);
682 end = round_up(end, 128);
683
684 drm_clflush_virt_range((void *)start, end - start);
685 } else {
686 drm_clflush_virt_range(addr, length);
687 }
688
689}
690
d174bd64
DV
691/* Only difference to the fast-path function is that this can handle bit17
692 * and uses non-atomic copy and kmap functions. */
693static int
694shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length,
695 char __user *user_data,
696 bool page_do_bit17_swizzling, bool needs_clflush)
697{
698 char *vaddr;
699 int ret;
700
701 vaddr = kmap(page);
702 if (needs_clflush)
23c18c71
DV
703 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
704 page_length,
705 page_do_bit17_swizzling);
d174bd64
DV
706
707 if (page_do_bit17_swizzling)
708 ret = __copy_to_user_swizzled(user_data,
709 vaddr, shmem_page_offset,
710 page_length);
711 else
712 ret = __copy_to_user(user_data,
713 vaddr + shmem_page_offset,
714 page_length);
715 kunmap(page);
716
f60d7f0c 717 return ret ? - EFAULT : 0;
d174bd64
DV
718}
719
b50a5371
AS
720static inline unsigned long
721slow_user_access(struct io_mapping *mapping,
722 uint64_t page_base, int page_offset,
723 char __user *user_data,
724 unsigned long length, bool pwrite)
725{
726 void __iomem *ioaddr;
727 void *vaddr;
728 uint64_t unwritten;
729
730 ioaddr = io_mapping_map_wc(mapping, page_base, PAGE_SIZE);
731 /* We can use the cpu mem copy function because this is X86. */
732 vaddr = (void __force *)ioaddr + page_offset;
733 if (pwrite)
734 unwritten = __copy_from_user(vaddr, user_data, length);
735 else
736 unwritten = __copy_to_user(user_data, vaddr, length);
737
738 io_mapping_unmap(ioaddr);
739 return unwritten;
740}
741
742static int
743i915_gem_gtt_pread(struct drm_device *dev,
744 struct drm_i915_gem_object *obj, uint64_t size,
745 uint64_t data_offset, uint64_t data_ptr)
746{
fac5e23e 747 struct drm_i915_private *dev_priv = to_i915(dev);
b50a5371 748 struct i915_ggtt *ggtt = &dev_priv->ggtt;
058d88c4 749 struct i915_vma *vma;
b50a5371
AS
750 struct drm_mm_node node;
751 char __user *user_data;
752 uint64_t remain;
753 uint64_t offset;
754 int ret;
755
058d88c4
CW
756 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE);
757 if (IS_ERR(vma)) {
b50a5371
AS
758 ret = insert_mappable_node(dev_priv, &node, PAGE_SIZE);
759 if (ret)
760 goto out;
761
762 ret = i915_gem_object_get_pages(obj);
763 if (ret) {
764 remove_mappable_node(&node);
765 goto out;
766 }
767
768 i915_gem_object_pin_pages(obj);
769 } else {
058d88c4 770 node.start = vma->node.start;
b50a5371
AS
771 node.allocated = false;
772 ret = i915_gem_object_put_fence(obj);
773 if (ret)
774 goto out_unpin;
775 }
776
777 ret = i915_gem_object_set_to_gtt_domain(obj, false);
778 if (ret)
779 goto out_unpin;
780
781 user_data = u64_to_user_ptr(data_ptr);
782 remain = size;
783 offset = data_offset;
784
785 mutex_unlock(&dev->struct_mutex);
786 if (likely(!i915.prefault_disable)) {
787 ret = fault_in_multipages_writeable(user_data, remain);
788 if (ret) {
789 mutex_lock(&dev->struct_mutex);
790 goto out_unpin;
791 }
792 }
793
794 while (remain > 0) {
795 /* Operation in this page
796 *
797 * page_base = page offset within aperture
798 * page_offset = offset within page
799 * page_length = bytes to copy for this page
800 */
801 u32 page_base = node.start;
802 unsigned page_offset = offset_in_page(offset);
803 unsigned page_length = PAGE_SIZE - page_offset;
804 page_length = remain < page_length ? remain : page_length;
805 if (node.allocated) {
806 wmb();
807 ggtt->base.insert_page(&ggtt->base,
808 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
809 node.start,
810 I915_CACHE_NONE, 0);
811 wmb();
812 } else {
813 page_base += offset & PAGE_MASK;
814 }
815 /* This is a slow read/write as it tries to read from
816 * and write to user memory which may result into page
817 * faults, and so we cannot perform this under struct_mutex.
818 */
819 if (slow_user_access(ggtt->mappable, page_base,
820 page_offset, user_data,
821 page_length, false)) {
822 ret = -EFAULT;
823 break;
824 }
825
826 remain -= page_length;
827 user_data += page_length;
828 offset += page_length;
829 }
830
831 mutex_lock(&dev->struct_mutex);
832 if (ret == 0 && (obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) {
833 /* The user has modified the object whilst we tried
834 * reading from it, and we now have no idea what domain
835 * the pages should be in. As we have just been touching
836 * them directly, flush everything back to the GTT
837 * domain.
838 */
839 ret = i915_gem_object_set_to_gtt_domain(obj, false);
840 }
841
842out_unpin:
843 if (node.allocated) {
844 wmb();
845 ggtt->base.clear_range(&ggtt->base,
846 node.start, node.size,
847 true);
848 i915_gem_object_unpin_pages(obj);
849 remove_mappable_node(&node);
850 } else {
058d88c4 851 i915_vma_unpin(vma);
b50a5371
AS
852 }
853out:
854 return ret;
855}
856
eb01459f 857static int
dbf7bff0
DV
858i915_gem_shmem_pread(struct drm_device *dev,
859 struct drm_i915_gem_object *obj,
860 struct drm_i915_gem_pread *args,
861 struct drm_file *file)
eb01459f 862{
8461d226 863 char __user *user_data;
eb01459f 864 ssize_t remain;
8461d226 865 loff_t offset;
eb2c0c81 866 int shmem_page_offset, page_length, ret = 0;
8461d226 867 int obj_do_bit17_swizzling, page_do_bit17_swizzling;
96d79b52 868 int prefaulted = 0;
8489731c 869 int needs_clflush = 0;
67d5a50c 870 struct sg_page_iter sg_iter;
eb01459f 871
6eae0059 872 if (!i915_gem_object_has_struct_page(obj))
b50a5371
AS
873 return -ENODEV;
874
3ed605bc 875 user_data = u64_to_user_ptr(args->data_ptr);
eb01459f
EA
876 remain = args->size;
877
8461d226 878 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
eb01459f 879
4c914c0c 880 ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
f60d7f0c
CW
881 if (ret)
882 return ret;
883
8461d226 884 offset = args->offset;
eb01459f 885
67d5a50c
ID
886 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
887 offset >> PAGE_SHIFT) {
2db76d7c 888 struct page *page = sg_page_iter_page(&sg_iter);
9da3da66
CW
889
890 if (remain <= 0)
891 break;
892
eb01459f
EA
893 /* Operation in this page
894 *
eb01459f 895 * shmem_page_offset = offset within page in shmem file
eb01459f
EA
896 * page_length = bytes to copy for this page
897 */
c8cbbb8b 898 shmem_page_offset = offset_in_page(offset);
eb01459f
EA
899 page_length = remain;
900 if ((shmem_page_offset + page_length) > PAGE_SIZE)
901 page_length = PAGE_SIZE - shmem_page_offset;
eb01459f 902
8461d226
DV
903 page_do_bit17_swizzling = obj_do_bit17_swizzling &&
904 (page_to_phys(page) & (1 << 17)) != 0;
905
d174bd64
DV
906 ret = shmem_pread_fast(page, shmem_page_offset, page_length,
907 user_data, page_do_bit17_swizzling,
908 needs_clflush);
909 if (ret == 0)
910 goto next_page;
dbf7bff0 911
dbf7bff0
DV
912 mutex_unlock(&dev->struct_mutex);
913
d330a953 914 if (likely(!i915.prefault_disable) && !prefaulted) {
f56f821f 915 ret = fault_in_multipages_writeable(user_data, remain);
96d79b52
DV
916 /* Userspace is tricking us, but we've already clobbered
917 * its pages with the prefault and promised to write the
918 * data up to the first fault. Hence ignore any errors
919 * and just continue. */
920 (void)ret;
921 prefaulted = 1;
922 }
eb01459f 923
d174bd64
DV
924 ret = shmem_pread_slow(page, shmem_page_offset, page_length,
925 user_data, page_do_bit17_swizzling,
926 needs_clflush);
eb01459f 927
dbf7bff0 928 mutex_lock(&dev->struct_mutex);
f60d7f0c 929
f60d7f0c 930 if (ret)
8461d226 931 goto out;
8461d226 932
17793c9a 933next_page:
eb01459f 934 remain -= page_length;
8461d226 935 user_data += page_length;
eb01459f
EA
936 offset += page_length;
937 }
938
4f27b75d 939out:
f60d7f0c
CW
940 i915_gem_object_unpin_pages(obj);
941
eb01459f
EA
942 return ret;
943}
944
673a394b
EA
945/**
946 * Reads data from the object referenced by handle.
14bb2c11
TU
947 * @dev: drm device pointer
948 * @data: ioctl data blob
949 * @file: drm file pointer
673a394b
EA
950 *
951 * On error, the contents of *data are undefined.
952 */
953int
954i915_gem_pread_ioctl(struct drm_device *dev, void *data,
05394f39 955 struct drm_file *file)
673a394b
EA
956{
957 struct drm_i915_gem_pread *args = data;
05394f39 958 struct drm_i915_gem_object *obj;
35b62a89 959 int ret = 0;
673a394b 960
51311d0a
CW
961 if (args->size == 0)
962 return 0;
963
964 if (!access_ok(VERIFY_WRITE,
3ed605bc 965 u64_to_user_ptr(args->data_ptr),
51311d0a
CW
966 args->size))
967 return -EFAULT;
968
03ac0642 969 obj = i915_gem_object_lookup(file, args->handle);
258a5ede
CW
970 if (!obj)
971 return -ENOENT;
673a394b 972
7dcd2499 973 /* Bounds check source. */
05394f39
CW
974 if (args->offset > obj->base.size ||
975 args->size > obj->base.size - args->offset) {
ce9d419d 976 ret = -EINVAL;
258a5ede 977 goto err;
ce9d419d
CW
978 }
979
db53a302
CW
980 trace_i915_gem_object_pread(obj, args->offset, args->size);
981
258a5ede
CW
982 ret = __unsafe_wait_rendering(obj, to_rps_client(file), true);
983 if (ret)
984 goto err;
985
986 ret = i915_mutex_lock_interruptible(dev);
987 if (ret)
988 goto err;
989
dbf7bff0 990 ret = i915_gem_shmem_pread(dev, obj, args, file);
673a394b 991
b50a5371 992 /* pread for non shmem backed objects */
1dd5b6f2
CW
993 if (ret == -EFAULT || ret == -ENODEV) {
994 intel_runtime_pm_get(to_i915(dev));
b50a5371
AS
995 ret = i915_gem_gtt_pread(dev, obj, args->size,
996 args->offset, args->data_ptr);
1dd5b6f2
CW
997 intel_runtime_pm_put(to_i915(dev));
998 }
b50a5371 999
f8c417cd 1000 i915_gem_object_put(obj);
4f27b75d 1001 mutex_unlock(&dev->struct_mutex);
258a5ede
CW
1002
1003 return ret;
1004
1005err:
1006 i915_gem_object_put_unlocked(obj);
eb01459f 1007 return ret;
673a394b
EA
1008}
1009
0839ccb8
KP
1010/* This is the fast write path which cannot handle
1011 * page faults in the source data
9b7530cc 1012 */
0839ccb8
KP
1013
1014static inline int
1015fast_user_write(struct io_mapping *mapping,
1016 loff_t page_base, int page_offset,
1017 char __user *user_data,
1018 int length)
9b7530cc 1019{
4f0c7cfb
BW
1020 void __iomem *vaddr_atomic;
1021 void *vaddr;
0839ccb8 1022 unsigned long unwritten;
9b7530cc 1023
3e4d3af5 1024 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base);
4f0c7cfb
BW
1025 /* We can use the cpu mem copy function because this is X86. */
1026 vaddr = (void __force*)vaddr_atomic + page_offset;
1027 unwritten = __copy_from_user_inatomic_nocache(vaddr,
0839ccb8 1028 user_data, length);
3e4d3af5 1029 io_mapping_unmap_atomic(vaddr_atomic);
fbd5a26d 1030 return unwritten;
0839ccb8
KP
1031}
1032
3de09aa3
EA
1033/**
1034 * This is the fast pwrite path, where we copy the data directly from the
1035 * user into the GTT, uncached.
62f90b38 1036 * @i915: i915 device private data
14bb2c11
TU
1037 * @obj: i915 gem object
1038 * @args: pwrite arguments structure
1039 * @file: drm file pointer
3de09aa3 1040 */
673a394b 1041static int
4f1959ee 1042i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
05394f39 1043 struct drm_i915_gem_object *obj,
3de09aa3 1044 struct drm_i915_gem_pwrite *args,
05394f39 1045 struct drm_file *file)
673a394b 1046{
4f1959ee 1047 struct i915_ggtt *ggtt = &i915->ggtt;
b50a5371 1048 struct drm_device *dev = obj->base.dev;
058d88c4 1049 struct i915_vma *vma;
4f1959ee
AS
1050 struct drm_mm_node node;
1051 uint64_t remain, offset;
673a394b 1052 char __user *user_data;
4f1959ee 1053 int ret;
b50a5371
AS
1054 bool hit_slow_path = false;
1055
3e510a8e 1056 if (i915_gem_object_is_tiled(obj))
b50a5371 1057 return -EFAULT;
935aaa69 1058
058d88c4 1059 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
de895082 1060 PIN_MAPPABLE | PIN_NONBLOCK);
058d88c4 1061 if (IS_ERR(vma)) {
4f1959ee
AS
1062 ret = insert_mappable_node(i915, &node, PAGE_SIZE);
1063 if (ret)
1064 goto out;
1065
1066 ret = i915_gem_object_get_pages(obj);
1067 if (ret) {
1068 remove_mappable_node(&node);
1069 goto out;
1070 }
1071
1072 i915_gem_object_pin_pages(obj);
1073 } else {
058d88c4 1074 node.start = vma->node.start;
4f1959ee 1075 node.allocated = false;
b50a5371
AS
1076 ret = i915_gem_object_put_fence(obj);
1077 if (ret)
1078 goto out_unpin;
4f1959ee 1079 }
935aaa69
DV
1080
1081 ret = i915_gem_object_set_to_gtt_domain(obj, true);
1082 if (ret)
1083 goto out_unpin;
1084
77a0d1ca 1085 intel_fb_obj_invalidate(obj, ORIGIN_GTT);
4f1959ee 1086 obj->dirty = true;
063e4e6b 1087
4f1959ee
AS
1088 user_data = u64_to_user_ptr(args->data_ptr);
1089 offset = args->offset;
1090 remain = args->size;
1091 while (remain) {
673a394b
EA
1092 /* Operation in this page
1093 *
0839ccb8
KP
1094 * page_base = page offset within aperture
1095 * page_offset = offset within page
1096 * page_length = bytes to copy for this page
673a394b 1097 */
4f1959ee
AS
1098 u32 page_base = node.start;
1099 unsigned page_offset = offset_in_page(offset);
1100 unsigned page_length = PAGE_SIZE - page_offset;
1101 page_length = remain < page_length ? remain : page_length;
1102 if (node.allocated) {
1103 wmb(); /* flush the write before we modify the GGTT */
1104 ggtt->base.insert_page(&ggtt->base,
1105 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
1106 node.start, I915_CACHE_NONE, 0);
1107 wmb(); /* flush modifications to the GGTT (insert_page) */
1108 } else {
1109 page_base += offset & PAGE_MASK;
1110 }
0839ccb8 1111 /* If we get a fault while copying data, then (presumably) our
3de09aa3
EA
1112 * source page isn't available. Return the error and we'll
1113 * retry in the slow path.
b50a5371
AS
1114 * If the object is non-shmem backed, we retry again with the
1115 * path that handles page fault.
0839ccb8 1116 */
72e96d64 1117 if (fast_user_write(ggtt->mappable, page_base,
935aaa69 1118 page_offset, user_data, page_length)) {
b50a5371
AS
1119 hit_slow_path = true;
1120 mutex_unlock(&dev->struct_mutex);
1121 if (slow_user_access(ggtt->mappable,
1122 page_base,
1123 page_offset, user_data,
1124 page_length, true)) {
1125 ret = -EFAULT;
1126 mutex_lock(&dev->struct_mutex);
1127 goto out_flush;
1128 }
1129
1130 mutex_lock(&dev->struct_mutex);
935aaa69 1131 }
673a394b 1132
0839ccb8
KP
1133 remain -= page_length;
1134 user_data += page_length;
1135 offset += page_length;
673a394b 1136 }
673a394b 1137
063e4e6b 1138out_flush:
b50a5371
AS
1139 if (hit_slow_path) {
1140 if (ret == 0 &&
1141 (obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) {
1142 /* The user has modified the object whilst we tried
1143 * reading from it, and we now have no idea what domain
1144 * the pages should be in. As we have just been touching
1145 * them directly, flush everything back to the GTT
1146 * domain.
1147 */
1148 ret = i915_gem_object_set_to_gtt_domain(obj, false);
1149 }
1150 }
1151
de152b62 1152 intel_fb_obj_flush(obj, false, ORIGIN_GTT);
935aaa69 1153out_unpin:
4f1959ee
AS
1154 if (node.allocated) {
1155 wmb();
1156 ggtt->base.clear_range(&ggtt->base,
1157 node.start, node.size,
1158 true);
1159 i915_gem_object_unpin_pages(obj);
1160 remove_mappable_node(&node);
1161 } else {
058d88c4 1162 i915_vma_unpin(vma);
4f1959ee 1163 }
935aaa69 1164out:
3de09aa3 1165 return ret;
673a394b
EA
1166}
1167
d174bd64
DV
1168/* Per-page copy function for the shmem pwrite fastpath.
1169 * Flushes invalid cachelines before writing to the target if
1170 * needs_clflush_before is set and flushes out any written cachelines after
1171 * writing if needs_clflush is set. */
3043c60c 1172static int
d174bd64
DV
1173shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length,
1174 char __user *user_data,
1175 bool page_do_bit17_swizzling,
1176 bool needs_clflush_before,
1177 bool needs_clflush_after)
673a394b 1178{
d174bd64 1179 char *vaddr;
673a394b 1180 int ret;
3de09aa3 1181
e7e58eb5 1182 if (unlikely(page_do_bit17_swizzling))
d174bd64 1183 return -EINVAL;
3de09aa3 1184
d174bd64
DV
1185 vaddr = kmap_atomic(page);
1186 if (needs_clflush_before)
1187 drm_clflush_virt_range(vaddr + shmem_page_offset,
1188 page_length);
c2831a94
CW
1189 ret = __copy_from_user_inatomic(vaddr + shmem_page_offset,
1190 user_data, page_length);
d174bd64
DV
1191 if (needs_clflush_after)
1192 drm_clflush_virt_range(vaddr + shmem_page_offset,
1193 page_length);
1194 kunmap_atomic(vaddr);
3de09aa3 1195
755d2218 1196 return ret ? -EFAULT : 0;
3de09aa3
EA
1197}
1198
d174bd64
DV
1199/* Only difference to the fast-path function is that this can handle bit17
1200 * and uses non-atomic copy and kmap functions. */
3043c60c 1201static int
d174bd64
DV
1202shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length,
1203 char __user *user_data,
1204 bool page_do_bit17_swizzling,
1205 bool needs_clflush_before,
1206 bool needs_clflush_after)
673a394b 1207{
d174bd64
DV
1208 char *vaddr;
1209 int ret;
e5281ccd 1210
d174bd64 1211 vaddr = kmap(page);
e7e58eb5 1212 if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
23c18c71
DV
1213 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
1214 page_length,
1215 page_do_bit17_swizzling);
d174bd64
DV
1216 if (page_do_bit17_swizzling)
1217 ret = __copy_from_user_swizzled(vaddr, shmem_page_offset,
e5281ccd
CW
1218 user_data,
1219 page_length);
d174bd64
DV
1220 else
1221 ret = __copy_from_user(vaddr + shmem_page_offset,
1222 user_data,
1223 page_length);
1224 if (needs_clflush_after)
23c18c71
DV
1225 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
1226 page_length,
1227 page_do_bit17_swizzling);
d174bd64 1228 kunmap(page);
40123c1f 1229
755d2218 1230 return ret ? -EFAULT : 0;
40123c1f
EA
1231}
1232
40123c1f 1233static int
e244a443
DV
1234i915_gem_shmem_pwrite(struct drm_device *dev,
1235 struct drm_i915_gem_object *obj,
1236 struct drm_i915_gem_pwrite *args,
1237 struct drm_file *file)
40123c1f 1238{
40123c1f 1239 ssize_t remain;
8c59967c
DV
1240 loff_t offset;
1241 char __user *user_data;
eb2c0c81 1242 int shmem_page_offset, page_length, ret = 0;
8c59967c 1243 int obj_do_bit17_swizzling, page_do_bit17_swizzling;
e244a443 1244 int hit_slowpath = 0;
58642885
DV
1245 int needs_clflush_after = 0;
1246 int needs_clflush_before = 0;
67d5a50c 1247 struct sg_page_iter sg_iter;
40123c1f 1248
3ed605bc 1249 user_data = u64_to_user_ptr(args->data_ptr);
40123c1f
EA
1250 remain = args->size;
1251
8c59967c 1252 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
40123c1f 1253
c13d87ea
CW
1254 ret = i915_gem_object_wait_rendering(obj, false);
1255 if (ret)
1256 return ret;
1257
58642885
DV
1258 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
1259 /* If we're not in the cpu write domain, set ourself into the gtt
1260 * write domain and manually flush cachelines (if required). This
1261 * optimizes for the case when the gpu will use the data
1262 * right away and we therefore have to clflush anyway. */
2c22569b 1263 needs_clflush_after = cpu_write_needs_clflush(obj);
58642885 1264 }
c76ce038
CW
1265 /* Same trick applies to invalidate partially written cachelines read
1266 * before writing. */
1267 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0)
1268 needs_clflush_before =
1269 !cpu_cache_is_coherent(dev, obj->cache_level);
58642885 1270
755d2218
CW
1271 ret = i915_gem_object_get_pages(obj);
1272 if (ret)
1273 return ret;
1274
77a0d1ca 1275 intel_fb_obj_invalidate(obj, ORIGIN_CPU);
063e4e6b 1276
755d2218
CW
1277 i915_gem_object_pin_pages(obj);
1278
673a394b 1279 offset = args->offset;
05394f39 1280 obj->dirty = 1;
673a394b 1281
67d5a50c
ID
1282 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
1283 offset >> PAGE_SHIFT) {
2db76d7c 1284 struct page *page = sg_page_iter_page(&sg_iter);
58642885 1285 int partial_cacheline_write;
e5281ccd 1286
9da3da66
CW
1287 if (remain <= 0)
1288 break;
1289
40123c1f
EA
1290 /* Operation in this page
1291 *
40123c1f 1292 * shmem_page_offset = offset within page in shmem file
40123c1f
EA
1293 * page_length = bytes to copy for this page
1294 */
c8cbbb8b 1295 shmem_page_offset = offset_in_page(offset);
40123c1f
EA
1296
1297 page_length = remain;
1298 if ((shmem_page_offset + page_length) > PAGE_SIZE)
1299 page_length = PAGE_SIZE - shmem_page_offset;
40123c1f 1300
58642885
DV
1301 /* If we don't overwrite a cacheline completely we need to be
1302 * careful to have up-to-date data by first clflushing. Don't
1303 * overcomplicate things and flush the entire patch. */
1304 partial_cacheline_write = needs_clflush_before &&
1305 ((shmem_page_offset | page_length)
1306 & (boot_cpu_data.x86_clflush_size - 1));
1307
8c59967c
DV
1308 page_do_bit17_swizzling = obj_do_bit17_swizzling &&
1309 (page_to_phys(page) & (1 << 17)) != 0;
1310
d174bd64
DV
1311 ret = shmem_pwrite_fast(page, shmem_page_offset, page_length,
1312 user_data, page_do_bit17_swizzling,
1313 partial_cacheline_write,
1314 needs_clflush_after);
1315 if (ret == 0)
1316 goto next_page;
e244a443
DV
1317
1318 hit_slowpath = 1;
e244a443 1319 mutex_unlock(&dev->struct_mutex);
d174bd64
DV
1320 ret = shmem_pwrite_slow(page, shmem_page_offset, page_length,
1321 user_data, page_do_bit17_swizzling,
1322 partial_cacheline_write,
1323 needs_clflush_after);
40123c1f 1324
e244a443 1325 mutex_lock(&dev->struct_mutex);
755d2218 1326
755d2218 1327 if (ret)
8c59967c 1328 goto out;
8c59967c 1329
17793c9a 1330next_page:
40123c1f 1331 remain -= page_length;
8c59967c 1332 user_data += page_length;
40123c1f 1333 offset += page_length;
673a394b
EA
1334 }
1335
fbd5a26d 1336out:
755d2218
CW
1337 i915_gem_object_unpin_pages(obj);
1338
e244a443 1339 if (hit_slowpath) {
8dcf015e
DV
1340 /*
1341 * Fixup: Flush cpu caches in case we didn't flush the dirty
1342 * cachelines in-line while writing and the object moved
1343 * out of the cpu write domain while we've dropped the lock.
1344 */
1345 if (!needs_clflush_after &&
1346 obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
000433b6 1347 if (i915_gem_clflush_object(obj, obj->pin_display))
ed75a55b 1348 needs_clflush_after = true;
e244a443 1349 }
8c59967c 1350 }
673a394b 1351
58642885 1352 if (needs_clflush_after)
c033666a 1353 i915_gem_chipset_flush(to_i915(dev));
ed75a55b
VS
1354 else
1355 obj->cache_dirty = true;
58642885 1356
de152b62 1357 intel_fb_obj_flush(obj, false, ORIGIN_CPU);
40123c1f 1358 return ret;
673a394b
EA
1359}
1360
1361/**
1362 * Writes data to the object referenced by handle.
14bb2c11
TU
1363 * @dev: drm device
1364 * @data: ioctl data blob
1365 * @file: drm file
673a394b
EA
1366 *
1367 * On error, the contents of the buffer that were to be modified are undefined.
1368 */
1369int
1370i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
fbd5a26d 1371 struct drm_file *file)
673a394b 1372{
fac5e23e 1373 struct drm_i915_private *dev_priv = to_i915(dev);
673a394b 1374 struct drm_i915_gem_pwrite *args = data;
05394f39 1375 struct drm_i915_gem_object *obj;
51311d0a
CW
1376 int ret;
1377
1378 if (args->size == 0)
1379 return 0;
1380
1381 if (!access_ok(VERIFY_READ,
3ed605bc 1382 u64_to_user_ptr(args->data_ptr),
51311d0a
CW
1383 args->size))
1384 return -EFAULT;
1385
d330a953 1386 if (likely(!i915.prefault_disable)) {
3ed605bc 1387 ret = fault_in_multipages_readable(u64_to_user_ptr(args->data_ptr),
0b74b508
XZ
1388 args->size);
1389 if (ret)
1390 return -EFAULT;
1391 }
673a394b 1392
03ac0642 1393 obj = i915_gem_object_lookup(file, args->handle);
258a5ede
CW
1394 if (!obj)
1395 return -ENOENT;
673a394b 1396
7dcd2499 1397 /* Bounds check destination. */
05394f39
CW
1398 if (args->offset > obj->base.size ||
1399 args->size > obj->base.size - args->offset) {
ce9d419d 1400 ret = -EINVAL;
258a5ede 1401 goto err;
ce9d419d
CW
1402 }
1403
db53a302
CW
1404 trace_i915_gem_object_pwrite(obj, args->offset, args->size);
1405
258a5ede
CW
1406 ret = __unsafe_wait_rendering(obj, to_rps_client(file), false);
1407 if (ret)
1408 goto err;
1409
1410 intel_runtime_pm_get(dev_priv);
1411
1412 ret = i915_mutex_lock_interruptible(dev);
1413 if (ret)
1414 goto err_rpm;
1415
935aaa69 1416 ret = -EFAULT;
673a394b
EA
1417 /* We can only do the GTT pwrite on untiled buffers, as otherwise
1418 * it would end up going through the fenced access, and we'll get
1419 * different detiling behavior between reading and writing.
1420 * pread/pwrite currently are reading and writing from the CPU
1421 * perspective, requiring manual detiling by the client.
1422 */
6eae0059
CW
1423 if (!i915_gem_object_has_struct_page(obj) ||
1424 cpu_write_needs_clflush(obj)) {
4f1959ee 1425 ret = i915_gem_gtt_pwrite_fast(dev_priv, obj, args, file);
935aaa69
DV
1426 /* Note that the gtt paths might fail with non-page-backed user
1427 * pointers (e.g. gtt mappings when moving data between
1428 * textures). Fallback to the shmem path in that case. */
fbd5a26d 1429 }
673a394b 1430
d1054ee4 1431 if (ret == -EFAULT || ret == -ENOSPC) {
6a2c4232
CW
1432 if (obj->phys_handle)
1433 ret = i915_gem_phys_pwrite(obj, args, file);
6eae0059 1434 else if (i915_gem_object_has_struct_page(obj))
6a2c4232 1435 ret = i915_gem_shmem_pwrite(dev, obj, args, file);
b50a5371
AS
1436 else
1437 ret = -ENODEV;
6a2c4232 1438 }
5c0480f2 1439
f8c417cd 1440 i915_gem_object_put(obj);
fbd5a26d 1441 mutex_unlock(&dev->struct_mutex);
5d77d9c5
ID
1442 intel_runtime_pm_put(dev_priv);
1443
673a394b 1444 return ret;
258a5ede
CW
1445
1446err_rpm:
1447 intel_runtime_pm_put(dev_priv);
1448err:
1449 i915_gem_object_put_unlocked(obj);
1450 return ret;
673a394b
EA
1451}
1452
aeecc969
CW
1453static enum fb_op_origin
1454write_origin(struct drm_i915_gem_object *obj, unsigned domain)
1455{
1456 return domain == I915_GEM_DOMAIN_GTT && !obj->has_wc_mmap ?
1457 ORIGIN_GTT : ORIGIN_CPU;
1458}
1459
673a394b 1460/**
2ef7eeaa
EA
1461 * Called when user space prepares to use an object with the CPU, either
1462 * through the mmap ioctl's mapping or a GTT mapping.
14bb2c11
TU
1463 * @dev: drm device
1464 * @data: ioctl data blob
1465 * @file: drm file
673a394b
EA
1466 */
1467int
1468i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
05394f39 1469 struct drm_file *file)
673a394b
EA
1470{
1471 struct drm_i915_gem_set_domain *args = data;
05394f39 1472 struct drm_i915_gem_object *obj;
2ef7eeaa
EA
1473 uint32_t read_domains = args->read_domains;
1474 uint32_t write_domain = args->write_domain;
673a394b
EA
1475 int ret;
1476
2ef7eeaa 1477 /* Only handle setting domains to types used by the CPU. */
b8f9096d 1478 if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
2ef7eeaa
EA
1479 return -EINVAL;
1480
1481 /* Having something in the write domain implies it's in the read
1482 * domain, and only that read domain. Enforce that in the request.
1483 */
1484 if (write_domain != 0 && read_domains != write_domain)
1485 return -EINVAL;
1486
03ac0642 1487 obj = i915_gem_object_lookup(file, args->handle);
b8f9096d
CW
1488 if (!obj)
1489 return -ENOENT;
673a394b 1490
3236f57a
CW
1491 /* Try to flush the object off the GPU without holding the lock.
1492 * We will repeat the flush holding the lock in the normal manner
1493 * to catch cases where we are gazumped.
1494 */
b8f9096d
CW
1495 ret = __unsafe_wait_rendering(obj, to_rps_client(file), !write_domain);
1496 if (ret)
1497 goto err;
1498
1499 ret = i915_mutex_lock_interruptible(dev);
3236f57a 1500 if (ret)
b8f9096d 1501 goto err;
3236f57a 1502
43566ded 1503 if (read_domains & I915_GEM_DOMAIN_GTT)
2ef7eeaa 1504 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
43566ded 1505 else
e47c68e9 1506 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
2ef7eeaa 1507
031b698a 1508 if (write_domain != 0)
aeecc969 1509 intel_fb_obj_invalidate(obj, write_origin(obj, write_domain));
031b698a 1510
f8c417cd 1511 i915_gem_object_put(obj);
673a394b
EA
1512 mutex_unlock(&dev->struct_mutex);
1513 return ret;
b8f9096d
CW
1514
1515err:
1516 i915_gem_object_put_unlocked(obj);
1517 return ret;
673a394b
EA
1518}
1519
1520/**
1521 * Called when user space has done writes to this buffer
14bb2c11
TU
1522 * @dev: drm device
1523 * @data: ioctl data blob
1524 * @file: drm file
673a394b
EA
1525 */
1526int
1527i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
05394f39 1528 struct drm_file *file)
673a394b
EA
1529{
1530 struct drm_i915_gem_sw_finish *args = data;
05394f39 1531 struct drm_i915_gem_object *obj;
c21724cc 1532 int err = 0;
1d7cfea1 1533
03ac0642 1534 obj = i915_gem_object_lookup(file, args->handle);
c21724cc
CW
1535 if (!obj)
1536 return -ENOENT;
673a394b 1537
673a394b 1538 /* Pinned buffers may be scanout, so flush the cache */
c21724cc
CW
1539 if (READ_ONCE(obj->pin_display)) {
1540 err = i915_mutex_lock_interruptible(dev);
1541 if (!err) {
1542 i915_gem_object_flush_cpu_write_domain(obj);
1543 mutex_unlock(&dev->struct_mutex);
1544 }
1545 }
e47c68e9 1546
c21724cc
CW
1547 i915_gem_object_put_unlocked(obj);
1548 return err;
673a394b
EA
1549}
1550
1551/**
14bb2c11
TU
1552 * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address
1553 * it is mapped to.
1554 * @dev: drm device
1555 * @data: ioctl data blob
1556 * @file: drm file
673a394b
EA
1557 *
1558 * While the mapping holds a reference on the contents of the object, it doesn't
1559 * imply a ref on the object itself.
34367381
DV
1560 *
1561 * IMPORTANT:
1562 *
1563 * DRM driver writers who look a this function as an example for how to do GEM
1564 * mmap support, please don't implement mmap support like here. The modern way
1565 * to implement DRM mmap support is with an mmap offset ioctl (like
1566 * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly.
1567 * That way debug tooling like valgrind will understand what's going on, hiding
1568 * the mmap call in a driver private ioctl will break that. The i915 driver only
1569 * does cpu mmaps this way because we didn't know better.
673a394b
EA
1570 */
1571int
1572i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
05394f39 1573 struct drm_file *file)
673a394b
EA
1574{
1575 struct drm_i915_gem_mmap *args = data;
03ac0642 1576 struct drm_i915_gem_object *obj;
673a394b
EA
1577 unsigned long addr;
1578
1816f923
AG
1579 if (args->flags & ~(I915_MMAP_WC))
1580 return -EINVAL;
1581
568a58e5 1582 if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT))
1816f923
AG
1583 return -ENODEV;
1584
03ac0642
CW
1585 obj = i915_gem_object_lookup(file, args->handle);
1586 if (!obj)
bf79cb91 1587 return -ENOENT;
673a394b 1588
1286ff73
DV
1589 /* prime objects have no backing filp to GEM mmap
1590 * pages from.
1591 */
03ac0642 1592 if (!obj->base.filp) {
34911fd3 1593 i915_gem_object_put_unlocked(obj);
1286ff73
DV
1594 return -EINVAL;
1595 }
1596
03ac0642 1597 addr = vm_mmap(obj->base.filp, 0, args->size,
673a394b
EA
1598 PROT_READ | PROT_WRITE, MAP_SHARED,
1599 args->offset);
1816f923
AG
1600 if (args->flags & I915_MMAP_WC) {
1601 struct mm_struct *mm = current->mm;
1602 struct vm_area_struct *vma;
1603
80a89a5e 1604 if (down_write_killable(&mm->mmap_sem)) {
34911fd3 1605 i915_gem_object_put_unlocked(obj);
80a89a5e
MH
1606 return -EINTR;
1607 }
1816f923
AG
1608 vma = find_vma(mm, addr);
1609 if (vma)
1610 vma->vm_page_prot =
1611 pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
1612 else
1613 addr = -ENOMEM;
1614 up_write(&mm->mmap_sem);
aeecc969
CW
1615
1616 /* This may race, but that's ok, it only gets set */
03ac0642 1617 WRITE_ONCE(obj->has_wc_mmap, true);
1816f923 1618 }
34911fd3 1619 i915_gem_object_put_unlocked(obj);
673a394b
EA
1620 if (IS_ERR((void *)addr))
1621 return addr;
1622
1623 args->addr_ptr = (uint64_t) addr;
1624
1625 return 0;
1626}
1627
de151cf6
JB
1628/**
1629 * i915_gem_fault - fault a page into the GTT
058d88c4 1630 * @area: CPU VMA in question
d9072a3e 1631 * @vmf: fault info
de151cf6
JB
1632 *
1633 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
1634 * from userspace. The fault handler takes care of binding the object to
1635 * the GTT (if needed), allocating and programming a fence register (again,
1636 * only if needed based on whether the old reg is still valid or the object
1637 * is tiled) and inserting a new PTE into the faulting process.
1638 *
1639 * Note that the faulting process may involve evicting existing objects
1640 * from the GTT and/or fence registers to make room. So performance may
1641 * suffer if the GTT working set is large or there are few fence registers
1642 * left.
1643 */
058d88c4 1644int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
de151cf6 1645{
058d88c4 1646 struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data);
05394f39 1647 struct drm_device *dev = obj->base.dev;
72e96d64
JL
1648 struct drm_i915_private *dev_priv = to_i915(dev);
1649 struct i915_ggtt *ggtt = &dev_priv->ggtt;
c5ad54cf 1650 struct i915_ggtt_view view = i915_ggtt_view_normal;
b8f9096d 1651 bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
058d88c4 1652 struct i915_vma *vma;
de151cf6
JB
1653 pgoff_t page_offset;
1654 unsigned long pfn;
b8f9096d 1655 int ret;
f65c9168 1656
de151cf6 1657 /* We don't use vmf->pgoff since that has the fake offset */
058d88c4 1658 page_offset = ((unsigned long)vmf->virtual_address - area->vm_start) >>
de151cf6
JB
1659 PAGE_SHIFT;
1660
db53a302
CW
1661 trace_i915_gem_object_fault(obj, page_offset, true, write);
1662
6e4930f6 1663 /* Try to flush the object off the GPU first without holding the lock.
b8f9096d 1664 * Upon acquiring the lock, we will perform our sanity checks and then
6e4930f6
CW
1665 * repeat the flush holding the lock in the normal manner to catch cases
1666 * where we are gazumped.
1667 */
b8f9096d 1668 ret = __unsafe_wait_rendering(obj, NULL, !write);
6e4930f6 1669 if (ret)
b8f9096d
CW
1670 goto err;
1671
1672 intel_runtime_pm_get(dev_priv);
1673
1674 ret = i915_mutex_lock_interruptible(dev);
1675 if (ret)
1676 goto err_rpm;
6e4930f6 1677
eb119bd6
CW
1678 /* Access to snoopable pages through the GTT is incoherent. */
1679 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) {
ddeff6ee 1680 ret = -EFAULT;
b8f9096d 1681 goto err_unlock;
eb119bd6
CW
1682 }
1683
c5ad54cf 1684 /* Use a partial view if the object is bigger than the aperture. */
72e96d64 1685 if (obj->base.size >= ggtt->mappable_end &&
3e510a8e 1686 !i915_gem_object_is_tiled(obj)) {
c5ad54cf 1687 static const unsigned int chunk_size = 256; // 1 MiB
e7ded2d7 1688
c5ad54cf
JL
1689 memset(&view, 0, sizeof(view));
1690 view.type = I915_GGTT_VIEW_PARTIAL;
1691 view.params.partial.offset = rounddown(page_offset, chunk_size);
1692 view.params.partial.size =
1693 min_t(unsigned int,
1694 chunk_size,
058d88c4 1695 (area->vm_end - area->vm_start) / PAGE_SIZE -
c5ad54cf
JL
1696 view.params.partial.offset);
1697 }
1698
1699 /* Now pin it into the GTT if needed */
058d88c4
CW
1700 vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE);
1701 if (IS_ERR(vma)) {
1702 ret = PTR_ERR(vma);
b8f9096d 1703 goto err_unlock;
058d88c4 1704 }
4a684a41 1705
c9839303
CW
1706 ret = i915_gem_object_set_to_gtt_domain(obj, write);
1707 if (ret)
b8f9096d 1708 goto err_unpin;
74898d7e 1709
06d98131 1710 ret = i915_gem_object_get_fence(obj);
d9e86c0e 1711 if (ret)
b8f9096d 1712 goto err_unpin;
7d1c4804 1713
b90b91d8 1714 /* Finally, remap it using the new GTT offset */
058d88c4 1715 pfn = ggtt->mappable_base + vma->node.start;
f343c5f6 1716 pfn >>= PAGE_SHIFT;
de151cf6 1717
c5ad54cf
JL
1718 if (unlikely(view.type == I915_GGTT_VIEW_PARTIAL)) {
1719 /* Overriding existing pages in partial view does not cause
1720 * us any trouble as TLBs are still valid because the fault
1721 * is due to userspace losing part of the mapping or never
1722 * having accessed it before (at this partials' range).
1723 */
058d88c4 1724 unsigned long base = area->vm_start +
c5ad54cf
JL
1725 (view.params.partial.offset << PAGE_SHIFT);
1726 unsigned int i;
b90b91d8 1727
c5ad54cf 1728 for (i = 0; i < view.params.partial.size; i++) {
058d88c4
CW
1729 ret = vm_insert_pfn(area,
1730 base + i * PAGE_SIZE,
1731 pfn + i);
b90b91d8
CW
1732 if (ret)
1733 break;
1734 }
1735
1736 obj->fault_mappable = true;
c5ad54cf
JL
1737 } else {
1738 if (!obj->fault_mappable) {
058d88c4
CW
1739 unsigned long size =
1740 min_t(unsigned long,
1741 area->vm_end - area->vm_start,
1742 obj->base.size) >> PAGE_SHIFT;
1743 unsigned long base = area->vm_start;
c5ad54cf
JL
1744 int i;
1745
058d88c4
CW
1746 for (i = 0; i < size; i++) {
1747 ret = vm_insert_pfn(area,
1748 base + i * PAGE_SIZE,
c5ad54cf
JL
1749 pfn + i);
1750 if (ret)
1751 break;
1752 }
1753
1754 obj->fault_mappable = true;
1755 } else
058d88c4 1756 ret = vm_insert_pfn(area,
c5ad54cf
JL
1757 (unsigned long)vmf->virtual_address,
1758 pfn + page_offset);
1759 }
b8f9096d 1760err_unpin:
058d88c4 1761 __i915_vma_unpin(vma);
b8f9096d 1762err_unlock:
de151cf6 1763 mutex_unlock(&dev->struct_mutex);
b8f9096d
CW
1764err_rpm:
1765 intel_runtime_pm_put(dev_priv);
1766err:
de151cf6 1767 switch (ret) {
d9bc7e9f 1768 case -EIO:
2232f031
DV
1769 /*
1770 * We eat errors when the gpu is terminally wedged to avoid
1771 * userspace unduly crashing (gl has no provisions for mmaps to
1772 * fail). But any other -EIO isn't ours (e.g. swap in failure)
1773 * and so needs to be reported.
1774 */
1775 if (!i915_terminally_wedged(&dev_priv->gpu_error)) {
f65c9168
PZ
1776 ret = VM_FAULT_SIGBUS;
1777 break;
1778 }
045e769a 1779 case -EAGAIN:
571c608d
DV
1780 /*
1781 * EAGAIN means the gpu is hung and we'll wait for the error
1782 * handler to reset everything when re-faulting in
1783 * i915_mutex_lock_interruptible.
d9bc7e9f 1784 */
c715089f
CW
1785 case 0:
1786 case -ERESTARTSYS:
bed636ab 1787 case -EINTR:
e79e0fe3
DR
1788 case -EBUSY:
1789 /*
1790 * EBUSY is ok: this just means that another thread
1791 * already did the job.
1792 */
f65c9168
PZ
1793 ret = VM_FAULT_NOPAGE;
1794 break;
de151cf6 1795 case -ENOMEM:
f65c9168
PZ
1796 ret = VM_FAULT_OOM;
1797 break;
a7c2e1aa 1798 case -ENOSPC:
45d67817 1799 case -EFAULT:
f65c9168
PZ
1800 ret = VM_FAULT_SIGBUS;
1801 break;
de151cf6 1802 default:
a7c2e1aa 1803 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret);
f65c9168
PZ
1804 ret = VM_FAULT_SIGBUS;
1805 break;
de151cf6 1806 }
f65c9168 1807 return ret;
de151cf6
JB
1808}
1809
901782b2
CW
1810/**
1811 * i915_gem_release_mmap - remove physical page mappings
1812 * @obj: obj in question
1813 *
af901ca1 1814 * Preserve the reservation of the mmapping with the DRM core code, but
901782b2
CW
1815 * relinquish ownership of the pages back to the system.
1816 *
1817 * It is vital that we remove the page mapping if we have mapped a tiled
1818 * object through the GTT and then lose the fence register due to
1819 * resource pressure. Similarly if the object has been moved out of the
1820 * aperture, than pages mapped into userspace must be revoked. Removing the
1821 * mapping will then trigger a page fault on the next user access, allowing
1822 * fixup by i915_gem_fault().
1823 */
d05ca301 1824void
05394f39 1825i915_gem_release_mmap(struct drm_i915_gem_object *obj)
901782b2 1826{
349f2ccf
CW
1827 /* Serialisation between user GTT access and our code depends upon
1828 * revoking the CPU's PTE whilst the mutex is held. The next user
1829 * pagefault then has to wait until we release the mutex.
1830 */
1831 lockdep_assert_held(&obj->base.dev->struct_mutex);
1832
6299f992
CW
1833 if (!obj->fault_mappable)
1834 return;
901782b2 1835
6796cb16
DH
1836 drm_vma_node_unmap(&obj->base.vma_node,
1837 obj->base.dev->anon_inode->i_mapping);
349f2ccf
CW
1838
1839 /* Ensure that the CPU's PTE are revoked and there are not outstanding
1840 * memory transactions from userspace before we return. The TLB
1841 * flushing implied above by changing the PTE above *should* be
1842 * sufficient, an extra barrier here just provides us with a bit
1843 * of paranoid documentation about our requirement to serialise
1844 * memory writes before touching registers / GSM.
1845 */
1846 wmb();
1847
6299f992 1848 obj->fault_mappable = false;
901782b2
CW
1849}
1850
eedd10f4
CW
1851void
1852i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv)
1853{
1854 struct drm_i915_gem_object *obj;
1855
1856 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list)
1857 i915_gem_release_mmap(obj);
1858}
1859
ad1a7d20
CW
1860/**
1861 * i915_gem_get_ggtt_size - return required global GTT size for an object
a9f1481f 1862 * @dev_priv: i915 device
ad1a7d20
CW
1863 * @size: object size
1864 * @tiling_mode: tiling mode
1865 *
1866 * Return the required global GTT size for an object, taking into account
1867 * potential fence register mapping.
1868 */
a9f1481f
CW
1869u64 i915_gem_get_ggtt_size(struct drm_i915_private *dev_priv,
1870 u64 size, int tiling_mode)
92b88aeb 1871{
ad1a7d20 1872 u64 ggtt_size;
92b88aeb 1873
ad1a7d20
CW
1874 GEM_BUG_ON(size == 0);
1875
a9f1481f 1876 if (INTEL_GEN(dev_priv) >= 4 ||
e28f8711
CW
1877 tiling_mode == I915_TILING_NONE)
1878 return size;
92b88aeb
CW
1879
1880 /* Previous chips need a power-of-two fence region when tiling */
a9f1481f 1881 if (IS_GEN3(dev_priv))
ad1a7d20 1882 ggtt_size = 1024*1024;
92b88aeb 1883 else
ad1a7d20 1884 ggtt_size = 512*1024;
92b88aeb 1885
ad1a7d20
CW
1886 while (ggtt_size < size)
1887 ggtt_size <<= 1;
92b88aeb 1888
ad1a7d20 1889 return ggtt_size;
92b88aeb
CW
1890}
1891
de151cf6 1892/**
ad1a7d20 1893 * i915_gem_get_ggtt_alignment - return required global GTT alignment
a9f1481f 1894 * @dev_priv: i915 device
14bb2c11
TU
1895 * @size: object size
1896 * @tiling_mode: tiling mode
ad1a7d20 1897 * @fenced: is fenced alignment required or not
de151cf6 1898 *
ad1a7d20 1899 * Return the required global GTT alignment for an object, taking into account
5e783301 1900 * potential fence register mapping.
de151cf6 1901 */
a9f1481f 1902u64 i915_gem_get_ggtt_alignment(struct drm_i915_private *dev_priv, u64 size,
ad1a7d20 1903 int tiling_mode, bool fenced)
de151cf6 1904{
ad1a7d20
CW
1905 GEM_BUG_ON(size == 0);
1906
de151cf6
JB
1907 /*
1908 * Minimum alignment is 4k (GTT page size), but might be greater
1909 * if a fence register is needed for the object.
1910 */
a9f1481f 1911 if (INTEL_GEN(dev_priv) >= 4 || (!fenced && IS_G33(dev_priv)) ||
e28f8711 1912 tiling_mode == I915_TILING_NONE)
de151cf6
JB
1913 return 4096;
1914
a00b10c3
CW
1915 /*
1916 * Previous chips need to be aligned to the size of the smallest
1917 * fence register that can contain the object.
1918 */
a9f1481f 1919 return i915_gem_get_ggtt_size(dev_priv, size, tiling_mode);
a00b10c3
CW
1920}
1921
d8cb5086
CW
1922static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
1923{
fac5e23e 1924 struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
f3f6184c 1925 int err;
da494d7c 1926
f3f6184c
CW
1927 err = drm_gem_create_mmap_offset(&obj->base);
1928 if (!err)
1929 return 0;
d8cb5086 1930
f3f6184c
CW
1931 /* We can idle the GPU locklessly to flush stale objects, but in order
1932 * to claim that space for ourselves, we need to take the big
1933 * struct_mutex to free the requests+objects and allocate our slot.
d8cb5086 1934 */
f3f6184c
CW
1935 err = i915_gem_wait_for_idle(dev_priv, true);
1936 if (err)
1937 return err;
1938
1939 err = i915_mutex_lock_interruptible(&dev_priv->drm);
1940 if (!err) {
1941 i915_gem_retire_requests(dev_priv);
1942 err = drm_gem_create_mmap_offset(&obj->base);
1943 mutex_unlock(&dev_priv->drm.struct_mutex);
1944 }
da494d7c 1945
f3f6184c 1946 return err;
d8cb5086
CW
1947}
1948
1949static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
1950{
d8cb5086
CW
1951 drm_gem_free_mmap_offset(&obj->base);
1952}
1953
da6b51d0 1954int
ff72145b
DA
1955i915_gem_mmap_gtt(struct drm_file *file,
1956 struct drm_device *dev,
da6b51d0 1957 uint32_t handle,
ff72145b 1958 uint64_t *offset)
de151cf6 1959{
05394f39 1960 struct drm_i915_gem_object *obj;
de151cf6
JB
1961 int ret;
1962
03ac0642 1963 obj = i915_gem_object_lookup(file, handle);
f3f6184c
CW
1964 if (!obj)
1965 return -ENOENT;
ab18282d 1966
d8cb5086 1967 ret = i915_gem_object_create_mmap_offset(obj);
f3f6184c
CW
1968 if (ret == 0)
1969 *offset = drm_vma_node_offset_addr(&obj->base.vma_node);
de151cf6 1970
f3f6184c 1971 i915_gem_object_put_unlocked(obj);
1d7cfea1 1972 return ret;
de151cf6
JB
1973}
1974
ff72145b
DA
1975/**
1976 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
1977 * @dev: DRM device
1978 * @data: GTT mapping ioctl data
1979 * @file: GEM object info
1980 *
1981 * Simply returns the fake offset to userspace so it can mmap it.
1982 * The mmap call will end up in drm_gem_mmap(), which will set things
1983 * up so we can get faults in the handler above.
1984 *
1985 * The fault handler will take care of binding the object into the GTT
1986 * (since it may have been evicted to make room for something), allocating
1987 * a fence register, and mapping the appropriate aperture address into
1988 * userspace.
1989 */
1990int
1991i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
1992 struct drm_file *file)
1993{
1994 struct drm_i915_gem_mmap_gtt *args = data;
1995
da6b51d0 1996 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
ff72145b
DA
1997}
1998
225067ee
DV
1999/* Immediately discard the backing storage */
2000static void
2001i915_gem_object_truncate(struct drm_i915_gem_object *obj)
e5281ccd 2002{
4d6294bf 2003 i915_gem_object_free_mmap_offset(obj);
1286ff73 2004
4d6294bf
CW
2005 if (obj->base.filp == NULL)
2006 return;
e5281ccd 2007
225067ee
DV
2008 /* Our goal here is to return as much of the memory as
2009 * is possible back to the system as we are called from OOM.
2010 * To do this we must instruct the shmfs to drop all of its
2011 * backing pages, *now*.
2012 */
5537252b 2013 shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1);
225067ee
DV
2014 obj->madv = __I915_MADV_PURGED;
2015}
e5281ccd 2016
5537252b
CW
2017/* Try to discard unwanted pages */
2018static void
2019i915_gem_object_invalidate(struct drm_i915_gem_object *obj)
225067ee 2020{
5537252b
CW
2021 struct address_space *mapping;
2022
2023 switch (obj->madv) {
2024 case I915_MADV_DONTNEED:
2025 i915_gem_object_truncate(obj);
2026 case __I915_MADV_PURGED:
2027 return;
2028 }
2029
2030 if (obj->base.filp == NULL)
2031 return;
2032
93c76a3d 2033 mapping = obj->base.filp->f_mapping,
5537252b 2034 invalidate_mapping_pages(mapping, 0, (loff_t)-1);
e5281ccd
CW
2035}
2036
5cdf5881 2037static void
05394f39 2038i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
673a394b 2039{
85d1225e
DG
2040 struct sgt_iter sgt_iter;
2041 struct page *page;
90797e6d 2042 int ret;
1286ff73 2043
05394f39 2044 BUG_ON(obj->madv == __I915_MADV_PURGED);
673a394b 2045
6c085a72 2046 ret = i915_gem_object_set_to_cpu_domain(obj, true);
f4457ae7 2047 if (WARN_ON(ret)) {
6c085a72
CW
2048 /* In the event of a disaster, abandon all caches and
2049 * hope for the best.
2050 */
2c22569b 2051 i915_gem_clflush_object(obj, true);
6c085a72
CW
2052 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
2053 }
2054
e2273302
ID
2055 i915_gem_gtt_finish_object(obj);
2056
6dacfd2f 2057 if (i915_gem_object_needs_bit17_swizzle(obj))
280b713b
EA
2058 i915_gem_object_save_bit_17_swizzle(obj);
2059
05394f39
CW
2060 if (obj->madv == I915_MADV_DONTNEED)
2061 obj->dirty = 0;
3ef94daa 2062
85d1225e 2063 for_each_sgt_page(page, sgt_iter, obj->pages) {
05394f39 2064 if (obj->dirty)
9da3da66 2065 set_page_dirty(page);
3ef94daa 2066
05394f39 2067 if (obj->madv == I915_MADV_WILLNEED)
9da3da66 2068 mark_page_accessed(page);
3ef94daa 2069
09cbfeaf 2070 put_page(page);
3ef94daa 2071 }
05394f39 2072 obj->dirty = 0;
673a394b 2073
9da3da66
CW
2074 sg_free_table(obj->pages);
2075 kfree(obj->pages);
37e680a1 2076}
6c085a72 2077
dd624afd 2078int
37e680a1
CW
2079i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
2080{
2081 const struct drm_i915_gem_object_ops *ops = obj->ops;
2082
2f745ad3 2083 if (obj->pages == NULL)
37e680a1
CW
2084 return 0;
2085
a5570178
CW
2086 if (obj->pages_pin_count)
2087 return -EBUSY;
2088
15717de2 2089 GEM_BUG_ON(obj->bind_count);
3e123027 2090
a2165e31
CW
2091 /* ->put_pages might need to allocate memory for the bit17 swizzle
2092 * array, hence protect them from being reaped by removing them from gtt
2093 * lists early. */
35c20a60 2094 list_del(&obj->global_list);
a2165e31 2095
0a798eb9 2096 if (obj->mapping) {
d31d7cb1 2097 /* low bits are ignored by is_vmalloc_addr and kmap_to_page */
fb8621d3
CW
2098 if (is_vmalloc_addr(obj->mapping))
2099 vunmap(obj->mapping);
2100 else
2101 kunmap(kmap_to_page(obj->mapping));
0a798eb9
CW
2102 obj->mapping = NULL;
2103 }
2104
37e680a1 2105 ops->put_pages(obj);
05394f39 2106 obj->pages = NULL;
37e680a1 2107
5537252b 2108 i915_gem_object_invalidate(obj);
6c085a72
CW
2109
2110 return 0;
2111}
2112
37e680a1 2113static int
6c085a72 2114i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
e5281ccd 2115{
fac5e23e 2116 struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
e5281ccd
CW
2117 int page_count, i;
2118 struct address_space *mapping;
9da3da66
CW
2119 struct sg_table *st;
2120 struct scatterlist *sg;
85d1225e 2121 struct sgt_iter sgt_iter;
e5281ccd 2122 struct page *page;
90797e6d 2123 unsigned long last_pfn = 0; /* suppress gcc warning */
e2273302 2124 int ret;
6c085a72 2125 gfp_t gfp;
e5281ccd 2126
6c085a72
CW
2127 /* Assert that the object is not currently in any GPU domain. As it
2128 * wasn't in the GTT, there shouldn't be any way it could have been in
2129 * a GPU cache
2130 */
2131 BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
2132 BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
2133
9da3da66
CW
2134 st = kmalloc(sizeof(*st), GFP_KERNEL);
2135 if (st == NULL)
2136 return -ENOMEM;
2137
05394f39 2138 page_count = obj->base.size / PAGE_SIZE;
9da3da66 2139 if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
9da3da66 2140 kfree(st);
e5281ccd 2141 return -ENOMEM;
9da3da66 2142 }
e5281ccd 2143
9da3da66
CW
2144 /* Get the list of pages out of our struct file. They'll be pinned
2145 * at this point until we release them.
2146 *
2147 * Fail silently without starting the shrinker
2148 */
93c76a3d 2149 mapping = obj->base.filp->f_mapping;
c62d2555 2150 gfp = mapping_gfp_constraint(mapping, ~(__GFP_IO | __GFP_RECLAIM));
d0164adc 2151 gfp |= __GFP_NORETRY | __GFP_NOWARN;
90797e6d
ID
2152 sg = st->sgl;
2153 st->nents = 0;
2154 for (i = 0; i < page_count; i++) {
6c085a72
CW
2155 page = shmem_read_mapping_page_gfp(mapping, i, gfp);
2156 if (IS_ERR(page)) {
21ab4e74
CW
2157 i915_gem_shrink(dev_priv,
2158 page_count,
2159 I915_SHRINK_BOUND |
2160 I915_SHRINK_UNBOUND |
2161 I915_SHRINK_PURGEABLE);
6c085a72
CW
2162 page = shmem_read_mapping_page_gfp(mapping, i, gfp);
2163 }
2164 if (IS_ERR(page)) {
2165 /* We've tried hard to allocate the memory by reaping
2166 * our own buffer, now let the real VM do its job and
2167 * go down in flames if truly OOM.
2168 */
6c085a72 2169 i915_gem_shrink_all(dev_priv);
f461d1be 2170 page = shmem_read_mapping_page(mapping, i);
e2273302
ID
2171 if (IS_ERR(page)) {
2172 ret = PTR_ERR(page);
6c085a72 2173 goto err_pages;
e2273302 2174 }
6c085a72 2175 }
426729dc
KRW
2176#ifdef CONFIG_SWIOTLB
2177 if (swiotlb_nr_tbl()) {
2178 st->nents++;
2179 sg_set_page(sg, page, PAGE_SIZE, 0);
2180 sg = sg_next(sg);
2181 continue;
2182 }
2183#endif
90797e6d
ID
2184 if (!i || page_to_pfn(page) != last_pfn + 1) {
2185 if (i)
2186 sg = sg_next(sg);
2187 st->nents++;
2188 sg_set_page(sg, page, PAGE_SIZE, 0);
2189 } else {
2190 sg->length += PAGE_SIZE;
2191 }
2192 last_pfn = page_to_pfn(page);
3bbbe706
DV
2193
2194 /* Check that the i965g/gm workaround works. */
2195 WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL));
e5281ccd 2196 }
426729dc
KRW
2197#ifdef CONFIG_SWIOTLB
2198 if (!swiotlb_nr_tbl())
2199#endif
2200 sg_mark_end(sg);
74ce6b6c
CW
2201 obj->pages = st;
2202
e2273302
ID
2203 ret = i915_gem_gtt_prepare_object(obj);
2204 if (ret)
2205 goto err_pages;
2206
6dacfd2f 2207 if (i915_gem_object_needs_bit17_swizzle(obj))
e5281ccd
CW
2208 i915_gem_object_do_bit_17_swizzle(obj);
2209
3e510a8e 2210 if (i915_gem_object_is_tiled(obj) &&
656bfa3a
DV
2211 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES)
2212 i915_gem_object_pin_pages(obj);
2213
e5281ccd
CW
2214 return 0;
2215
2216err_pages:
90797e6d 2217 sg_mark_end(sg);
85d1225e
DG
2218 for_each_sgt_page(page, sgt_iter, st)
2219 put_page(page);
9da3da66
CW
2220 sg_free_table(st);
2221 kfree(st);
0820baf3
CW
2222
2223 /* shmemfs first checks if there is enough memory to allocate the page
2224 * and reports ENOSPC should there be insufficient, along with the usual
2225 * ENOMEM for a genuine allocation failure.
2226 *
2227 * We use ENOSPC in our driver to mean that we have run out of aperture
2228 * space and so want to translate the error from shmemfs back to our
2229 * usual understanding of ENOMEM.
2230 */
e2273302
ID
2231 if (ret == -ENOSPC)
2232 ret = -ENOMEM;
2233
2234 return ret;
673a394b
EA
2235}
2236
37e680a1
CW
2237/* Ensure that the associated pages are gathered from the backing storage
2238 * and pinned into our object. i915_gem_object_get_pages() may be called
2239 * multiple times before they are released by a single call to
2240 * i915_gem_object_put_pages() - once the pages are no longer referenced
2241 * either as a result of memory pressure (reaping pages under the shrinker)
2242 * or as the object is itself released.
2243 */
2244int
2245i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
2246{
fac5e23e 2247 struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
37e680a1
CW
2248 const struct drm_i915_gem_object_ops *ops = obj->ops;
2249 int ret;
2250
2f745ad3 2251 if (obj->pages)
37e680a1
CW
2252 return 0;
2253
43e28f09 2254 if (obj->madv != I915_MADV_WILLNEED) {
bd9b6a4e 2255 DRM_DEBUG("Attempting to obtain a purgeable object\n");
8c99e57d 2256 return -EFAULT;
43e28f09
CW
2257 }
2258
a5570178
CW
2259 BUG_ON(obj->pages_pin_count);
2260
37e680a1
CW
2261 ret = ops->get_pages(obj);
2262 if (ret)
2263 return ret;
2264
35c20a60 2265 list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list);
ee286370
CW
2266
2267 obj->get_page.sg = obj->pages->sgl;
2268 obj->get_page.last = 0;
2269
37e680a1 2270 return 0;
673a394b
EA
2271}
2272
dd6034c6 2273/* The 'mapping' part of i915_gem_object_pin_map() below */
d31d7cb1
CW
2274static void *i915_gem_object_map(const struct drm_i915_gem_object *obj,
2275 enum i915_map_type type)
dd6034c6
DG
2276{
2277 unsigned long n_pages = obj->base.size >> PAGE_SHIFT;
2278 struct sg_table *sgt = obj->pages;
85d1225e
DG
2279 struct sgt_iter sgt_iter;
2280 struct page *page;
b338fa47
DG
2281 struct page *stack_pages[32];
2282 struct page **pages = stack_pages;
dd6034c6 2283 unsigned long i = 0;
d31d7cb1 2284 pgprot_t pgprot;
dd6034c6
DG
2285 void *addr;
2286
2287 /* A single page can always be kmapped */
d31d7cb1 2288 if (n_pages == 1 && type == I915_MAP_WB)
dd6034c6
DG
2289 return kmap(sg_page(sgt->sgl));
2290
b338fa47
DG
2291 if (n_pages > ARRAY_SIZE(stack_pages)) {
2292 /* Too big for stack -- allocate temporary array instead */
2293 pages = drm_malloc_gfp(n_pages, sizeof(*pages), GFP_TEMPORARY);
2294 if (!pages)
2295 return NULL;
2296 }
dd6034c6 2297
85d1225e
DG
2298 for_each_sgt_page(page, sgt_iter, sgt)
2299 pages[i++] = page;
dd6034c6
DG
2300
2301 /* Check that we have the expected number of pages */
2302 GEM_BUG_ON(i != n_pages);
2303
d31d7cb1
CW
2304 switch (type) {
2305 case I915_MAP_WB:
2306 pgprot = PAGE_KERNEL;
2307 break;
2308 case I915_MAP_WC:
2309 pgprot = pgprot_writecombine(PAGE_KERNEL_IO);
2310 break;
2311 }
2312 addr = vmap(pages, n_pages, 0, pgprot);
dd6034c6 2313
b338fa47
DG
2314 if (pages != stack_pages)
2315 drm_free_large(pages);
dd6034c6
DG
2316
2317 return addr;
2318}
2319
2320/* get, pin, and map the pages of the object into kernel space */
d31d7cb1
CW
2321void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
2322 enum i915_map_type type)
0a798eb9 2323{
d31d7cb1
CW
2324 enum i915_map_type has_type;
2325 bool pinned;
2326 void *ptr;
0a798eb9
CW
2327 int ret;
2328
2329 lockdep_assert_held(&obj->base.dev->struct_mutex);
d31d7cb1 2330 GEM_BUG_ON(!i915_gem_object_has_struct_page(obj));
0a798eb9
CW
2331
2332 ret = i915_gem_object_get_pages(obj);
2333 if (ret)
2334 return ERR_PTR(ret);
2335
2336 i915_gem_object_pin_pages(obj);
d31d7cb1 2337 pinned = obj->pages_pin_count > 1;
0a798eb9 2338
d31d7cb1
CW
2339 ptr = ptr_unpack_bits(obj->mapping, has_type);
2340 if (ptr && has_type != type) {
2341 if (pinned) {
2342 ret = -EBUSY;
2343 goto err;
0a798eb9 2344 }
d31d7cb1
CW
2345
2346 if (is_vmalloc_addr(ptr))
2347 vunmap(ptr);
2348 else
2349 kunmap(kmap_to_page(ptr));
2350
2351 ptr = obj->mapping = NULL;
0a798eb9
CW
2352 }
2353
d31d7cb1
CW
2354 if (!ptr) {
2355 ptr = i915_gem_object_map(obj, type);
2356 if (!ptr) {
2357 ret = -ENOMEM;
2358 goto err;
2359 }
2360
2361 obj->mapping = ptr_pack_bits(ptr, type);
2362 }
2363
2364 return ptr;
2365
2366err:
2367 i915_gem_object_unpin_pages(obj);
2368 return ERR_PTR(ret);
0a798eb9
CW
2369}
2370
b4716185 2371static void
fa545cbf
CW
2372i915_gem_object_retire__write(struct i915_gem_active *active,
2373 struct drm_i915_gem_request *request)
e2d05a8b 2374{
fa545cbf
CW
2375 struct drm_i915_gem_object *obj =
2376 container_of(active, struct drm_i915_gem_object, last_write);
b4716185 2377
de152b62 2378 intel_fb_obj_flush(obj, true, ORIGIN_CS);
e2d05a8b
BW
2379}
2380
caea7476 2381static void
fa545cbf
CW
2382i915_gem_object_retire__read(struct i915_gem_active *active,
2383 struct drm_i915_gem_request *request)
ce44b0ea 2384{
fa545cbf
CW
2385 int idx = request->engine->id;
2386 struct drm_i915_gem_object *obj =
2387 container_of(active, struct drm_i915_gem_object, last_read[idx]);
ce44b0ea 2388
573adb39 2389 GEM_BUG_ON(!i915_gem_object_has_active_engine(obj, idx));
b4716185 2390
573adb39
CW
2391 i915_gem_object_clear_active(obj, idx);
2392 if (i915_gem_object_is_active(obj))
b4716185 2393 return;
caea7476 2394
6c246959
CW
2395 /* Bump our place on the bound list to keep it roughly in LRU order
2396 * so that we don't steal from recently used but inactive objects
2397 * (unless we are forced to ofc!)
2398 */
b0decaf7
CW
2399 if (obj->bind_count)
2400 list_move_tail(&obj->global_list,
2401 &request->i915->mm.bound_list);
caea7476 2402
f8c417cd 2403 i915_gem_object_put(obj);
c8725f3d
CW
2404}
2405
7b4d3a16 2406static bool i915_context_is_banned(const struct i915_gem_context *ctx)
be62acb4 2407{
44e2c070 2408 unsigned long elapsed;
be62acb4 2409
44e2c070 2410 if (ctx->hang_stats.banned)
be62acb4
MK
2411 return true;
2412
7b4d3a16 2413 elapsed = get_seconds() - ctx->hang_stats.guilty_ts;
676fa572
CW
2414 if (ctx->hang_stats.ban_period_seconds &&
2415 elapsed <= ctx->hang_stats.ban_period_seconds) {
7b4d3a16
CW
2416 DRM_DEBUG("context hanging too fast, banning!\n");
2417 return true;
be62acb4
MK
2418 }
2419
2420 return false;
2421}
2422
7b4d3a16 2423static void i915_set_reset_status(struct i915_gem_context *ctx,
b6b0fac0 2424 const bool guilty)
aa60c664 2425{
7b4d3a16 2426 struct i915_ctx_hang_stats *hs = &ctx->hang_stats;
44e2c070
MK
2427
2428 if (guilty) {
7b4d3a16 2429 hs->banned = i915_context_is_banned(ctx);
44e2c070
MK
2430 hs->batch_active++;
2431 hs->guilty_ts = get_seconds();
2432 } else {
2433 hs->batch_pending++;
aa60c664
MK
2434 }
2435}
2436
8d9fc7fd 2437struct drm_i915_gem_request *
0bc40be8 2438i915_gem_find_active_request(struct intel_engine_cs *engine)
9375e446 2439{
4db080f9
CW
2440 struct drm_i915_gem_request *request;
2441
f69a02c9
CW
2442 /* We are called by the error capture and reset at a random
2443 * point in time. In particular, note that neither is crucially
2444 * ordered with an interrupt. After a hang, the GPU is dead and we
2445 * assume that no more writes can happen (we waited long enough for
2446 * all writes that were in transaction to be flushed) - adding an
2447 * extra delay for a recent interrupt is pointless. Hence, we do
2448 * not need an engine->irq_seqno_barrier() before the seqno reads.
2449 */
efdf7c06 2450 list_for_each_entry(request, &engine->request_list, link) {
f69a02c9 2451 if (i915_gem_request_completed(request))
4db080f9 2452 continue;
aa60c664 2453
b6b0fac0 2454 return request;
4db080f9 2455 }
b6b0fac0
MK
2456
2457 return NULL;
2458}
2459
7b4d3a16 2460static void i915_gem_reset_engine_status(struct intel_engine_cs *engine)
b6b0fac0
MK
2461{
2462 struct drm_i915_gem_request *request;
2463 bool ring_hung;
2464
0bc40be8 2465 request = i915_gem_find_active_request(engine);
b6b0fac0
MK
2466 if (request == NULL)
2467 return;
2468
0bc40be8 2469 ring_hung = engine->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG;
b6b0fac0 2470
7b4d3a16 2471 i915_set_reset_status(request->ctx, ring_hung);
efdf7c06 2472 list_for_each_entry_continue(request, &engine->request_list, link)
7b4d3a16 2473 i915_set_reset_status(request->ctx, false);
4db080f9 2474}
aa60c664 2475
7b4d3a16 2476static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine)
4db080f9 2477{
dcff85c8 2478 struct drm_i915_gem_request *request;
7e37f889 2479 struct intel_ring *ring;
608c1a52 2480
c4b0930b
CW
2481 /* Mark all pending requests as complete so that any concurrent
2482 * (lockless) lookup doesn't try and wait upon the request as we
2483 * reset it.
2484 */
87b723a1 2485 intel_engine_init_seqno(engine, engine->last_submitted_seqno);
c4b0930b 2486
dcb4c12a
OM
2487 /*
2488 * Clear the execlists queue up before freeing the requests, as those
2489 * are the ones that keep the context and ringbuffer backing objects
2490 * pinned in place.
2491 */
dcb4c12a 2492
7de1691a 2493 if (i915.enable_execlists) {
27af5eea
TU
2494 /* Ensure irq handler finishes or is cancelled. */
2495 tasklet_kill(&engine->irq_tasklet);
1197b4f2 2496
e39d42fa 2497 intel_execlists_cancel_requests(engine);
dcb4c12a
OM
2498 }
2499
1d62beea
BW
2500 /*
2501 * We must free the requests after all the corresponding objects have
2502 * been moved off active lists. Which is the same order as the normal
2503 * retire_requests function does. This is important if object hold
2504 * implicit references on things like e.g. ppgtt address spaces through
2505 * the request.
2506 */
87b723a1
CW
2507 request = i915_gem_active_raw(&engine->last_request,
2508 &engine->i915->drm.struct_mutex);
dcff85c8 2509 if (request)
05235c53 2510 i915_gem_request_retire_upto(request);
dcff85c8 2511 GEM_BUG_ON(intel_engine_is_active(engine));
608c1a52
CW
2512
2513 /* Having flushed all requests from all queues, we know that all
2514 * ringbuffers must now be empty. However, since we do not reclaim
2515 * all space when retiring the request (to prevent HEADs colliding
2516 * with rapid ringbuffer wraparound) the amount of available space
2517 * upon reset is less than when we start. Do one more pass over
2518 * all the ringbuffers to reset last_retired_head.
2519 */
7e37f889
CW
2520 list_for_each_entry(ring, &engine->buffers, link) {
2521 ring->last_retired_head = ring->tail;
2522 intel_ring_update_space(ring);
608c1a52 2523 }
2ed53a94 2524
b913b33c 2525 engine->i915->gt.active_engines &= ~intel_engine_flag(engine);
673a394b
EA
2526}
2527
069efc1d 2528void i915_gem_reset(struct drm_device *dev)
673a394b 2529{
fac5e23e 2530 struct drm_i915_private *dev_priv = to_i915(dev);
e2f80391 2531 struct intel_engine_cs *engine;
673a394b 2532
4db080f9
CW
2533 /*
2534 * Before we free the objects from the requests, we need to inspect
2535 * them for finding the guilty party. As the requests only borrow
2536 * their reference to the objects, the inspection must be done first.
2537 */
b4ac5afc 2538 for_each_engine(engine, dev_priv)
7b4d3a16 2539 i915_gem_reset_engine_status(engine);
4db080f9 2540
b4ac5afc 2541 for_each_engine(engine, dev_priv)
7b4d3a16 2542 i915_gem_reset_engine_cleanup(engine);
b913b33c 2543 mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, 0);
dfaae392 2544
acce9ffa
BW
2545 i915_gem_context_reset(dev);
2546
19b2dbde 2547 i915_gem_restore_fences(dev);
673a394b
EA
2548}
2549
75ef9da2 2550static void
673a394b
EA
2551i915_gem_retire_work_handler(struct work_struct *work)
2552{
b29c19b6 2553 struct drm_i915_private *dev_priv =
67d97da3 2554 container_of(work, typeof(*dev_priv), gt.retire_work.work);
91c8a326 2555 struct drm_device *dev = &dev_priv->drm;
673a394b 2556
891b48cf 2557 /* Come back later if the device is busy... */
b29c19b6 2558 if (mutex_trylock(&dev->struct_mutex)) {
67d97da3 2559 i915_gem_retire_requests(dev_priv);
b29c19b6 2560 mutex_unlock(&dev->struct_mutex);
673a394b 2561 }
67d97da3
CW
2562
2563 /* Keep the retire handler running until we are finally idle.
2564 * We do not need to do this test under locking as in the worst-case
2565 * we queue the retire worker once too often.
2566 */
c9615613
CW
2567 if (READ_ONCE(dev_priv->gt.awake)) {
2568 i915_queue_hangcheck(dev_priv);
67d97da3
CW
2569 queue_delayed_work(dev_priv->wq,
2570 &dev_priv->gt.retire_work,
bcb45086 2571 round_jiffies_up_relative(HZ));
c9615613 2572 }
b29c19b6 2573}
0a58705b 2574
b29c19b6
CW
2575static void
2576i915_gem_idle_work_handler(struct work_struct *work)
2577{
2578 struct drm_i915_private *dev_priv =
67d97da3 2579 container_of(work, typeof(*dev_priv), gt.idle_work.work);
91c8a326 2580 struct drm_device *dev = &dev_priv->drm;
b4ac5afc 2581 struct intel_engine_cs *engine;
67d97da3
CW
2582 bool rearm_hangcheck;
2583
2584 if (!READ_ONCE(dev_priv->gt.awake))
2585 return;
2586
2587 if (READ_ONCE(dev_priv->gt.active_engines))
2588 return;
2589
2590 rearm_hangcheck =
2591 cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
2592
2593 if (!mutex_trylock(&dev->struct_mutex)) {
2594 /* Currently busy, come back later */
2595 mod_delayed_work(dev_priv->wq,
2596 &dev_priv->gt.idle_work,
2597 msecs_to_jiffies(50));
2598 goto out_rearm;
2599 }
2600
2601 if (dev_priv->gt.active_engines)
2602 goto out_unlock;
b29c19b6 2603
b4ac5afc 2604 for_each_engine(engine, dev_priv)
67d97da3 2605 i915_gem_batch_pool_fini(&engine->batch_pool);
35c94185 2606
67d97da3
CW
2607 GEM_BUG_ON(!dev_priv->gt.awake);
2608 dev_priv->gt.awake = false;
2609 rearm_hangcheck = false;
30ecad77 2610
67d97da3
CW
2611 if (INTEL_GEN(dev_priv) >= 6)
2612 gen6_rps_idle(dev_priv);
2613 intel_runtime_pm_put(dev_priv);
2614out_unlock:
2615 mutex_unlock(&dev->struct_mutex);
b29c19b6 2616
67d97da3
CW
2617out_rearm:
2618 if (rearm_hangcheck) {
2619 GEM_BUG_ON(!dev_priv->gt.awake);
2620 i915_queue_hangcheck(dev_priv);
35c94185 2621 }
673a394b
EA
2622}
2623
b1f788c6
CW
2624void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
2625{
2626 struct drm_i915_gem_object *obj = to_intel_bo(gem);
2627 struct drm_i915_file_private *fpriv = file->driver_priv;
2628 struct i915_vma *vma, *vn;
2629
2630 mutex_lock(&obj->base.dev->struct_mutex);
2631 list_for_each_entry_safe(vma, vn, &obj->vma_list, obj_link)
2632 if (vma->vm->file == fpriv)
2633 i915_vma_close(vma);
2634 mutex_unlock(&obj->base.dev->struct_mutex);
2635}
2636
23ba4fd0
BW
2637/**
2638 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
14bb2c11
TU
2639 * @dev: drm device pointer
2640 * @data: ioctl data blob
2641 * @file: drm file pointer
23ba4fd0
BW
2642 *
2643 * Returns 0 if successful, else an error is returned with the remaining time in
2644 * the timeout parameter.
2645 * -ETIME: object is still busy after timeout
2646 * -ERESTARTSYS: signal interrupted the wait
2647 * -ENONENT: object doesn't exist
2648 * Also possible, but rare:
2649 * -EAGAIN: GPU wedged
2650 * -ENOMEM: damn
2651 * -ENODEV: Internal IRQ fail
2652 * -E?: The add request failed
2653 *
2654 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any
2655 * non-zero timeout parameter the wait ioctl will wait for the given number of
2656 * nanoseconds on an object becoming unbusy. Since the wait itself does so
2657 * without holding struct_mutex the object may become re-busied before this
2658 * function completes. A similar but shorter * race condition exists in the busy
2659 * ioctl
2660 */
2661int
2662i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
2663{
2664 struct drm_i915_gem_wait *args = data;
033d549b 2665 struct intel_rps_client *rps = to_rps_client(file);
23ba4fd0 2666 struct drm_i915_gem_object *obj;
033d549b
CW
2667 unsigned long active;
2668 int idx, ret = 0;
23ba4fd0 2669
11b5d511
DV
2670 if (args->flags != 0)
2671 return -EINVAL;
2672
03ac0642 2673 obj = i915_gem_object_lookup(file, args->bo_handle);
033d549b 2674 if (!obj)
23ba4fd0 2675 return -ENOENT;
23ba4fd0 2676
033d549b
CW
2677 active = __I915_BO_ACTIVE(obj);
2678 for_each_active(active, idx) {
2679 s64 *timeout = args->timeout_ns >= 0 ? &args->timeout_ns : NULL;
2680 ret = i915_gem_active_wait_unlocked(&obj->last_read[idx], true,
2681 timeout, rps);
2682 if (ret)
2683 break;
b4716185
CW
2684 }
2685
033d549b 2686 i915_gem_object_put_unlocked(obj);
ff865885 2687 return ret;
23ba4fd0
BW
2688}
2689
b4716185 2690static int
fa545cbf 2691__i915_gem_object_sync(struct drm_i915_gem_request *to,
8e637178 2692 struct drm_i915_gem_request *from)
b4716185 2693{
b4716185
CW
2694 int ret;
2695
8e637178 2696 if (to->engine == from->engine)
b4716185
CW
2697 return 0;
2698
39df9190 2699 if (!i915.semaphores) {
776f3236
CW
2700 ret = i915_wait_request(from,
2701 from->i915->mm.interruptible,
2702 NULL,
2703 NO_WAITBOOST);
b4716185
CW
2704 if (ret)
2705 return ret;
b4716185 2706 } else {
8e637178 2707 int idx = intel_engine_sync_index(from->engine, to->engine);
ddf07be7 2708 if (from->fence.seqno <= from->engine->semaphore.sync_seqno[idx])
b4716185
CW
2709 return 0;
2710
8e637178 2711 trace_i915_gem_ring_sync_to(to, from);
ddf07be7 2712 ret = to->engine->semaphore.sync_to(to, from);
b4716185
CW
2713 if (ret)
2714 return ret;
2715
ddf07be7 2716 from->engine->semaphore.sync_seqno[idx] = from->fence.seqno;
b4716185
CW
2717 }
2718
2719 return 0;
2720}
2721
5816d648
BW
2722/**
2723 * i915_gem_object_sync - sync an object to a ring.
2724 *
2725 * @obj: object which may be in use on another ring.
8e637178 2726 * @to: request we are wishing to use
5816d648
BW
2727 *
2728 * This code is meant to abstract object synchronization with the GPU.
8e637178
CW
2729 * Conceptually we serialise writes between engines inside the GPU.
2730 * We only allow one engine to write into a buffer at any time, but
2731 * multiple readers. To ensure each has a coherent view of memory, we must:
b4716185
CW
2732 *
2733 * - If there is an outstanding write request to the object, the new
2734 * request must wait for it to complete (either CPU or in hw, requests
2735 * on the same ring will be naturally ordered).
2736 *
2737 * - If we are a write request (pending_write_domain is set), the new
2738 * request must wait for outstanding read requests to complete.
5816d648
BW
2739 *
2740 * Returns 0 if successful, else propagates up the lower layer error.
2741 */
2911a35b
BW
2742int
2743i915_gem_object_sync(struct drm_i915_gem_object *obj,
8e637178 2744 struct drm_i915_gem_request *to)
2911a35b 2745{
8cac6f6c
CW
2746 struct i915_gem_active *active;
2747 unsigned long active_mask;
2748 int idx;
41c52415 2749
8cac6f6c 2750 lockdep_assert_held(&obj->base.dev->struct_mutex);
2911a35b 2751
573adb39 2752 active_mask = i915_gem_object_get_active(obj);
8cac6f6c
CW
2753 if (!active_mask)
2754 return 0;
27c01aae 2755
8cac6f6c
CW
2756 if (obj->base.pending_write_domain) {
2757 active = obj->last_read;
b4716185 2758 } else {
8cac6f6c
CW
2759 active_mask = 1;
2760 active = &obj->last_write;
b4716185 2761 }
8cac6f6c
CW
2762
2763 for_each_active(active_mask, idx) {
2764 struct drm_i915_gem_request *request;
2765 int ret;
2766
2767 request = i915_gem_active_peek(&active[idx],
2768 &obj->base.dev->struct_mutex);
2769 if (!request)
2770 continue;
2771
fa545cbf 2772 ret = __i915_gem_object_sync(to, request);
b4716185
CW
2773 if (ret)
2774 return ret;
2775 }
2911a35b 2776
b4716185 2777 return 0;
2911a35b
BW
2778}
2779
b5ffc9bc
CW
2780static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
2781{
2782 u32 old_write_domain, old_read_domains;
2783
b5ffc9bc
CW
2784 /* Force a pagefault for domain tracking on next user access */
2785 i915_gem_release_mmap(obj);
2786
b97c3d9c
KP
2787 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
2788 return;
2789
b5ffc9bc
CW
2790 old_read_domains = obj->base.read_domains;
2791 old_write_domain = obj->base.write_domain;
2792
2793 obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT;
2794 obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT;
2795
2796 trace_i915_gem_object_change_domain(obj,
2797 old_read_domains,
2798 old_write_domain);
2799}
2800
8ef8561f
CW
2801static void __i915_vma_iounmap(struct i915_vma *vma)
2802{
20dfbde4 2803 GEM_BUG_ON(i915_vma_is_pinned(vma));
8ef8561f
CW
2804
2805 if (vma->iomap == NULL)
2806 return;
2807
2808 io_mapping_unmap(vma->iomap);
2809 vma->iomap = NULL;
2810}
2811
df0e9a28 2812int i915_vma_unbind(struct i915_vma *vma)
673a394b 2813{
07fe0b12 2814 struct drm_i915_gem_object *obj = vma->obj;
b0decaf7 2815 unsigned long active;
43e28f09 2816 int ret;
673a394b 2817
b0decaf7
CW
2818 /* First wait upon any activity as retiring the request may
2819 * have side-effects such as unpinning or even unbinding this vma.
2820 */
2821 active = i915_vma_get_active(vma);
df0e9a28 2822 if (active) {
b0decaf7
CW
2823 int idx;
2824
b1f788c6
CW
2825 /* When a closed VMA is retired, it is unbound - eek.
2826 * In order to prevent it from being recursively closed,
2827 * take a pin on the vma so that the second unbind is
2828 * aborted.
2829 */
20dfbde4 2830 __i915_vma_pin(vma);
b1f788c6 2831
b0decaf7
CW
2832 for_each_active(active, idx) {
2833 ret = i915_gem_active_retire(&vma->last_read[idx],
2834 &vma->vm->dev->struct_mutex);
2835 if (ret)
b1f788c6 2836 break;
b0decaf7
CW
2837 }
2838
20dfbde4 2839 __i915_vma_unpin(vma);
b1f788c6
CW
2840 if (ret)
2841 return ret;
2842
b0decaf7
CW
2843 GEM_BUG_ON(i915_vma_is_active(vma));
2844 }
2845
20dfbde4 2846 if (i915_vma_is_pinned(vma))
b0decaf7
CW
2847 return -EBUSY;
2848
b1f788c6
CW
2849 if (!drm_mm_node_allocated(&vma->node))
2850 goto destroy;
433544bd 2851
15717de2
CW
2852 GEM_BUG_ON(obj->bind_count == 0);
2853 GEM_BUG_ON(!obj->pages);
c4670ad0 2854
3272db53
CW
2855 if (i915_vma_is_ggtt(vma) &&
2856 vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
8b1bc9b4 2857 i915_gem_object_finish_gtt(obj);
5323fd04 2858
8b1bc9b4
DV
2859 /* release the fence reg _after_ flushing */
2860 ret = i915_gem_object_put_fence(obj);
2861 if (ret)
2862 return ret;
8ef8561f
CW
2863
2864 __i915_vma_iounmap(vma);
8b1bc9b4 2865 }
96b47b65 2866
50e046b6
CW
2867 if (likely(!vma->vm->closed)) {
2868 trace_i915_vma_unbind(vma);
2869 vma->vm->unbind_vma(vma);
2870 }
3272db53 2871 vma->flags &= ~(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND);
6f65e29a 2872
50e046b6
CW
2873 drm_mm_remove_node(&vma->node);
2874 list_move_tail(&vma->vm_link, &vma->vm->unbound_list);
2875
3272db53 2876 if (i915_vma_is_ggtt(vma)) {
fe14d5f4
TU
2877 if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
2878 obj->map_and_fenceable = false;
247177dd
CW
2879 } else if (vma->pages) {
2880 sg_free_table(vma->pages);
2881 kfree(vma->pages);
fe14d5f4
TU
2882 }
2883 }
247177dd 2884 vma->pages = NULL;
673a394b 2885
2f633156 2886 /* Since the unbound list is global, only move to that list if
b93dab6e 2887 * no more VMAs exist. */
15717de2
CW
2888 if (--obj->bind_count == 0)
2889 list_move_tail(&obj->global_list,
2890 &to_i915(obj->base.dev)->mm.unbound_list);
673a394b 2891
70903c3b
CW
2892 /* And finally now the object is completely decoupled from this vma,
2893 * we can drop its hold on the backing storage and allow it to be
2894 * reaped by the shrinker.
2895 */
2896 i915_gem_object_unpin_pages(obj);
2897
b1f788c6 2898destroy:
3272db53 2899 if (unlikely(i915_vma_is_closed(vma)))
b1f788c6
CW
2900 i915_vma_destroy(vma);
2901
88241785 2902 return 0;
54cf91dc
CW
2903}
2904
dcff85c8
CW
2905int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv,
2906 bool interruptible)
4df2faf4 2907{
e2f80391 2908 struct intel_engine_cs *engine;
b4ac5afc 2909 int ret;
4df2faf4 2910
b4ac5afc 2911 for_each_engine(engine, dev_priv) {
62e63007
CW
2912 if (engine->last_context == NULL)
2913 continue;
2914
dcff85c8 2915 ret = intel_engine_idle(engine, interruptible);
1ec14ad3
CW
2916 if (ret)
2917 return ret;
2918 }
4df2faf4 2919
8a1a49f9 2920 return 0;
4df2faf4
DV
2921}
2922
4144f9b5 2923static bool i915_gem_valid_gtt_space(struct i915_vma *vma,
42d6ab48
CW
2924 unsigned long cache_level)
2925{
4144f9b5 2926 struct drm_mm_node *gtt_space = &vma->node;
42d6ab48
CW
2927 struct drm_mm_node *other;
2928
4144f9b5
CW
2929 /*
2930 * On some machines we have to be careful when putting differing types
2931 * of snoopable memory together to avoid the prefetcher crossing memory
2932 * domains and dying. During vm initialisation, we decide whether or not
2933 * these constraints apply and set the drm_mm.color_adjust
2934 * appropriately.
42d6ab48 2935 */
4144f9b5 2936 if (vma->vm->mm.color_adjust == NULL)
42d6ab48
CW
2937 return true;
2938
c6cfb325 2939 if (!drm_mm_node_allocated(gtt_space))
42d6ab48
CW
2940 return true;
2941
2942 if (list_empty(&gtt_space->node_list))
2943 return true;
2944
2945 other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list);
2946 if (other->allocated && !other->hole_follows && other->color != cache_level)
2947 return false;
2948
2949 other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list);
2950 if (other->allocated && !gtt_space->hole_follows && other->color != cache_level)
2951 return false;
2952
2953 return true;
2954}
2955
673a394b 2956/**
59bfa124
CW
2957 * i915_vma_insert - finds a slot for the vma in its address space
2958 * @vma: the vma
91b2db6f 2959 * @size: requested size in bytes (can be larger than the VMA)
59bfa124 2960 * @alignment: required alignment
14bb2c11 2961 * @flags: mask of PIN_* flags to use
59bfa124
CW
2962 *
2963 * First we try to allocate some free space that meets the requirements for
2964 * the VMA. Failiing that, if the flags permit, it will evict an old VMA,
2965 * preferrably the oldest idle entry to make room for the new VMA.
2966 *
2967 * Returns:
2968 * 0 on success, negative error code otherwise.
673a394b 2969 */
59bfa124
CW
2970static int
2971i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
673a394b 2972{
59bfa124
CW
2973 struct drm_i915_private *dev_priv = to_i915(vma->vm->dev);
2974 struct drm_i915_gem_object *obj = vma->obj;
de180033
CW
2975 u64 start, end;
2976 u64 min_alignment;
07f73f69 2977 int ret;
673a394b 2978
3272db53 2979 GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND));
59bfa124 2980 GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
de180033
CW
2981
2982 size = max(size, vma->size);
2983 if (flags & PIN_MAPPABLE)
3e510a8e
CW
2984 size = i915_gem_get_ggtt_size(dev_priv, size,
2985 i915_gem_object_get_tiling(obj));
de180033
CW
2986
2987 min_alignment =
3e510a8e
CW
2988 i915_gem_get_ggtt_alignment(dev_priv, size,
2989 i915_gem_object_get_tiling(obj),
de180033
CW
2990 flags & PIN_MAPPABLE);
2991 if (alignment == 0)
2992 alignment = min_alignment;
2993 if (alignment & (min_alignment - 1)) {
2994 DRM_DEBUG("Invalid object alignment requested %llu, minimum %llu\n",
2995 alignment, min_alignment);
59bfa124 2996 return -EINVAL;
91e6711e 2997 }
a00b10c3 2998
101b506a 2999 start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
de180033
CW
3000
3001 end = vma->vm->total;
101b506a 3002 if (flags & PIN_MAPPABLE)
91b2db6f 3003 end = min_t(u64, end, dev_priv->ggtt.mappable_end);
101b506a 3004 if (flags & PIN_ZONE_4G)
48ea1e32 3005 end = min_t(u64, end, (1ULL << 32) - PAGE_SIZE);
101b506a 3006
91e6711e
JL
3007 /* If binding the object/GGTT view requires more space than the entire
3008 * aperture has, reject it early before evicting everything in a vain
3009 * attempt to find space.
654fc607 3010 */
91e6711e 3011 if (size > end) {
de180033 3012 DRM_DEBUG("Attempting to bind an object larger than the aperture: request=%llu [object=%zd] > %s aperture=%llu\n",
91b2db6f 3013 size, obj->base.size,
1ec9e26d 3014 flags & PIN_MAPPABLE ? "mappable" : "total",
d23db88c 3015 end);
59bfa124 3016 return -E2BIG;
654fc607
CW
3017 }
3018
37e680a1 3019 ret = i915_gem_object_get_pages(obj);
6c085a72 3020 if (ret)
59bfa124 3021 return ret;
6c085a72 3022
fbdda6fb
CW
3023 i915_gem_object_pin_pages(obj);
3024
506a8e87 3025 if (flags & PIN_OFFSET_FIXED) {
59bfa124 3026 u64 offset = flags & PIN_OFFSET_MASK;
de180033 3027 if (offset & (alignment - 1) || offset > end - size) {
506a8e87 3028 ret = -EINVAL;
de180033 3029 goto err_unpin;
506a8e87 3030 }
de180033 3031
506a8e87
CW
3032 vma->node.start = offset;
3033 vma->node.size = size;
3034 vma->node.color = obj->cache_level;
de180033 3035 ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node);
506a8e87
CW
3036 if (ret) {
3037 ret = i915_gem_evict_for_vma(vma);
3038 if (ret == 0)
de180033
CW
3039 ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node);
3040 if (ret)
3041 goto err_unpin;
506a8e87 3042 }
101b506a 3043 } else {
de180033
CW
3044 u32 search_flag, alloc_flag;
3045
506a8e87
CW
3046 if (flags & PIN_HIGH) {
3047 search_flag = DRM_MM_SEARCH_BELOW;
3048 alloc_flag = DRM_MM_CREATE_TOP;
3049 } else {
3050 search_flag = DRM_MM_SEARCH_DEFAULT;
3051 alloc_flag = DRM_MM_CREATE_DEFAULT;
3052 }
101b506a 3053
954c4691
CW
3054 /* We only allocate in PAGE_SIZE/GTT_PAGE_SIZE (4096) chunks,
3055 * so we know that we always have a minimum alignment of 4096.
3056 * The drm_mm range manager is optimised to return results
3057 * with zero alignment, so where possible use the optimal
3058 * path.
3059 */
3060 if (alignment <= 4096)
3061 alignment = 0;
3062
0a9ae0d7 3063search_free:
de180033
CW
3064 ret = drm_mm_insert_node_in_range_generic(&vma->vm->mm,
3065 &vma->node,
506a8e87
CW
3066 size, alignment,
3067 obj->cache_level,
3068 start, end,
3069 search_flag,
3070 alloc_flag);
3071 if (ret) {
de180033 3072 ret = i915_gem_evict_something(vma->vm, size, alignment,
506a8e87
CW
3073 obj->cache_level,
3074 start, end,
3075 flags);
3076 if (ret == 0)
3077 goto search_free;
9731129c 3078
de180033 3079 goto err_unpin;
506a8e87 3080 }
673a394b 3081 }
37508589 3082 GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level));
673a394b 3083
35c20a60 3084 list_move_tail(&obj->global_list, &dev_priv->mm.bound_list);
de180033 3085 list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
15717de2 3086 obj->bind_count++;
bf1a1092 3087
59bfa124 3088 return 0;
2f633156 3089
bc6bc15b 3090err_unpin:
2f633156 3091 i915_gem_object_unpin_pages(obj);
59bfa124 3092 return ret;
673a394b
EA
3093}
3094
000433b6 3095bool
2c22569b
CW
3096i915_gem_clflush_object(struct drm_i915_gem_object *obj,
3097 bool force)
673a394b 3098{
673a394b
EA
3099 /* If we don't have a page list set up, then we're not pinned
3100 * to GPU, and we can ignore the cache flush because it'll happen
3101 * again at bind time.
3102 */
05394f39 3103 if (obj->pages == NULL)
000433b6 3104 return false;
673a394b 3105
769ce464
ID
3106 /*
3107 * Stolen memory is always coherent with the GPU as it is explicitly
3108 * marked as wc by the system, or the system is cache-coherent.
3109 */
6a2c4232 3110 if (obj->stolen || obj->phys_handle)
000433b6 3111 return false;
769ce464 3112
9c23f7fc
CW
3113 /* If the GPU is snooping the contents of the CPU cache,
3114 * we do not need to manually clear the CPU cache lines. However,
3115 * the caches are only snooped when the render cache is
3116 * flushed/invalidated. As we always have to emit invalidations
3117 * and flushes when moving into and out of the RENDER domain, correct
3118 * snooping behaviour occurs naturally as the result of our domain
3119 * tracking.
3120 */
0f71979a
CW
3121 if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) {
3122 obj->cache_dirty = true;
000433b6 3123 return false;
0f71979a 3124 }
9c23f7fc 3125
1c5d22f7 3126 trace_i915_gem_object_clflush(obj);
9da3da66 3127 drm_clflush_sg(obj->pages);
0f71979a 3128 obj->cache_dirty = false;
000433b6
CW
3129
3130 return true;
e47c68e9
EA
3131}
3132
3133/** Flushes the GTT write domain for the object if it's dirty. */
3134static void
05394f39 3135i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
e47c68e9 3136{
1c5d22f7
CW
3137 uint32_t old_write_domain;
3138
05394f39 3139 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT)
e47c68e9
EA
3140 return;
3141
63256ec5 3142 /* No actual flushing is required for the GTT write domain. Writes
e47c68e9
EA
3143 * to it immediately go to main memory as far as we know, so there's
3144 * no chipset flush. It also doesn't land in render cache.
63256ec5
CW
3145 *
3146 * However, we do have to enforce the order so that all writes through
3147 * the GTT land before any writes to the device, such as updates to
3148 * the GATT itself.
e47c68e9 3149 */
63256ec5
CW
3150 wmb();
3151
05394f39
CW
3152 old_write_domain = obj->base.write_domain;
3153 obj->base.write_domain = 0;
1c5d22f7 3154
de152b62 3155 intel_fb_obj_flush(obj, false, ORIGIN_GTT);
f99d7069 3156
1c5d22f7 3157 trace_i915_gem_object_change_domain(obj,
05394f39 3158 obj->base.read_domains,
1c5d22f7 3159 old_write_domain);
e47c68e9
EA
3160}
3161
3162/** Flushes the CPU write domain for the object if it's dirty. */
3163static void
e62b59e4 3164i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
e47c68e9 3165{
1c5d22f7 3166 uint32_t old_write_domain;
e47c68e9 3167
05394f39 3168 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
e47c68e9
EA
3169 return;
3170
e62b59e4 3171 if (i915_gem_clflush_object(obj, obj->pin_display))
c033666a 3172 i915_gem_chipset_flush(to_i915(obj->base.dev));
000433b6 3173
05394f39
CW
3174 old_write_domain = obj->base.write_domain;
3175 obj->base.write_domain = 0;
1c5d22f7 3176
de152b62 3177 intel_fb_obj_flush(obj, false, ORIGIN_CPU);
f99d7069 3178
1c5d22f7 3179 trace_i915_gem_object_change_domain(obj,
05394f39 3180 obj->base.read_domains,
1c5d22f7 3181 old_write_domain);
e47c68e9
EA
3182}
3183
2ef7eeaa
EA
3184/**
3185 * Moves a single object to the GTT read, and possibly write domain.
14bb2c11
TU
3186 * @obj: object to act on
3187 * @write: ask for write access or read only
2ef7eeaa
EA
3188 *
3189 * This function returns when the move is complete, including waiting on
3190 * flushes to occur.
3191 */
79e53945 3192int
2021746e 3193i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
2ef7eeaa 3194{
1c5d22f7 3195 uint32_t old_write_domain, old_read_domains;
43566ded 3196 struct i915_vma *vma;
e47c68e9 3197 int ret;
2ef7eeaa 3198
0201f1ec 3199 ret = i915_gem_object_wait_rendering(obj, !write);
88241785
CW
3200 if (ret)
3201 return ret;
3202
c13d87ea
CW
3203 if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
3204 return 0;
3205
43566ded
CW
3206 /* Flush and acquire obj->pages so that we are coherent through
3207 * direct access in memory with previous cached writes through
3208 * shmemfs and that our cache domain tracking remains valid.
3209 * For example, if the obj->filp was moved to swap without us
3210 * being notified and releasing the pages, we would mistakenly
3211 * continue to assume that the obj remained out of the CPU cached
3212 * domain.
3213 */
3214 ret = i915_gem_object_get_pages(obj);
3215 if (ret)
3216 return ret;
3217
e62b59e4 3218 i915_gem_object_flush_cpu_write_domain(obj);
1c5d22f7 3219
d0a57789
CW
3220 /* Serialise direct access to this object with the barriers for
3221 * coherent writes from the GPU, by effectively invalidating the
3222 * GTT domain upon first access.
3223 */
3224 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
3225 mb();
3226
05394f39
CW
3227 old_write_domain = obj->base.write_domain;
3228 old_read_domains = obj->base.read_domains;
1c5d22f7 3229
e47c68e9
EA
3230 /* It should now be out of any other write domains, and we can update
3231 * the domain values for our changes.
3232 */
05394f39
CW
3233 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
3234 obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
e47c68e9 3235 if (write) {
05394f39
CW
3236 obj->base.read_domains = I915_GEM_DOMAIN_GTT;
3237 obj->base.write_domain = I915_GEM_DOMAIN_GTT;
3238 obj->dirty = 1;
2ef7eeaa
EA
3239 }
3240
1c5d22f7
CW
3241 trace_i915_gem_object_change_domain(obj,
3242 old_read_domains,
3243 old_write_domain);
3244
8325a09d 3245 /* And bump the LRU for this access */
058d88c4 3246 vma = i915_gem_object_to_ggtt(obj, NULL);
b0decaf7
CW
3247 if (vma &&
3248 drm_mm_node_allocated(&vma->node) &&
3249 !i915_vma_is_active(vma))
3250 list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
8325a09d 3251
e47c68e9
EA
3252 return 0;
3253}
3254
ef55f92a
CW
3255/**
3256 * Changes the cache-level of an object across all VMA.
14bb2c11
TU
3257 * @obj: object to act on
3258 * @cache_level: new cache level to set for the object
ef55f92a
CW
3259 *
3260 * After this function returns, the object will be in the new cache-level
3261 * across all GTT and the contents of the backing storage will be coherent,
3262 * with respect to the new cache-level. In order to keep the backing storage
3263 * coherent for all users, we only allow a single cache level to be set
3264 * globally on the object and prevent it from being changed whilst the
3265 * hardware is reading from the object. That is if the object is currently
3266 * on the scanout it will be set to uncached (or equivalent display
3267 * cache coherency) and all non-MOCS GPU access will also be uncached so
3268 * that all direct access to the scanout remains coherent.
3269 */
e4ffd173
CW
3270int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
3271 enum i915_cache_level cache_level)
3272{
aa653a68 3273 struct i915_vma *vma;
ed75a55b 3274 int ret = 0;
e4ffd173
CW
3275
3276 if (obj->cache_level == cache_level)
ed75a55b 3277 goto out;
e4ffd173 3278
ef55f92a
CW
3279 /* Inspect the list of currently bound VMA and unbind any that would
3280 * be invalid given the new cache-level. This is principally to
3281 * catch the issue of the CS prefetch crossing page boundaries and
3282 * reading an invalid PTE on older architectures.
3283 */
aa653a68
CW
3284restart:
3285 list_for_each_entry(vma, &obj->vma_list, obj_link) {
ef55f92a
CW
3286 if (!drm_mm_node_allocated(&vma->node))
3287 continue;
3288
20dfbde4 3289 if (i915_vma_is_pinned(vma)) {
ef55f92a
CW
3290 DRM_DEBUG("can not change the cache level of pinned objects\n");
3291 return -EBUSY;
3292 }
3293
aa653a68
CW
3294 if (i915_gem_valid_gtt_space(vma, cache_level))
3295 continue;
3296
3297 ret = i915_vma_unbind(vma);
3298 if (ret)
3299 return ret;
3300
3301 /* As unbinding may affect other elements in the
3302 * obj->vma_list (due to side-effects from retiring
3303 * an active vma), play safe and restart the iterator.
3304 */
3305 goto restart;
42d6ab48
CW
3306 }
3307
ef55f92a
CW
3308 /* We can reuse the existing drm_mm nodes but need to change the
3309 * cache-level on the PTE. We could simply unbind them all and
3310 * rebind with the correct cache-level on next use. However since
3311 * we already have a valid slot, dma mapping, pages etc, we may as
3312 * rewrite the PTE in the belief that doing so tramples upon less
3313 * state and so involves less work.
3314 */
15717de2 3315 if (obj->bind_count) {
ef55f92a
CW
3316 /* Before we change the PTE, the GPU must not be accessing it.
3317 * If we wait upon the object, we know that all the bound
3318 * VMA are no longer active.
3319 */
2e2f351d 3320 ret = i915_gem_object_wait_rendering(obj, false);
e4ffd173
CW
3321 if (ret)
3322 return ret;
3323
aa653a68 3324 if (!HAS_LLC(obj->base.dev) && cache_level != I915_CACHE_NONE) {
ef55f92a
CW
3325 /* Access to snoopable pages through the GTT is
3326 * incoherent and on some machines causes a hard
3327 * lockup. Relinquish the CPU mmaping to force
3328 * userspace to refault in the pages and we can
3329 * then double check if the GTT mapping is still
3330 * valid for that pointer access.
3331 */
3332 i915_gem_release_mmap(obj);
3333
3334 /* As we no longer need a fence for GTT access,
3335 * we can relinquish it now (and so prevent having
3336 * to steal a fence from someone else on the next
3337 * fence request). Note GPU activity would have
3338 * dropped the fence as all snoopable access is
3339 * supposed to be linear.
3340 */
e4ffd173
CW
3341 ret = i915_gem_object_put_fence(obj);
3342 if (ret)
3343 return ret;
ef55f92a
CW
3344 } else {
3345 /* We either have incoherent backing store and
3346 * so no GTT access or the architecture is fully
3347 * coherent. In such cases, existing GTT mmaps
3348 * ignore the cache bit in the PTE and we can
3349 * rewrite it without confusing the GPU or having
3350 * to force userspace to fault back in its mmaps.
3351 */
e4ffd173
CW
3352 }
3353
1c7f4bca 3354 list_for_each_entry(vma, &obj->vma_list, obj_link) {
ef55f92a
CW
3355 if (!drm_mm_node_allocated(&vma->node))
3356 continue;
3357
3358 ret = i915_vma_bind(vma, cache_level, PIN_UPDATE);
3359 if (ret)
3360 return ret;
3361 }
e4ffd173
CW
3362 }
3363
1c7f4bca 3364 list_for_each_entry(vma, &obj->vma_list, obj_link)
2c22569b
CW
3365 vma->node.color = cache_level;
3366 obj->cache_level = cache_level;
3367
ed75a55b 3368out:
ef55f92a
CW
3369 /* Flush the dirty CPU caches to the backing storage so that the
3370 * object is now coherent at its new cache level (with respect
3371 * to the access domain).
3372 */
b50a5371 3373 if (obj->cache_dirty && cpu_write_needs_clflush(obj)) {
0f71979a 3374 if (i915_gem_clflush_object(obj, true))
c033666a 3375 i915_gem_chipset_flush(to_i915(obj->base.dev));
e4ffd173
CW
3376 }
3377
e4ffd173
CW
3378 return 0;
3379}
3380
199adf40
BW
3381int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
3382 struct drm_file *file)
e6994aee 3383{
199adf40 3384 struct drm_i915_gem_caching *args = data;
e6994aee 3385 struct drm_i915_gem_object *obj;
e6994aee 3386
03ac0642
CW
3387 obj = i915_gem_object_lookup(file, args->handle);
3388 if (!obj)
432be69d 3389 return -ENOENT;
e6994aee 3390
651d794f
CW
3391 switch (obj->cache_level) {
3392 case I915_CACHE_LLC:
3393 case I915_CACHE_L3_LLC:
3394 args->caching = I915_CACHING_CACHED;
3395 break;
3396
4257d3ba
CW
3397 case I915_CACHE_WT:
3398 args->caching = I915_CACHING_DISPLAY;
3399 break;
3400
651d794f
CW
3401 default:
3402 args->caching = I915_CACHING_NONE;
3403 break;
3404 }
e6994aee 3405
34911fd3 3406 i915_gem_object_put_unlocked(obj);
432be69d 3407 return 0;
e6994aee
CW
3408}
3409
199adf40
BW
3410int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
3411 struct drm_file *file)
e6994aee 3412{
fac5e23e 3413 struct drm_i915_private *dev_priv = to_i915(dev);
199adf40 3414 struct drm_i915_gem_caching *args = data;
e6994aee
CW
3415 struct drm_i915_gem_object *obj;
3416 enum i915_cache_level level;
3417 int ret;
3418
199adf40
BW
3419 switch (args->caching) {
3420 case I915_CACHING_NONE:
e6994aee
CW
3421 level = I915_CACHE_NONE;
3422 break;
199adf40 3423 case I915_CACHING_CACHED:
e5756c10
ID
3424 /*
3425 * Due to a HW issue on BXT A stepping, GPU stores via a
3426 * snooped mapping may leave stale data in a corresponding CPU
3427 * cacheline, whereas normally such cachelines would get
3428 * invalidated.
3429 */
ca377809 3430 if (!HAS_LLC(dev) && !HAS_SNOOP(dev))
e5756c10
ID
3431 return -ENODEV;
3432
e6994aee
CW
3433 level = I915_CACHE_LLC;
3434 break;
4257d3ba
CW
3435 case I915_CACHING_DISPLAY:
3436 level = HAS_WT(dev) ? I915_CACHE_WT : I915_CACHE_NONE;
3437 break;
e6994aee
CW
3438 default:
3439 return -EINVAL;
3440 }
3441
fd0fe6ac
ID
3442 intel_runtime_pm_get(dev_priv);
3443
3bc2913e
BW
3444 ret = i915_mutex_lock_interruptible(dev);
3445 if (ret)
fd0fe6ac 3446 goto rpm_put;
3bc2913e 3447
03ac0642
CW
3448 obj = i915_gem_object_lookup(file, args->handle);
3449 if (!obj) {
e6994aee
CW
3450 ret = -ENOENT;
3451 goto unlock;
3452 }
3453
3454 ret = i915_gem_object_set_cache_level(obj, level);
3455
f8c417cd 3456 i915_gem_object_put(obj);
e6994aee
CW
3457unlock:
3458 mutex_unlock(&dev->struct_mutex);
fd0fe6ac
ID
3459rpm_put:
3460 intel_runtime_pm_put(dev_priv);
3461
e6994aee
CW
3462 return ret;
3463}
3464
b9241ea3 3465/*
2da3b9b9
CW
3466 * Prepare buffer for display plane (scanout, cursors, etc).
3467 * Can be called from an uninterruptible phase (modesetting) and allows
3468 * any flushes to be pipelined (for pageflips).
b9241ea3 3469 */
058d88c4 3470struct i915_vma *
2da3b9b9
CW
3471i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
3472 u32 alignment,
e6617330 3473 const struct i915_ggtt_view *view)
b9241ea3 3474{
058d88c4 3475 struct i915_vma *vma;
2da3b9b9 3476 u32 old_read_domains, old_write_domain;
b9241ea3
ZW
3477 int ret;
3478
cc98b413
CW
3479 /* Mark the pin_display early so that we account for the
3480 * display coherency whilst setting up the cache domains.
3481 */
8a0c39b1 3482 obj->pin_display++;
cc98b413 3483
a7ef0640
EA
3484 /* The display engine is not coherent with the LLC cache on gen6. As
3485 * a result, we make sure that the pinning that is about to occur is
3486 * done with uncached PTEs. This is lowest common denominator for all
3487 * chipsets.
3488 *
3489 * However for gen6+, we could do better by using the GFDT bit instead
3490 * of uncaching, which would allow us to flush all the LLC-cached data
3491 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
3492 */
651d794f
CW
3493 ret = i915_gem_object_set_cache_level(obj,
3494 HAS_WT(obj->base.dev) ? I915_CACHE_WT : I915_CACHE_NONE);
058d88c4
CW
3495 if (ret) {
3496 vma = ERR_PTR(ret);
cc98b413 3497 goto err_unpin_display;
058d88c4 3498 }
a7ef0640 3499
2da3b9b9
CW
3500 /* As the user may map the buffer once pinned in the display plane
3501 * (e.g. libkms for the bootup splash), we have to ensure that we
3502 * always use map_and_fenceable for all scanout buffers.
3503 */
058d88c4 3504 vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
50470bb0
TU
3505 view->type == I915_GGTT_VIEW_NORMAL ?
3506 PIN_MAPPABLE : 0);
058d88c4 3507 if (IS_ERR(vma))
cc98b413 3508 goto err_unpin_display;
2da3b9b9 3509
058d88c4
CW
3510 WARN_ON(obj->pin_display > i915_vma_pin_count(vma));
3511
e62b59e4 3512 i915_gem_object_flush_cpu_write_domain(obj);
b118c1e3 3513
2da3b9b9 3514 old_write_domain = obj->base.write_domain;
05394f39 3515 old_read_domains = obj->base.read_domains;
2da3b9b9
CW
3516
3517 /* It should now be out of any other write domains, and we can update
3518 * the domain values for our changes.
3519 */
e5f1d962 3520 obj->base.write_domain = 0;
05394f39 3521 obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
b9241ea3
ZW
3522
3523 trace_i915_gem_object_change_domain(obj,
3524 old_read_domains,
2da3b9b9 3525 old_write_domain);
b9241ea3 3526
058d88c4 3527 return vma;
cc98b413
CW
3528
3529err_unpin_display:
8a0c39b1 3530 obj->pin_display--;
058d88c4 3531 return vma;
cc98b413
CW
3532}
3533
3534void
058d88c4 3535i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
cc98b413 3536{
058d88c4 3537 if (WARN_ON(vma->obj->pin_display == 0))
8a0c39b1
TU
3538 return;
3539
058d88c4 3540 vma->obj->pin_display--;
e6617330 3541
058d88c4
CW
3542 i915_vma_unpin(vma);
3543 WARN_ON(vma->obj->pin_display > i915_vma_pin_count(vma));
b9241ea3
ZW
3544}
3545
e47c68e9
EA
3546/**
3547 * Moves a single object to the CPU read, and possibly write domain.
14bb2c11
TU
3548 * @obj: object to act on
3549 * @write: requesting write or read-only access
e47c68e9
EA
3550 *
3551 * This function returns when the move is complete, including waiting on
3552 * flushes to occur.
3553 */
dabdfe02 3554int
919926ae 3555i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
e47c68e9 3556{
1c5d22f7 3557 uint32_t old_write_domain, old_read_domains;
e47c68e9
EA
3558 int ret;
3559
0201f1ec 3560 ret = i915_gem_object_wait_rendering(obj, !write);
88241785
CW
3561 if (ret)
3562 return ret;
3563
c13d87ea
CW
3564 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
3565 return 0;
3566
e47c68e9 3567 i915_gem_object_flush_gtt_write_domain(obj);
2ef7eeaa 3568
05394f39
CW
3569 old_write_domain = obj->base.write_domain;
3570 old_read_domains = obj->base.read_domains;
1c5d22f7 3571
e47c68e9 3572 /* Flush the CPU cache if it's still invalid. */
05394f39 3573 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
2c22569b 3574 i915_gem_clflush_object(obj, false);
2ef7eeaa 3575
05394f39 3576 obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
2ef7eeaa
EA
3577 }
3578
3579 /* It should now be out of any other write domains, and we can update
3580 * the domain values for our changes.
3581 */
05394f39 3582 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
e47c68e9
EA
3583
3584 /* If we're writing through the CPU, then the GPU read domains will
3585 * need to be invalidated at next use.
3586 */
3587 if (write) {
05394f39
CW
3588 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
3589 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
e47c68e9 3590 }
2ef7eeaa 3591
1c5d22f7
CW
3592 trace_i915_gem_object_change_domain(obj,
3593 old_read_domains,
3594 old_write_domain);
3595
2ef7eeaa
EA
3596 return 0;
3597}
3598
673a394b
EA
3599/* Throttle our rendering by waiting until the ring has completed our requests
3600 * emitted over 20 msec ago.
3601 *
b962442e
EA
3602 * Note that if we were to use the current jiffies each time around the loop,
3603 * we wouldn't escape the function with any frames outstanding if the time to
3604 * render a frame was over 20ms.
3605 *
673a394b
EA
3606 * This should get us reasonable parallelism between CPU and GPU but also
3607 * relatively low latency when blocking on a particular request to finish.
3608 */
40a5f0de 3609static int
f787a5f5 3610i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
40a5f0de 3611{
fac5e23e 3612 struct drm_i915_private *dev_priv = to_i915(dev);
f787a5f5 3613 struct drm_i915_file_private *file_priv = file->driver_priv;
d0bc54f2 3614 unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES;
54fb2411 3615 struct drm_i915_gem_request *request, *target = NULL;
f787a5f5 3616 int ret;
93533c29 3617
308887aa
DV
3618 ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
3619 if (ret)
3620 return ret;
3621
f4457ae7
CW
3622 /* ABI: return -EIO if already wedged */
3623 if (i915_terminally_wedged(&dev_priv->gpu_error))
3624 return -EIO;
e110e8d6 3625
1c25595f 3626 spin_lock(&file_priv->mm.lock);
f787a5f5 3627 list_for_each_entry(request, &file_priv->mm.request_list, client_list) {
b962442e
EA
3628 if (time_after_eq(request->emitted_jiffies, recent_enough))
3629 break;
40a5f0de 3630
fcfa423c
JH
3631 /*
3632 * Note that the request might not have been submitted yet.
3633 * In which case emitted_jiffies will be zero.
3634 */
3635 if (!request->emitted_jiffies)
3636 continue;
3637
54fb2411 3638 target = request;
b962442e 3639 }
ff865885 3640 if (target)
e8a261ea 3641 i915_gem_request_get(target);
1c25595f 3642 spin_unlock(&file_priv->mm.lock);
40a5f0de 3643
54fb2411 3644 if (target == NULL)
f787a5f5 3645 return 0;
2bc43b5c 3646
776f3236 3647 ret = i915_wait_request(target, true, NULL, NULL);
e8a261ea 3648 i915_gem_request_put(target);
ff865885 3649
40a5f0de
EA
3650 return ret;
3651}
3652
d23db88c 3653static bool
91b2db6f 3654i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
d23db88c
CW
3655{
3656 struct drm_i915_gem_object *obj = vma->obj;
3657
59bfa124
CW
3658 if (!drm_mm_node_allocated(&vma->node))
3659 return false;
3660
91b2db6f
CW
3661 if (vma->node.size < size)
3662 return true;
3663
3664 if (alignment && vma->node.start & (alignment - 1))
d23db88c
CW
3665 return true;
3666
3667 if (flags & PIN_MAPPABLE && !obj->map_and_fenceable)
3668 return true;
3669
3670 if (flags & PIN_OFFSET_BIAS &&
3671 vma->node.start < (flags & PIN_OFFSET_MASK))
3672 return true;
3673
506a8e87
CW
3674 if (flags & PIN_OFFSET_FIXED &&
3675 vma->node.start != (flags & PIN_OFFSET_MASK))
3676 return true;
3677
d23db88c
CW
3678 return false;
3679}
3680
d0710abb
CW
3681void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)
3682{
3683 struct drm_i915_gem_object *obj = vma->obj;
a9f1481f 3684 struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
d0710abb
CW
3685 bool mappable, fenceable;
3686 u32 fence_size, fence_alignment;
3687
a9f1481f 3688 fence_size = i915_gem_get_ggtt_size(dev_priv,
ad1a7d20 3689 obj->base.size,
3e510a8e 3690 i915_gem_object_get_tiling(obj));
a9f1481f 3691 fence_alignment = i915_gem_get_ggtt_alignment(dev_priv,
ad1a7d20 3692 obj->base.size,
3e510a8e 3693 i915_gem_object_get_tiling(obj),
ad1a7d20 3694 true);
d0710abb
CW
3695
3696 fenceable = (vma->node.size == fence_size &&
3697 (vma->node.start & (fence_alignment - 1)) == 0);
3698
3699 mappable = (vma->node.start + fence_size <=
a9f1481f 3700 dev_priv->ggtt.mappable_end);
d0710abb
CW
3701
3702 obj->map_and_fenceable = mappable && fenceable;
3703}
3704
305bc234
CW
3705int __i915_vma_do_pin(struct i915_vma *vma,
3706 u64 size, u64 alignment, u64 flags)
673a394b 3707{
305bc234 3708 unsigned int bound = vma->flags;
673a394b
EA
3709 int ret;
3710
59bfa124 3711 GEM_BUG_ON((flags & (PIN_GLOBAL | PIN_USER)) == 0);
3272db53 3712 GEM_BUG_ON((flags & PIN_GLOBAL) && !i915_vma_is_ggtt(vma));
d7f46fc4 3713
305bc234
CW
3714 if (WARN_ON(bound & I915_VMA_PIN_OVERFLOW)) {
3715 ret = -EBUSY;
3716 goto err;
3717 }
ac0c6b5a 3718
de895082 3719 if ((bound & I915_VMA_BIND_MASK) == 0) {
59bfa124
CW
3720 ret = i915_vma_insert(vma, size, alignment, flags);
3721 if (ret)
3722 goto err;
fe14d5f4 3723 }
74898d7e 3724
59bfa124 3725 ret = i915_vma_bind(vma, vma->obj->cache_level, flags);
3b16525c 3726 if (ret)
59bfa124 3727 goto err;
3b16525c 3728
3272db53 3729 if ((bound ^ vma->flags) & I915_VMA_GLOBAL_BIND)
d0710abb 3730 __i915_vma_set_map_and_fenceable(vma);
ef79e17c 3731
3b16525c 3732 GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags));
673a394b 3733 return 0;
673a394b 3734
59bfa124
CW
3735err:
3736 __i915_vma_unpin(vma);
3737 return ret;
ec7adb6e
JL
3738}
3739
058d88c4 3740struct i915_vma *
ec7adb6e
JL
3741i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
3742 const struct i915_ggtt_view *view,
91b2db6f 3743 u64 size,
2ffffd0f
CW
3744 u64 alignment,
3745 u64 flags)
ec7adb6e 3746{
058d88c4 3747 struct i915_address_space *vm = &to_i915(obj->base.dev)->ggtt.base;
59bfa124
CW
3748 struct i915_vma *vma;
3749 int ret;
72e96d64 3750
058d88c4 3751 vma = i915_gem_obj_lookup_or_create_vma(obj, vm, view);
59bfa124 3752 if (IS_ERR(vma))
058d88c4 3753 return vma;
59bfa124
CW
3754
3755 if (i915_vma_misplaced(vma, size, alignment, flags)) {
3756 if (flags & PIN_NONBLOCK &&
3757 (i915_vma_is_pinned(vma) || i915_vma_is_active(vma)))
058d88c4 3758 return ERR_PTR(-ENOSPC);
59bfa124
CW
3759
3760 WARN(i915_vma_is_pinned(vma),
3761 "bo is already pinned in ggtt with incorrect alignment:"
3762 " offset=%08x %08x, req.alignment=%llx, req.map_and_fenceable=%d,"
3763 " obj->map_and_fenceable=%d\n",
3764 upper_32_bits(vma->node.start),
3765 lower_32_bits(vma->node.start),
3766 alignment,
3767 !!(flags & PIN_MAPPABLE),
3768 obj->map_and_fenceable);
3769 ret = i915_vma_unbind(vma);
3770 if (ret)
058d88c4 3771 return ERR_PTR(ret);
59bfa124
CW
3772 }
3773
058d88c4
CW
3774 ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL);
3775 if (ret)
3776 return ERR_PTR(ret);
ec7adb6e 3777
058d88c4 3778 return vma;
673a394b
EA
3779}
3780
edf6b76f 3781static __always_inline unsigned int __busy_read_flag(unsigned int id)
3fdc13c7
CW
3782{
3783 /* Note that we could alias engines in the execbuf API, but
3784 * that would be very unwise as it prevents userspace from
3785 * fine control over engine selection. Ahem.
3786 *
3787 * This should be something like EXEC_MAX_ENGINE instead of
3788 * I915_NUM_ENGINES.
3789 */
3790 BUILD_BUG_ON(I915_NUM_ENGINES > 16);
3791 return 0x10000 << id;
3792}
3793
3794static __always_inline unsigned int __busy_write_id(unsigned int id)
3795{
70cb472c
CW
3796 /* The uABI guarantees an active writer is also amongst the read
3797 * engines. This would be true if we accessed the activity tracking
3798 * under the lock, but as we perform the lookup of the object and
3799 * its activity locklessly we can not guarantee that the last_write
3800 * being active implies that we have set the same engine flag from
3801 * last_read - hence we always set both read and write busy for
3802 * last_write.
3803 */
3804 return id | __busy_read_flag(id);
3fdc13c7
CW
3805}
3806
edf6b76f 3807static __always_inline unsigned int
3fdc13c7
CW
3808__busy_set_if_active(const struct i915_gem_active *active,
3809 unsigned int (*flag)(unsigned int id))
3810{
3811 /* For more discussion about the barriers and locking concerns,
3812 * see __i915_gem_active_get_rcu().
3813 */
3814 do {
3815 struct drm_i915_gem_request *request;
3816 unsigned int id;
3817
3818 request = rcu_dereference(active->request);
3819 if (!request || i915_gem_request_completed(request))
3820 return 0;
3821
3822 id = request->engine->exec_id;
3823
edf6b76f
CW
3824 /* Check that the pointer wasn't reassigned and overwritten.
3825 *
3826 * In __i915_gem_active_get_rcu(), we enforce ordering between
3827 * the first rcu pointer dereference (imposing a
3828 * read-dependency only on access through the pointer) and
3829 * the second lockless access through the memory barrier
3830 * following a successful atomic_inc_not_zero(). Here there
3831 * is no such barrier, and so we must manually insert an
3832 * explicit read barrier to ensure that the following
3833 * access occurs after all the loads through the first
3834 * pointer.
3835 *
3836 * It is worth comparing this sequence with
3837 * raw_write_seqcount_latch() which operates very similarly.
3838 * The challenge here is the visibility of the other CPU
3839 * writes to the reallocated request vs the local CPU ordering.
3840 * Before the other CPU can overwrite the request, it will
3841 * have updated our active->request and gone through a wmb.
3842 * During the read here, we want to make sure that the values
3843 * we see have not been overwritten as we do so - and we do
3844 * that by serialising the second pointer check with the writes
3845 * on other other CPUs.
3846 *
3847 * The corresponding write barrier is part of
3848 * rcu_assign_pointer().
3849 */
3850 smp_rmb();
3fdc13c7
CW
3851 if (request == rcu_access_pointer(active->request))
3852 return flag(id);
3853 } while (1);
3854}
3855
edf6b76f 3856static __always_inline unsigned int
3fdc13c7
CW
3857busy_check_reader(const struct i915_gem_active *active)
3858{
3859 return __busy_set_if_active(active, __busy_read_flag);
3860}
3861
edf6b76f 3862static __always_inline unsigned int
3fdc13c7
CW
3863busy_check_writer(const struct i915_gem_active *active)
3864{
3865 return __busy_set_if_active(active, __busy_write_id);
3866}
3867
673a394b
EA
3868int
3869i915_gem_busy_ioctl(struct drm_device *dev, void *data,
05394f39 3870 struct drm_file *file)
673a394b
EA
3871{
3872 struct drm_i915_gem_busy *args = data;
05394f39 3873 struct drm_i915_gem_object *obj;
3fdc13c7 3874 unsigned long active;
673a394b 3875
03ac0642 3876 obj = i915_gem_object_lookup(file, args->handle);
3fdc13c7
CW
3877 if (!obj)
3878 return -ENOENT;
d1b851fc 3879
426960be 3880 args->busy = 0;
3fdc13c7
CW
3881 active = __I915_BO_ACTIVE(obj);
3882 if (active) {
3883 int idx;
426960be 3884
3fdc13c7
CW
3885 /* Yes, the lookups are intentionally racy.
3886 *
3887 * First, we cannot simply rely on __I915_BO_ACTIVE. We have
3888 * to regard the value as stale and as our ABI guarantees
3889 * forward progress, we confirm the status of each active
3890 * request with the hardware.
3891 *
3892 * Even though we guard the pointer lookup by RCU, that only
3893 * guarantees that the pointer and its contents remain
3894 * dereferencable and does *not* mean that the request we
3895 * have is the same as the one being tracked by the object.
3896 *
3897 * Consider that we lookup the request just as it is being
3898 * retired and freed. We take a local copy of the pointer,
3899 * but before we add its engine into the busy set, the other
3900 * thread reallocates it and assigns it to a task on another
3901 * engine with a fresh and incomplete seqno.
3902 *
3903 * So after we lookup the engine's id, we double check that
3904 * the active request is the same and only then do we add it
3905 * into the busy set.
3906 */
3907 rcu_read_lock();
3908
3909 for_each_active(active, idx)
3910 args->busy |= busy_check_reader(&obj->last_read[idx]);
3911
3912 /* For ABI sanity, we only care that the write engine is in
70cb472c
CW
3913 * the set of read engines. This should be ensured by the
3914 * ordering of setting last_read/last_write in
3915 * i915_vma_move_to_active(), and then in reverse in retire.
3916 * However, for good measure, we always report the last_write
3917 * request as a busy read as well as being a busy write.
3fdc13c7
CW
3918 *
3919 * We don't care that the set of active read/write engines
3920 * may change during construction of the result, as it is
3921 * equally liable to change before userspace can inspect
3922 * the result.
3923 */
3924 args->busy |= busy_check_writer(&obj->last_write);
3925
3926 rcu_read_unlock();
426960be 3927 }
673a394b 3928
3fdc13c7
CW
3929 i915_gem_object_put_unlocked(obj);
3930 return 0;
673a394b
EA
3931}
3932
3933int
3934i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
3935 struct drm_file *file_priv)
3936{
0206e353 3937 return i915_gem_ring_throttle(dev, file_priv);
673a394b
EA
3938}
3939
3ef94daa
CW
3940int
3941i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
3942 struct drm_file *file_priv)
3943{
fac5e23e 3944 struct drm_i915_private *dev_priv = to_i915(dev);
3ef94daa 3945 struct drm_i915_gem_madvise *args = data;
05394f39 3946 struct drm_i915_gem_object *obj;
76c1dec1 3947 int ret;
3ef94daa
CW
3948
3949 switch (args->madv) {
3950 case I915_MADV_DONTNEED:
3951 case I915_MADV_WILLNEED:
3952 break;
3953 default:
3954 return -EINVAL;
3955 }
3956
1d7cfea1
CW
3957 ret = i915_mutex_lock_interruptible(dev);
3958 if (ret)
3959 return ret;
3960
03ac0642
CW
3961 obj = i915_gem_object_lookup(file_priv, args->handle);
3962 if (!obj) {
1d7cfea1
CW
3963 ret = -ENOENT;
3964 goto unlock;
3ef94daa 3965 }
3ef94daa 3966
656bfa3a 3967 if (obj->pages &&
3e510a8e 3968 i915_gem_object_is_tiled(obj) &&
656bfa3a
DV
3969 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
3970 if (obj->madv == I915_MADV_WILLNEED)
3971 i915_gem_object_unpin_pages(obj);
3972 if (args->madv == I915_MADV_WILLNEED)
3973 i915_gem_object_pin_pages(obj);
3974 }
3975
05394f39
CW
3976 if (obj->madv != __I915_MADV_PURGED)
3977 obj->madv = args->madv;
3ef94daa 3978
6c085a72 3979 /* if the object is no longer attached, discard its backing storage */
be6a0376 3980 if (obj->madv == I915_MADV_DONTNEED && obj->pages == NULL)
2d7ef395
CW
3981 i915_gem_object_truncate(obj);
3982
05394f39 3983 args->retained = obj->madv != __I915_MADV_PURGED;
bb6baf76 3984
f8c417cd 3985 i915_gem_object_put(obj);
1d7cfea1 3986unlock:
3ef94daa 3987 mutex_unlock(&dev->struct_mutex);
1d7cfea1 3988 return ret;
3ef94daa
CW
3989}
3990
37e680a1
CW
3991void i915_gem_object_init(struct drm_i915_gem_object *obj,
3992 const struct drm_i915_gem_object_ops *ops)
0327d6ba 3993{
b4716185
CW
3994 int i;
3995
35c20a60 3996 INIT_LIST_HEAD(&obj->global_list);
666796da 3997 for (i = 0; i < I915_NUM_ENGINES; i++)
fa545cbf
CW
3998 init_request_active(&obj->last_read[i],
3999 i915_gem_object_retire__read);
4000 init_request_active(&obj->last_write,
4001 i915_gem_object_retire__write);
4002 init_request_active(&obj->last_fence, NULL);
b25cb2f8 4003 INIT_LIST_HEAD(&obj->obj_exec_link);
2f633156 4004 INIT_LIST_HEAD(&obj->vma_list);
8d9d5744 4005 INIT_LIST_HEAD(&obj->batch_pool_link);
0327d6ba 4006
37e680a1
CW
4007 obj->ops = ops;
4008
0327d6ba
CW
4009 obj->fence_reg = I915_FENCE_REG_NONE;
4010 obj->madv = I915_MADV_WILLNEED;
0327d6ba 4011
f19ec8cb 4012 i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size);
0327d6ba
CW
4013}
4014
37e680a1 4015static const struct drm_i915_gem_object_ops i915_gem_object_ops = {
de472664 4016 .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE,
37e680a1
CW
4017 .get_pages = i915_gem_object_get_pages_gtt,
4018 .put_pages = i915_gem_object_put_pages_gtt,
4019};
4020
d37cd8a8 4021struct drm_i915_gem_object *i915_gem_object_create(struct drm_device *dev,
05394f39 4022 size_t size)
ac52bc56 4023{
c397b908 4024 struct drm_i915_gem_object *obj;
5949eac4 4025 struct address_space *mapping;
1a240d4d 4026 gfp_t mask;
fe3db79b 4027 int ret;
ac52bc56 4028
42dcedd4 4029 obj = i915_gem_object_alloc(dev);
c397b908 4030 if (obj == NULL)
fe3db79b 4031 return ERR_PTR(-ENOMEM);
673a394b 4032
fe3db79b
CW
4033 ret = drm_gem_object_init(dev, &obj->base, size);
4034 if (ret)
4035 goto fail;
673a394b 4036
bed1ea95
CW
4037 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
4038 if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) {
4039 /* 965gm cannot relocate objects above 4GiB. */
4040 mask &= ~__GFP_HIGHMEM;
4041 mask |= __GFP_DMA32;
4042 }
4043
93c76a3d 4044 mapping = obj->base.filp->f_mapping;
bed1ea95 4045 mapping_set_gfp_mask(mapping, mask);
5949eac4 4046
37e680a1 4047 i915_gem_object_init(obj, &i915_gem_object_ops);
73aa808f 4048
c397b908
DV
4049 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
4050 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
673a394b 4051
3d29b842
ED
4052 if (HAS_LLC(dev)) {
4053 /* On some devices, we can have the GPU use the LLC (the CPU
a1871112
EA
4054 * cache) for about a 10% performance improvement
4055 * compared to uncached. Graphics requests other than
4056 * display scanout are coherent with the CPU in
4057 * accessing this cache. This means in this mode we
4058 * don't need to clflush on the CPU side, and on the
4059 * GPU side we only need to flush internal caches to
4060 * get data visible to the CPU.
4061 *
4062 * However, we maintain the display planes as UC, and so
4063 * need to rebind when first used as such.
4064 */
4065 obj->cache_level = I915_CACHE_LLC;
4066 } else
4067 obj->cache_level = I915_CACHE_NONE;
4068
d861e338
DV
4069 trace_i915_gem_object_create(obj);
4070
05394f39 4071 return obj;
fe3db79b
CW
4072
4073fail:
4074 i915_gem_object_free(obj);
4075
4076 return ERR_PTR(ret);
c397b908
DV
4077}
4078
340fbd8c
CW
4079static bool discard_backing_storage(struct drm_i915_gem_object *obj)
4080{
4081 /* If we are the last user of the backing storage (be it shmemfs
4082 * pages or stolen etc), we know that the pages are going to be
4083 * immediately released. In this case, we can then skip copying
4084 * back the contents from the GPU.
4085 */
4086
4087 if (obj->madv != I915_MADV_WILLNEED)
4088 return false;
4089
4090 if (obj->base.filp == NULL)
4091 return true;
4092
4093 /* At first glance, this looks racy, but then again so would be
4094 * userspace racing mmap against close. However, the first external
4095 * reference to the filp can only be obtained through the
4096 * i915_gem_mmap_ioctl() which safeguards us against the user
4097 * acquiring such a reference whilst we are in the middle of
4098 * freeing the object.
4099 */
4100 return atomic_long_read(&obj->base.filp->f_count) == 1;
4101}
4102
1488fc08 4103void i915_gem_free_object(struct drm_gem_object *gem_obj)
673a394b 4104{
1488fc08 4105 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
05394f39 4106 struct drm_device *dev = obj->base.dev;
fac5e23e 4107 struct drm_i915_private *dev_priv = to_i915(dev);
07fe0b12 4108 struct i915_vma *vma, *next;
673a394b 4109
f65c9168
PZ
4110 intel_runtime_pm_get(dev_priv);
4111
26e12f89
CW
4112 trace_i915_gem_object_destroy(obj);
4113
b1f788c6
CW
4114 /* All file-owned VMA should have been released by this point through
4115 * i915_gem_close_object(), or earlier by i915_gem_context_close().
4116 * However, the object may also be bound into the global GTT (e.g.
4117 * older GPUs without per-process support, or for direct access through
4118 * the GTT either for the user or for scanout). Those VMA still need to
4119 * unbound now.
4120 */
1c7f4bca 4121 list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) {
3272db53 4122 GEM_BUG_ON(!i915_vma_is_ggtt(vma));
b1f788c6 4123 GEM_BUG_ON(i915_vma_is_active(vma));
3272db53 4124 vma->flags &= ~I915_VMA_PIN_MASK;
b1f788c6 4125 i915_vma_close(vma);
1488fc08 4126 }
15717de2 4127 GEM_BUG_ON(obj->bind_count);
1488fc08 4128
1d64ae71
BW
4129 /* Stolen objects don't hold a ref, but do hold pin count. Fix that up
4130 * before progressing. */
4131 if (obj->stolen)
4132 i915_gem_object_unpin_pages(obj);
4133
faf5bf0a 4134 WARN_ON(atomic_read(&obj->frontbuffer_bits));
a071fa00 4135
656bfa3a
DV
4136 if (obj->pages && obj->madv == I915_MADV_WILLNEED &&
4137 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES &&
3e510a8e 4138 i915_gem_object_is_tiled(obj))
656bfa3a
DV
4139 i915_gem_object_unpin_pages(obj);
4140
401c29f6
BW
4141 if (WARN_ON(obj->pages_pin_count))
4142 obj->pages_pin_count = 0;
340fbd8c 4143 if (discard_backing_storage(obj))
5537252b 4144 obj->madv = I915_MADV_DONTNEED;
37e680a1 4145 i915_gem_object_put_pages(obj);
de151cf6 4146
9da3da66
CW
4147 BUG_ON(obj->pages);
4148
2f745ad3
CW
4149 if (obj->base.import_attach)
4150 drm_prime_gem_destroy(&obj->base, NULL);
de151cf6 4151
5cc9ed4b
CW
4152 if (obj->ops->release)
4153 obj->ops->release(obj);
4154
05394f39
CW
4155 drm_gem_object_release(&obj->base);
4156 i915_gem_info_remove_obj(dev_priv, obj->base.size);
c397b908 4157
05394f39 4158 kfree(obj->bit_17);
42dcedd4 4159 i915_gem_object_free(obj);
f65c9168
PZ
4160
4161 intel_runtime_pm_put(dev_priv);
673a394b
EA
4162}
4163
dcff85c8 4164int i915_gem_suspend(struct drm_device *dev)
29105ccc 4165{
fac5e23e 4166 struct drm_i915_private *dev_priv = to_i915(dev);
dcff85c8 4167 int ret;
28dfe52a 4168
54b4f68f
CW
4169 intel_suspend_gt_powersave(dev_priv);
4170
45c5f202 4171 mutex_lock(&dev->struct_mutex);
5ab57c70
CW
4172
4173 /* We have to flush all the executing contexts to main memory so
4174 * that they can saved in the hibernation image. To ensure the last
4175 * context image is coherent, we have to switch away from it. That
4176 * leaves the dev_priv->kernel_context still active when
4177 * we actually suspend, and its image in memory may not match the GPU
4178 * state. Fortunately, the kernel_context is disposable and we do
4179 * not rely on its state.
4180 */
4181 ret = i915_gem_switch_to_kernel_context(dev_priv);
4182 if (ret)
4183 goto err;
4184
dcff85c8 4185 ret = i915_gem_wait_for_idle(dev_priv, true);
f7403347 4186 if (ret)
45c5f202 4187 goto err;
f7403347 4188
c033666a 4189 i915_gem_retire_requests(dev_priv);
673a394b 4190
b2e862d0 4191 i915_gem_context_lost(dev_priv);
45c5f202
CW
4192 mutex_unlock(&dev->struct_mutex);
4193
737b1506 4194 cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
67d97da3
CW
4195 cancel_delayed_work_sync(&dev_priv->gt.retire_work);
4196 flush_delayed_work(&dev_priv->gt.idle_work);
29105ccc 4197
bdcf120b
CW
4198 /* Assert that we sucessfully flushed all the work and
4199 * reset the GPU back to its idle, low power state.
4200 */
67d97da3 4201 WARN_ON(dev_priv->gt.awake);
bdcf120b 4202
673a394b 4203 return 0;
45c5f202
CW
4204
4205err:
4206 mutex_unlock(&dev->struct_mutex);
4207 return ret;
673a394b
EA
4208}
4209
5ab57c70
CW
4210void i915_gem_resume(struct drm_device *dev)
4211{
4212 struct drm_i915_private *dev_priv = to_i915(dev);
4213
4214 mutex_lock(&dev->struct_mutex);
4215 i915_gem_restore_gtt_mappings(dev);
4216
4217 /* As we didn't flush the kernel context before suspend, we cannot
4218 * guarantee that the context image is complete. So let's just reset
4219 * it and start again.
4220 */
4221 if (i915.enable_execlists)
4222 intel_lr_context_reset(dev_priv, dev_priv->kernel_context);
4223
4224 mutex_unlock(&dev->struct_mutex);
4225}
4226
f691e2f4
DV
4227void i915_gem_init_swizzling(struct drm_device *dev)
4228{
fac5e23e 4229 struct drm_i915_private *dev_priv = to_i915(dev);
f691e2f4 4230
11782b02 4231 if (INTEL_INFO(dev)->gen < 5 ||
f691e2f4
DV
4232 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
4233 return;
4234
4235 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
4236 DISP_TILE_SURFACE_SWIZZLING);
4237
11782b02
DV
4238 if (IS_GEN5(dev))
4239 return;
4240
f691e2f4
DV
4241 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
4242 if (IS_GEN6(dev))
6b26c86d 4243 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB));
8782e26c 4244 else if (IS_GEN7(dev))
6b26c86d 4245 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB));
31a5336e
BW
4246 else if (IS_GEN8(dev))
4247 I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW));
8782e26c
BW
4248 else
4249 BUG();
f691e2f4 4250}
e21af88d 4251
81e7f200
VS
4252static void init_unused_ring(struct drm_device *dev, u32 base)
4253{
fac5e23e 4254 struct drm_i915_private *dev_priv = to_i915(dev);
81e7f200
VS
4255
4256 I915_WRITE(RING_CTL(base), 0);
4257 I915_WRITE(RING_HEAD(base), 0);
4258 I915_WRITE(RING_TAIL(base), 0);
4259 I915_WRITE(RING_START(base), 0);
4260}
4261
4262static void init_unused_rings(struct drm_device *dev)
4263{
4264 if (IS_I830(dev)) {
4265 init_unused_ring(dev, PRB1_BASE);
4266 init_unused_ring(dev, SRB0_BASE);
4267 init_unused_ring(dev, SRB1_BASE);
4268 init_unused_ring(dev, SRB2_BASE);
4269 init_unused_ring(dev, SRB3_BASE);
4270 } else if (IS_GEN2(dev)) {
4271 init_unused_ring(dev, SRB0_BASE);
4272 init_unused_ring(dev, SRB1_BASE);
4273 } else if (IS_GEN3(dev)) {
4274 init_unused_ring(dev, PRB1_BASE);
4275 init_unused_ring(dev, PRB2_BASE);
4276 }
4277}
4278
4fc7c971
BW
4279int
4280i915_gem_init_hw(struct drm_device *dev)
4281{
fac5e23e 4282 struct drm_i915_private *dev_priv = to_i915(dev);
e2f80391 4283 struct intel_engine_cs *engine;
d200cda6 4284 int ret;
4fc7c971 4285
5e4f5189
CW
4286 /* Double layer security blanket, see i915_gem_init() */
4287 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4288
3accaf7e 4289 if (HAS_EDRAM(dev) && INTEL_GEN(dev_priv) < 9)
05e21cc4 4290 I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf));
4fc7c971 4291
0bf21347
VS
4292 if (IS_HASWELL(dev))
4293 I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev) ?
4294 LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED);
9435373e 4295
88a2b2a3 4296 if (HAS_PCH_NOP(dev)) {
6ba844b0
DV
4297 if (IS_IVYBRIDGE(dev)) {
4298 u32 temp = I915_READ(GEN7_MSG_CTL);
4299 temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK);
4300 I915_WRITE(GEN7_MSG_CTL, temp);
4301 } else if (INTEL_INFO(dev)->gen >= 7) {
4302 u32 temp = I915_READ(HSW_NDE_RSTWRN_OPT);
4303 temp &= ~RESET_PCH_HANDSHAKE_ENABLE;
4304 I915_WRITE(HSW_NDE_RSTWRN_OPT, temp);
4305 }
88a2b2a3
BW
4306 }
4307
4fc7c971
BW
4308 i915_gem_init_swizzling(dev);
4309
d5abdfda
DV
4310 /*
4311 * At least 830 can leave some of the unused rings
4312 * "active" (ie. head != tail) after resume which
4313 * will prevent c3 entry. Makes sure all unused rings
4314 * are totally idle.
4315 */
4316 init_unused_rings(dev);
4317
ed54c1a1 4318 BUG_ON(!dev_priv->kernel_context);
90638cc1 4319
4ad2fd88
JH
4320 ret = i915_ppgtt_init_hw(dev);
4321 if (ret) {
4322 DRM_ERROR("PPGTT enable HW failed %d\n", ret);
4323 goto out;
4324 }
4325
4326 /* Need to do basic initialisation of all rings first: */
b4ac5afc 4327 for_each_engine(engine, dev_priv) {
e2f80391 4328 ret = engine->init_hw(engine);
35a57ffb 4329 if (ret)
5e4f5189 4330 goto out;
35a57ffb 4331 }
99433931 4332
0ccdacf6
PA
4333 intel_mocs_init_l3cc_table(dev);
4334
33a732f4 4335 /* We can't enable contexts until all firmware is loaded */
e556f7c1
DG
4336 ret = intel_guc_setup(dev);
4337 if (ret)
4338 goto out;
33a732f4 4339
5e4f5189
CW
4340out:
4341 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
2fa48d8d 4342 return ret;
8187a2b7
ZN
4343}
4344
39df9190
CW
4345bool intel_sanitize_semaphores(struct drm_i915_private *dev_priv, int value)
4346{
4347 if (INTEL_INFO(dev_priv)->gen < 6)
4348 return false;
4349
4350 /* TODO: make semaphores and Execlists play nicely together */
4351 if (i915.enable_execlists)
4352 return false;
4353
4354 if (value >= 0)
4355 return value;
4356
4357#ifdef CONFIG_INTEL_IOMMU
4358 /* Enable semaphores on SNB when IO remapping is off */
4359 if (INTEL_INFO(dev_priv)->gen == 6 && intel_iommu_gfx_mapped)
4360 return false;
4361#endif
4362
4363 return true;
4364}
4365
1070a42b
CW
4366int i915_gem_init(struct drm_device *dev)
4367{
fac5e23e 4368 struct drm_i915_private *dev_priv = to_i915(dev);
1070a42b
CW
4369 int ret;
4370
1070a42b 4371 mutex_lock(&dev->struct_mutex);
d62b4892 4372
a83014d3 4373 if (!i915.enable_execlists) {
7e37f889 4374 dev_priv->gt.cleanup_engine = intel_engine_cleanup;
454afebd 4375 } else {
117897f4 4376 dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup;
a83014d3
OM
4377 }
4378
5e4f5189
CW
4379 /* This is just a security blanket to placate dragons.
4380 * On some systems, we very sporadically observe that the first TLBs
4381 * used by the CS may be stale, despite us poking the TLB reset. If
4382 * we hold the forcewake during initialisation these problems
4383 * just magically go away.
4384 */
4385 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4386
72778cb2 4387 i915_gem_init_userptr(dev_priv);
f6b9d5ca
CW
4388
4389 ret = i915_gem_init_ggtt(dev_priv);
4390 if (ret)
4391 goto out_unlock;
d62b4892 4392
2fa48d8d 4393 ret = i915_gem_context_init(dev);
7bcc3777
JN
4394 if (ret)
4395 goto out_unlock;
2fa48d8d 4396
8b3e2d36 4397 ret = intel_engines_init(dev);
35a57ffb 4398 if (ret)
7bcc3777 4399 goto out_unlock;
2fa48d8d 4400
1070a42b 4401 ret = i915_gem_init_hw(dev);
60990320 4402 if (ret == -EIO) {
7e21d648 4403 /* Allow engine initialisation to fail by marking the GPU as
60990320
CW
4404 * wedged. But we only want to do this where the GPU is angry,
4405 * for all other failure, such as an allocation failure, bail.
4406 */
4407 DRM_ERROR("Failed to initialize GPU, declaring it wedged\n");
805de8f4 4408 atomic_or(I915_WEDGED, &dev_priv->gpu_error.reset_counter);
60990320 4409 ret = 0;
1070a42b 4410 }
7bcc3777
JN
4411
4412out_unlock:
5e4f5189 4413 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
60990320 4414 mutex_unlock(&dev->struct_mutex);
1070a42b 4415
60990320 4416 return ret;
1070a42b
CW
4417}
4418
8187a2b7 4419void
117897f4 4420i915_gem_cleanup_engines(struct drm_device *dev)
8187a2b7 4421{
fac5e23e 4422 struct drm_i915_private *dev_priv = to_i915(dev);
e2f80391 4423 struct intel_engine_cs *engine;
8187a2b7 4424
b4ac5afc 4425 for_each_engine(engine, dev_priv)
117897f4 4426 dev_priv->gt.cleanup_engine(engine);
8187a2b7
ZN
4427}
4428
64193406 4429static void
666796da 4430init_engine_lists(struct intel_engine_cs *engine)
64193406 4431{
0bc40be8 4432 INIT_LIST_HEAD(&engine->request_list);
64193406
CW
4433}
4434
40ae4e16
ID
4435void
4436i915_gem_load_init_fences(struct drm_i915_private *dev_priv)
4437{
91c8a326 4438 struct drm_device *dev = &dev_priv->drm;
40ae4e16
ID
4439
4440 if (INTEL_INFO(dev_priv)->gen >= 7 && !IS_VALLEYVIEW(dev_priv) &&
4441 !IS_CHERRYVIEW(dev_priv))
4442 dev_priv->num_fence_regs = 32;
4443 else if (INTEL_INFO(dev_priv)->gen >= 4 || IS_I945G(dev_priv) ||
4444 IS_I945GM(dev_priv) || IS_G33(dev_priv))
4445 dev_priv->num_fence_regs = 16;
4446 else
4447 dev_priv->num_fence_regs = 8;
4448
c033666a 4449 if (intel_vgpu_active(dev_priv))
40ae4e16
ID
4450 dev_priv->num_fence_regs =
4451 I915_READ(vgtif_reg(avail_rs.fence_num));
4452
4453 /* Initialize fence registers to zero */
4454 i915_gem_restore_fences(dev);
4455
4456 i915_gem_detect_bit_6_swizzle(dev);
4457}
4458
673a394b 4459void
d64aa096 4460i915_gem_load_init(struct drm_device *dev)
673a394b 4461{
fac5e23e 4462 struct drm_i915_private *dev_priv = to_i915(dev);
42dcedd4
CW
4463 int i;
4464
efab6d8d 4465 dev_priv->objects =
42dcedd4
CW
4466 kmem_cache_create("i915_gem_object",
4467 sizeof(struct drm_i915_gem_object), 0,
4468 SLAB_HWCACHE_ALIGN,
4469 NULL);
e20d2ab7
CW
4470 dev_priv->vmas =
4471 kmem_cache_create("i915_gem_vma",
4472 sizeof(struct i915_vma), 0,
4473 SLAB_HWCACHE_ALIGN,
4474 NULL);
efab6d8d
CW
4475 dev_priv->requests =
4476 kmem_cache_create("i915_gem_request",
4477 sizeof(struct drm_i915_gem_request), 0,
0eafec6d
CW
4478 SLAB_HWCACHE_ALIGN |
4479 SLAB_RECLAIM_ACCOUNT |
4480 SLAB_DESTROY_BY_RCU,
efab6d8d 4481 NULL);
673a394b 4482
a33afea5 4483 INIT_LIST_HEAD(&dev_priv->context_list);
6c085a72
CW
4484 INIT_LIST_HEAD(&dev_priv->mm.unbound_list);
4485 INIT_LIST_HEAD(&dev_priv->mm.bound_list);
a09ba7fa 4486 INIT_LIST_HEAD(&dev_priv->mm.fence_list);
666796da
TU
4487 for (i = 0; i < I915_NUM_ENGINES; i++)
4488 init_engine_lists(&dev_priv->engine[i]);
4b9de737 4489 for (i = 0; i < I915_MAX_NUM_FENCES; i++)
007cc8ac 4490 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list);
67d97da3 4491 INIT_DELAYED_WORK(&dev_priv->gt.retire_work,
673a394b 4492 i915_gem_retire_work_handler);
67d97da3 4493 INIT_DELAYED_WORK(&dev_priv->gt.idle_work,
b29c19b6 4494 i915_gem_idle_work_handler);
1f15b76f 4495 init_waitqueue_head(&dev_priv->gpu_error.wait_queue);
1f83fee0 4496 init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
31169714 4497
72bfa19c
CW
4498 dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL;
4499
19b2dbde 4500 INIT_LIST_HEAD(&dev_priv->mm.fence_list);
10ed13e4 4501
6b95a207 4502 init_waitqueue_head(&dev_priv->pending_flip_queue);
17250b71 4503
ce453d81
CW
4504 dev_priv->mm.interruptible = true;
4505
b5add959 4506 spin_lock_init(&dev_priv->fb_tracking.lock);
673a394b 4507}
71acb5eb 4508
d64aa096
ID
4509void i915_gem_load_cleanup(struct drm_device *dev)
4510{
4511 struct drm_i915_private *dev_priv = to_i915(dev);
4512
4513 kmem_cache_destroy(dev_priv->requests);
4514 kmem_cache_destroy(dev_priv->vmas);
4515 kmem_cache_destroy(dev_priv->objects);
0eafec6d
CW
4516
4517 /* And ensure that our DESTROY_BY_RCU slabs are truly destroyed */
4518 rcu_barrier();
d64aa096
ID
4519}
4520
461fb99c
CW
4521int i915_gem_freeze_late(struct drm_i915_private *dev_priv)
4522{
4523 struct drm_i915_gem_object *obj;
4524
4525 /* Called just before we write the hibernation image.
4526 *
4527 * We need to update the domain tracking to reflect that the CPU
4528 * will be accessing all the pages to create and restore from the
4529 * hibernation, and so upon restoration those pages will be in the
4530 * CPU domain.
4531 *
4532 * To make sure the hibernation image contains the latest state,
4533 * we update that state just before writing out the image.
4534 */
4535
4536 list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list) {
4537 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
4538 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
4539 }
4540
4541 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
4542 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
4543 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
4544 }
4545
4546 return 0;
4547}
4548
f787a5f5 4549void i915_gem_release(struct drm_device *dev, struct drm_file *file)
b962442e 4550{
f787a5f5 4551 struct drm_i915_file_private *file_priv = file->driver_priv;
15f7bbc7 4552 struct drm_i915_gem_request *request;
b962442e
EA
4553
4554 /* Clean up our request list when the client is going away, so that
4555 * later retire_requests won't dereference our soon-to-be-gone
4556 * file_priv.
4557 */
1c25595f 4558 spin_lock(&file_priv->mm.lock);
15f7bbc7 4559 list_for_each_entry(request, &file_priv->mm.request_list, client_list)
f787a5f5 4560 request->file_priv = NULL;
1c25595f 4561 spin_unlock(&file_priv->mm.lock);
b29c19b6 4562
2e1b8730 4563 if (!list_empty(&file_priv->rps.link)) {
8d3afd7d 4564 spin_lock(&to_i915(dev)->rps.client_lock);
2e1b8730 4565 list_del(&file_priv->rps.link);
8d3afd7d 4566 spin_unlock(&to_i915(dev)->rps.client_lock);
1854d5ca 4567 }
b29c19b6
CW
4568}
4569
4570int i915_gem_open(struct drm_device *dev, struct drm_file *file)
4571{
4572 struct drm_i915_file_private *file_priv;
e422b888 4573 int ret;
b29c19b6
CW
4574
4575 DRM_DEBUG_DRIVER("\n");
4576
4577 file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL);
4578 if (!file_priv)
4579 return -ENOMEM;
4580
4581 file->driver_priv = file_priv;
f19ec8cb 4582 file_priv->dev_priv = to_i915(dev);
ab0e7ff9 4583 file_priv->file = file;
2e1b8730 4584 INIT_LIST_HEAD(&file_priv->rps.link);
b29c19b6
CW
4585
4586 spin_lock_init(&file_priv->mm.lock);
4587 INIT_LIST_HEAD(&file_priv->mm.request_list);
b29c19b6 4588
c80ff16e 4589 file_priv->bsd_engine = -1;
de1add36 4590
e422b888
BW
4591 ret = i915_gem_context_open(dev, file);
4592 if (ret)
4593 kfree(file_priv);
b29c19b6 4594
e422b888 4595 return ret;
b29c19b6
CW
4596}
4597
b680c37a
DV
4598/**
4599 * i915_gem_track_fb - update frontbuffer tracking
d9072a3e
GT
4600 * @old: current GEM buffer for the frontbuffer slots
4601 * @new: new GEM buffer for the frontbuffer slots
4602 * @frontbuffer_bits: bitmask of frontbuffer slots
b680c37a
DV
4603 *
4604 * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them
4605 * from @old and setting them in @new. Both @old and @new can be NULL.
4606 */
a071fa00
DV
4607void i915_gem_track_fb(struct drm_i915_gem_object *old,
4608 struct drm_i915_gem_object *new,
4609 unsigned frontbuffer_bits)
4610{
faf5bf0a
CW
4611 /* Control of individual bits within the mask are guarded by
4612 * the owning plane->mutex, i.e. we can never see concurrent
4613 * manipulation of individual bits. But since the bitfield as a whole
4614 * is updated using RMW, we need to use atomics in order to update
4615 * the bits.
4616 */
4617 BUILD_BUG_ON(INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES >
4618 sizeof(atomic_t) * BITS_PER_BYTE);
4619
a071fa00 4620 if (old) {
faf5bf0a
CW
4621 WARN_ON(!(atomic_read(&old->frontbuffer_bits) & frontbuffer_bits));
4622 atomic_andnot(frontbuffer_bits, &old->frontbuffer_bits);
a071fa00
DV
4623 }
4624
4625 if (new) {
faf5bf0a
CW
4626 WARN_ON(atomic_read(&new->frontbuffer_bits) & frontbuffer_bits);
4627 atomic_or(frontbuffer_bits, &new->frontbuffer_bits);
a071fa00
DV
4628 }
4629}
4630
033908ae
DG
4631/* Like i915_gem_object_get_page(), but mark the returned page dirty */
4632struct page *
4633i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, int n)
4634{
4635 struct page *page;
4636
4637 /* Only default objects have per-page dirty tracking */
b9bcd14a 4638 if (WARN_ON(!i915_gem_object_has_struct_page(obj)))
033908ae
DG
4639 return NULL;
4640
4641 page = i915_gem_object_get_page(obj, n);
4642 set_page_dirty(page);
4643 return page;
4644}
4645
ea70299d
DG
4646/* Allocate a new GEM object and fill it with the supplied data */
4647struct drm_i915_gem_object *
4648i915_gem_object_create_from_data(struct drm_device *dev,
4649 const void *data, size_t size)
4650{
4651 struct drm_i915_gem_object *obj;
4652 struct sg_table *sg;
4653 size_t bytes;
4654 int ret;
4655
d37cd8a8 4656 obj = i915_gem_object_create(dev, round_up(size, PAGE_SIZE));
fe3db79b 4657 if (IS_ERR(obj))
ea70299d
DG
4658 return obj;
4659
4660 ret = i915_gem_object_set_to_cpu_domain(obj, true);
4661 if (ret)
4662 goto fail;
4663
4664 ret = i915_gem_object_get_pages(obj);
4665 if (ret)
4666 goto fail;
4667
4668 i915_gem_object_pin_pages(obj);
4669 sg = obj->pages;
4670 bytes = sg_copy_from_buffer(sg->sgl, sg->nents, (void *)data, size);
9e7d18c0 4671 obj->dirty = 1; /* Backing store is now out of date */
ea70299d
DG
4672 i915_gem_object_unpin_pages(obj);
4673
4674 if (WARN_ON(bytes != size)) {
4675 DRM_ERROR("Incomplete copy, wrote %zu of %zu", bytes, size);
4676 ret = -EFAULT;
4677 goto fail;
4678 }
4679
4680 return obj;
4681
4682fail:
f8c417cd 4683 i915_gem_object_put(obj);
ea70299d
DG
4684 return ERR_PTR(ret);
4685}
This page took 1.792372 seconds and 5 git commands to generate.