drm/i915: Fix SDVO TV support
[deliverable/linux.git] / drivers / gpu / drm / i915 / i915_gem.c
CommitLineData
673a394b
EA
1/*
2 * Copyright © 2008 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Eric Anholt <eric@anholt.net>
25 *
26 */
27
28#include "drmP.h"
29#include "drm.h"
30#include "i915_drm.h"
31#include "i915_drv.h"
32#include <linux/swap.h>
79e53945 33#include <linux/pci.h>
673a394b 34
28dfe52a
EA
35#define I915_GEM_GPU_DOMAINS (~(I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT))
36
e47c68e9
EA
37static void i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj);
38static void i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj);
39static void i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj);
e47c68e9
EA
40static int i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj,
41 int write);
42static int i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj,
43 uint64_t offset,
44 uint64_t size);
45static void i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *obj);
856fa198
EA
46static int i915_gem_object_get_pages(struct drm_gem_object *obj);
47static void i915_gem_object_put_pages(struct drm_gem_object *obj);
673a394b 48static int i915_gem_object_wait_rendering(struct drm_gem_object *obj);
de151cf6
JB
49static int i915_gem_object_bind_to_gtt(struct drm_gem_object *obj,
50 unsigned alignment);
0f973f27 51static int i915_gem_object_get_fence_reg(struct drm_gem_object *obj, bool write);
de151cf6
JB
52static void i915_gem_clear_fence_reg(struct drm_gem_object *obj);
53static int i915_gem_evict_something(struct drm_device *dev);
71acb5eb
DA
54static int i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
55 struct drm_i915_gem_pwrite *args,
56 struct drm_file *file_priv);
673a394b 57
79e53945
JB
58int i915_gem_do_init(struct drm_device *dev, unsigned long start,
59 unsigned long end)
673a394b
EA
60{
61 drm_i915_private_t *dev_priv = dev->dev_private;
673a394b 62
79e53945
JB
63 if (start >= end ||
64 (start & (PAGE_SIZE - 1)) != 0 ||
65 (end & (PAGE_SIZE - 1)) != 0) {
673a394b
EA
66 return -EINVAL;
67 }
68
79e53945
JB
69 drm_mm_init(&dev_priv->mm.gtt_space, start,
70 end - start);
673a394b 71
79e53945
JB
72 dev->gtt_total = (uint32_t) (end - start);
73
74 return 0;
75}
673a394b 76
79e53945
JB
77int
78i915_gem_init_ioctl(struct drm_device *dev, void *data,
79 struct drm_file *file_priv)
80{
81 struct drm_i915_gem_init *args = data;
82 int ret;
83
84 mutex_lock(&dev->struct_mutex);
85 ret = i915_gem_do_init(dev, args->gtt_start, args->gtt_end);
673a394b
EA
86 mutex_unlock(&dev->struct_mutex);
87
79e53945 88 return ret;
673a394b
EA
89}
90
5a125c3c
EA
91int
92i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
93 struct drm_file *file_priv)
94{
5a125c3c 95 struct drm_i915_gem_get_aperture *args = data;
5a125c3c
EA
96
97 if (!(dev->driver->driver_features & DRIVER_GEM))
98 return -ENODEV;
99
100 args->aper_size = dev->gtt_total;
2678d9d6
KP
101 args->aper_available_size = (args->aper_size -
102 atomic_read(&dev->pin_memory));
5a125c3c
EA
103
104 return 0;
105}
106
673a394b
EA
107
108/**
109 * Creates a new mm object and returns a handle to it.
110 */
111int
112i915_gem_create_ioctl(struct drm_device *dev, void *data,
113 struct drm_file *file_priv)
114{
115 struct drm_i915_gem_create *args = data;
116 struct drm_gem_object *obj;
117 int handle, ret;
118
119 args->size = roundup(args->size, PAGE_SIZE);
120
121 /* Allocate the new object */
122 obj = drm_gem_object_alloc(dev, args->size);
123 if (obj == NULL)
124 return -ENOMEM;
125
126 ret = drm_gem_handle_create(file_priv, obj, &handle);
127 mutex_lock(&dev->struct_mutex);
128 drm_gem_object_handle_unreference(obj);
129 mutex_unlock(&dev->struct_mutex);
130
131 if (ret)
132 return ret;
133
134 args->handle = handle;
135
136 return 0;
137}
138
eb01459f
EA
139static inline int
140fast_shmem_read(struct page **pages,
141 loff_t page_base, int page_offset,
142 char __user *data,
143 int length)
144{
145 char __iomem *vaddr;
146 int ret;
147
148 vaddr = kmap_atomic(pages[page_base >> PAGE_SHIFT], KM_USER0);
149 if (vaddr == NULL)
150 return -ENOMEM;
151 ret = __copy_to_user_inatomic(data, vaddr + page_offset, length);
152 kunmap_atomic(vaddr, KM_USER0);
153
154 return ret;
155}
156
40123c1f
EA
157static inline int
158slow_shmem_copy(struct page *dst_page,
159 int dst_offset,
160 struct page *src_page,
161 int src_offset,
162 int length)
163{
164 char *dst_vaddr, *src_vaddr;
165
166 dst_vaddr = kmap_atomic(dst_page, KM_USER0);
167 if (dst_vaddr == NULL)
168 return -ENOMEM;
169
170 src_vaddr = kmap_atomic(src_page, KM_USER1);
171 if (src_vaddr == NULL) {
172 kunmap_atomic(dst_vaddr, KM_USER0);
173 return -ENOMEM;
174 }
175
176 memcpy(dst_vaddr + dst_offset, src_vaddr + src_offset, length);
177
178 kunmap_atomic(src_vaddr, KM_USER1);
179 kunmap_atomic(dst_vaddr, KM_USER0);
180
181 return 0;
182}
183
eb01459f
EA
184/**
185 * This is the fast shmem pread path, which attempts to copy_from_user directly
186 * from the backing pages of the object to the user's address space. On a
187 * fault, it fails so we can fall back to i915_gem_shmem_pwrite_slow().
188 */
189static int
190i915_gem_shmem_pread_fast(struct drm_device *dev, struct drm_gem_object *obj,
191 struct drm_i915_gem_pread *args,
192 struct drm_file *file_priv)
193{
194 struct drm_i915_gem_object *obj_priv = obj->driver_private;
195 ssize_t remain;
196 loff_t offset, page_base;
197 char __user *user_data;
198 int page_offset, page_length;
199 int ret;
200
201 user_data = (char __user *) (uintptr_t) args->data_ptr;
202 remain = args->size;
203
204 mutex_lock(&dev->struct_mutex);
205
206 ret = i915_gem_object_get_pages(obj);
207 if (ret != 0)
208 goto fail_unlock;
209
210 ret = i915_gem_object_set_cpu_read_domain_range(obj, args->offset,
211 args->size);
212 if (ret != 0)
213 goto fail_put_pages;
214
215 obj_priv = obj->driver_private;
216 offset = args->offset;
217
218 while (remain > 0) {
219 /* Operation in this page
220 *
221 * page_base = page offset within aperture
222 * page_offset = offset within page
223 * page_length = bytes to copy for this page
224 */
225 page_base = (offset & ~(PAGE_SIZE-1));
226 page_offset = offset & (PAGE_SIZE-1);
227 page_length = remain;
228 if ((page_offset + remain) > PAGE_SIZE)
229 page_length = PAGE_SIZE - page_offset;
230
231 ret = fast_shmem_read(obj_priv->pages,
232 page_base, page_offset,
233 user_data, page_length);
234 if (ret)
235 goto fail_put_pages;
236
237 remain -= page_length;
238 user_data += page_length;
239 offset += page_length;
240 }
241
242fail_put_pages:
243 i915_gem_object_put_pages(obj);
244fail_unlock:
245 mutex_unlock(&dev->struct_mutex);
246
247 return ret;
248}
249
250/**
251 * This is the fallback shmem pread path, which allocates temporary storage
252 * in kernel space to copy_to_user into outside of the struct_mutex, so we
253 * can copy out of the object's backing pages while holding the struct mutex
254 * and not take page faults.
255 */
256static int
257i915_gem_shmem_pread_slow(struct drm_device *dev, struct drm_gem_object *obj,
258 struct drm_i915_gem_pread *args,
259 struct drm_file *file_priv)
260{
261 struct drm_i915_gem_object *obj_priv = obj->driver_private;
262 struct mm_struct *mm = current->mm;
263 struct page **user_pages;
264 ssize_t remain;
265 loff_t offset, pinned_pages, i;
266 loff_t first_data_page, last_data_page, num_pages;
267 int shmem_page_index, shmem_page_offset;
268 int data_page_index, data_page_offset;
269 int page_length;
270 int ret;
271 uint64_t data_ptr = args->data_ptr;
272
273 remain = args->size;
274
275 /* Pin the user pages containing the data. We can't fault while
276 * holding the struct mutex, yet we want to hold it while
277 * dereferencing the user data.
278 */
279 first_data_page = data_ptr / PAGE_SIZE;
280 last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
281 num_pages = last_data_page - first_data_page + 1;
282
283 user_pages = kcalloc(num_pages, sizeof(struct page *), GFP_KERNEL);
284 if (user_pages == NULL)
285 return -ENOMEM;
286
287 down_read(&mm->mmap_sem);
288 pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
289 num_pages, 0, 0, user_pages, NULL);
290 up_read(&mm->mmap_sem);
291 if (pinned_pages < num_pages) {
292 ret = -EFAULT;
293 goto fail_put_user_pages;
294 }
295
296 mutex_lock(&dev->struct_mutex);
297
298 ret = i915_gem_object_get_pages(obj);
299 if (ret != 0)
300 goto fail_unlock;
301
302 ret = i915_gem_object_set_cpu_read_domain_range(obj, args->offset,
303 args->size);
304 if (ret != 0)
305 goto fail_put_pages;
306
307 obj_priv = obj->driver_private;
308 offset = args->offset;
309
310 while (remain > 0) {
311 /* Operation in this page
312 *
313 * shmem_page_index = page number within shmem file
314 * shmem_page_offset = offset within page in shmem file
315 * data_page_index = page number in get_user_pages return
316 * data_page_offset = offset with data_page_index page.
317 * page_length = bytes to copy for this page
318 */
319 shmem_page_index = offset / PAGE_SIZE;
320 shmem_page_offset = offset & ~PAGE_MASK;
321 data_page_index = data_ptr / PAGE_SIZE - first_data_page;
322 data_page_offset = data_ptr & ~PAGE_MASK;
323
324 page_length = remain;
325 if ((shmem_page_offset + page_length) > PAGE_SIZE)
326 page_length = PAGE_SIZE - shmem_page_offset;
327 if ((data_page_offset + page_length) > PAGE_SIZE)
328 page_length = PAGE_SIZE - data_page_offset;
329
330 ret = slow_shmem_copy(user_pages[data_page_index],
331 data_page_offset,
332 obj_priv->pages[shmem_page_index],
333 shmem_page_offset,
334 page_length);
335 if (ret)
336 goto fail_put_pages;
337
338 remain -= page_length;
339 data_ptr += page_length;
340 offset += page_length;
341 }
342
343fail_put_pages:
344 i915_gem_object_put_pages(obj);
345fail_unlock:
346 mutex_unlock(&dev->struct_mutex);
347fail_put_user_pages:
348 for (i = 0; i < pinned_pages; i++) {
349 SetPageDirty(user_pages[i]);
350 page_cache_release(user_pages[i]);
351 }
352 kfree(user_pages);
353
354 return ret;
355}
356
673a394b
EA
357/**
358 * Reads data from the object referenced by handle.
359 *
360 * On error, the contents of *data are undefined.
361 */
362int
363i915_gem_pread_ioctl(struct drm_device *dev, void *data,
364 struct drm_file *file_priv)
365{
366 struct drm_i915_gem_pread *args = data;
367 struct drm_gem_object *obj;
368 struct drm_i915_gem_object *obj_priv;
673a394b
EA
369 int ret;
370
371 obj = drm_gem_object_lookup(dev, file_priv, args->handle);
372 if (obj == NULL)
373 return -EBADF;
374 obj_priv = obj->driver_private;
375
376 /* Bounds check source.
377 *
378 * XXX: This could use review for overflow issues...
379 */
380 if (args->offset > obj->size || args->size > obj->size ||
381 args->offset + args->size > obj->size) {
382 drm_gem_object_unreference(obj);
383 return -EINVAL;
384 }
385
eb01459f
EA
386 ret = i915_gem_shmem_pread_fast(dev, obj, args, file_priv);
387 if (ret != 0)
388 ret = i915_gem_shmem_pread_slow(dev, obj, args, file_priv);
673a394b
EA
389
390 drm_gem_object_unreference(obj);
673a394b 391
eb01459f 392 return ret;
673a394b
EA
393}
394
0839ccb8
KP
395/* This is the fast write path which cannot handle
396 * page faults in the source data
9b7530cc 397 */
0839ccb8
KP
398
399static inline int
400fast_user_write(struct io_mapping *mapping,
401 loff_t page_base, int page_offset,
402 char __user *user_data,
403 int length)
9b7530cc 404{
9b7530cc 405 char *vaddr_atomic;
0839ccb8 406 unsigned long unwritten;
9b7530cc 407
0839ccb8
KP
408 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base);
409 unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + page_offset,
410 user_data, length);
411 io_mapping_unmap_atomic(vaddr_atomic);
412 if (unwritten)
413 return -EFAULT;
414 return 0;
415}
416
417/* Here's the write path which can sleep for
418 * page faults
419 */
420
421static inline int
3de09aa3
EA
422slow_kernel_write(struct io_mapping *mapping,
423 loff_t gtt_base, int gtt_offset,
424 struct page *user_page, int user_offset,
425 int length)
0839ccb8 426{
3de09aa3 427 char *src_vaddr, *dst_vaddr;
0839ccb8
KP
428 unsigned long unwritten;
429
3de09aa3
EA
430 dst_vaddr = io_mapping_map_atomic_wc(mapping, gtt_base);
431 src_vaddr = kmap_atomic(user_page, KM_USER1);
432 unwritten = __copy_from_user_inatomic_nocache(dst_vaddr + gtt_offset,
433 src_vaddr + user_offset,
434 length);
435 kunmap_atomic(src_vaddr, KM_USER1);
436 io_mapping_unmap_atomic(dst_vaddr);
0839ccb8
KP
437 if (unwritten)
438 return -EFAULT;
9b7530cc 439 return 0;
9b7530cc
LT
440}
441
40123c1f
EA
442static inline int
443fast_shmem_write(struct page **pages,
444 loff_t page_base, int page_offset,
445 char __user *data,
446 int length)
447{
448 char __iomem *vaddr;
d0088775 449 unsigned long unwritten;
40123c1f
EA
450
451 vaddr = kmap_atomic(pages[page_base >> PAGE_SHIFT], KM_USER0);
452 if (vaddr == NULL)
453 return -ENOMEM;
d0088775 454 unwritten = __copy_from_user_inatomic(vaddr + page_offset, data, length);
40123c1f
EA
455 kunmap_atomic(vaddr, KM_USER0);
456
d0088775
DA
457 if (unwritten)
458 return -EFAULT;
40123c1f
EA
459 return 0;
460}
461
3de09aa3
EA
462/**
463 * This is the fast pwrite path, where we copy the data directly from the
464 * user into the GTT, uncached.
465 */
673a394b 466static int
3de09aa3
EA
467i915_gem_gtt_pwrite_fast(struct drm_device *dev, struct drm_gem_object *obj,
468 struct drm_i915_gem_pwrite *args,
469 struct drm_file *file_priv)
673a394b
EA
470{
471 struct drm_i915_gem_object *obj_priv = obj->driver_private;
0839ccb8 472 drm_i915_private_t *dev_priv = dev->dev_private;
673a394b 473 ssize_t remain;
0839ccb8 474 loff_t offset, page_base;
673a394b 475 char __user *user_data;
0839ccb8
KP
476 int page_offset, page_length;
477 int ret;
673a394b
EA
478
479 user_data = (char __user *) (uintptr_t) args->data_ptr;
480 remain = args->size;
481 if (!access_ok(VERIFY_READ, user_data, remain))
482 return -EFAULT;
483
484
485 mutex_lock(&dev->struct_mutex);
486 ret = i915_gem_object_pin(obj, 0);
487 if (ret) {
488 mutex_unlock(&dev->struct_mutex);
489 return ret;
490 }
2ef7eeaa 491 ret = i915_gem_object_set_to_gtt_domain(obj, 1);
673a394b
EA
492 if (ret)
493 goto fail;
494
495 obj_priv = obj->driver_private;
496 offset = obj_priv->gtt_offset + args->offset;
673a394b
EA
497
498 while (remain > 0) {
499 /* Operation in this page
500 *
0839ccb8
KP
501 * page_base = page offset within aperture
502 * page_offset = offset within page
503 * page_length = bytes to copy for this page
673a394b 504 */
0839ccb8
KP
505 page_base = (offset & ~(PAGE_SIZE-1));
506 page_offset = offset & (PAGE_SIZE-1);
507 page_length = remain;
508 if ((page_offset + remain) > PAGE_SIZE)
509 page_length = PAGE_SIZE - page_offset;
510
511 ret = fast_user_write (dev_priv->mm.gtt_mapping, page_base,
512 page_offset, user_data, page_length);
513
514 /* If we get a fault while copying data, then (presumably) our
3de09aa3
EA
515 * source page isn't available. Return the error and we'll
516 * retry in the slow path.
0839ccb8 517 */
3de09aa3
EA
518 if (ret)
519 goto fail;
673a394b 520
0839ccb8
KP
521 remain -= page_length;
522 user_data += page_length;
523 offset += page_length;
673a394b 524 }
673a394b
EA
525
526fail:
527 i915_gem_object_unpin(obj);
528 mutex_unlock(&dev->struct_mutex);
529
530 return ret;
531}
532
3de09aa3
EA
533/**
534 * This is the fallback GTT pwrite path, which uses get_user_pages to pin
535 * the memory and maps it using kmap_atomic for copying.
536 *
537 * This code resulted in x11perf -rgb10text consuming about 10% more CPU
538 * than using i915_gem_gtt_pwrite_fast on a G45 (32-bit).
539 */
3043c60c 540static int
3de09aa3
EA
541i915_gem_gtt_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj,
542 struct drm_i915_gem_pwrite *args,
543 struct drm_file *file_priv)
673a394b 544{
3de09aa3
EA
545 struct drm_i915_gem_object *obj_priv = obj->driver_private;
546 drm_i915_private_t *dev_priv = dev->dev_private;
547 ssize_t remain;
548 loff_t gtt_page_base, offset;
549 loff_t first_data_page, last_data_page, num_pages;
550 loff_t pinned_pages, i;
551 struct page **user_pages;
552 struct mm_struct *mm = current->mm;
553 int gtt_page_offset, data_page_offset, data_page_index, page_length;
673a394b 554 int ret;
3de09aa3
EA
555 uint64_t data_ptr = args->data_ptr;
556
557 remain = args->size;
558
559 /* Pin the user pages containing the data. We can't fault while
560 * holding the struct mutex, and all of the pwrite implementations
561 * want to hold it while dereferencing the user data.
562 */
563 first_data_page = data_ptr / PAGE_SIZE;
564 last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
565 num_pages = last_data_page - first_data_page + 1;
566
567 user_pages = kcalloc(num_pages, sizeof(struct page *), GFP_KERNEL);
568 if (user_pages == NULL)
569 return -ENOMEM;
570
571 down_read(&mm->mmap_sem);
572 pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
573 num_pages, 0, 0, user_pages, NULL);
574 up_read(&mm->mmap_sem);
575 if (pinned_pages < num_pages) {
576 ret = -EFAULT;
577 goto out_unpin_pages;
578 }
673a394b
EA
579
580 mutex_lock(&dev->struct_mutex);
3de09aa3
EA
581 ret = i915_gem_object_pin(obj, 0);
582 if (ret)
583 goto out_unlock;
584
585 ret = i915_gem_object_set_to_gtt_domain(obj, 1);
586 if (ret)
587 goto out_unpin_object;
588
589 obj_priv = obj->driver_private;
590 offset = obj_priv->gtt_offset + args->offset;
591
592 while (remain > 0) {
593 /* Operation in this page
594 *
595 * gtt_page_base = page offset within aperture
596 * gtt_page_offset = offset within page in aperture
597 * data_page_index = page number in get_user_pages return
598 * data_page_offset = offset with data_page_index page.
599 * page_length = bytes to copy for this page
600 */
601 gtt_page_base = offset & PAGE_MASK;
602 gtt_page_offset = offset & ~PAGE_MASK;
603 data_page_index = data_ptr / PAGE_SIZE - first_data_page;
604 data_page_offset = data_ptr & ~PAGE_MASK;
605
606 page_length = remain;
607 if ((gtt_page_offset + page_length) > PAGE_SIZE)
608 page_length = PAGE_SIZE - gtt_page_offset;
609 if ((data_page_offset + page_length) > PAGE_SIZE)
610 page_length = PAGE_SIZE - data_page_offset;
611
612 ret = slow_kernel_write(dev_priv->mm.gtt_mapping,
613 gtt_page_base, gtt_page_offset,
614 user_pages[data_page_index],
615 data_page_offset,
616 page_length);
617
618 /* If we get a fault while copying data, then (presumably) our
619 * source page isn't available. Return the error and we'll
620 * retry in the slow path.
621 */
622 if (ret)
623 goto out_unpin_object;
624
625 remain -= page_length;
626 offset += page_length;
627 data_ptr += page_length;
628 }
629
630out_unpin_object:
631 i915_gem_object_unpin(obj);
632out_unlock:
633 mutex_unlock(&dev->struct_mutex);
634out_unpin_pages:
635 for (i = 0; i < pinned_pages; i++)
636 page_cache_release(user_pages[i]);
637 kfree(user_pages);
638
639 return ret;
640}
641
40123c1f
EA
642/**
643 * This is the fast shmem pwrite path, which attempts to directly
644 * copy_from_user into the kmapped pages backing the object.
645 */
3043c60c 646static int
40123c1f
EA
647i915_gem_shmem_pwrite_fast(struct drm_device *dev, struct drm_gem_object *obj,
648 struct drm_i915_gem_pwrite *args,
649 struct drm_file *file_priv)
673a394b 650{
40123c1f
EA
651 struct drm_i915_gem_object *obj_priv = obj->driver_private;
652 ssize_t remain;
653 loff_t offset, page_base;
654 char __user *user_data;
655 int page_offset, page_length;
673a394b 656 int ret;
40123c1f
EA
657
658 user_data = (char __user *) (uintptr_t) args->data_ptr;
659 remain = args->size;
673a394b
EA
660
661 mutex_lock(&dev->struct_mutex);
662
40123c1f
EA
663 ret = i915_gem_object_get_pages(obj);
664 if (ret != 0)
665 goto fail_unlock;
673a394b 666
e47c68e9 667 ret = i915_gem_object_set_to_cpu_domain(obj, 1);
40123c1f
EA
668 if (ret != 0)
669 goto fail_put_pages;
670
671 obj_priv = obj->driver_private;
672 offset = args->offset;
673 obj_priv->dirty = 1;
674
675 while (remain > 0) {
676 /* Operation in this page
677 *
678 * page_base = page offset within aperture
679 * page_offset = offset within page
680 * page_length = bytes to copy for this page
681 */
682 page_base = (offset & ~(PAGE_SIZE-1));
683 page_offset = offset & (PAGE_SIZE-1);
684 page_length = remain;
685 if ((page_offset + remain) > PAGE_SIZE)
686 page_length = PAGE_SIZE - page_offset;
687
688 ret = fast_shmem_write(obj_priv->pages,
689 page_base, page_offset,
690 user_data, page_length);
691 if (ret)
692 goto fail_put_pages;
693
694 remain -= page_length;
695 user_data += page_length;
696 offset += page_length;
697 }
698
699fail_put_pages:
700 i915_gem_object_put_pages(obj);
701fail_unlock:
702 mutex_unlock(&dev->struct_mutex);
703
704 return ret;
705}
706
707/**
708 * This is the fallback shmem pwrite path, which uses get_user_pages to pin
709 * the memory and maps it using kmap_atomic for copying.
710 *
711 * This avoids taking mmap_sem for faulting on the user's address while the
712 * struct_mutex is held.
713 */
714static int
715i915_gem_shmem_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj,
716 struct drm_i915_gem_pwrite *args,
717 struct drm_file *file_priv)
718{
719 struct drm_i915_gem_object *obj_priv = obj->driver_private;
720 struct mm_struct *mm = current->mm;
721 struct page **user_pages;
722 ssize_t remain;
723 loff_t offset, pinned_pages, i;
724 loff_t first_data_page, last_data_page, num_pages;
725 int shmem_page_index, shmem_page_offset;
726 int data_page_index, data_page_offset;
727 int page_length;
728 int ret;
729 uint64_t data_ptr = args->data_ptr;
730
731 remain = args->size;
732
733 /* Pin the user pages containing the data. We can't fault while
734 * holding the struct mutex, and all of the pwrite implementations
735 * want to hold it while dereferencing the user data.
736 */
737 first_data_page = data_ptr / PAGE_SIZE;
738 last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
739 num_pages = last_data_page - first_data_page + 1;
740
741 user_pages = kcalloc(num_pages, sizeof(struct page *), GFP_KERNEL);
742 if (user_pages == NULL)
743 return -ENOMEM;
744
745 down_read(&mm->mmap_sem);
746 pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
747 num_pages, 0, 0, user_pages, NULL);
748 up_read(&mm->mmap_sem);
749 if (pinned_pages < num_pages) {
750 ret = -EFAULT;
751 goto fail_put_user_pages;
673a394b
EA
752 }
753
40123c1f
EA
754 mutex_lock(&dev->struct_mutex);
755
756 ret = i915_gem_object_get_pages(obj);
757 if (ret != 0)
758 goto fail_unlock;
759
760 ret = i915_gem_object_set_to_cpu_domain(obj, 1);
761 if (ret != 0)
762 goto fail_put_pages;
763
764 obj_priv = obj->driver_private;
673a394b 765 offset = args->offset;
40123c1f 766 obj_priv->dirty = 1;
673a394b 767
40123c1f
EA
768 while (remain > 0) {
769 /* Operation in this page
770 *
771 * shmem_page_index = page number within shmem file
772 * shmem_page_offset = offset within page in shmem file
773 * data_page_index = page number in get_user_pages return
774 * data_page_offset = offset with data_page_index page.
775 * page_length = bytes to copy for this page
776 */
777 shmem_page_index = offset / PAGE_SIZE;
778 shmem_page_offset = offset & ~PAGE_MASK;
779 data_page_index = data_ptr / PAGE_SIZE - first_data_page;
780 data_page_offset = data_ptr & ~PAGE_MASK;
781
782 page_length = remain;
783 if ((shmem_page_offset + page_length) > PAGE_SIZE)
784 page_length = PAGE_SIZE - shmem_page_offset;
785 if ((data_page_offset + page_length) > PAGE_SIZE)
786 page_length = PAGE_SIZE - data_page_offset;
787
788 ret = slow_shmem_copy(obj_priv->pages[shmem_page_index],
789 shmem_page_offset,
790 user_pages[data_page_index],
791 data_page_offset,
792 page_length);
793 if (ret)
794 goto fail_put_pages;
795
796 remain -= page_length;
797 data_ptr += page_length;
798 offset += page_length;
673a394b
EA
799 }
800
40123c1f
EA
801fail_put_pages:
802 i915_gem_object_put_pages(obj);
803fail_unlock:
673a394b 804 mutex_unlock(&dev->struct_mutex);
40123c1f
EA
805fail_put_user_pages:
806 for (i = 0; i < pinned_pages; i++)
807 page_cache_release(user_pages[i]);
808 kfree(user_pages);
673a394b 809
40123c1f 810 return ret;
673a394b
EA
811}
812
813/**
814 * Writes data to the object referenced by handle.
815 *
816 * On error, the contents of the buffer that were to be modified are undefined.
817 */
818int
819i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
820 struct drm_file *file_priv)
821{
822 struct drm_i915_gem_pwrite *args = data;
823 struct drm_gem_object *obj;
824 struct drm_i915_gem_object *obj_priv;
825 int ret = 0;
826
827 obj = drm_gem_object_lookup(dev, file_priv, args->handle);
828 if (obj == NULL)
829 return -EBADF;
830 obj_priv = obj->driver_private;
831
832 /* Bounds check destination.
833 *
834 * XXX: This could use review for overflow issues...
835 */
836 if (args->offset > obj->size || args->size > obj->size ||
837 args->offset + args->size > obj->size) {
838 drm_gem_object_unreference(obj);
839 return -EINVAL;
840 }
841
842 /* We can only do the GTT pwrite on untiled buffers, as otherwise
843 * it would end up going through the fenced access, and we'll get
844 * different detiling behavior between reading and writing.
845 * pread/pwrite currently are reading and writing from the CPU
846 * perspective, requiring manual detiling by the client.
847 */
71acb5eb
DA
848 if (obj_priv->phys_obj)
849 ret = i915_gem_phys_pwrite(dev, obj, args, file_priv);
850 else if (obj_priv->tiling_mode == I915_TILING_NONE &&
3de09aa3
EA
851 dev->gtt_total != 0) {
852 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file_priv);
853 if (ret == -EFAULT) {
854 ret = i915_gem_gtt_pwrite_slow(dev, obj, args,
855 file_priv);
856 }
40123c1f
EA
857 } else {
858 ret = i915_gem_shmem_pwrite_fast(dev, obj, args, file_priv);
859 if (ret == -EFAULT) {
860 ret = i915_gem_shmem_pwrite_slow(dev, obj, args,
861 file_priv);
862 }
863 }
673a394b
EA
864
865#if WATCH_PWRITE
866 if (ret)
867 DRM_INFO("pwrite failed %d\n", ret);
868#endif
869
870 drm_gem_object_unreference(obj);
871
872 return ret;
873}
874
875/**
2ef7eeaa
EA
876 * Called when user space prepares to use an object with the CPU, either
877 * through the mmap ioctl's mapping or a GTT mapping.
673a394b
EA
878 */
879int
880i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
881 struct drm_file *file_priv)
882{
883 struct drm_i915_gem_set_domain *args = data;
884 struct drm_gem_object *obj;
2ef7eeaa
EA
885 uint32_t read_domains = args->read_domains;
886 uint32_t write_domain = args->write_domain;
673a394b
EA
887 int ret;
888
889 if (!(dev->driver->driver_features & DRIVER_GEM))
890 return -ENODEV;
891
2ef7eeaa
EA
892 /* Only handle setting domains to types used by the CPU. */
893 if (write_domain & ~(I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT))
894 return -EINVAL;
895
896 if (read_domains & ~(I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT))
897 return -EINVAL;
898
899 /* Having something in the write domain implies it's in the read
900 * domain, and only that read domain. Enforce that in the request.
901 */
902 if (write_domain != 0 && read_domains != write_domain)
903 return -EINVAL;
904
673a394b
EA
905 obj = drm_gem_object_lookup(dev, file_priv, args->handle);
906 if (obj == NULL)
907 return -EBADF;
908
909 mutex_lock(&dev->struct_mutex);
910#if WATCH_BUF
911 DRM_INFO("set_domain_ioctl %p(%d), %08x %08x\n",
2ef7eeaa 912 obj, obj->size, read_domains, write_domain);
673a394b 913#endif
2ef7eeaa
EA
914 if (read_domains & I915_GEM_DOMAIN_GTT) {
915 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
02354392
EA
916
917 /* Silently promote "you're not bound, there was nothing to do"
918 * to success, since the client was just asking us to
919 * make sure everything was done.
920 */
921 if (ret == -EINVAL)
922 ret = 0;
2ef7eeaa 923 } else {
e47c68e9 924 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
2ef7eeaa
EA
925 }
926
673a394b
EA
927 drm_gem_object_unreference(obj);
928 mutex_unlock(&dev->struct_mutex);
929 return ret;
930}
931
932/**
933 * Called when user space has done writes to this buffer
934 */
935int
936i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
937 struct drm_file *file_priv)
938{
939 struct drm_i915_gem_sw_finish *args = data;
940 struct drm_gem_object *obj;
941 struct drm_i915_gem_object *obj_priv;
942 int ret = 0;
943
944 if (!(dev->driver->driver_features & DRIVER_GEM))
945 return -ENODEV;
946
947 mutex_lock(&dev->struct_mutex);
948 obj = drm_gem_object_lookup(dev, file_priv, args->handle);
949 if (obj == NULL) {
950 mutex_unlock(&dev->struct_mutex);
951 return -EBADF;
952 }
953
954#if WATCH_BUF
955 DRM_INFO("%s: sw_finish %d (%p %d)\n",
956 __func__, args->handle, obj, obj->size);
957#endif
958 obj_priv = obj->driver_private;
959
960 /* Pinned buffers may be scanout, so flush the cache */
e47c68e9
EA
961 if (obj_priv->pin_count)
962 i915_gem_object_flush_cpu_write_domain(obj);
963
673a394b
EA
964 drm_gem_object_unreference(obj);
965 mutex_unlock(&dev->struct_mutex);
966 return ret;
967}
968
969/**
970 * Maps the contents of an object, returning the address it is mapped
971 * into.
972 *
973 * While the mapping holds a reference on the contents of the object, it doesn't
974 * imply a ref on the object itself.
975 */
976int
977i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
978 struct drm_file *file_priv)
979{
980 struct drm_i915_gem_mmap *args = data;
981 struct drm_gem_object *obj;
982 loff_t offset;
983 unsigned long addr;
984
985 if (!(dev->driver->driver_features & DRIVER_GEM))
986 return -ENODEV;
987
988 obj = drm_gem_object_lookup(dev, file_priv, args->handle);
989 if (obj == NULL)
990 return -EBADF;
991
992 offset = args->offset;
993
994 down_write(&current->mm->mmap_sem);
995 addr = do_mmap(obj->filp, 0, args->size,
996 PROT_READ | PROT_WRITE, MAP_SHARED,
997 args->offset);
998 up_write(&current->mm->mmap_sem);
999 mutex_lock(&dev->struct_mutex);
1000 drm_gem_object_unreference(obj);
1001 mutex_unlock(&dev->struct_mutex);
1002 if (IS_ERR((void *)addr))
1003 return addr;
1004
1005 args->addr_ptr = (uint64_t) addr;
1006
1007 return 0;
1008}
1009
de151cf6
JB
1010/**
1011 * i915_gem_fault - fault a page into the GTT
1012 * vma: VMA in question
1013 * vmf: fault info
1014 *
1015 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
1016 * from userspace. The fault handler takes care of binding the object to
1017 * the GTT (if needed), allocating and programming a fence register (again,
1018 * only if needed based on whether the old reg is still valid or the object
1019 * is tiled) and inserting a new PTE into the faulting process.
1020 *
1021 * Note that the faulting process may involve evicting existing objects
1022 * from the GTT and/or fence registers to make room. So performance may
1023 * suffer if the GTT working set is large or there are few fence registers
1024 * left.
1025 */
1026int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1027{
1028 struct drm_gem_object *obj = vma->vm_private_data;
1029 struct drm_device *dev = obj->dev;
1030 struct drm_i915_private *dev_priv = dev->dev_private;
1031 struct drm_i915_gem_object *obj_priv = obj->driver_private;
1032 pgoff_t page_offset;
1033 unsigned long pfn;
1034 int ret = 0;
0f973f27 1035 bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
de151cf6
JB
1036
1037 /* We don't use vmf->pgoff since that has the fake offset */
1038 page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >>
1039 PAGE_SHIFT;
1040
1041 /* Now bind it into the GTT if needed */
1042 mutex_lock(&dev->struct_mutex);
1043 if (!obj_priv->gtt_space) {
1044 ret = i915_gem_object_bind_to_gtt(obj, obj_priv->gtt_alignment);
1045 if (ret) {
1046 mutex_unlock(&dev->struct_mutex);
1047 return VM_FAULT_SIGBUS;
1048 }
1049 list_add(&obj_priv->list, &dev_priv->mm.inactive_list);
1050 }
1051
1052 /* Need a new fence register? */
1053 if (obj_priv->fence_reg == I915_FENCE_REG_NONE &&
d9ddcb96 1054 obj_priv->tiling_mode != I915_TILING_NONE) {
0f973f27 1055 ret = i915_gem_object_get_fence_reg(obj, write);
7d8d58b2
CW
1056 if (ret) {
1057 mutex_unlock(&dev->struct_mutex);
d9ddcb96 1058 return VM_FAULT_SIGBUS;
7d8d58b2 1059 }
d9ddcb96 1060 }
de151cf6
JB
1061
1062 pfn = ((dev->agp->base + obj_priv->gtt_offset) >> PAGE_SHIFT) +
1063 page_offset;
1064
1065 /* Finally, remap it using the new GTT offset */
1066 ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn);
1067
1068 mutex_unlock(&dev->struct_mutex);
1069
1070 switch (ret) {
1071 case -ENOMEM:
1072 case -EAGAIN:
1073 return VM_FAULT_OOM;
1074 case -EFAULT:
959b887c 1075 case -EINVAL:
de151cf6
JB
1076 return VM_FAULT_SIGBUS;
1077 default:
1078 return VM_FAULT_NOPAGE;
1079 }
1080}
1081
1082/**
1083 * i915_gem_create_mmap_offset - create a fake mmap offset for an object
1084 * @obj: obj in question
1085 *
1086 * GEM memory mapping works by handing back to userspace a fake mmap offset
1087 * it can use in a subsequent mmap(2) call. The DRM core code then looks
1088 * up the object based on the offset and sets up the various memory mapping
1089 * structures.
1090 *
1091 * This routine allocates and attaches a fake offset for @obj.
1092 */
1093static int
1094i915_gem_create_mmap_offset(struct drm_gem_object *obj)
1095{
1096 struct drm_device *dev = obj->dev;
1097 struct drm_gem_mm *mm = dev->mm_private;
1098 struct drm_i915_gem_object *obj_priv = obj->driver_private;
1099 struct drm_map_list *list;
f77d390c 1100 struct drm_local_map *map;
de151cf6
JB
1101 int ret = 0;
1102
1103 /* Set the object up for mmap'ing */
1104 list = &obj->map_list;
1105 list->map = drm_calloc(1, sizeof(struct drm_map_list),
1106 DRM_MEM_DRIVER);
1107 if (!list->map)
1108 return -ENOMEM;
1109
1110 map = list->map;
1111 map->type = _DRM_GEM;
1112 map->size = obj->size;
1113 map->handle = obj;
1114
1115 /* Get a DRM GEM mmap offset allocated... */
1116 list->file_offset_node = drm_mm_search_free(&mm->offset_manager,
1117 obj->size / PAGE_SIZE, 0, 0);
1118 if (!list->file_offset_node) {
1119 DRM_ERROR("failed to allocate offset for bo %d\n", obj->name);
1120 ret = -ENOMEM;
1121 goto out_free_list;
1122 }
1123
1124 list->file_offset_node = drm_mm_get_block(list->file_offset_node,
1125 obj->size / PAGE_SIZE, 0);
1126 if (!list->file_offset_node) {
1127 ret = -ENOMEM;
1128 goto out_free_list;
1129 }
1130
1131 list->hash.key = list->file_offset_node->start;
1132 if (drm_ht_insert_item(&mm->offset_hash, &list->hash)) {
1133 DRM_ERROR("failed to add to map hash\n");
1134 goto out_free_mm;
1135 }
1136
1137 /* By now we should be all set, any drm_mmap request on the offset
1138 * below will get to our mmap & fault handler */
1139 obj_priv->mmap_offset = ((uint64_t) list->hash.key) << PAGE_SHIFT;
1140
1141 return 0;
1142
1143out_free_mm:
1144 drm_mm_put_block(list->file_offset_node);
1145out_free_list:
1146 drm_free(list->map, sizeof(struct drm_map_list), DRM_MEM_DRIVER);
1147
1148 return ret;
1149}
1150
ab00b3e5
JB
1151static void
1152i915_gem_free_mmap_offset(struct drm_gem_object *obj)
1153{
1154 struct drm_device *dev = obj->dev;
1155 struct drm_i915_gem_object *obj_priv = obj->driver_private;
1156 struct drm_gem_mm *mm = dev->mm_private;
1157 struct drm_map_list *list;
1158
1159 list = &obj->map_list;
1160 drm_ht_remove_item(&mm->offset_hash, &list->hash);
1161
1162 if (list->file_offset_node) {
1163 drm_mm_put_block(list->file_offset_node);
1164 list->file_offset_node = NULL;
1165 }
1166
1167 if (list->map) {
1168 drm_free(list->map, sizeof(struct drm_map), DRM_MEM_DRIVER);
1169 list->map = NULL;
1170 }
1171
1172 obj_priv->mmap_offset = 0;
1173}
1174
de151cf6
JB
1175/**
1176 * i915_gem_get_gtt_alignment - return required GTT alignment for an object
1177 * @obj: object to check
1178 *
1179 * Return the required GTT alignment for an object, taking into account
1180 * potential fence register mapping if needed.
1181 */
1182static uint32_t
1183i915_gem_get_gtt_alignment(struct drm_gem_object *obj)
1184{
1185 struct drm_device *dev = obj->dev;
1186 struct drm_i915_gem_object *obj_priv = obj->driver_private;
1187 int start, i;
1188
1189 /*
1190 * Minimum alignment is 4k (GTT page size), but might be greater
1191 * if a fence register is needed for the object.
1192 */
1193 if (IS_I965G(dev) || obj_priv->tiling_mode == I915_TILING_NONE)
1194 return 4096;
1195
1196 /*
1197 * Previous chips need to be aligned to the size of the smallest
1198 * fence register that can contain the object.
1199 */
1200 if (IS_I9XX(dev))
1201 start = 1024*1024;
1202 else
1203 start = 512*1024;
1204
1205 for (i = start; i < obj->size; i <<= 1)
1206 ;
1207
1208 return i;
1209}
1210
1211/**
1212 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
1213 * @dev: DRM device
1214 * @data: GTT mapping ioctl data
1215 * @file_priv: GEM object info
1216 *
1217 * Simply returns the fake offset to userspace so it can mmap it.
1218 * The mmap call will end up in drm_gem_mmap(), which will set things
1219 * up so we can get faults in the handler above.
1220 *
1221 * The fault handler will take care of binding the object into the GTT
1222 * (since it may have been evicted to make room for something), allocating
1223 * a fence register, and mapping the appropriate aperture address into
1224 * userspace.
1225 */
1226int
1227i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
1228 struct drm_file *file_priv)
1229{
1230 struct drm_i915_gem_mmap_gtt *args = data;
1231 struct drm_i915_private *dev_priv = dev->dev_private;
1232 struct drm_gem_object *obj;
1233 struct drm_i915_gem_object *obj_priv;
1234 int ret;
1235
1236 if (!(dev->driver->driver_features & DRIVER_GEM))
1237 return -ENODEV;
1238
1239 obj = drm_gem_object_lookup(dev, file_priv, args->handle);
1240 if (obj == NULL)
1241 return -EBADF;
1242
1243 mutex_lock(&dev->struct_mutex);
1244
1245 obj_priv = obj->driver_private;
1246
1247 if (!obj_priv->mmap_offset) {
1248 ret = i915_gem_create_mmap_offset(obj);
13af1062
CW
1249 if (ret) {
1250 drm_gem_object_unreference(obj);
1251 mutex_unlock(&dev->struct_mutex);
de151cf6 1252 return ret;
13af1062 1253 }
de151cf6
JB
1254 }
1255
1256 args->offset = obj_priv->mmap_offset;
1257
1258 obj_priv->gtt_alignment = i915_gem_get_gtt_alignment(obj);
1259
1260 /* Make sure the alignment is correct for fence regs etc */
1261 if (obj_priv->agp_mem &&
1262 (obj_priv->gtt_offset & (obj_priv->gtt_alignment - 1))) {
1263 drm_gem_object_unreference(obj);
1264 mutex_unlock(&dev->struct_mutex);
1265 return -EINVAL;
1266 }
1267
1268 /*
1269 * Pull it into the GTT so that we have a page list (makes the
1270 * initial fault faster and any subsequent flushing possible).
1271 */
1272 if (!obj_priv->agp_mem) {
1273 ret = i915_gem_object_bind_to_gtt(obj, obj_priv->gtt_alignment);
1274 if (ret) {
1275 drm_gem_object_unreference(obj);
1276 mutex_unlock(&dev->struct_mutex);
1277 return ret;
1278 }
1279 list_add(&obj_priv->list, &dev_priv->mm.inactive_list);
1280 }
1281
1282 drm_gem_object_unreference(obj);
1283 mutex_unlock(&dev->struct_mutex);
1284
1285 return 0;
1286}
1287
673a394b 1288static void
856fa198 1289i915_gem_object_put_pages(struct drm_gem_object *obj)
673a394b
EA
1290{
1291 struct drm_i915_gem_object *obj_priv = obj->driver_private;
1292 int page_count = obj->size / PAGE_SIZE;
1293 int i;
1294
856fa198 1295 BUG_ON(obj_priv->pages_refcount == 0);
673a394b 1296
856fa198
EA
1297 if (--obj_priv->pages_refcount != 0)
1298 return;
673a394b
EA
1299
1300 for (i = 0; i < page_count; i++)
856fa198 1301 if (obj_priv->pages[i] != NULL) {
673a394b 1302 if (obj_priv->dirty)
856fa198
EA
1303 set_page_dirty(obj_priv->pages[i]);
1304 mark_page_accessed(obj_priv->pages[i]);
1305 page_cache_release(obj_priv->pages[i]);
673a394b
EA
1306 }
1307 obj_priv->dirty = 0;
1308
856fa198 1309 drm_free(obj_priv->pages,
673a394b
EA
1310 page_count * sizeof(struct page *),
1311 DRM_MEM_DRIVER);
856fa198 1312 obj_priv->pages = NULL;
673a394b
EA
1313}
1314
1315static void
ce44b0ea 1316i915_gem_object_move_to_active(struct drm_gem_object *obj, uint32_t seqno)
673a394b
EA
1317{
1318 struct drm_device *dev = obj->dev;
1319 drm_i915_private_t *dev_priv = dev->dev_private;
1320 struct drm_i915_gem_object *obj_priv = obj->driver_private;
1321
1322 /* Add a reference if we're newly entering the active list. */
1323 if (!obj_priv->active) {
1324 drm_gem_object_reference(obj);
1325 obj_priv->active = 1;
1326 }
1327 /* Move from whatever list we were on to the tail of execution. */
1328 list_move_tail(&obj_priv->list,
1329 &dev_priv->mm.active_list);
ce44b0ea 1330 obj_priv->last_rendering_seqno = seqno;
673a394b
EA
1331}
1332
ce44b0ea
EA
1333static void
1334i915_gem_object_move_to_flushing(struct drm_gem_object *obj)
1335{
1336 struct drm_device *dev = obj->dev;
1337 drm_i915_private_t *dev_priv = dev->dev_private;
1338 struct drm_i915_gem_object *obj_priv = obj->driver_private;
1339
1340 BUG_ON(!obj_priv->active);
1341 list_move_tail(&obj_priv->list, &dev_priv->mm.flushing_list);
1342 obj_priv->last_rendering_seqno = 0;
1343}
673a394b
EA
1344
1345static void
1346i915_gem_object_move_to_inactive(struct drm_gem_object *obj)
1347{
1348 struct drm_device *dev = obj->dev;
1349 drm_i915_private_t *dev_priv = dev->dev_private;
1350 struct drm_i915_gem_object *obj_priv = obj->driver_private;
1351
1352 i915_verify_inactive(dev, __FILE__, __LINE__);
1353 if (obj_priv->pin_count != 0)
1354 list_del_init(&obj_priv->list);
1355 else
1356 list_move_tail(&obj_priv->list, &dev_priv->mm.inactive_list);
1357
ce44b0ea 1358 obj_priv->last_rendering_seqno = 0;
673a394b
EA
1359 if (obj_priv->active) {
1360 obj_priv->active = 0;
1361 drm_gem_object_unreference(obj);
1362 }
1363 i915_verify_inactive(dev, __FILE__, __LINE__);
1364}
1365
1366/**
1367 * Creates a new sequence number, emitting a write of it to the status page
1368 * plus an interrupt, which will trigger i915_user_interrupt_handler.
1369 *
1370 * Must be called with struct_lock held.
1371 *
1372 * Returned sequence numbers are nonzero on success.
1373 */
1374static uint32_t
1375i915_add_request(struct drm_device *dev, uint32_t flush_domains)
1376{
1377 drm_i915_private_t *dev_priv = dev->dev_private;
1378 struct drm_i915_gem_request *request;
1379 uint32_t seqno;
1380 int was_empty;
1381 RING_LOCALS;
1382
1383 request = drm_calloc(1, sizeof(*request), DRM_MEM_DRIVER);
1384 if (request == NULL)
1385 return 0;
1386
1387 /* Grab the seqno we're going to make this request be, and bump the
1388 * next (skipping 0 so it can be the reserved no-seqno value).
1389 */
1390 seqno = dev_priv->mm.next_gem_seqno;
1391 dev_priv->mm.next_gem_seqno++;
1392 if (dev_priv->mm.next_gem_seqno == 0)
1393 dev_priv->mm.next_gem_seqno++;
1394
1395 BEGIN_LP_RING(4);
1396 OUT_RING(MI_STORE_DWORD_INDEX);
1397 OUT_RING(I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
1398 OUT_RING(seqno);
1399
1400 OUT_RING(MI_USER_INTERRUPT);
1401 ADVANCE_LP_RING();
1402
1403 DRM_DEBUG("%d\n", seqno);
1404
1405 request->seqno = seqno;
1406 request->emitted_jiffies = jiffies;
673a394b
EA
1407 was_empty = list_empty(&dev_priv->mm.request_list);
1408 list_add_tail(&request->list, &dev_priv->mm.request_list);
1409
ce44b0ea
EA
1410 /* Associate any objects on the flushing list matching the write
1411 * domain we're flushing with our flush.
1412 */
1413 if (flush_domains != 0) {
1414 struct drm_i915_gem_object *obj_priv, *next;
1415
1416 list_for_each_entry_safe(obj_priv, next,
1417 &dev_priv->mm.flushing_list, list) {
1418 struct drm_gem_object *obj = obj_priv->obj;
1419
1420 if ((obj->write_domain & flush_domains) ==
1421 obj->write_domain) {
1422 obj->write_domain = 0;
1423 i915_gem_object_move_to_active(obj, seqno);
1424 }
1425 }
1426
1427 }
1428
6dbe2772 1429 if (was_empty && !dev_priv->mm.suspended)
673a394b
EA
1430 schedule_delayed_work(&dev_priv->mm.retire_work, HZ);
1431 return seqno;
1432}
1433
1434/**
1435 * Command execution barrier
1436 *
1437 * Ensures that all commands in the ring are finished
1438 * before signalling the CPU
1439 */
3043c60c 1440static uint32_t
673a394b
EA
1441i915_retire_commands(struct drm_device *dev)
1442{
1443 drm_i915_private_t *dev_priv = dev->dev_private;
1444 uint32_t cmd = MI_FLUSH | MI_NO_WRITE_FLUSH;
1445 uint32_t flush_domains = 0;
1446 RING_LOCALS;
1447
1448 /* The sampler always gets flushed on i965 (sigh) */
1449 if (IS_I965G(dev))
1450 flush_domains |= I915_GEM_DOMAIN_SAMPLER;
1451 BEGIN_LP_RING(2);
1452 OUT_RING(cmd);
1453 OUT_RING(0); /* noop */
1454 ADVANCE_LP_RING();
1455 return flush_domains;
1456}
1457
1458/**
1459 * Moves buffers associated only with the given active seqno from the active
1460 * to inactive list, potentially freeing them.
1461 */
1462static void
1463i915_gem_retire_request(struct drm_device *dev,
1464 struct drm_i915_gem_request *request)
1465{
1466 drm_i915_private_t *dev_priv = dev->dev_private;
1467
1468 /* Move any buffers on the active list that are no longer referenced
1469 * by the ringbuffer to the flushing/inactive lists as appropriate.
1470 */
1471 while (!list_empty(&dev_priv->mm.active_list)) {
1472 struct drm_gem_object *obj;
1473 struct drm_i915_gem_object *obj_priv;
1474
1475 obj_priv = list_first_entry(&dev_priv->mm.active_list,
1476 struct drm_i915_gem_object,
1477 list);
1478 obj = obj_priv->obj;
1479
1480 /* If the seqno being retired doesn't match the oldest in the
1481 * list, then the oldest in the list must still be newer than
1482 * this seqno.
1483 */
1484 if (obj_priv->last_rendering_seqno != request->seqno)
1485 return;
de151cf6 1486
673a394b
EA
1487#if WATCH_LRU
1488 DRM_INFO("%s: retire %d moves to inactive list %p\n",
1489 __func__, request->seqno, obj);
1490#endif
1491
ce44b0ea
EA
1492 if (obj->write_domain != 0)
1493 i915_gem_object_move_to_flushing(obj);
1494 else
673a394b 1495 i915_gem_object_move_to_inactive(obj);
673a394b
EA
1496 }
1497}
1498
1499/**
1500 * Returns true if seq1 is later than seq2.
1501 */
1502static int
1503i915_seqno_passed(uint32_t seq1, uint32_t seq2)
1504{
1505 return (int32_t)(seq1 - seq2) >= 0;
1506}
1507
1508uint32_t
1509i915_get_gem_seqno(struct drm_device *dev)
1510{
1511 drm_i915_private_t *dev_priv = dev->dev_private;
1512
1513 return READ_HWSP(dev_priv, I915_GEM_HWS_INDEX);
1514}
1515
1516/**
1517 * This function clears the request list as sequence numbers are passed.
1518 */
1519void
1520i915_gem_retire_requests(struct drm_device *dev)
1521{
1522 drm_i915_private_t *dev_priv = dev->dev_private;
1523 uint32_t seqno;
1524
6c0594a3
KW
1525 if (!dev_priv->hw_status_page)
1526 return;
1527
673a394b
EA
1528 seqno = i915_get_gem_seqno(dev);
1529
1530 while (!list_empty(&dev_priv->mm.request_list)) {
1531 struct drm_i915_gem_request *request;
1532 uint32_t retiring_seqno;
1533
1534 request = list_first_entry(&dev_priv->mm.request_list,
1535 struct drm_i915_gem_request,
1536 list);
1537 retiring_seqno = request->seqno;
1538
1539 if (i915_seqno_passed(seqno, retiring_seqno) ||
1540 dev_priv->mm.wedged) {
1541 i915_gem_retire_request(dev, request);
1542
1543 list_del(&request->list);
1544 drm_free(request, sizeof(*request), DRM_MEM_DRIVER);
1545 } else
1546 break;
1547 }
1548}
1549
1550void
1551i915_gem_retire_work_handler(struct work_struct *work)
1552{
1553 drm_i915_private_t *dev_priv;
1554 struct drm_device *dev;
1555
1556 dev_priv = container_of(work, drm_i915_private_t,
1557 mm.retire_work.work);
1558 dev = dev_priv->dev;
1559
1560 mutex_lock(&dev->struct_mutex);
1561 i915_gem_retire_requests(dev);
6dbe2772
KP
1562 if (!dev_priv->mm.suspended &&
1563 !list_empty(&dev_priv->mm.request_list))
673a394b
EA
1564 schedule_delayed_work(&dev_priv->mm.retire_work, HZ);
1565 mutex_unlock(&dev->struct_mutex);
1566}
1567
1568/**
1569 * Waits for a sequence number to be signaled, and cleans up the
1570 * request and object lists appropriately for that event.
1571 */
3043c60c 1572static int
673a394b
EA
1573i915_wait_request(struct drm_device *dev, uint32_t seqno)
1574{
1575 drm_i915_private_t *dev_priv = dev->dev_private;
1576 int ret = 0;
1577
1578 BUG_ON(seqno == 0);
1579
1580 if (!i915_seqno_passed(i915_get_gem_seqno(dev), seqno)) {
1581 dev_priv->mm.waiting_gem_seqno = seqno;
1582 i915_user_irq_get(dev);
1583 ret = wait_event_interruptible(dev_priv->irq_queue,
1584 i915_seqno_passed(i915_get_gem_seqno(dev),
1585 seqno) ||
1586 dev_priv->mm.wedged);
1587 i915_user_irq_put(dev);
1588 dev_priv->mm.waiting_gem_seqno = 0;
1589 }
1590 if (dev_priv->mm.wedged)
1591 ret = -EIO;
1592
1593 if (ret && ret != -ERESTARTSYS)
1594 DRM_ERROR("%s returns %d (awaiting %d at %d)\n",
1595 __func__, ret, seqno, i915_get_gem_seqno(dev));
1596
1597 /* Directly dispatch request retiring. While we have the work queue
1598 * to handle this, the waiter on a request often wants an associated
1599 * buffer to have made it to the inactive list, and we would need
1600 * a separate wait queue to handle that.
1601 */
1602 if (ret == 0)
1603 i915_gem_retire_requests(dev);
1604
1605 return ret;
1606}
1607
1608static void
1609i915_gem_flush(struct drm_device *dev,
1610 uint32_t invalidate_domains,
1611 uint32_t flush_domains)
1612{
1613 drm_i915_private_t *dev_priv = dev->dev_private;
1614 uint32_t cmd;
1615 RING_LOCALS;
1616
1617#if WATCH_EXEC
1618 DRM_INFO("%s: invalidate %08x flush %08x\n", __func__,
1619 invalidate_domains, flush_domains);
1620#endif
1621
1622 if (flush_domains & I915_GEM_DOMAIN_CPU)
1623 drm_agp_chipset_flush(dev);
1624
1625 if ((invalidate_domains | flush_domains) & ~(I915_GEM_DOMAIN_CPU |
1626 I915_GEM_DOMAIN_GTT)) {
1627 /*
1628 * read/write caches:
1629 *
1630 * I915_GEM_DOMAIN_RENDER is always invalidated, but is
1631 * only flushed if MI_NO_WRITE_FLUSH is unset. On 965, it is
1632 * also flushed at 2d versus 3d pipeline switches.
1633 *
1634 * read-only caches:
1635 *
1636 * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if
1637 * MI_READ_FLUSH is set, and is always flushed on 965.
1638 *
1639 * I915_GEM_DOMAIN_COMMAND may not exist?
1640 *
1641 * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is
1642 * invalidated when MI_EXE_FLUSH is set.
1643 *
1644 * I915_GEM_DOMAIN_VERTEX, which exists on 965, is
1645 * invalidated with every MI_FLUSH.
1646 *
1647 * TLBs:
1648 *
1649 * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND
1650 * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and
1651 * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER
1652 * are flushed at any MI_FLUSH.
1653 */
1654
1655 cmd = MI_FLUSH | MI_NO_WRITE_FLUSH;
1656 if ((invalidate_domains|flush_domains) &
1657 I915_GEM_DOMAIN_RENDER)
1658 cmd &= ~MI_NO_WRITE_FLUSH;
1659 if (!IS_I965G(dev)) {
1660 /*
1661 * On the 965, the sampler cache always gets flushed
1662 * and this bit is reserved.
1663 */
1664 if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER)
1665 cmd |= MI_READ_FLUSH;
1666 }
1667 if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION)
1668 cmd |= MI_EXE_FLUSH;
1669
1670#if WATCH_EXEC
1671 DRM_INFO("%s: queue flush %08x to ring\n", __func__, cmd);
1672#endif
1673 BEGIN_LP_RING(2);
1674 OUT_RING(cmd);
1675 OUT_RING(0); /* noop */
1676 ADVANCE_LP_RING();
1677 }
1678}
1679
1680/**
1681 * Ensures that all rendering to the object has completed and the object is
1682 * safe to unbind from the GTT or access from the CPU.
1683 */
1684static int
1685i915_gem_object_wait_rendering(struct drm_gem_object *obj)
1686{
1687 struct drm_device *dev = obj->dev;
1688 struct drm_i915_gem_object *obj_priv = obj->driver_private;
1689 int ret;
1690
e47c68e9
EA
1691 /* This function only exists to support waiting for existing rendering,
1692 * not for emitting required flushes.
673a394b 1693 */
e47c68e9 1694 BUG_ON((obj->write_domain & I915_GEM_GPU_DOMAINS) != 0);
673a394b
EA
1695
1696 /* If there is rendering queued on the buffer being evicted, wait for
1697 * it.
1698 */
1699 if (obj_priv->active) {
1700#if WATCH_BUF
1701 DRM_INFO("%s: object %p wait for seqno %08x\n",
1702 __func__, obj, obj_priv->last_rendering_seqno);
1703#endif
1704 ret = i915_wait_request(dev, obj_priv->last_rendering_seqno);
1705 if (ret != 0)
1706 return ret;
1707 }
1708
1709 return 0;
1710}
1711
1712/**
1713 * Unbinds an object from the GTT aperture.
1714 */
0f973f27 1715int
673a394b
EA
1716i915_gem_object_unbind(struct drm_gem_object *obj)
1717{
1718 struct drm_device *dev = obj->dev;
1719 struct drm_i915_gem_object *obj_priv = obj->driver_private;
de151cf6 1720 loff_t offset;
673a394b
EA
1721 int ret = 0;
1722
1723#if WATCH_BUF
1724 DRM_INFO("%s:%d %p\n", __func__, __LINE__, obj);
1725 DRM_INFO("gtt_space %p\n", obj_priv->gtt_space);
1726#endif
1727 if (obj_priv->gtt_space == NULL)
1728 return 0;
1729
1730 if (obj_priv->pin_count != 0) {
1731 DRM_ERROR("Attempting to unbind pinned buffer\n");
1732 return -EINVAL;
1733 }
1734
673a394b
EA
1735 /* Move the object to the CPU domain to ensure that
1736 * any possible CPU writes while it's not in the GTT
1737 * are flushed when we go to remap it. This will
1738 * also ensure that all pending GPU writes are finished
1739 * before we unbind.
1740 */
e47c68e9 1741 ret = i915_gem_object_set_to_cpu_domain(obj, 1);
673a394b 1742 if (ret) {
e47c68e9
EA
1743 if (ret != -ERESTARTSYS)
1744 DRM_ERROR("set_domain failed: %d\n", ret);
673a394b
EA
1745 return ret;
1746 }
1747
1748 if (obj_priv->agp_mem != NULL) {
1749 drm_unbind_agp(obj_priv->agp_mem);
1750 drm_free_agp(obj_priv->agp_mem, obj->size / PAGE_SIZE);
1751 obj_priv->agp_mem = NULL;
1752 }
1753
1754 BUG_ON(obj_priv->active);
1755
de151cf6
JB
1756 /* blow away mappings if mapped through GTT */
1757 offset = ((loff_t) obj->map_list.hash.key) << PAGE_SHIFT;
79e53945
JB
1758 if (dev->dev_mapping)
1759 unmap_mapping_range(dev->dev_mapping, offset, obj->size, 1);
de151cf6
JB
1760
1761 if (obj_priv->fence_reg != I915_FENCE_REG_NONE)
1762 i915_gem_clear_fence_reg(obj);
1763
856fa198 1764 i915_gem_object_put_pages(obj);
673a394b
EA
1765
1766 if (obj_priv->gtt_space) {
1767 atomic_dec(&dev->gtt_count);
1768 atomic_sub(obj->size, &dev->gtt_memory);
1769
1770 drm_mm_put_block(obj_priv->gtt_space);
1771 obj_priv->gtt_space = NULL;
1772 }
1773
1774 /* Remove ourselves from the LRU list if present. */
1775 if (!list_empty(&obj_priv->list))
1776 list_del_init(&obj_priv->list);
1777
1778 return 0;
1779}
1780
1781static int
1782i915_gem_evict_something(struct drm_device *dev)
1783{
1784 drm_i915_private_t *dev_priv = dev->dev_private;
1785 struct drm_gem_object *obj;
1786 struct drm_i915_gem_object *obj_priv;
1787 int ret = 0;
1788
1789 for (;;) {
1790 /* If there's an inactive buffer available now, grab it
1791 * and be done.
1792 */
1793 if (!list_empty(&dev_priv->mm.inactive_list)) {
1794 obj_priv = list_first_entry(&dev_priv->mm.inactive_list,
1795 struct drm_i915_gem_object,
1796 list);
1797 obj = obj_priv->obj;
1798 BUG_ON(obj_priv->pin_count != 0);
1799#if WATCH_LRU
1800 DRM_INFO("%s: evicting %p\n", __func__, obj);
1801#endif
1802 BUG_ON(obj_priv->active);
1803
1804 /* Wait on the rendering and unbind the buffer. */
1805 ret = i915_gem_object_unbind(obj);
1806 break;
1807 }
1808
1809 /* If we didn't get anything, but the ring is still processing
1810 * things, wait for one of those things to finish and hopefully
1811 * leave us a buffer to evict.
1812 */
1813 if (!list_empty(&dev_priv->mm.request_list)) {
1814 struct drm_i915_gem_request *request;
1815
1816 request = list_first_entry(&dev_priv->mm.request_list,
1817 struct drm_i915_gem_request,
1818 list);
1819
1820 ret = i915_wait_request(dev, request->seqno);
1821 if (ret)
1822 break;
1823
1824 /* if waiting caused an object to become inactive,
1825 * then loop around and wait for it. Otherwise, we
1826 * assume that waiting freed and unbound something,
1827 * so there should now be some space in the GTT
1828 */
1829 if (!list_empty(&dev_priv->mm.inactive_list))
1830 continue;
1831 break;
1832 }
1833
1834 /* If we didn't have anything on the request list but there
1835 * are buffers awaiting a flush, emit one and try again.
1836 * When we wait on it, those buffers waiting for that flush
1837 * will get moved to inactive.
1838 */
1839 if (!list_empty(&dev_priv->mm.flushing_list)) {
1840 obj_priv = list_first_entry(&dev_priv->mm.flushing_list,
1841 struct drm_i915_gem_object,
1842 list);
1843 obj = obj_priv->obj;
1844
1845 i915_gem_flush(dev,
1846 obj->write_domain,
1847 obj->write_domain);
1848 i915_add_request(dev, obj->write_domain);
1849
1850 obj = NULL;
1851 continue;
1852 }
1853
1854 DRM_ERROR("inactive empty %d request empty %d "
1855 "flushing empty %d\n",
1856 list_empty(&dev_priv->mm.inactive_list),
1857 list_empty(&dev_priv->mm.request_list),
1858 list_empty(&dev_priv->mm.flushing_list));
1859 /* If we didn't do any of the above, there's nothing to be done
1860 * and we just can't fit it in.
1861 */
1862 return -ENOMEM;
1863 }
1864 return ret;
1865}
1866
ac94a962
KP
1867static int
1868i915_gem_evict_everything(struct drm_device *dev)
1869{
1870 int ret;
1871
1872 for (;;) {
1873 ret = i915_gem_evict_something(dev);
1874 if (ret != 0)
1875 break;
1876 }
15c35334
OA
1877 if (ret == -ENOMEM)
1878 return 0;
ac94a962
KP
1879 return ret;
1880}
1881
673a394b 1882static int
856fa198 1883i915_gem_object_get_pages(struct drm_gem_object *obj)
673a394b
EA
1884{
1885 struct drm_i915_gem_object *obj_priv = obj->driver_private;
1886 int page_count, i;
1887 struct address_space *mapping;
1888 struct inode *inode;
1889 struct page *page;
1890 int ret;
1891
856fa198 1892 if (obj_priv->pages_refcount++ != 0)
673a394b
EA
1893 return 0;
1894
1895 /* Get the list of pages out of our struct file. They'll be pinned
1896 * at this point until we release them.
1897 */
1898 page_count = obj->size / PAGE_SIZE;
856fa198
EA
1899 BUG_ON(obj_priv->pages != NULL);
1900 obj_priv->pages = drm_calloc(page_count, sizeof(struct page *),
1901 DRM_MEM_DRIVER);
1902 if (obj_priv->pages == NULL) {
673a394b 1903 DRM_ERROR("Faled to allocate page list\n");
856fa198 1904 obj_priv->pages_refcount--;
673a394b
EA
1905 return -ENOMEM;
1906 }
1907
1908 inode = obj->filp->f_path.dentry->d_inode;
1909 mapping = inode->i_mapping;
1910 for (i = 0; i < page_count; i++) {
1911 page = read_mapping_page(mapping, i, NULL);
1912 if (IS_ERR(page)) {
1913 ret = PTR_ERR(page);
1914 DRM_ERROR("read_mapping_page failed: %d\n", ret);
856fa198 1915 i915_gem_object_put_pages(obj);
673a394b
EA
1916 return ret;
1917 }
856fa198 1918 obj_priv->pages[i] = page;
673a394b
EA
1919 }
1920 return 0;
1921}
1922
de151cf6
JB
1923static void i965_write_fence_reg(struct drm_i915_fence_reg *reg)
1924{
1925 struct drm_gem_object *obj = reg->obj;
1926 struct drm_device *dev = obj->dev;
1927 drm_i915_private_t *dev_priv = dev->dev_private;
1928 struct drm_i915_gem_object *obj_priv = obj->driver_private;
1929 int regnum = obj_priv->fence_reg;
1930 uint64_t val;
1931
1932 val = (uint64_t)((obj_priv->gtt_offset + obj->size - 4096) &
1933 0xfffff000) << 32;
1934 val |= obj_priv->gtt_offset & 0xfffff000;
1935 val |= ((obj_priv->stride / 128) - 1) << I965_FENCE_PITCH_SHIFT;
1936 if (obj_priv->tiling_mode == I915_TILING_Y)
1937 val |= 1 << I965_FENCE_TILING_Y_SHIFT;
1938 val |= I965_FENCE_REG_VALID;
1939
1940 I915_WRITE64(FENCE_REG_965_0 + (regnum * 8), val);
1941}
1942
1943static void i915_write_fence_reg(struct drm_i915_fence_reg *reg)
1944{
1945 struct drm_gem_object *obj = reg->obj;
1946 struct drm_device *dev = obj->dev;
1947 drm_i915_private_t *dev_priv = dev->dev_private;
1948 struct drm_i915_gem_object *obj_priv = obj->driver_private;
1949 int regnum = obj_priv->fence_reg;
0f973f27 1950 int tile_width;
dc529a4f 1951 uint32_t fence_reg, val;
de151cf6
JB
1952 uint32_t pitch_val;
1953
1954 if ((obj_priv->gtt_offset & ~I915_FENCE_START_MASK) ||
1955 (obj_priv->gtt_offset & (obj->size - 1))) {
f06da264 1956 WARN(1, "%s: object 0x%08x not 1M or size (0x%zx) aligned\n",
0f973f27 1957 __func__, obj_priv->gtt_offset, obj->size);
de151cf6
JB
1958 return;
1959 }
1960
0f973f27
JB
1961 if (obj_priv->tiling_mode == I915_TILING_Y &&
1962 HAS_128_BYTE_Y_TILING(dev))
1963 tile_width = 128;
de151cf6 1964 else
0f973f27
JB
1965 tile_width = 512;
1966
1967 /* Note: pitch better be a power of two tile widths */
1968 pitch_val = obj_priv->stride / tile_width;
1969 pitch_val = ffs(pitch_val) - 1;
de151cf6
JB
1970
1971 val = obj_priv->gtt_offset;
1972 if (obj_priv->tiling_mode == I915_TILING_Y)
1973 val |= 1 << I830_FENCE_TILING_Y_SHIFT;
1974 val |= I915_FENCE_SIZE_BITS(obj->size);
1975 val |= pitch_val << I830_FENCE_PITCH_SHIFT;
1976 val |= I830_FENCE_REG_VALID;
1977
dc529a4f
EA
1978 if (regnum < 8)
1979 fence_reg = FENCE_REG_830_0 + (regnum * 4);
1980 else
1981 fence_reg = FENCE_REG_945_8 + ((regnum - 8) * 4);
1982 I915_WRITE(fence_reg, val);
de151cf6
JB
1983}
1984
1985static void i830_write_fence_reg(struct drm_i915_fence_reg *reg)
1986{
1987 struct drm_gem_object *obj = reg->obj;
1988 struct drm_device *dev = obj->dev;
1989 drm_i915_private_t *dev_priv = dev->dev_private;
1990 struct drm_i915_gem_object *obj_priv = obj->driver_private;
1991 int regnum = obj_priv->fence_reg;
1992 uint32_t val;
1993 uint32_t pitch_val;
8d7773a3 1994 uint32_t fence_size_bits;
de151cf6 1995
8d7773a3 1996 if ((obj_priv->gtt_offset & ~I830_FENCE_START_MASK) ||
de151cf6 1997 (obj_priv->gtt_offset & (obj->size - 1))) {
8d7773a3 1998 WARN(1, "%s: object 0x%08x not 512K or size aligned\n",
0f973f27 1999 __func__, obj_priv->gtt_offset);
de151cf6
JB
2000 return;
2001 }
2002
2003 pitch_val = (obj_priv->stride / 128) - 1;
8d7773a3 2004 WARN_ON(pitch_val & ~0x0000000f);
de151cf6
JB
2005 val = obj_priv->gtt_offset;
2006 if (obj_priv->tiling_mode == I915_TILING_Y)
2007 val |= 1 << I830_FENCE_TILING_Y_SHIFT;
8d7773a3
DV
2008 fence_size_bits = I830_FENCE_SIZE_BITS(obj->size);
2009 WARN_ON(fence_size_bits & ~0x00000f00);
2010 val |= fence_size_bits;
de151cf6
JB
2011 val |= pitch_val << I830_FENCE_PITCH_SHIFT;
2012 val |= I830_FENCE_REG_VALID;
2013
2014 I915_WRITE(FENCE_REG_830_0 + (regnum * 4), val);
2015
2016}
2017
2018/**
2019 * i915_gem_object_get_fence_reg - set up a fence reg for an object
2020 * @obj: object to map through a fence reg
0f973f27 2021 * @write: object is about to be written
de151cf6
JB
2022 *
2023 * When mapping objects through the GTT, userspace wants to be able to write
2024 * to them without having to worry about swizzling if the object is tiled.
2025 *
2026 * This function walks the fence regs looking for a free one for @obj,
2027 * stealing one if it can't find any.
2028 *
2029 * It then sets up the reg based on the object's properties: address, pitch
2030 * and tiling format.
2031 */
d9ddcb96 2032static int
0f973f27 2033i915_gem_object_get_fence_reg(struct drm_gem_object *obj, bool write)
de151cf6
JB
2034{
2035 struct drm_device *dev = obj->dev;
79e53945 2036 struct drm_i915_private *dev_priv = dev->dev_private;
de151cf6
JB
2037 struct drm_i915_gem_object *obj_priv = obj->driver_private;
2038 struct drm_i915_fence_reg *reg = NULL;
fc7170ba
CW
2039 struct drm_i915_gem_object *old_obj_priv = NULL;
2040 int i, ret, avail;
de151cf6
JB
2041
2042 switch (obj_priv->tiling_mode) {
2043 case I915_TILING_NONE:
2044 WARN(1, "allocating a fence for non-tiled object?\n");
2045 break;
2046 case I915_TILING_X:
0f973f27
JB
2047 if (!obj_priv->stride)
2048 return -EINVAL;
2049 WARN((obj_priv->stride & (512 - 1)),
2050 "object 0x%08x is X tiled but has non-512B pitch\n",
2051 obj_priv->gtt_offset);
de151cf6
JB
2052 break;
2053 case I915_TILING_Y:
0f973f27
JB
2054 if (!obj_priv->stride)
2055 return -EINVAL;
2056 WARN((obj_priv->stride & (128 - 1)),
2057 "object 0x%08x is Y tiled but has non-128B pitch\n",
2058 obj_priv->gtt_offset);
de151cf6
JB
2059 break;
2060 }
2061
2062 /* First try to find a free reg */
9b2412f9 2063try_again:
fc7170ba 2064 avail = 0;
de151cf6
JB
2065 for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) {
2066 reg = &dev_priv->fence_regs[i];
2067 if (!reg->obj)
2068 break;
fc7170ba
CW
2069
2070 old_obj_priv = reg->obj->driver_private;
2071 if (!old_obj_priv->pin_count)
2072 avail++;
de151cf6
JB
2073 }
2074
2075 /* None available, try to steal one or wait for a user to finish */
2076 if (i == dev_priv->num_fence_regs) {
d7619c4b 2077 uint32_t seqno = dev_priv->mm.next_gem_seqno;
de151cf6
JB
2078 loff_t offset;
2079
fc7170ba
CW
2080 if (avail == 0)
2081 return -ENOMEM;
2082
de151cf6
JB
2083 for (i = dev_priv->fence_reg_start;
2084 i < dev_priv->num_fence_regs; i++) {
d7619c4b
CW
2085 uint32_t this_seqno;
2086
de151cf6
JB
2087 reg = &dev_priv->fence_regs[i];
2088 old_obj_priv = reg->obj->driver_private;
d7619c4b
CW
2089
2090 if (old_obj_priv->pin_count)
2091 continue;
2092
2093 /* i915 uses fences for GPU access to tiled buffers */
2094 if (IS_I965G(dev) || !old_obj_priv->active)
de151cf6 2095 break;
d7619c4b
CW
2096
2097 /* find the seqno of the first available fence */
2098 this_seqno = old_obj_priv->last_rendering_seqno;
2099 if (this_seqno != 0 &&
2100 reg->obj->write_domain == 0 &&
2101 i915_seqno_passed(seqno, this_seqno))
2102 seqno = this_seqno;
de151cf6
JB
2103 }
2104
2105 /*
2106 * Now things get ugly... we have to wait for one of the
2107 * objects to finish before trying again.
2108 */
2109 if (i == dev_priv->num_fence_regs) {
d7619c4b
CW
2110 if (seqno == dev_priv->mm.next_gem_seqno) {
2111 i915_gem_flush(dev,
2112 I915_GEM_GPU_DOMAINS,
2113 I915_GEM_GPU_DOMAINS);
2114 seqno = i915_add_request(dev,
2115 I915_GEM_GPU_DOMAINS);
2116 if (seqno == 0)
2117 return -ENOMEM;
de151cf6 2118 }
d7619c4b
CW
2119
2120 ret = i915_wait_request(dev, seqno);
2121 if (ret)
2122 return ret;
de151cf6
JB
2123 goto try_again;
2124 }
2125
d7619c4b
CW
2126 BUG_ON(old_obj_priv->active ||
2127 (reg->obj->write_domain & I915_GEM_GPU_DOMAINS));
2128
de151cf6
JB
2129 /*
2130 * Zap this virtual mapping so we can set up a fence again
2131 * for this object next time we need it.
2132 */
2133 offset = ((loff_t) reg->obj->map_list.hash.key) << PAGE_SHIFT;
79e53945
JB
2134 if (dev->dev_mapping)
2135 unmap_mapping_range(dev->dev_mapping, offset,
2136 reg->obj->size, 1);
de151cf6
JB
2137 old_obj_priv->fence_reg = I915_FENCE_REG_NONE;
2138 }
2139
2140 obj_priv->fence_reg = i;
2141 reg->obj = obj;
2142
2143 if (IS_I965G(dev))
2144 i965_write_fence_reg(reg);
2145 else if (IS_I9XX(dev))
2146 i915_write_fence_reg(reg);
2147 else
2148 i830_write_fence_reg(reg);
d9ddcb96
EA
2149
2150 return 0;
de151cf6
JB
2151}
2152
2153/**
2154 * i915_gem_clear_fence_reg - clear out fence register info
2155 * @obj: object to clear
2156 *
2157 * Zeroes out the fence register itself and clears out the associated
2158 * data structures in dev_priv and obj_priv.
2159 */
2160static void
2161i915_gem_clear_fence_reg(struct drm_gem_object *obj)
2162{
2163 struct drm_device *dev = obj->dev;
79e53945 2164 drm_i915_private_t *dev_priv = dev->dev_private;
de151cf6
JB
2165 struct drm_i915_gem_object *obj_priv = obj->driver_private;
2166
2167 if (IS_I965G(dev))
2168 I915_WRITE64(FENCE_REG_965_0 + (obj_priv->fence_reg * 8), 0);
dc529a4f
EA
2169 else {
2170 uint32_t fence_reg;
2171
2172 if (obj_priv->fence_reg < 8)
2173 fence_reg = FENCE_REG_830_0 + obj_priv->fence_reg * 4;
2174 else
2175 fence_reg = FENCE_REG_945_8 + (obj_priv->fence_reg -
2176 8) * 4;
2177
2178 I915_WRITE(fence_reg, 0);
2179 }
de151cf6
JB
2180
2181 dev_priv->fence_regs[obj_priv->fence_reg].obj = NULL;
2182 obj_priv->fence_reg = I915_FENCE_REG_NONE;
2183}
2184
673a394b
EA
2185/**
2186 * Finds free space in the GTT aperture and binds the object there.
2187 */
2188static int
2189i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment)
2190{
2191 struct drm_device *dev = obj->dev;
2192 drm_i915_private_t *dev_priv = dev->dev_private;
2193 struct drm_i915_gem_object *obj_priv = obj->driver_private;
2194 struct drm_mm_node *free_space;
2195 int page_count, ret;
2196
9bb2d6f9
EA
2197 if (dev_priv->mm.suspended)
2198 return -EBUSY;
673a394b 2199 if (alignment == 0)
0f973f27 2200 alignment = i915_gem_get_gtt_alignment(obj);
8d7773a3 2201 if (alignment & (i915_gem_get_gtt_alignment(obj) - 1)) {
673a394b
EA
2202 DRM_ERROR("Invalid object alignment requested %u\n", alignment);
2203 return -EINVAL;
2204 }
2205
2206 search_free:
2207 free_space = drm_mm_search_free(&dev_priv->mm.gtt_space,
2208 obj->size, alignment, 0);
2209 if (free_space != NULL) {
2210 obj_priv->gtt_space = drm_mm_get_block(free_space, obj->size,
2211 alignment);
2212 if (obj_priv->gtt_space != NULL) {
2213 obj_priv->gtt_space->private = obj;
2214 obj_priv->gtt_offset = obj_priv->gtt_space->start;
2215 }
2216 }
2217 if (obj_priv->gtt_space == NULL) {
2218 /* If the gtt is empty and we're still having trouble
2219 * fitting our object in, we're out of memory.
2220 */
2221#if WATCH_LRU
2222 DRM_INFO("%s: GTT full, evicting something\n", __func__);
2223#endif
2224 if (list_empty(&dev_priv->mm.inactive_list) &&
2225 list_empty(&dev_priv->mm.flushing_list) &&
2226 list_empty(&dev_priv->mm.active_list)) {
2227 DRM_ERROR("GTT full, but LRU list empty\n");
2228 return -ENOMEM;
2229 }
2230
2231 ret = i915_gem_evict_something(dev);
2232 if (ret != 0) {
ac94a962
KP
2233 if (ret != -ERESTARTSYS)
2234 DRM_ERROR("Failed to evict a buffer %d\n", ret);
673a394b
EA
2235 return ret;
2236 }
2237 goto search_free;
2238 }
2239
2240#if WATCH_BUF
2241 DRM_INFO("Binding object of size %d at 0x%08x\n",
2242 obj->size, obj_priv->gtt_offset);
2243#endif
856fa198 2244 ret = i915_gem_object_get_pages(obj);
673a394b
EA
2245 if (ret) {
2246 drm_mm_put_block(obj_priv->gtt_space);
2247 obj_priv->gtt_space = NULL;
2248 return ret;
2249 }
2250
2251 page_count = obj->size / PAGE_SIZE;
2252 /* Create an AGP memory structure pointing at our pages, and bind it
2253 * into the GTT.
2254 */
2255 obj_priv->agp_mem = drm_agp_bind_pages(dev,
856fa198 2256 obj_priv->pages,
673a394b 2257 page_count,
ba1eb1d8
KP
2258 obj_priv->gtt_offset,
2259 obj_priv->agp_type);
673a394b 2260 if (obj_priv->agp_mem == NULL) {
856fa198 2261 i915_gem_object_put_pages(obj);
673a394b
EA
2262 drm_mm_put_block(obj_priv->gtt_space);
2263 obj_priv->gtt_space = NULL;
2264 return -ENOMEM;
2265 }
2266 atomic_inc(&dev->gtt_count);
2267 atomic_add(obj->size, &dev->gtt_memory);
2268
2269 /* Assert that the object is not currently in any GPU domain. As it
2270 * wasn't in the GTT, there shouldn't be any way it could have been in
2271 * a GPU cache
2272 */
2273 BUG_ON(obj->read_domains & ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT));
2274 BUG_ON(obj->write_domain & ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT));
2275
2276 return 0;
2277}
2278
2279void
2280i915_gem_clflush_object(struct drm_gem_object *obj)
2281{
2282 struct drm_i915_gem_object *obj_priv = obj->driver_private;
2283
2284 /* If we don't have a page list set up, then we're not pinned
2285 * to GPU, and we can ignore the cache flush because it'll happen
2286 * again at bind time.
2287 */
856fa198 2288 if (obj_priv->pages == NULL)
673a394b
EA
2289 return;
2290
856fa198 2291 drm_clflush_pages(obj_priv->pages, obj->size / PAGE_SIZE);
673a394b
EA
2292}
2293
e47c68e9
EA
2294/** Flushes any GPU write domain for the object if it's dirty. */
2295static void
2296i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj)
2297{
2298 struct drm_device *dev = obj->dev;
2299 uint32_t seqno;
2300
2301 if ((obj->write_domain & I915_GEM_GPU_DOMAINS) == 0)
2302 return;
2303
2304 /* Queue the GPU write cache flushing we need. */
2305 i915_gem_flush(dev, 0, obj->write_domain);
2306 seqno = i915_add_request(dev, obj->write_domain);
2307 obj->write_domain = 0;
2308 i915_gem_object_move_to_active(obj, seqno);
2309}
2310
2311/** Flushes the GTT write domain for the object if it's dirty. */
2312static void
2313i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj)
2314{
2315 if (obj->write_domain != I915_GEM_DOMAIN_GTT)
2316 return;
2317
2318 /* No actual flushing is required for the GTT write domain. Writes
2319 * to it immediately go to main memory as far as we know, so there's
2320 * no chipset flush. It also doesn't land in render cache.
2321 */
2322 obj->write_domain = 0;
2323}
2324
2325/** Flushes the CPU write domain for the object if it's dirty. */
2326static void
2327i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj)
2328{
2329 struct drm_device *dev = obj->dev;
2330
2331 if (obj->write_domain != I915_GEM_DOMAIN_CPU)
2332 return;
2333
2334 i915_gem_clflush_object(obj);
2335 drm_agp_chipset_flush(dev);
2336 obj->write_domain = 0;
2337}
2338
2ef7eeaa
EA
2339/**
2340 * Moves a single object to the GTT read, and possibly write domain.
2341 *
2342 * This function returns when the move is complete, including waiting on
2343 * flushes to occur.
2344 */
79e53945 2345int
2ef7eeaa
EA
2346i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write)
2347{
2ef7eeaa 2348 struct drm_i915_gem_object *obj_priv = obj->driver_private;
e47c68e9 2349 int ret;
2ef7eeaa 2350
02354392
EA
2351 /* Not valid to be called on unbound objects. */
2352 if (obj_priv->gtt_space == NULL)
2353 return -EINVAL;
2354
e47c68e9
EA
2355 i915_gem_object_flush_gpu_write_domain(obj);
2356 /* Wait on any GPU rendering and flushing to occur. */
2357 ret = i915_gem_object_wait_rendering(obj);
2358 if (ret != 0)
2359 return ret;
2360
2361 /* If we're writing through the GTT domain, then CPU and GPU caches
2362 * will need to be invalidated at next use.
2ef7eeaa 2363 */
e47c68e9
EA
2364 if (write)
2365 obj->read_domains &= I915_GEM_DOMAIN_GTT;
2ef7eeaa 2366
e47c68e9 2367 i915_gem_object_flush_cpu_write_domain(obj);
2ef7eeaa 2368
e47c68e9
EA
2369 /* It should now be out of any other write domains, and we can update
2370 * the domain values for our changes.
2371 */
2372 BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
2373 obj->read_domains |= I915_GEM_DOMAIN_GTT;
2374 if (write) {
2375 obj->write_domain = I915_GEM_DOMAIN_GTT;
2376 obj_priv->dirty = 1;
2ef7eeaa
EA
2377 }
2378
e47c68e9
EA
2379 return 0;
2380}
2381
2382/**
2383 * Moves a single object to the CPU read, and possibly write domain.
2384 *
2385 * This function returns when the move is complete, including waiting on
2386 * flushes to occur.
2387 */
2388static int
2389i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, int write)
2390{
e47c68e9
EA
2391 int ret;
2392
2393 i915_gem_object_flush_gpu_write_domain(obj);
2ef7eeaa 2394 /* Wait on any GPU rendering and flushing to occur. */
e47c68e9
EA
2395 ret = i915_gem_object_wait_rendering(obj);
2396 if (ret != 0)
2397 return ret;
2ef7eeaa 2398
e47c68e9 2399 i915_gem_object_flush_gtt_write_domain(obj);
2ef7eeaa 2400
e47c68e9
EA
2401 /* If we have a partially-valid cache of the object in the CPU,
2402 * finish invalidating it and free the per-page flags.
2ef7eeaa 2403 */
e47c68e9 2404 i915_gem_object_set_to_full_cpu_read_domain(obj);
2ef7eeaa 2405
e47c68e9
EA
2406 /* Flush the CPU cache if it's still invalid. */
2407 if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
2ef7eeaa 2408 i915_gem_clflush_object(obj);
2ef7eeaa 2409
e47c68e9 2410 obj->read_domains |= I915_GEM_DOMAIN_CPU;
2ef7eeaa
EA
2411 }
2412
2413 /* It should now be out of any other write domains, and we can update
2414 * the domain values for our changes.
2415 */
e47c68e9
EA
2416 BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
2417
2418 /* If we're writing through the CPU, then the GPU read domains will
2419 * need to be invalidated at next use.
2420 */
2421 if (write) {
2422 obj->read_domains &= I915_GEM_DOMAIN_CPU;
2423 obj->write_domain = I915_GEM_DOMAIN_CPU;
2424 }
2ef7eeaa
EA
2425
2426 return 0;
2427}
2428
673a394b
EA
2429/*
2430 * Set the next domain for the specified object. This
2431 * may not actually perform the necessary flushing/invaliding though,
2432 * as that may want to be batched with other set_domain operations
2433 *
2434 * This is (we hope) the only really tricky part of gem. The goal
2435 * is fairly simple -- track which caches hold bits of the object
2436 * and make sure they remain coherent. A few concrete examples may
2437 * help to explain how it works. For shorthand, we use the notation
2438 * (read_domains, write_domain), e.g. (CPU, CPU) to indicate the
2439 * a pair of read and write domain masks.
2440 *
2441 * Case 1: the batch buffer
2442 *
2443 * 1. Allocated
2444 * 2. Written by CPU
2445 * 3. Mapped to GTT
2446 * 4. Read by GPU
2447 * 5. Unmapped from GTT
2448 * 6. Freed
2449 *
2450 * Let's take these a step at a time
2451 *
2452 * 1. Allocated
2453 * Pages allocated from the kernel may still have
2454 * cache contents, so we set them to (CPU, CPU) always.
2455 * 2. Written by CPU (using pwrite)
2456 * The pwrite function calls set_domain (CPU, CPU) and
2457 * this function does nothing (as nothing changes)
2458 * 3. Mapped by GTT
2459 * This function asserts that the object is not
2460 * currently in any GPU-based read or write domains
2461 * 4. Read by GPU
2462 * i915_gem_execbuffer calls set_domain (COMMAND, 0).
2463 * As write_domain is zero, this function adds in the
2464 * current read domains (CPU+COMMAND, 0).
2465 * flush_domains is set to CPU.
2466 * invalidate_domains is set to COMMAND
2467 * clflush is run to get data out of the CPU caches
2468 * then i915_dev_set_domain calls i915_gem_flush to
2469 * emit an MI_FLUSH and drm_agp_chipset_flush
2470 * 5. Unmapped from GTT
2471 * i915_gem_object_unbind calls set_domain (CPU, CPU)
2472 * flush_domains and invalidate_domains end up both zero
2473 * so no flushing/invalidating happens
2474 * 6. Freed
2475 * yay, done
2476 *
2477 * Case 2: The shared render buffer
2478 *
2479 * 1. Allocated
2480 * 2. Mapped to GTT
2481 * 3. Read/written by GPU
2482 * 4. set_domain to (CPU,CPU)
2483 * 5. Read/written by CPU
2484 * 6. Read/written by GPU
2485 *
2486 * 1. Allocated
2487 * Same as last example, (CPU, CPU)
2488 * 2. Mapped to GTT
2489 * Nothing changes (assertions find that it is not in the GPU)
2490 * 3. Read/written by GPU
2491 * execbuffer calls set_domain (RENDER, RENDER)
2492 * flush_domains gets CPU
2493 * invalidate_domains gets GPU
2494 * clflush (obj)
2495 * MI_FLUSH and drm_agp_chipset_flush
2496 * 4. set_domain (CPU, CPU)
2497 * flush_domains gets GPU
2498 * invalidate_domains gets CPU
2499 * wait_rendering (obj) to make sure all drawing is complete.
2500 * This will include an MI_FLUSH to get the data from GPU
2501 * to memory
2502 * clflush (obj) to invalidate the CPU cache
2503 * Another MI_FLUSH in i915_gem_flush (eliminate this somehow?)
2504 * 5. Read/written by CPU
2505 * cache lines are loaded and dirtied
2506 * 6. Read written by GPU
2507 * Same as last GPU access
2508 *
2509 * Case 3: The constant buffer
2510 *
2511 * 1. Allocated
2512 * 2. Written by CPU
2513 * 3. Read by GPU
2514 * 4. Updated (written) by CPU again
2515 * 5. Read by GPU
2516 *
2517 * 1. Allocated
2518 * (CPU, CPU)
2519 * 2. Written by CPU
2520 * (CPU, CPU)
2521 * 3. Read by GPU
2522 * (CPU+RENDER, 0)
2523 * flush_domains = CPU
2524 * invalidate_domains = RENDER
2525 * clflush (obj)
2526 * MI_FLUSH
2527 * drm_agp_chipset_flush
2528 * 4. Updated (written) by CPU again
2529 * (CPU, CPU)
2530 * flush_domains = 0 (no previous write domain)
2531 * invalidate_domains = 0 (no new read domains)
2532 * 5. Read by GPU
2533 * (CPU+RENDER, 0)
2534 * flush_domains = CPU
2535 * invalidate_domains = RENDER
2536 * clflush (obj)
2537 * MI_FLUSH
2538 * drm_agp_chipset_flush
2539 */
c0d90829 2540static void
8b0e378a 2541i915_gem_object_set_to_gpu_domain(struct drm_gem_object *obj)
673a394b
EA
2542{
2543 struct drm_device *dev = obj->dev;
2544 struct drm_i915_gem_object *obj_priv = obj->driver_private;
2545 uint32_t invalidate_domains = 0;
2546 uint32_t flush_domains = 0;
e47c68e9 2547
8b0e378a
EA
2548 BUG_ON(obj->pending_read_domains & I915_GEM_DOMAIN_CPU);
2549 BUG_ON(obj->pending_write_domain == I915_GEM_DOMAIN_CPU);
673a394b
EA
2550
2551#if WATCH_BUF
2552 DRM_INFO("%s: object %p read %08x -> %08x write %08x -> %08x\n",
2553 __func__, obj,
8b0e378a
EA
2554 obj->read_domains, obj->pending_read_domains,
2555 obj->write_domain, obj->pending_write_domain);
673a394b
EA
2556#endif
2557 /*
2558 * If the object isn't moving to a new write domain,
2559 * let the object stay in multiple read domains
2560 */
8b0e378a
EA
2561 if (obj->pending_write_domain == 0)
2562 obj->pending_read_domains |= obj->read_domains;
673a394b
EA
2563 else
2564 obj_priv->dirty = 1;
2565
2566 /*
2567 * Flush the current write domain if
2568 * the new read domains don't match. Invalidate
2569 * any read domains which differ from the old
2570 * write domain
2571 */
8b0e378a
EA
2572 if (obj->write_domain &&
2573 obj->write_domain != obj->pending_read_domains) {
673a394b 2574 flush_domains |= obj->write_domain;
8b0e378a
EA
2575 invalidate_domains |=
2576 obj->pending_read_domains & ~obj->write_domain;
673a394b
EA
2577 }
2578 /*
2579 * Invalidate any read caches which may have
2580 * stale data. That is, any new read domains.
2581 */
8b0e378a 2582 invalidate_domains |= obj->pending_read_domains & ~obj->read_domains;
673a394b
EA
2583 if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_CPU) {
2584#if WATCH_BUF
2585 DRM_INFO("%s: CPU domain flush %08x invalidate %08x\n",
2586 __func__, flush_domains, invalidate_domains);
2587#endif
673a394b
EA
2588 i915_gem_clflush_object(obj);
2589 }
2590
efbeed96
EA
2591 /* The actual obj->write_domain will be updated with
2592 * pending_write_domain after we emit the accumulated flush for all
2593 * of our domain changes in execbuffers (which clears objects'
2594 * write_domains). So if we have a current write domain that we
2595 * aren't changing, set pending_write_domain to that.
2596 */
2597 if (flush_domains == 0 && obj->pending_write_domain == 0)
2598 obj->pending_write_domain = obj->write_domain;
8b0e378a 2599 obj->read_domains = obj->pending_read_domains;
673a394b
EA
2600
2601 dev->invalidate_domains |= invalidate_domains;
2602 dev->flush_domains |= flush_domains;
2603#if WATCH_BUF
2604 DRM_INFO("%s: read %08x write %08x invalidate %08x flush %08x\n",
2605 __func__,
2606 obj->read_domains, obj->write_domain,
2607 dev->invalidate_domains, dev->flush_domains);
2608#endif
673a394b
EA
2609}
2610
2611/**
e47c68e9 2612 * Moves the object from a partially CPU read to a full one.
673a394b 2613 *
e47c68e9
EA
2614 * Note that this only resolves i915_gem_object_set_cpu_read_domain_range(),
2615 * and doesn't handle transitioning from !(read_domains & I915_GEM_DOMAIN_CPU).
673a394b 2616 */
e47c68e9
EA
2617static void
2618i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *obj)
673a394b
EA
2619{
2620 struct drm_i915_gem_object *obj_priv = obj->driver_private;
673a394b 2621
e47c68e9
EA
2622 if (!obj_priv->page_cpu_valid)
2623 return;
2624
2625 /* If we're partially in the CPU read domain, finish moving it in.
2626 */
2627 if (obj->read_domains & I915_GEM_DOMAIN_CPU) {
2628 int i;
2629
2630 for (i = 0; i <= (obj->size - 1) / PAGE_SIZE; i++) {
2631 if (obj_priv->page_cpu_valid[i])
2632 continue;
856fa198 2633 drm_clflush_pages(obj_priv->pages + i, 1);
e47c68e9 2634 }
e47c68e9
EA
2635 }
2636
2637 /* Free the page_cpu_valid mappings which are now stale, whether
2638 * or not we've got I915_GEM_DOMAIN_CPU.
2639 */
2640 drm_free(obj_priv->page_cpu_valid, obj->size / PAGE_SIZE,
2641 DRM_MEM_DRIVER);
2642 obj_priv->page_cpu_valid = NULL;
2643}
2644
2645/**
2646 * Set the CPU read domain on a range of the object.
2647 *
2648 * The object ends up with I915_GEM_DOMAIN_CPU in its read flags although it's
2649 * not entirely valid. The page_cpu_valid member of the object flags which
2650 * pages have been flushed, and will be respected by
2651 * i915_gem_object_set_to_cpu_domain() if it's called on to get a valid mapping
2652 * of the whole object.
2653 *
2654 * This function returns when the move is complete, including waiting on
2655 * flushes to occur.
2656 */
2657static int
2658i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj,
2659 uint64_t offset, uint64_t size)
2660{
2661 struct drm_i915_gem_object *obj_priv = obj->driver_private;
2662 int i, ret;
673a394b 2663
e47c68e9
EA
2664 if (offset == 0 && size == obj->size)
2665 return i915_gem_object_set_to_cpu_domain(obj, 0);
673a394b 2666
e47c68e9
EA
2667 i915_gem_object_flush_gpu_write_domain(obj);
2668 /* Wait on any GPU rendering and flushing to occur. */
6a47baa6 2669 ret = i915_gem_object_wait_rendering(obj);
e47c68e9 2670 if (ret != 0)
6a47baa6 2671 return ret;
e47c68e9
EA
2672 i915_gem_object_flush_gtt_write_domain(obj);
2673
2674 /* If we're already fully in the CPU read domain, we're done. */
2675 if (obj_priv->page_cpu_valid == NULL &&
2676 (obj->read_domains & I915_GEM_DOMAIN_CPU) != 0)
2677 return 0;
673a394b 2678
e47c68e9
EA
2679 /* Otherwise, create/clear the per-page CPU read domain flag if we're
2680 * newly adding I915_GEM_DOMAIN_CPU
2681 */
673a394b
EA
2682 if (obj_priv->page_cpu_valid == NULL) {
2683 obj_priv->page_cpu_valid = drm_calloc(1, obj->size / PAGE_SIZE,
2684 DRM_MEM_DRIVER);
e47c68e9
EA
2685 if (obj_priv->page_cpu_valid == NULL)
2686 return -ENOMEM;
2687 } else if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0)
2688 memset(obj_priv->page_cpu_valid, 0, obj->size / PAGE_SIZE);
673a394b
EA
2689
2690 /* Flush the cache on any pages that are still invalid from the CPU's
2691 * perspective.
2692 */
e47c68e9
EA
2693 for (i = offset / PAGE_SIZE; i <= (offset + size - 1) / PAGE_SIZE;
2694 i++) {
673a394b
EA
2695 if (obj_priv->page_cpu_valid[i])
2696 continue;
2697
856fa198 2698 drm_clflush_pages(obj_priv->pages + i, 1);
673a394b
EA
2699
2700 obj_priv->page_cpu_valid[i] = 1;
2701 }
2702
e47c68e9
EA
2703 /* It should now be out of any other write domains, and we can update
2704 * the domain values for our changes.
2705 */
2706 BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
2707
2708 obj->read_domains |= I915_GEM_DOMAIN_CPU;
2709
673a394b
EA
2710 return 0;
2711}
2712
673a394b
EA
2713/**
2714 * Pin an object to the GTT and evaluate the relocations landing in it.
2715 */
2716static int
2717i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
2718 struct drm_file *file_priv,
40a5f0de
EA
2719 struct drm_i915_gem_exec_object *entry,
2720 struct drm_i915_gem_relocation_entry *relocs)
673a394b
EA
2721{
2722 struct drm_device *dev = obj->dev;
0839ccb8 2723 drm_i915_private_t *dev_priv = dev->dev_private;
673a394b
EA
2724 struct drm_i915_gem_object *obj_priv = obj->driver_private;
2725 int i, ret;
0839ccb8 2726 void __iomem *reloc_page;
673a394b
EA
2727
2728 /* Choose the GTT offset for our buffer and put it there. */
2729 ret = i915_gem_object_pin(obj, (uint32_t) entry->alignment);
2730 if (ret)
2731 return ret;
2732
2733 entry->offset = obj_priv->gtt_offset;
2734
673a394b
EA
2735 /* Apply the relocations, using the GTT aperture to avoid cache
2736 * flushing requirements.
2737 */
2738 for (i = 0; i < entry->relocation_count; i++) {
40a5f0de 2739 struct drm_i915_gem_relocation_entry *reloc= &relocs[i];
673a394b
EA
2740 struct drm_gem_object *target_obj;
2741 struct drm_i915_gem_object *target_obj_priv;
3043c60c
EA
2742 uint32_t reloc_val, reloc_offset;
2743 uint32_t __iomem *reloc_entry;
673a394b 2744
673a394b 2745 target_obj = drm_gem_object_lookup(obj->dev, file_priv,
40a5f0de 2746 reloc->target_handle);
673a394b
EA
2747 if (target_obj == NULL) {
2748 i915_gem_object_unpin(obj);
2749 return -EBADF;
2750 }
2751 target_obj_priv = target_obj->driver_private;
2752
2753 /* The target buffer should have appeared before us in the
2754 * exec_object list, so it should have a GTT space bound by now.
2755 */
2756 if (target_obj_priv->gtt_space == NULL) {
2757 DRM_ERROR("No GTT space found for object %d\n",
40a5f0de 2758 reloc->target_handle);
673a394b
EA
2759 drm_gem_object_unreference(target_obj);
2760 i915_gem_object_unpin(obj);
2761 return -EINVAL;
2762 }
2763
40a5f0de 2764 if (reloc->offset > obj->size - 4) {
673a394b
EA
2765 DRM_ERROR("Relocation beyond object bounds: "
2766 "obj %p target %d offset %d size %d.\n",
40a5f0de
EA
2767 obj, reloc->target_handle,
2768 (int) reloc->offset, (int) obj->size);
673a394b
EA
2769 drm_gem_object_unreference(target_obj);
2770 i915_gem_object_unpin(obj);
2771 return -EINVAL;
2772 }
40a5f0de 2773 if (reloc->offset & 3) {
673a394b
EA
2774 DRM_ERROR("Relocation not 4-byte aligned: "
2775 "obj %p target %d offset %d.\n",
40a5f0de
EA
2776 obj, reloc->target_handle,
2777 (int) reloc->offset);
673a394b
EA
2778 drm_gem_object_unreference(target_obj);
2779 i915_gem_object_unpin(obj);
2780 return -EINVAL;
2781 }
2782
40a5f0de
EA
2783 if (reloc->write_domain & I915_GEM_DOMAIN_CPU ||
2784 reloc->read_domains & I915_GEM_DOMAIN_CPU) {
e47c68e9
EA
2785 DRM_ERROR("reloc with read/write CPU domains: "
2786 "obj %p target %d offset %d "
2787 "read %08x write %08x",
40a5f0de
EA
2788 obj, reloc->target_handle,
2789 (int) reloc->offset,
2790 reloc->read_domains,
2791 reloc->write_domain);
491152b8
CW
2792 drm_gem_object_unreference(target_obj);
2793 i915_gem_object_unpin(obj);
e47c68e9
EA
2794 return -EINVAL;
2795 }
2796
40a5f0de
EA
2797 if (reloc->write_domain && target_obj->pending_write_domain &&
2798 reloc->write_domain != target_obj->pending_write_domain) {
673a394b
EA
2799 DRM_ERROR("Write domain conflict: "
2800 "obj %p target %d offset %d "
2801 "new %08x old %08x\n",
40a5f0de
EA
2802 obj, reloc->target_handle,
2803 (int) reloc->offset,
2804 reloc->write_domain,
673a394b
EA
2805 target_obj->pending_write_domain);
2806 drm_gem_object_unreference(target_obj);
2807 i915_gem_object_unpin(obj);
2808 return -EINVAL;
2809 }
2810
2811#if WATCH_RELOC
2812 DRM_INFO("%s: obj %p offset %08x target %d "
2813 "read %08x write %08x gtt %08x "
2814 "presumed %08x delta %08x\n",
2815 __func__,
2816 obj,
40a5f0de
EA
2817 (int) reloc->offset,
2818 (int) reloc->target_handle,
2819 (int) reloc->read_domains,
2820 (int) reloc->write_domain,
673a394b 2821 (int) target_obj_priv->gtt_offset,
40a5f0de
EA
2822 (int) reloc->presumed_offset,
2823 reloc->delta);
673a394b
EA
2824#endif
2825
40a5f0de
EA
2826 target_obj->pending_read_domains |= reloc->read_domains;
2827 target_obj->pending_write_domain |= reloc->write_domain;
673a394b
EA
2828
2829 /* If the relocation already has the right value in it, no
2830 * more work needs to be done.
2831 */
40a5f0de 2832 if (target_obj_priv->gtt_offset == reloc->presumed_offset) {
673a394b
EA
2833 drm_gem_object_unreference(target_obj);
2834 continue;
2835 }
2836
2ef7eeaa
EA
2837 ret = i915_gem_object_set_to_gtt_domain(obj, 1);
2838 if (ret != 0) {
2839 drm_gem_object_unreference(target_obj);
2840 i915_gem_object_unpin(obj);
2841 return -EINVAL;
673a394b
EA
2842 }
2843
2844 /* Map the page containing the relocation we're going to
2845 * perform.
2846 */
40a5f0de 2847 reloc_offset = obj_priv->gtt_offset + reloc->offset;
0839ccb8
KP
2848 reloc_page = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping,
2849 (reloc_offset &
2850 ~(PAGE_SIZE - 1)));
3043c60c 2851 reloc_entry = (uint32_t __iomem *)(reloc_page +
0839ccb8 2852 (reloc_offset & (PAGE_SIZE - 1)));
40a5f0de 2853 reloc_val = target_obj_priv->gtt_offset + reloc->delta;
673a394b
EA
2854
2855#if WATCH_BUF
2856 DRM_INFO("Applied relocation: %p@0x%08x %08x -> %08x\n",
40a5f0de 2857 obj, (unsigned int) reloc->offset,
673a394b
EA
2858 readl(reloc_entry), reloc_val);
2859#endif
2860 writel(reloc_val, reloc_entry);
0839ccb8 2861 io_mapping_unmap_atomic(reloc_page);
673a394b 2862
40a5f0de
EA
2863 /* The updated presumed offset for this entry will be
2864 * copied back out to the user.
673a394b 2865 */
40a5f0de 2866 reloc->presumed_offset = target_obj_priv->gtt_offset;
673a394b
EA
2867
2868 drm_gem_object_unreference(target_obj);
2869 }
2870
673a394b
EA
2871#if WATCH_BUF
2872 if (0)
2873 i915_gem_dump_object(obj, 128, __func__, ~0);
2874#endif
2875 return 0;
2876}
2877
2878/** Dispatch a batchbuffer to the ring
2879 */
2880static int
2881i915_dispatch_gem_execbuffer(struct drm_device *dev,
2882 struct drm_i915_gem_execbuffer *exec,
201361a5 2883 struct drm_clip_rect *cliprects,
673a394b
EA
2884 uint64_t exec_offset)
2885{
2886 drm_i915_private_t *dev_priv = dev->dev_private;
673a394b
EA
2887 int nbox = exec->num_cliprects;
2888 int i = 0, count;
2889 uint32_t exec_start, exec_len;
2890 RING_LOCALS;
2891
2892 exec_start = (uint32_t) exec_offset + exec->batch_start_offset;
2893 exec_len = (uint32_t) exec->batch_len;
2894
2895 if ((exec_start | exec_len) & 0x7) {
2896 DRM_ERROR("alignment\n");
2897 return -EINVAL;
2898 }
2899
2900 if (!exec_start)
2901 return -EINVAL;
2902
2903 count = nbox ? nbox : 1;
2904
2905 for (i = 0; i < count; i++) {
2906 if (i < nbox) {
201361a5 2907 int ret = i915_emit_box(dev, cliprects, i,
673a394b
EA
2908 exec->DR1, exec->DR4);
2909 if (ret)
2910 return ret;
2911 }
2912
2913 if (IS_I830(dev) || IS_845G(dev)) {
2914 BEGIN_LP_RING(4);
2915 OUT_RING(MI_BATCH_BUFFER);
2916 OUT_RING(exec_start | MI_BATCH_NON_SECURE);
2917 OUT_RING(exec_start + exec_len - 4);
2918 OUT_RING(0);
2919 ADVANCE_LP_RING();
2920 } else {
2921 BEGIN_LP_RING(2);
2922 if (IS_I965G(dev)) {
2923 OUT_RING(MI_BATCH_BUFFER_START |
2924 (2 << 6) |
2925 MI_BATCH_NON_SECURE_I965);
2926 OUT_RING(exec_start);
2927 } else {
2928 OUT_RING(MI_BATCH_BUFFER_START |
2929 (2 << 6));
2930 OUT_RING(exec_start | MI_BATCH_NON_SECURE);
2931 }
2932 ADVANCE_LP_RING();
2933 }
2934 }
2935
2936 /* XXX breadcrumb */
2937 return 0;
2938}
2939
2940/* Throttle our rendering by waiting until the ring has completed our requests
2941 * emitted over 20 msec ago.
2942 *
2943 * This should get us reasonable parallelism between CPU and GPU but also
2944 * relatively low latency when blocking on a particular request to finish.
2945 */
2946static int
2947i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file_priv)
2948{
2949 struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv;
2950 int ret = 0;
2951 uint32_t seqno;
2952
2953 mutex_lock(&dev->struct_mutex);
2954 seqno = i915_file_priv->mm.last_gem_throttle_seqno;
2955 i915_file_priv->mm.last_gem_throttle_seqno =
2956 i915_file_priv->mm.last_gem_seqno;
2957 if (seqno)
2958 ret = i915_wait_request(dev, seqno);
2959 mutex_unlock(&dev->struct_mutex);
2960 return ret;
2961}
2962
40a5f0de
EA
2963static int
2964i915_gem_get_relocs_from_user(struct drm_i915_gem_exec_object *exec_list,
2965 uint32_t buffer_count,
2966 struct drm_i915_gem_relocation_entry **relocs)
2967{
2968 uint32_t reloc_count = 0, reloc_index = 0, i;
2969 int ret;
2970
2971 *relocs = NULL;
2972 for (i = 0; i < buffer_count; i++) {
2973 if (reloc_count + exec_list[i].relocation_count < reloc_count)
2974 return -EINVAL;
2975 reloc_count += exec_list[i].relocation_count;
2976 }
2977
2978 *relocs = drm_calloc(reloc_count, sizeof(**relocs), DRM_MEM_DRIVER);
2979 if (*relocs == NULL)
2980 return -ENOMEM;
2981
2982 for (i = 0; i < buffer_count; i++) {
2983 struct drm_i915_gem_relocation_entry __user *user_relocs;
2984
2985 user_relocs = (void __user *)(uintptr_t)exec_list[i].relocs_ptr;
2986
2987 ret = copy_from_user(&(*relocs)[reloc_index],
2988 user_relocs,
2989 exec_list[i].relocation_count *
2990 sizeof(**relocs));
2991 if (ret != 0) {
2992 drm_free(*relocs, reloc_count * sizeof(**relocs),
2993 DRM_MEM_DRIVER);
2994 *relocs = NULL;
2995 return ret;
2996 }
2997
2998 reloc_index += exec_list[i].relocation_count;
2999 }
3000
3001 return ret;
3002}
3003
3004static int
3005i915_gem_put_relocs_to_user(struct drm_i915_gem_exec_object *exec_list,
3006 uint32_t buffer_count,
3007 struct drm_i915_gem_relocation_entry *relocs)
3008{
3009 uint32_t reloc_count = 0, i;
3010 int ret;
3011
3012 for (i = 0; i < buffer_count; i++) {
3013 struct drm_i915_gem_relocation_entry __user *user_relocs;
3014
3015 user_relocs = (void __user *)(uintptr_t)exec_list[i].relocs_ptr;
3016
3017 if (ret == 0) {
3018 ret = copy_to_user(user_relocs,
3019 &relocs[reloc_count],
3020 exec_list[i].relocation_count *
3021 sizeof(*relocs));
3022 }
3023
3024 reloc_count += exec_list[i].relocation_count;
3025 }
3026
3027 drm_free(relocs, reloc_count * sizeof(*relocs), DRM_MEM_DRIVER);
3028
3029 return ret;
3030}
3031
673a394b
EA
3032int
3033i915_gem_execbuffer(struct drm_device *dev, void *data,
3034 struct drm_file *file_priv)
3035{
3036 drm_i915_private_t *dev_priv = dev->dev_private;
3037 struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv;
3038 struct drm_i915_gem_execbuffer *args = data;
3039 struct drm_i915_gem_exec_object *exec_list = NULL;
3040 struct drm_gem_object **object_list = NULL;
3041 struct drm_gem_object *batch_obj;
b70d11da 3042 struct drm_i915_gem_object *obj_priv;
201361a5 3043 struct drm_clip_rect *cliprects = NULL;
40a5f0de
EA
3044 struct drm_i915_gem_relocation_entry *relocs;
3045 int ret, ret2, i, pinned = 0;
673a394b 3046 uint64_t exec_offset;
40a5f0de 3047 uint32_t seqno, flush_domains, reloc_index;
ac94a962 3048 int pin_tries;
673a394b
EA
3049
3050#if WATCH_EXEC
3051 DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n",
3052 (int) args->buffers_ptr, args->buffer_count, args->batch_len);
3053#endif
3054
4f481ed2
EA
3055 if (args->buffer_count < 1) {
3056 DRM_ERROR("execbuf with %d buffers\n", args->buffer_count);
3057 return -EINVAL;
3058 }
673a394b
EA
3059 /* Copy in the exec list from userland */
3060 exec_list = drm_calloc(sizeof(*exec_list), args->buffer_count,
3061 DRM_MEM_DRIVER);
3062 object_list = drm_calloc(sizeof(*object_list), args->buffer_count,
3063 DRM_MEM_DRIVER);
3064 if (exec_list == NULL || object_list == NULL) {
3065 DRM_ERROR("Failed to allocate exec or object list "
3066 "for %d buffers\n",
3067 args->buffer_count);
3068 ret = -ENOMEM;
3069 goto pre_mutex_err;
3070 }
3071 ret = copy_from_user(exec_list,
3072 (struct drm_i915_relocation_entry __user *)
3073 (uintptr_t) args->buffers_ptr,
3074 sizeof(*exec_list) * args->buffer_count);
3075 if (ret != 0) {
3076 DRM_ERROR("copy %d exec entries failed %d\n",
3077 args->buffer_count, ret);
3078 goto pre_mutex_err;
3079 }
3080
201361a5
EA
3081 if (args->num_cliprects != 0) {
3082 cliprects = drm_calloc(args->num_cliprects, sizeof(*cliprects),
3083 DRM_MEM_DRIVER);
3084 if (cliprects == NULL)
3085 goto pre_mutex_err;
3086
3087 ret = copy_from_user(cliprects,
3088 (struct drm_clip_rect __user *)
3089 (uintptr_t) args->cliprects_ptr,
3090 sizeof(*cliprects) * args->num_cliprects);
3091 if (ret != 0) {
3092 DRM_ERROR("copy %d cliprects failed: %d\n",
3093 args->num_cliprects, ret);
3094 goto pre_mutex_err;
3095 }
3096 }
3097
40a5f0de
EA
3098 ret = i915_gem_get_relocs_from_user(exec_list, args->buffer_count,
3099 &relocs);
3100 if (ret != 0)
3101 goto pre_mutex_err;
3102
673a394b
EA
3103 mutex_lock(&dev->struct_mutex);
3104
3105 i915_verify_inactive(dev, __FILE__, __LINE__);
3106
3107 if (dev_priv->mm.wedged) {
3108 DRM_ERROR("Execbuf while wedged\n");
3109 mutex_unlock(&dev->struct_mutex);
a198bc80
CW
3110 ret = -EIO;
3111 goto pre_mutex_err;
673a394b
EA
3112 }
3113
3114 if (dev_priv->mm.suspended) {
3115 DRM_ERROR("Execbuf while VT-switched.\n");
3116 mutex_unlock(&dev->struct_mutex);
a198bc80
CW
3117 ret = -EBUSY;
3118 goto pre_mutex_err;
673a394b
EA
3119 }
3120
ac94a962 3121 /* Look up object handles */
673a394b
EA
3122 for (i = 0; i < args->buffer_count; i++) {
3123 object_list[i] = drm_gem_object_lookup(dev, file_priv,
3124 exec_list[i].handle);
3125 if (object_list[i] == NULL) {
3126 DRM_ERROR("Invalid object handle %d at index %d\n",
3127 exec_list[i].handle, i);
3128 ret = -EBADF;
3129 goto err;
3130 }
b70d11da
KH
3131
3132 obj_priv = object_list[i]->driver_private;
3133 if (obj_priv->in_execbuffer) {
3134 DRM_ERROR("Object %p appears more than once in object list\n",
3135 object_list[i]);
3136 ret = -EBADF;
3137 goto err;
3138 }
3139 obj_priv->in_execbuffer = true;
ac94a962 3140 }
673a394b 3141
ac94a962
KP
3142 /* Pin and relocate */
3143 for (pin_tries = 0; ; pin_tries++) {
3144 ret = 0;
40a5f0de
EA
3145 reloc_index = 0;
3146
ac94a962
KP
3147 for (i = 0; i < args->buffer_count; i++) {
3148 object_list[i]->pending_read_domains = 0;
3149 object_list[i]->pending_write_domain = 0;
3150 ret = i915_gem_object_pin_and_relocate(object_list[i],
3151 file_priv,
40a5f0de
EA
3152 &exec_list[i],
3153 &relocs[reloc_index]);
ac94a962
KP
3154 if (ret)
3155 break;
3156 pinned = i + 1;
40a5f0de 3157 reloc_index += exec_list[i].relocation_count;
ac94a962
KP
3158 }
3159 /* success */
3160 if (ret == 0)
3161 break;
3162
3163 /* error other than GTT full, or we've already tried again */
3164 if (ret != -ENOMEM || pin_tries >= 1) {
f1acec93
EA
3165 if (ret != -ERESTARTSYS)
3166 DRM_ERROR("Failed to pin buffers %d\n", ret);
673a394b
EA
3167 goto err;
3168 }
ac94a962
KP
3169
3170 /* unpin all of our buffers */
3171 for (i = 0; i < pinned; i++)
3172 i915_gem_object_unpin(object_list[i]);
b1177636 3173 pinned = 0;
ac94a962
KP
3174
3175 /* evict everyone we can from the aperture */
3176 ret = i915_gem_evict_everything(dev);
3177 if (ret)
3178 goto err;
673a394b
EA
3179 }
3180
3181 /* Set the pending read domains for the batch buffer to COMMAND */
3182 batch_obj = object_list[args->buffer_count-1];
3183 batch_obj->pending_read_domains = I915_GEM_DOMAIN_COMMAND;
3184 batch_obj->pending_write_domain = 0;
3185
3186 i915_verify_inactive(dev, __FILE__, __LINE__);
3187
646f0f6e
KP
3188 /* Zero the global flush/invalidate flags. These
3189 * will be modified as new domains are computed
3190 * for each object
3191 */
3192 dev->invalidate_domains = 0;
3193 dev->flush_domains = 0;
3194
673a394b
EA
3195 for (i = 0; i < args->buffer_count; i++) {
3196 struct drm_gem_object *obj = object_list[i];
673a394b 3197
646f0f6e 3198 /* Compute new gpu domains and update invalidate/flush */
8b0e378a 3199 i915_gem_object_set_to_gpu_domain(obj);
673a394b
EA
3200 }
3201
3202 i915_verify_inactive(dev, __FILE__, __LINE__);
3203
646f0f6e
KP
3204 if (dev->invalidate_domains | dev->flush_domains) {
3205#if WATCH_EXEC
3206 DRM_INFO("%s: invalidate_domains %08x flush_domains %08x\n",
3207 __func__,
3208 dev->invalidate_domains,
3209 dev->flush_domains);
3210#endif
3211 i915_gem_flush(dev,
3212 dev->invalidate_domains,
3213 dev->flush_domains);
3214 if (dev->flush_domains)
3215 (void)i915_add_request(dev, dev->flush_domains);
3216 }
673a394b 3217
efbeed96
EA
3218 for (i = 0; i < args->buffer_count; i++) {
3219 struct drm_gem_object *obj = object_list[i];
3220
3221 obj->write_domain = obj->pending_write_domain;
3222 }
3223
673a394b
EA
3224 i915_verify_inactive(dev, __FILE__, __LINE__);
3225
3226#if WATCH_COHERENCY
3227 for (i = 0; i < args->buffer_count; i++) {
3228 i915_gem_object_check_coherency(object_list[i],
3229 exec_list[i].handle);
3230 }
3231#endif
3232
3233 exec_offset = exec_list[args->buffer_count - 1].offset;
3234
3235#if WATCH_EXEC
3236 i915_gem_dump_object(object_list[args->buffer_count - 1],
3237 args->batch_len,
3238 __func__,
3239 ~0);
3240#endif
3241
673a394b 3242 /* Exec the batchbuffer */
201361a5 3243 ret = i915_dispatch_gem_execbuffer(dev, args, cliprects, exec_offset);
673a394b
EA
3244 if (ret) {
3245 DRM_ERROR("dispatch failed %d\n", ret);
3246 goto err;
3247 }
3248
3249 /*
3250 * Ensure that the commands in the batch buffer are
3251 * finished before the interrupt fires
3252 */
3253 flush_domains = i915_retire_commands(dev);
3254
3255 i915_verify_inactive(dev, __FILE__, __LINE__);
3256
3257 /*
3258 * Get a seqno representing the execution of the current buffer,
3259 * which we can wait on. We would like to mitigate these interrupts,
3260 * likely by only creating seqnos occasionally (so that we have
3261 * *some* interrupts representing completion of buffers that we can
3262 * wait on when trying to clear up gtt space).
3263 */
3264 seqno = i915_add_request(dev, flush_domains);
3265 BUG_ON(seqno == 0);
3266 i915_file_priv->mm.last_gem_seqno = seqno;
3267 for (i = 0; i < args->buffer_count; i++) {
3268 struct drm_gem_object *obj = object_list[i];
673a394b 3269
ce44b0ea 3270 i915_gem_object_move_to_active(obj, seqno);
673a394b
EA
3271#if WATCH_LRU
3272 DRM_INFO("%s: move to exec list %p\n", __func__, obj);
3273#endif
3274 }
3275#if WATCH_LRU
3276 i915_dump_lru(dev, __func__);
3277#endif
3278
3279 i915_verify_inactive(dev, __FILE__, __LINE__);
3280
673a394b 3281err:
aad87dff
JL
3282 for (i = 0; i < pinned; i++)
3283 i915_gem_object_unpin(object_list[i]);
3284
b70d11da
KH
3285 for (i = 0; i < args->buffer_count; i++) {
3286 if (object_list[i]) {
3287 obj_priv = object_list[i]->driver_private;
3288 obj_priv->in_execbuffer = false;
3289 }
aad87dff 3290 drm_gem_object_unreference(object_list[i]);
b70d11da 3291 }
673a394b 3292
673a394b
EA
3293 mutex_unlock(&dev->struct_mutex);
3294
a35f2e2b
RD
3295 if (!ret) {
3296 /* Copy the new buffer offsets back to the user's exec list. */
3297 ret = copy_to_user((struct drm_i915_relocation_entry __user *)
3298 (uintptr_t) args->buffers_ptr,
3299 exec_list,
3300 sizeof(*exec_list) * args->buffer_count);
3301 if (ret)
3302 DRM_ERROR("failed to copy %d exec entries "
3303 "back to user (%d)\n",
3304 args->buffer_count, ret);
3305 }
3306
40a5f0de
EA
3307 /* Copy the updated relocations out regardless of current error
3308 * state. Failure to update the relocs would mean that the next
3309 * time userland calls execbuf, it would do so with presumed offset
3310 * state that didn't match the actual object state.
3311 */
3312 ret2 = i915_gem_put_relocs_to_user(exec_list, args->buffer_count,
3313 relocs);
3314 if (ret2 != 0) {
3315 DRM_ERROR("Failed to copy relocations back out: %d\n", ret2);
3316
3317 if (ret == 0)
3318 ret = ret2;
3319 }
3320
673a394b
EA
3321pre_mutex_err:
3322 drm_free(object_list, sizeof(*object_list) * args->buffer_count,
3323 DRM_MEM_DRIVER);
3324 drm_free(exec_list, sizeof(*exec_list) * args->buffer_count,
3325 DRM_MEM_DRIVER);
201361a5
EA
3326 drm_free(cliprects, sizeof(*cliprects) * args->num_cliprects,
3327 DRM_MEM_DRIVER);
673a394b
EA
3328
3329 return ret;
3330}
3331
3332int
3333i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment)
3334{
3335 struct drm_device *dev = obj->dev;
3336 struct drm_i915_gem_object *obj_priv = obj->driver_private;
3337 int ret;
3338
3339 i915_verify_inactive(dev, __FILE__, __LINE__);
3340 if (obj_priv->gtt_space == NULL) {
3341 ret = i915_gem_object_bind_to_gtt(obj, alignment);
3342 if (ret != 0) {
9bb2d6f9 3343 if (ret != -EBUSY && ret != -ERESTARTSYS)
0fce81e3 3344 DRM_ERROR("Failure to bind: %d\n", ret);
673a394b
EA
3345 return ret;
3346 }
22c344e9
CW
3347 }
3348 /*
3349 * Pre-965 chips need a fence register set up in order to
3350 * properly handle tiled surfaces.
3351 */
3352 if (!IS_I965G(dev) &&
3353 obj_priv->fence_reg == I915_FENCE_REG_NONE &&
3354 obj_priv->tiling_mode != I915_TILING_NONE) {
3355 ret = i915_gem_object_get_fence_reg(obj, true);
3356 if (ret != 0) {
3357 if (ret != -EBUSY && ret != -ERESTARTSYS)
3358 DRM_ERROR("Failure to install fence: %d\n",
3359 ret);
3360 return ret;
3361 }
673a394b
EA
3362 }
3363 obj_priv->pin_count++;
3364
3365 /* If the object is not active and not pending a flush,
3366 * remove it from the inactive list
3367 */
3368 if (obj_priv->pin_count == 1) {
3369 atomic_inc(&dev->pin_count);
3370 atomic_add(obj->size, &dev->pin_memory);
3371 if (!obj_priv->active &&
3372 (obj->write_domain & ~(I915_GEM_DOMAIN_CPU |
3373 I915_GEM_DOMAIN_GTT)) == 0 &&
3374 !list_empty(&obj_priv->list))
3375 list_del_init(&obj_priv->list);
3376 }
3377 i915_verify_inactive(dev, __FILE__, __LINE__);
3378
3379 return 0;
3380}
3381
3382void
3383i915_gem_object_unpin(struct drm_gem_object *obj)
3384{
3385 struct drm_device *dev = obj->dev;
3386 drm_i915_private_t *dev_priv = dev->dev_private;
3387 struct drm_i915_gem_object *obj_priv = obj->driver_private;
3388
3389 i915_verify_inactive(dev, __FILE__, __LINE__);
3390 obj_priv->pin_count--;
3391 BUG_ON(obj_priv->pin_count < 0);
3392 BUG_ON(obj_priv->gtt_space == NULL);
3393
3394 /* If the object is no longer pinned, and is
3395 * neither active nor being flushed, then stick it on
3396 * the inactive list
3397 */
3398 if (obj_priv->pin_count == 0) {
3399 if (!obj_priv->active &&
3400 (obj->write_domain & ~(I915_GEM_DOMAIN_CPU |
3401 I915_GEM_DOMAIN_GTT)) == 0)
3402 list_move_tail(&obj_priv->list,
3403 &dev_priv->mm.inactive_list);
3404 atomic_dec(&dev->pin_count);
3405 atomic_sub(obj->size, &dev->pin_memory);
3406 }
3407 i915_verify_inactive(dev, __FILE__, __LINE__);
3408}
3409
3410int
3411i915_gem_pin_ioctl(struct drm_device *dev, void *data,
3412 struct drm_file *file_priv)
3413{
3414 struct drm_i915_gem_pin *args = data;
3415 struct drm_gem_object *obj;
3416 struct drm_i915_gem_object *obj_priv;
3417 int ret;
3418
3419 mutex_lock(&dev->struct_mutex);
3420
3421 obj = drm_gem_object_lookup(dev, file_priv, args->handle);
3422 if (obj == NULL) {
3423 DRM_ERROR("Bad handle in i915_gem_pin_ioctl(): %d\n",
3424 args->handle);
3425 mutex_unlock(&dev->struct_mutex);
3426 return -EBADF;
3427 }
3428 obj_priv = obj->driver_private;
3429
79e53945
JB
3430 if (obj_priv->pin_filp != NULL && obj_priv->pin_filp != file_priv) {
3431 DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n",
3432 args->handle);
96dec61d 3433 drm_gem_object_unreference(obj);
673a394b 3434 mutex_unlock(&dev->struct_mutex);
79e53945
JB
3435 return -EINVAL;
3436 }
3437
3438 obj_priv->user_pin_count++;
3439 obj_priv->pin_filp = file_priv;
3440 if (obj_priv->user_pin_count == 1) {
3441 ret = i915_gem_object_pin(obj, args->alignment);
3442 if (ret != 0) {
3443 drm_gem_object_unreference(obj);
3444 mutex_unlock(&dev->struct_mutex);
3445 return ret;
3446 }
673a394b
EA
3447 }
3448
3449 /* XXX - flush the CPU caches for pinned objects
3450 * as the X server doesn't manage domains yet
3451 */
e47c68e9 3452 i915_gem_object_flush_cpu_write_domain(obj);
673a394b
EA
3453 args->offset = obj_priv->gtt_offset;
3454 drm_gem_object_unreference(obj);
3455 mutex_unlock(&dev->struct_mutex);
3456
3457 return 0;
3458}
3459
3460int
3461i915_gem_unpin_ioctl(struct drm_device *dev, void *data,
3462 struct drm_file *file_priv)
3463{
3464 struct drm_i915_gem_pin *args = data;
3465 struct drm_gem_object *obj;
79e53945 3466 struct drm_i915_gem_object *obj_priv;
673a394b
EA
3467
3468 mutex_lock(&dev->struct_mutex);
3469
3470 obj = drm_gem_object_lookup(dev, file_priv, args->handle);
3471 if (obj == NULL) {
3472 DRM_ERROR("Bad handle in i915_gem_unpin_ioctl(): %d\n",
3473 args->handle);
3474 mutex_unlock(&dev->struct_mutex);
3475 return -EBADF;
3476 }
3477
79e53945
JB
3478 obj_priv = obj->driver_private;
3479 if (obj_priv->pin_filp != file_priv) {
3480 DRM_ERROR("Not pinned by caller in i915_gem_pin_ioctl(): %d\n",
3481 args->handle);
3482 drm_gem_object_unreference(obj);
3483 mutex_unlock(&dev->struct_mutex);
3484 return -EINVAL;
3485 }
3486 obj_priv->user_pin_count--;
3487 if (obj_priv->user_pin_count == 0) {
3488 obj_priv->pin_filp = NULL;
3489 i915_gem_object_unpin(obj);
3490 }
673a394b
EA
3491
3492 drm_gem_object_unreference(obj);
3493 mutex_unlock(&dev->struct_mutex);
3494 return 0;
3495}
3496
3497int
3498i915_gem_busy_ioctl(struct drm_device *dev, void *data,
3499 struct drm_file *file_priv)
3500{
3501 struct drm_i915_gem_busy *args = data;
3502 struct drm_gem_object *obj;
3503 struct drm_i915_gem_object *obj_priv;
3504
3505 mutex_lock(&dev->struct_mutex);
3506 obj = drm_gem_object_lookup(dev, file_priv, args->handle);
3507 if (obj == NULL) {
3508 DRM_ERROR("Bad handle in i915_gem_busy_ioctl(): %d\n",
3509 args->handle);
3510 mutex_unlock(&dev->struct_mutex);
3511 return -EBADF;
3512 }
3513
f21289b3
EA
3514 /* Update the active list for the hardware's current position.
3515 * Otherwise this only updates on a delayed timer or when irqs are
3516 * actually unmasked, and our working set ends up being larger than
3517 * required.
3518 */
3519 i915_gem_retire_requests(dev);
3520
673a394b 3521 obj_priv = obj->driver_private;
c4de0a5d
EA
3522 /* Don't count being on the flushing list against the object being
3523 * done. Otherwise, a buffer left on the flushing list but not getting
3524 * flushed (because nobody's flushing that domain) won't ever return
3525 * unbusy and get reused by libdrm's bo cache. The other expected
3526 * consumer of this interface, OpenGL's occlusion queries, also specs
3527 * that the objects get unbusy "eventually" without any interference.
3528 */
3529 args->busy = obj_priv->active && obj_priv->last_rendering_seqno != 0;
673a394b
EA
3530
3531 drm_gem_object_unreference(obj);
3532 mutex_unlock(&dev->struct_mutex);
3533 return 0;
3534}
3535
3536int
3537i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
3538 struct drm_file *file_priv)
3539{
3540 return i915_gem_ring_throttle(dev, file_priv);
3541}
3542
3543int i915_gem_init_object(struct drm_gem_object *obj)
3544{
3545 struct drm_i915_gem_object *obj_priv;
3546
3547 obj_priv = drm_calloc(1, sizeof(*obj_priv), DRM_MEM_DRIVER);
3548 if (obj_priv == NULL)
3549 return -ENOMEM;
3550
3551 /*
3552 * We've just allocated pages from the kernel,
3553 * so they've just been written by the CPU with
3554 * zeros. They'll need to be clflushed before we
3555 * use them with the GPU.
3556 */
3557 obj->write_domain = I915_GEM_DOMAIN_CPU;
3558 obj->read_domains = I915_GEM_DOMAIN_CPU;
3559
ba1eb1d8
KP
3560 obj_priv->agp_type = AGP_USER_MEMORY;
3561
673a394b
EA
3562 obj->driver_private = obj_priv;
3563 obj_priv->obj = obj;
de151cf6 3564 obj_priv->fence_reg = I915_FENCE_REG_NONE;
673a394b 3565 INIT_LIST_HEAD(&obj_priv->list);
de151cf6 3566
673a394b
EA
3567 return 0;
3568}
3569
3570void i915_gem_free_object(struct drm_gem_object *obj)
3571{
de151cf6 3572 struct drm_device *dev = obj->dev;
673a394b
EA
3573 struct drm_i915_gem_object *obj_priv = obj->driver_private;
3574
3575 while (obj_priv->pin_count > 0)
3576 i915_gem_object_unpin(obj);
3577
71acb5eb
DA
3578 if (obj_priv->phys_obj)
3579 i915_gem_detach_phys_object(dev, obj);
3580
673a394b
EA
3581 i915_gem_object_unbind(obj);
3582
ab00b3e5 3583 i915_gem_free_mmap_offset(obj);
de151cf6 3584
673a394b
EA
3585 drm_free(obj_priv->page_cpu_valid, 1, DRM_MEM_DRIVER);
3586 drm_free(obj->driver_private, 1, DRM_MEM_DRIVER);
3587}
3588
673a394b
EA
3589/** Unbinds all objects that are on the given buffer list. */
3590static int
3591i915_gem_evict_from_list(struct drm_device *dev, struct list_head *head)
3592{
3593 struct drm_gem_object *obj;
3594 struct drm_i915_gem_object *obj_priv;
3595 int ret;
3596
3597 while (!list_empty(head)) {
3598 obj_priv = list_first_entry(head,
3599 struct drm_i915_gem_object,
3600 list);
3601 obj = obj_priv->obj;
3602
3603 if (obj_priv->pin_count != 0) {
3604 DRM_ERROR("Pinned object in unbind list\n");
3605 mutex_unlock(&dev->struct_mutex);
3606 return -EINVAL;
3607 }
3608
3609 ret = i915_gem_object_unbind(obj);
3610 if (ret != 0) {
3611 DRM_ERROR("Error unbinding object in LeaveVT: %d\n",
3612 ret);
3613 mutex_unlock(&dev->struct_mutex);
3614 return ret;
3615 }
3616 }
3617
3618
3619 return 0;
3620}
3621
5669fcac 3622int
673a394b
EA
3623i915_gem_idle(struct drm_device *dev)
3624{
3625 drm_i915_private_t *dev_priv = dev->dev_private;
3626 uint32_t seqno, cur_seqno, last_seqno;
3627 int stuck, ret;
3628
6dbe2772
KP
3629 mutex_lock(&dev->struct_mutex);
3630
3631 if (dev_priv->mm.suspended || dev_priv->ring.ring_obj == NULL) {
3632 mutex_unlock(&dev->struct_mutex);
673a394b 3633 return 0;
6dbe2772 3634 }
673a394b
EA
3635
3636 /* Hack! Don't let anybody do execbuf while we don't control the chip.
3637 * We need to replace this with a semaphore, or something.
3638 */
3639 dev_priv->mm.suspended = 1;
3640
6dbe2772
KP
3641 /* Cancel the retire work handler, wait for it to finish if running
3642 */
3643 mutex_unlock(&dev->struct_mutex);
3644 cancel_delayed_work_sync(&dev_priv->mm.retire_work);
3645 mutex_lock(&dev->struct_mutex);
3646
673a394b
EA
3647 i915_kernel_lost_context(dev);
3648
3649 /* Flush the GPU along with all non-CPU write domains
3650 */
3651 i915_gem_flush(dev, ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT),
3652 ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT));
de151cf6 3653 seqno = i915_add_request(dev, ~I915_GEM_DOMAIN_CPU);
673a394b
EA
3654
3655 if (seqno == 0) {
3656 mutex_unlock(&dev->struct_mutex);
3657 return -ENOMEM;
3658 }
3659
3660 dev_priv->mm.waiting_gem_seqno = seqno;
3661 last_seqno = 0;
3662 stuck = 0;
3663 for (;;) {
3664 cur_seqno = i915_get_gem_seqno(dev);
3665 if (i915_seqno_passed(cur_seqno, seqno))
3666 break;
3667 if (last_seqno == cur_seqno) {
3668 if (stuck++ > 100) {
3669 DRM_ERROR("hardware wedged\n");
3670 dev_priv->mm.wedged = 1;
3671 DRM_WAKEUP(&dev_priv->irq_queue);
3672 break;
3673 }
3674 }
3675 msleep(10);
3676 last_seqno = cur_seqno;
3677 }
3678 dev_priv->mm.waiting_gem_seqno = 0;
3679
3680 i915_gem_retire_requests(dev);
3681
28dfe52a
EA
3682 if (!dev_priv->mm.wedged) {
3683 /* Active and flushing should now be empty as we've
3684 * waited for a sequence higher than any pending execbuffer
3685 */
3686 WARN_ON(!list_empty(&dev_priv->mm.active_list));
3687 WARN_ON(!list_empty(&dev_priv->mm.flushing_list));
3688 /* Request should now be empty as we've also waited
3689 * for the last request in the list
3690 */
3691 WARN_ON(!list_empty(&dev_priv->mm.request_list));
3692 }
673a394b 3693
28dfe52a
EA
3694 /* Empty the active and flushing lists to inactive. If there's
3695 * anything left at this point, it means that we're wedged and
3696 * nothing good's going to happen by leaving them there. So strip
3697 * the GPU domains and just stuff them onto inactive.
673a394b 3698 */
28dfe52a
EA
3699 while (!list_empty(&dev_priv->mm.active_list)) {
3700 struct drm_i915_gem_object *obj_priv;
673a394b 3701
28dfe52a
EA
3702 obj_priv = list_first_entry(&dev_priv->mm.active_list,
3703 struct drm_i915_gem_object,
3704 list);
3705 obj_priv->obj->write_domain &= ~I915_GEM_GPU_DOMAINS;
3706 i915_gem_object_move_to_inactive(obj_priv->obj);
3707 }
3708
3709 while (!list_empty(&dev_priv->mm.flushing_list)) {
3710 struct drm_i915_gem_object *obj_priv;
3711
151903d5 3712 obj_priv = list_first_entry(&dev_priv->mm.flushing_list,
28dfe52a
EA
3713 struct drm_i915_gem_object,
3714 list);
3715 obj_priv->obj->write_domain &= ~I915_GEM_GPU_DOMAINS;
3716 i915_gem_object_move_to_inactive(obj_priv->obj);
3717 }
3718
3719
3720 /* Move all inactive buffers out of the GTT. */
673a394b 3721 ret = i915_gem_evict_from_list(dev, &dev_priv->mm.inactive_list);
28dfe52a 3722 WARN_ON(!list_empty(&dev_priv->mm.inactive_list));
6dbe2772
KP
3723 if (ret) {
3724 mutex_unlock(&dev->struct_mutex);
673a394b 3725 return ret;
6dbe2772 3726 }
673a394b 3727
6dbe2772
KP
3728 i915_gem_cleanup_ringbuffer(dev);
3729 mutex_unlock(&dev->struct_mutex);
3730
673a394b
EA
3731 return 0;
3732}
3733
3734static int
3735i915_gem_init_hws(struct drm_device *dev)
3736{
3737 drm_i915_private_t *dev_priv = dev->dev_private;
3738 struct drm_gem_object *obj;
3739 struct drm_i915_gem_object *obj_priv;
3740 int ret;
3741
3742 /* If we need a physical address for the status page, it's already
3743 * initialized at driver load time.
3744 */
3745 if (!I915_NEED_GFX_HWS(dev))
3746 return 0;
3747
3748 obj = drm_gem_object_alloc(dev, 4096);
3749 if (obj == NULL) {
3750 DRM_ERROR("Failed to allocate status page\n");
3751 return -ENOMEM;
3752 }
3753 obj_priv = obj->driver_private;
ba1eb1d8 3754 obj_priv->agp_type = AGP_USER_CACHED_MEMORY;
673a394b
EA
3755
3756 ret = i915_gem_object_pin(obj, 4096);
3757 if (ret != 0) {
3758 drm_gem_object_unreference(obj);
3759 return ret;
3760 }
3761
3762 dev_priv->status_gfx_addr = obj_priv->gtt_offset;
673a394b 3763
856fa198 3764 dev_priv->hw_status_page = kmap(obj_priv->pages[0]);
ba1eb1d8 3765 if (dev_priv->hw_status_page == NULL) {
673a394b
EA
3766 DRM_ERROR("Failed to map status page.\n");
3767 memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map));
3eb2ee77 3768 i915_gem_object_unpin(obj);
673a394b
EA
3769 drm_gem_object_unreference(obj);
3770 return -EINVAL;
3771 }
3772 dev_priv->hws_obj = obj;
673a394b
EA
3773 memset(dev_priv->hw_status_page, 0, PAGE_SIZE);
3774 I915_WRITE(HWS_PGA, dev_priv->status_gfx_addr);
ba1eb1d8 3775 I915_READ(HWS_PGA); /* posting read */
673a394b
EA
3776 DRM_DEBUG("hws offset: 0x%08x\n", dev_priv->status_gfx_addr);
3777
3778 return 0;
3779}
3780
85a7bb98
CW
3781static void
3782i915_gem_cleanup_hws(struct drm_device *dev)
3783{
3784 drm_i915_private_t *dev_priv = dev->dev_private;
bab2d1f6
CW
3785 struct drm_gem_object *obj;
3786 struct drm_i915_gem_object *obj_priv;
85a7bb98
CW
3787
3788 if (dev_priv->hws_obj == NULL)
3789 return;
3790
bab2d1f6
CW
3791 obj = dev_priv->hws_obj;
3792 obj_priv = obj->driver_private;
3793
856fa198 3794 kunmap(obj_priv->pages[0]);
85a7bb98
CW
3795 i915_gem_object_unpin(obj);
3796 drm_gem_object_unreference(obj);
3797 dev_priv->hws_obj = NULL;
bab2d1f6 3798
85a7bb98
CW
3799 memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map));
3800 dev_priv->hw_status_page = NULL;
3801
3802 /* Write high address into HWS_PGA when disabling. */
3803 I915_WRITE(HWS_PGA, 0x1ffff000);
3804}
3805
79e53945 3806int
673a394b
EA
3807i915_gem_init_ringbuffer(struct drm_device *dev)
3808{
3809 drm_i915_private_t *dev_priv = dev->dev_private;
3810 struct drm_gem_object *obj;
3811 struct drm_i915_gem_object *obj_priv;
79e53945 3812 drm_i915_ring_buffer_t *ring = &dev_priv->ring;
673a394b 3813 int ret;
50aa253d 3814 u32 head;
673a394b
EA
3815
3816 ret = i915_gem_init_hws(dev);
3817 if (ret != 0)
3818 return ret;
3819
3820 obj = drm_gem_object_alloc(dev, 128 * 1024);
3821 if (obj == NULL) {
3822 DRM_ERROR("Failed to allocate ringbuffer\n");
85a7bb98 3823 i915_gem_cleanup_hws(dev);
673a394b
EA
3824 return -ENOMEM;
3825 }
3826 obj_priv = obj->driver_private;
3827
3828 ret = i915_gem_object_pin(obj, 4096);
3829 if (ret != 0) {
3830 drm_gem_object_unreference(obj);
85a7bb98 3831 i915_gem_cleanup_hws(dev);
673a394b
EA
3832 return ret;
3833 }
3834
3835 /* Set up the kernel mapping for the ring. */
79e53945
JB
3836 ring->Size = obj->size;
3837 ring->tail_mask = obj->size - 1;
673a394b 3838
79e53945
JB
3839 ring->map.offset = dev->agp->base + obj_priv->gtt_offset;
3840 ring->map.size = obj->size;
3841 ring->map.type = 0;
3842 ring->map.flags = 0;
3843 ring->map.mtrr = 0;
673a394b 3844
79e53945
JB
3845 drm_core_ioremap_wc(&ring->map, dev);
3846 if (ring->map.handle == NULL) {
673a394b
EA
3847 DRM_ERROR("Failed to map ringbuffer.\n");
3848 memset(&dev_priv->ring, 0, sizeof(dev_priv->ring));
47ed185a 3849 i915_gem_object_unpin(obj);
673a394b 3850 drm_gem_object_unreference(obj);
85a7bb98 3851 i915_gem_cleanup_hws(dev);
673a394b
EA
3852 return -EINVAL;
3853 }
79e53945
JB
3854 ring->ring_obj = obj;
3855 ring->virtual_start = ring->map.handle;
673a394b
EA
3856
3857 /* Stop the ring if it's running. */
3858 I915_WRITE(PRB0_CTL, 0);
673a394b 3859 I915_WRITE(PRB0_TAIL, 0);
50aa253d 3860 I915_WRITE(PRB0_HEAD, 0);
673a394b
EA
3861
3862 /* Initialize the ring. */
3863 I915_WRITE(PRB0_START, obj_priv->gtt_offset);
50aa253d
KP
3864 head = I915_READ(PRB0_HEAD) & HEAD_ADDR;
3865
3866 /* G45 ring initialization fails to reset head to zero */
3867 if (head != 0) {
3868 DRM_ERROR("Ring head not reset to zero "
3869 "ctl %08x head %08x tail %08x start %08x\n",
3870 I915_READ(PRB0_CTL),
3871 I915_READ(PRB0_HEAD),
3872 I915_READ(PRB0_TAIL),
3873 I915_READ(PRB0_START));
3874 I915_WRITE(PRB0_HEAD, 0);
3875
3876 DRM_ERROR("Ring head forced to zero "
3877 "ctl %08x head %08x tail %08x start %08x\n",
3878 I915_READ(PRB0_CTL),
3879 I915_READ(PRB0_HEAD),
3880 I915_READ(PRB0_TAIL),
3881 I915_READ(PRB0_START));
3882 }
3883
673a394b
EA
3884 I915_WRITE(PRB0_CTL,
3885 ((obj->size - 4096) & RING_NR_PAGES) |
3886 RING_NO_REPORT |
3887 RING_VALID);
3888
50aa253d
KP
3889 head = I915_READ(PRB0_HEAD) & HEAD_ADDR;
3890
3891 /* If the head is still not zero, the ring is dead */
3892 if (head != 0) {
3893 DRM_ERROR("Ring initialization failed "
3894 "ctl %08x head %08x tail %08x start %08x\n",
3895 I915_READ(PRB0_CTL),
3896 I915_READ(PRB0_HEAD),
3897 I915_READ(PRB0_TAIL),
3898 I915_READ(PRB0_START));
3899 return -EIO;
3900 }
3901
673a394b 3902 /* Update our cache of the ring state */
79e53945
JB
3903 if (!drm_core_check_feature(dev, DRIVER_MODESET))
3904 i915_kernel_lost_context(dev);
3905 else {
3906 ring->head = I915_READ(PRB0_HEAD) & HEAD_ADDR;
3907 ring->tail = I915_READ(PRB0_TAIL) & TAIL_ADDR;
3908 ring->space = ring->head - (ring->tail + 8);
3909 if (ring->space < 0)
3910 ring->space += ring->Size;
3911 }
673a394b
EA
3912
3913 return 0;
3914}
3915
79e53945 3916void
673a394b
EA
3917i915_gem_cleanup_ringbuffer(struct drm_device *dev)
3918{
3919 drm_i915_private_t *dev_priv = dev->dev_private;
3920
3921 if (dev_priv->ring.ring_obj == NULL)
3922 return;
3923
3924 drm_core_ioremapfree(&dev_priv->ring.map, dev);
3925
3926 i915_gem_object_unpin(dev_priv->ring.ring_obj);
3927 drm_gem_object_unreference(dev_priv->ring.ring_obj);
3928 dev_priv->ring.ring_obj = NULL;
3929 memset(&dev_priv->ring, 0, sizeof(dev_priv->ring));
3930
85a7bb98 3931 i915_gem_cleanup_hws(dev);
673a394b
EA
3932}
3933
3934int
3935i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
3936 struct drm_file *file_priv)
3937{
3938 drm_i915_private_t *dev_priv = dev->dev_private;
3939 int ret;
3940
79e53945
JB
3941 if (drm_core_check_feature(dev, DRIVER_MODESET))
3942 return 0;
3943
673a394b
EA
3944 if (dev_priv->mm.wedged) {
3945 DRM_ERROR("Reenabling wedged hardware, good luck\n");
3946 dev_priv->mm.wedged = 0;
3947 }
3948
673a394b 3949 mutex_lock(&dev->struct_mutex);
9bb2d6f9
EA
3950 dev_priv->mm.suspended = 0;
3951
3952 ret = i915_gem_init_ringbuffer(dev);
3953 if (ret != 0)
3954 return ret;
3955
673a394b
EA
3956 BUG_ON(!list_empty(&dev_priv->mm.active_list));
3957 BUG_ON(!list_empty(&dev_priv->mm.flushing_list));
3958 BUG_ON(!list_empty(&dev_priv->mm.inactive_list));
3959 BUG_ON(!list_empty(&dev_priv->mm.request_list));
673a394b 3960 mutex_unlock(&dev->struct_mutex);
dbb19d30
KH
3961
3962 drm_irq_install(dev);
3963
673a394b
EA
3964 return 0;
3965}
3966
3967int
3968i915_gem_leavevt_ioctl(struct drm_device *dev, void *data,
3969 struct drm_file *file_priv)
3970{
3971 int ret;
3972
79e53945
JB
3973 if (drm_core_check_feature(dev, DRIVER_MODESET))
3974 return 0;
3975
673a394b 3976 ret = i915_gem_idle(dev);
dbb19d30
KH
3977 drm_irq_uninstall(dev);
3978
6dbe2772 3979 return ret;
673a394b
EA
3980}
3981
3982void
3983i915_gem_lastclose(struct drm_device *dev)
3984{
3985 int ret;
673a394b 3986
e806b495
EA
3987 if (drm_core_check_feature(dev, DRIVER_MODESET))
3988 return;
3989
6dbe2772
KP
3990 ret = i915_gem_idle(dev);
3991 if (ret)
3992 DRM_ERROR("failed to idle hardware: %d\n", ret);
673a394b
EA
3993}
3994
3995void
3996i915_gem_load(struct drm_device *dev)
3997{
3998 drm_i915_private_t *dev_priv = dev->dev_private;
3999
4000 INIT_LIST_HEAD(&dev_priv->mm.active_list);
4001 INIT_LIST_HEAD(&dev_priv->mm.flushing_list);
4002 INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
4003 INIT_LIST_HEAD(&dev_priv->mm.request_list);
4004 INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
4005 i915_gem_retire_work_handler);
4006 dev_priv->mm.next_gem_seqno = 1;
4007
de151cf6
JB
4008 /* Old X drivers will take 0-2 for front, back, depth buffers */
4009 dev_priv->fence_reg_start = 3;
4010
0f973f27 4011 if (IS_I965G(dev) || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
de151cf6
JB
4012 dev_priv->num_fence_regs = 16;
4013 else
4014 dev_priv->num_fence_regs = 8;
4015
673a394b
EA
4016 i915_gem_detect_bit_6_swizzle(dev);
4017}
71acb5eb
DA
4018
4019/*
4020 * Create a physically contiguous memory object for this object
4021 * e.g. for cursor + overlay regs
4022 */
4023int i915_gem_init_phys_object(struct drm_device *dev,
4024 int id, int size)
4025{
4026 drm_i915_private_t *dev_priv = dev->dev_private;
4027 struct drm_i915_gem_phys_object *phys_obj;
4028 int ret;
4029
4030 if (dev_priv->mm.phys_objs[id - 1] || !size)
4031 return 0;
4032
4033 phys_obj = drm_calloc(1, sizeof(struct drm_i915_gem_phys_object), DRM_MEM_DRIVER);
4034 if (!phys_obj)
4035 return -ENOMEM;
4036
4037 phys_obj->id = id;
4038
4039 phys_obj->handle = drm_pci_alloc(dev, size, 0, 0xffffffff);
4040 if (!phys_obj->handle) {
4041 ret = -ENOMEM;
4042 goto kfree_obj;
4043 }
4044#ifdef CONFIG_X86
4045 set_memory_wc((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE);
4046#endif
4047
4048 dev_priv->mm.phys_objs[id - 1] = phys_obj;
4049
4050 return 0;
4051kfree_obj:
4052 drm_free(phys_obj, sizeof(struct drm_i915_gem_phys_object), DRM_MEM_DRIVER);
4053 return ret;
4054}
4055
4056void i915_gem_free_phys_object(struct drm_device *dev, int id)
4057{
4058 drm_i915_private_t *dev_priv = dev->dev_private;
4059 struct drm_i915_gem_phys_object *phys_obj;
4060
4061 if (!dev_priv->mm.phys_objs[id - 1])
4062 return;
4063
4064 phys_obj = dev_priv->mm.phys_objs[id - 1];
4065 if (phys_obj->cur_obj) {
4066 i915_gem_detach_phys_object(dev, phys_obj->cur_obj);
4067 }
4068
4069#ifdef CONFIG_X86
4070 set_memory_wb((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE);
4071#endif
4072 drm_pci_free(dev, phys_obj->handle);
4073 kfree(phys_obj);
4074 dev_priv->mm.phys_objs[id - 1] = NULL;
4075}
4076
4077void i915_gem_free_all_phys_object(struct drm_device *dev)
4078{
4079 int i;
4080
260883c8 4081 for (i = I915_GEM_PHYS_CURSOR_0; i <= I915_MAX_PHYS_OBJECT; i++)
71acb5eb
DA
4082 i915_gem_free_phys_object(dev, i);
4083}
4084
4085void i915_gem_detach_phys_object(struct drm_device *dev,
4086 struct drm_gem_object *obj)
4087{
4088 struct drm_i915_gem_object *obj_priv;
4089 int i;
4090 int ret;
4091 int page_count;
4092
4093 obj_priv = obj->driver_private;
4094 if (!obj_priv->phys_obj)
4095 return;
4096
856fa198 4097 ret = i915_gem_object_get_pages(obj);
71acb5eb
DA
4098 if (ret)
4099 goto out;
4100
4101 page_count = obj->size / PAGE_SIZE;
4102
4103 for (i = 0; i < page_count; i++) {
856fa198 4104 char *dst = kmap_atomic(obj_priv->pages[i], KM_USER0);
71acb5eb
DA
4105 char *src = obj_priv->phys_obj->handle->vaddr + (i * PAGE_SIZE);
4106
4107 memcpy(dst, src, PAGE_SIZE);
4108 kunmap_atomic(dst, KM_USER0);
4109 }
856fa198 4110 drm_clflush_pages(obj_priv->pages, page_count);
71acb5eb
DA
4111 drm_agp_chipset_flush(dev);
4112out:
4113 obj_priv->phys_obj->cur_obj = NULL;
4114 obj_priv->phys_obj = NULL;
4115}
4116
4117int
4118i915_gem_attach_phys_object(struct drm_device *dev,
4119 struct drm_gem_object *obj, int id)
4120{
4121 drm_i915_private_t *dev_priv = dev->dev_private;
4122 struct drm_i915_gem_object *obj_priv;
4123 int ret = 0;
4124 int page_count;
4125 int i;
4126
4127 if (id > I915_MAX_PHYS_OBJECT)
4128 return -EINVAL;
4129
4130 obj_priv = obj->driver_private;
4131
4132 if (obj_priv->phys_obj) {
4133 if (obj_priv->phys_obj->id == id)
4134 return 0;
4135 i915_gem_detach_phys_object(dev, obj);
4136 }
4137
4138
4139 /* create a new object */
4140 if (!dev_priv->mm.phys_objs[id - 1]) {
4141 ret = i915_gem_init_phys_object(dev, id,
4142 obj->size);
4143 if (ret) {
aeb565df 4144 DRM_ERROR("failed to init phys object %d size: %zu\n", id, obj->size);
71acb5eb
DA
4145 goto out;
4146 }
4147 }
4148
4149 /* bind to the object */
4150 obj_priv->phys_obj = dev_priv->mm.phys_objs[id - 1];
4151 obj_priv->phys_obj->cur_obj = obj;
4152
856fa198 4153 ret = i915_gem_object_get_pages(obj);
71acb5eb
DA
4154 if (ret) {
4155 DRM_ERROR("failed to get page list\n");
4156 goto out;
4157 }
4158
4159 page_count = obj->size / PAGE_SIZE;
4160
4161 for (i = 0; i < page_count; i++) {
856fa198 4162 char *src = kmap_atomic(obj_priv->pages[i], KM_USER0);
71acb5eb
DA
4163 char *dst = obj_priv->phys_obj->handle->vaddr + (i * PAGE_SIZE);
4164
4165 memcpy(dst, src, PAGE_SIZE);
4166 kunmap_atomic(src, KM_USER0);
4167 }
4168
4169 return 0;
4170out:
4171 return ret;
4172}
4173
4174static int
4175i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
4176 struct drm_i915_gem_pwrite *args,
4177 struct drm_file *file_priv)
4178{
4179 struct drm_i915_gem_object *obj_priv = obj->driver_private;
4180 void *obj_addr;
4181 int ret;
4182 char __user *user_data;
4183
4184 user_data = (char __user *) (uintptr_t) args->data_ptr;
4185 obj_addr = obj_priv->phys_obj->handle->vaddr + args->offset;
4186
e08fb4f6 4187 DRM_DEBUG("obj_addr %p, %lld\n", obj_addr, args->size);
71acb5eb
DA
4188 ret = copy_from_user(obj_addr, user_data, args->size);
4189 if (ret)
4190 return -EFAULT;
4191
4192 drm_agp_chipset_flush(dev);
4193 return 0;
4194}
This page took 0.301829 seconds and 5 git commands to generate.