drivers/gpu/drm/i915/i915_gem.c

   1 /*
   2  * Copyright © 2008 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  *
  23  * Authors:
  24  *    Eric Anholt <eric@anholt.net>
  25  *
  26  */
  27
  28 #include "drmP.h"
  29 #include "drm.h"
  30 #include "i915_drm.h"
  31 #include "i915_drv.h"
  32 #include "i915_trace.h"
  33 #include "intel_drv.h"
  34 #include <linux/slab.h>
  35 #include <linux/swap.h>
  36 #include <linux/pci.h>
  37
  38 struct change_domains {
  39         uint32_t invalidate_domains;
  40         uint32_t flush_domains;
  41         uint32_t flush_rings;
  42 };
  43
  44 static int i915_gem_object_flush_gpu_write_domain(struct drm_i915_gem_object *obj,
  45                                                   struct intel_ring_buffer *pipelined);
  46 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
  47 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
  48 static int i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj,
  49                                              bool write);
  50 static int i915_gem_object_set_cpu_read_domain_range(struct drm_i915_gem_object *obj,
  51                                                      uint64_t offset,
  52                                                      uint64_t size);
  53 static void i915_gem_object_set_to_full_cpu_read_domain(struct drm_i915_gem_object *obj);
  54 static int i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
  55                                           bool interruptible);
  56 static int i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
  57                                        unsigned alignment,
  58                                        bool map_and_fenceable);
  59 static void i915_gem_clear_fence_reg(struct drm_i915_gem_object *obj);
  60 static int i915_gem_phys_pwrite(struct drm_device *dev,
  61                                 struct drm_i915_gem_object *obj,
  62                                 struct drm_i915_gem_pwrite *args,
  63                                 struct drm_file *file);
  64 static void i915_gem_free_object_tail(struct drm_i915_gem_object *obj);
  65
  66 static int i915_gem_inactive_shrink(struct shrinker *shrinker,
  67                                     int nr_to_scan,
  68                                     gfp_t gfp_mask);
  69
  70
  71 /* some bookkeeping */
  72 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
  73                                   size_t size)
  74 {
  75         dev_priv->mm.object_count++;
  76         dev_priv->mm.object_memory += size;
  77 }
  78
  79 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
  80                                      size_t size)
  81 {
  82         dev_priv->mm.object_count--;
  83         dev_priv->mm.object_memory -= size;
  84 }
  85
  86 int
  87 i915_gem_check_is_wedged(struct drm_device *dev)
  88 {
  89         struct drm_i915_private *dev_priv = dev->dev_private;
  90         struct completion *x = &dev_priv->error_completion;
  91         unsigned long flags;
  92         int ret;
  93
  94         if (!atomic_read(&dev_priv->mm.wedged))
  95                 return 0;
  96
  97         ret = wait_for_completion_interruptible(x);
  98         if (ret)
  99                 return ret;
 100
 101         /* Success, we reset the GPU! */
 102         if (!atomic_read(&dev_priv->mm.wedged))
 103                 return 0;
 104
 105         /* GPU is hung, bump the completion count to account for
 106          * the token we just consumed so that we never hit zero and
 107          * end up waiting upon a subsequent completion event that
 108          * will never happen.
 109          */
 110         spin_lock_irqsave(&x->wait.lock, flags);
 111         x->done++;
 112         spin_unlock_irqrestore(&x->wait.lock, flags);
 113         return -EIO;
 114 }
 115
 116 static int i915_mutex_lock_interruptible(struct drm_device *dev)
 117 {
 118         struct drm_i915_private *dev_priv = dev->dev_private;
 119         int ret;
 120
 121         ret = i915_gem_check_is_wedged(dev);
 122         if (ret)
 123                 return ret;
 124
 125         ret = mutex_lock_interruptible(&dev->struct_mutex);
 126         if (ret)
 127                 return ret;
 128
 129         if (atomic_read(&dev_priv->mm.wedged)) {
 130                 mutex_unlock(&dev->struct_mutex);
 131                 return -EAGAIN;
 132         }
 133
 134         WARN_ON(i915_verify_lists(dev));
 135         return 0;
 136 }
 137
 138 static inline bool
 139 i915_gem_object_is_inactive(struct drm_i915_gem_object *obj)
 140 {
 141         return obj->gtt_space && !obj->active && obj->pin_count == 0;
 142 }
 143
 144 void i915_gem_do_init(struct drm_device *dev,
 145                       unsigned long start,
 146                       unsigned long mappable_end,
 147                       unsigned long end)
 148 {
 149         drm_i915_private_t *dev_priv = dev->dev_private;
 150
 151         drm_mm_init(&dev_priv->mm.gtt_space, start,
 152                     end - start);
 153
 154         dev_priv->mm.gtt_total = end - start;
 155         dev_priv->mm.mappable_gtt_total = min(end, mappable_end) - start;
 156         dev_priv->mm.gtt_mappable_end = mappable_end;
 157 }
 158
 159 int
 160 i915_gem_init_ioctl(struct drm_device *dev, void *data,
 161                     struct drm_file *file)
 162 {
 163         struct drm_i915_gem_init *args = data;
 164
 165         if (args->gtt_start >= args->gtt_end ||
 166             (args->gtt_end | args->gtt_start) & (PAGE_SIZE - 1))
 167                 return -EINVAL;
 168
 169         mutex_lock(&dev->struct_mutex);
 170         i915_gem_do_init(dev, args->gtt_start, args->gtt_end, args->gtt_end);
 171         mutex_unlock(&dev->struct_mutex);
 172
 173         return 0;
 174 }
 175
 176 int
 177 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 178                             struct drm_file *file)
 179 {
 180         struct drm_i915_private *dev_priv = dev->dev_private;
 181         struct drm_i915_gem_get_aperture *args = data;
 182         struct drm_i915_gem_object *obj;
 183         size_t pinned;
 184
 185         if (!(dev->driver->driver_features & DRIVER_GEM))
 186                 return -ENODEV;
 187
 188         pinned = 0;
 189         mutex_lock(&dev->struct_mutex);
 190         list_for_each_entry(obj, &dev_priv->mm.pinned_list, mm_list)
 191                 pinned += obj->gtt_space->size;
 192         mutex_unlock(&dev->struct_mutex);
 193
 194         args->aper_size = dev_priv->mm.gtt_total;
 195         args->aper_available_size = args->aper_size -pinned;
 196
 197         return 0;
 198 }
 199
 200 /**
 201  * Creates a new mm object and returns a handle to it.
 202  */
 203 int
 204 i915_gem_create_ioctl(struct drm_device *dev, void *data,
 205                       struct drm_file *file)
 206 {
 207         struct drm_i915_gem_create *args = data;
 208         struct drm_i915_gem_object *obj;
 209         int ret;
 210         u32 handle;
 211
 212         args->size = roundup(args->size, PAGE_SIZE);
 213
 214         /* Allocate the new object */
 215         obj = i915_gem_alloc_object(dev, args->size);
 216         if (obj == NULL)
 217                 return -ENOMEM;
 218
 219         ret = drm_gem_handle_create(file, &obj->base, &handle);
 220         if (ret) {
 221                 drm_gem_object_release(&obj->base);
 222                 i915_gem_info_remove_obj(dev->dev_private, obj->base.size);
 223                 kfree(obj);
 224                 return ret;
 225         }
 226
 227         /* drop reference from allocate - handle holds it now */
 228         drm_gem_object_unreference(&obj->base);
 229         trace_i915_gem_object_create(obj);
 230
 231         args->handle = handle;
 232         return 0;
 233 }
 234
 235 static int i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj)
 236 {
 237         drm_i915_private_t *dev_priv = obj->base.dev->dev_private;
 238
 239         return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 &&
 240                 obj->tiling_mode != I915_TILING_NONE;
 241 }
 242
 243 static inline void
 244 slow_shmem_copy(struct page *dst_page,
 245                 int dst_offset,
 246                 struct page *src_page,
 247                 int src_offset,
 248                 int length)
 249 {
 250         char *dst_vaddr, *src_vaddr;
 251
 252         dst_vaddr = kmap(dst_page);
 253         src_vaddr = kmap(src_page);
 254
 255         memcpy(dst_vaddr + dst_offset, src_vaddr + src_offset, length);
 256
 257         kunmap(src_page);
 258         kunmap(dst_page);
 259 }
 260
 261 static inline void
 262 slow_shmem_bit17_copy(struct page *gpu_page,
 263                       int gpu_offset,
 264                       struct page *cpu_page,
 265                       int cpu_offset,
 266                       int length,
 267                       int is_read)
 268 {
 269         char *gpu_vaddr, *cpu_vaddr;
 270
 271         /* Use the unswizzled path if this page isn't affected. */
 272         if ((page_to_phys(gpu_page) & (1 << 17)) == 0) {
 273                 if (is_read)
 274                         return slow_shmem_copy(cpu_page, cpu_offset,
 275                                                gpu_page, gpu_offset, length);
 276                 else
 277                         return slow_shmem_copy(gpu_page, gpu_offset,
 278                                                cpu_page, cpu_offset, length);
 279         }
 280
 281         gpu_vaddr = kmap(gpu_page);
 282         cpu_vaddr = kmap(cpu_page);
 283
 284         /* Copy the data, XORing A6 with A17 (1). The user already knows he's
 285          * XORing with the other bits (A9 for Y, A9 and A10 for X)
 286          */
 287         while (length > 0) {
 288                 int cacheline_end = ALIGN(gpu_offset + 1, 64);
 289                 int this_length = min(cacheline_end - gpu_offset, length);
 290                 int swizzled_gpu_offset = gpu_offset ^ 64;
 291
 292                 if (is_read) {
 293                         memcpy(cpu_vaddr + cpu_offset,
 294                                gpu_vaddr + swizzled_gpu_offset,
 295                                this_length);
 296                 } else {
 297                         memcpy(gpu_vaddr + swizzled_gpu_offset,
 298                                cpu_vaddr + cpu_offset,
 299                                this_length);
 300                 }
 301                 cpu_offset += this_length;
 302                 gpu_offset += this_length;
 303                 length -= this_length;
 304         }
 305
 306         kunmap(cpu_page);
 307         kunmap(gpu_page);
 308 }
 309
 310 /**
 311  * This is the fast shmem pread path, which attempts to copy_from_user directly
 312  * from the backing pages of the object to the user's address space.  On a
 313  * fault, it fails so we can fall back to i915_gem_shmem_pwrite_slow().
 314  */
 315 static int
 316 i915_gem_shmem_pread_fast(struct drm_device *dev,
 317                           struct drm_i915_gem_object *obj,
 318                           struct drm_i915_gem_pread *args,
 319                           struct drm_file *file)
 320 {
 321         struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
 322         ssize_t remain;
 323         loff_t offset;
 324         char __user *user_data;
 325         int page_offset, page_length;
 326
 327         user_data = (char __user *) (uintptr_t) args->data_ptr;
 328         remain = args->size;
 329
 330         offset = args->offset;
 331
 332         while (remain > 0) {
 333                 struct page *page;
 334                 char *vaddr;
 335                 int ret;
 336
 337                 /* Operation in this page
 338                  *
 339                  * page_offset = offset within page
 340                  * page_length = bytes to copy for this page
 341                  */
 342                 page_offset = offset & (PAGE_SIZE-1);
 343                 page_length = remain;
 344                 if ((page_offset + remain) > PAGE_SIZE)
 345                         page_length = PAGE_SIZE - page_offset;
 346
 347                 page = read_cache_page_gfp(mapping, offset >> PAGE_SHIFT,
 348                                            GFP_HIGHUSER | __GFP_RECLAIMABLE);
 349                 if (IS_ERR(page))
 350                         return PTR_ERR(page);
 351
 352                 vaddr = kmap_atomic(page);
 353                 ret = __copy_to_user_inatomic(user_data,
 354                                               vaddr + page_offset,
 355                                               page_length);
 356                 kunmap_atomic(vaddr);
 357
 358                 mark_page_accessed(page);
 359                 page_cache_release(page);
 360                 if (ret)
 361                         return -EFAULT;
 362
 363                 remain -= page_length;
 364                 user_data += page_length;
 365                 offset += page_length;
 366         }
 367
 368         return 0;
 369 }
 370
 371 /**
 372  * This is the fallback shmem pread path, which allocates temporary storage
 373  * in kernel space to copy_to_user into outside of the struct_mutex, so we
 374  * can copy out of the object's backing pages while holding the struct mutex
 375  * and not take page faults.
 376  */
 377 static int
 378 i915_gem_shmem_pread_slow(struct drm_device *dev,
 379                           struct drm_i915_gem_object *obj,
 380                           struct drm_i915_gem_pread *args,
 381                           struct drm_file *file)
 382 {
 383         struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
 384         struct mm_struct *mm = current->mm;
 385         struct page **user_pages;
 386         ssize_t remain;
 387         loff_t offset, pinned_pages, i;
 388         loff_t first_data_page, last_data_page, num_pages;
 389         int shmem_page_offset;
 390         int data_page_index, data_page_offset;
 391         int page_length;
 392         int ret;
 393         uint64_t data_ptr = args->data_ptr;
 394         int do_bit17_swizzling;
 395
 396         remain = args->size;
 397
 398         /* Pin the user pages containing the data.  We can't fault while
 399          * holding the struct mutex, yet we want to hold it while
 400          * dereferencing the user data.
 401          */
 402         first_data_page = data_ptr / PAGE_SIZE;
 403         last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
 404         num_pages = last_data_page - first_data_page + 1;
 405
 406         user_pages = drm_malloc_ab(num_pages, sizeof(struct page *));
 407         if (user_pages == NULL)
 408                 return -ENOMEM;
 409
 410         mutex_unlock(&dev->struct_mutex);
 411         down_read(&mm->mmap_sem);
 412         pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
 413                                       num_pages, 1, 0, user_pages, NULL);
 414         up_read(&mm->mmap_sem);
 415         mutex_lock(&dev->struct_mutex);
 416         if (pinned_pages < num_pages) {
 417                 ret = -EFAULT;
 418                 goto out;
 419         }
 420
 421         ret = i915_gem_object_set_cpu_read_domain_range(obj,
 422                                                         args->offset,
 423                                                         args->size);
 424         if (ret)
 425                 goto out;
 426
 427         do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
 428
 429         offset = args->offset;
 430
 431         while (remain > 0) {
 432                 struct page *page;
 433
 434                 /* Operation in this page
 435                  *
 436                  * shmem_page_offset = offset within page in shmem file
 437                  * data_page_index = page number in get_user_pages return
 438                  * data_page_offset = offset with data_page_index page.
 439                  * page_length = bytes to copy for this page
 440                  */
 441                 shmem_page_offset = offset & ~PAGE_MASK;
 442                 data_page_index = data_ptr / PAGE_SIZE - first_data_page;
 443                 data_page_offset = data_ptr & ~PAGE_MASK;
 444
 445                 page_length = remain;
 446                 if ((shmem_page_offset + page_length) > PAGE_SIZE)
 447                         page_length = PAGE_SIZE - shmem_page_offset;
 448                 if ((data_page_offset + page_length) > PAGE_SIZE)
 449                         page_length = PAGE_SIZE - data_page_offset;
 450
 451                 page = read_cache_page_gfp(mapping, offset >> PAGE_SHIFT,
 452                                            GFP_HIGHUSER | __GFP_RECLAIMABLE);
 453                 if (IS_ERR(page))
 454                         return PTR_ERR(page);
 455
 456                 if (do_bit17_swizzling) {
 457                         slow_shmem_bit17_copy(page,
 458                                               shmem_page_offset,
 459                                               user_pages[data_page_index],
 460                                               data_page_offset,
 461                                               page_length,
 462                                               1);
 463                 } else {
 464                         slow_shmem_copy(user_pages[data_page_index],
 465                                         data_page_offset,
 466                                         page,
 467                                         shmem_page_offset,
 468                                         page_length);
 469                 }
 470
 471                 mark_page_accessed(page);
 472                 page_cache_release(page);
 473
 474                 remain -= page_length;
 475                 data_ptr += page_length;
 476                 offset += page_length;
 477         }
 478
 479 out:
 480         for (i = 0; i < pinned_pages; i++) {
 481                 SetPageDirty(user_pages[i]);
 482                 mark_page_accessed(user_pages[i]);
 483                 page_cache_release(user_pages[i]);
 484         }
 485         drm_free_large(user_pages);
 486
 487         return ret;
 488 }
 489
 490 /**
 491  * Reads data from the object referenced by handle.
 492  *
 493  * On error, the contents of *data are undefined.
 494  */
 495 int
 496 i915_gem_pread_ioctl(struct drm_device *dev, void *data,
 497                      struct drm_file *file)
 498 {
 499         struct drm_i915_gem_pread *args = data;
 500         struct drm_i915_gem_object *obj;
 501         int ret = 0;
 502
 503         if (args->size == 0)
 504                 return 0;
 505
 506         if (!access_ok(VERIFY_WRITE,
 507                        (char __user *)(uintptr_t)args->data_ptr,
 508                        args->size))
 509                 return -EFAULT;
 510
 511         ret = fault_in_pages_writeable((char __user *)(uintptr_t)args->data_ptr,
 512                                        args->size);
 513         if (ret)
 514                 return -EFAULT;
 515
 516         ret = i915_mutex_lock_interruptible(dev);
 517         if (ret)
 518                 return ret;
 519
 520         obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
 521         if (obj == NULL) {
 522                 ret = -ENOENT;
 523                 goto unlock;
 524         }
 525
 526         /* Bounds check source.  */
 527         if (args->offset > obj->base.size ||
 528             args->size > obj->base.size - args->offset) {
 529                 ret = -EINVAL;
 530                 goto out;
 531         }
 532
 533         ret = i915_gem_object_set_cpu_read_domain_range(obj,
 534                                                         args->offset,
 535                                                         args->size);
 536         if (ret)
 537                 goto out;
 538
 539         ret = -EFAULT;
 540         if (!i915_gem_object_needs_bit17_swizzle(obj))
 541                 ret = i915_gem_shmem_pread_fast(dev, obj, args, file);
 542         if (ret == -EFAULT)
 543                 ret = i915_gem_shmem_pread_slow(dev, obj, args, file);
 544
 545 out:
 546         drm_gem_object_unreference(&obj->base);
 547 unlock:
 548         mutex_unlock(&dev->struct_mutex);
 549         return ret;
 550 }
 551
 552 /* This is the fast write path which cannot handle
 553  * page faults in the source data
 554  */
 555
 556 static inline int
 557 fast_user_write(struct io_mapping *mapping,
 558                 loff_t page_base, int page_offset,
 559                 char __user *user_data,
 560                 int length)
 561 {
 562         char *vaddr_atomic;
 563         unsigned long unwritten;
 564
 565         vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base);
 566         unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + page_offset,
 567                                                       user_data, length);
 568         io_mapping_unmap_atomic(vaddr_atomic);
 569         return unwritten;
 570 }
 571
 572 /* Here's the write path which can sleep for
 573  * page faults
 574  */
 575
 576 static inline void
 577 slow_kernel_write(struct io_mapping *mapping,
 578                   loff_t gtt_base, int gtt_offset,
 579                   struct page *user_page, int user_offset,
 580                   int length)
 581 {
 582         char __iomem *dst_vaddr;
 583         char *src_vaddr;
 584
 585         dst_vaddr = io_mapping_map_wc(mapping, gtt_base);
 586         src_vaddr = kmap(user_page);
 587
 588         memcpy_toio(dst_vaddr + gtt_offset,
 589                     src_vaddr + user_offset,
 590                     length);
 591
 592         kunmap(user_page);
 593         io_mapping_unmap(dst_vaddr);
 594 }
 595
 596 /**
 597  * This is the fast pwrite path, where we copy the data directly from the
 598  * user into the GTT, uncached.
 599  */
 600 static int
 601 i915_gem_gtt_pwrite_fast(struct drm_device *dev,
 602                          struct drm_i915_gem_object *obj,
 603                          struct drm_i915_gem_pwrite *args,
 604                          struct drm_file *file)
 605 {
 606         drm_i915_private_t *dev_priv = dev->dev_private;
 607         ssize_t remain;
 608         loff_t offset, page_base;
 609         char __user *user_data;
 610         int page_offset, page_length;
 611
 612         user_data = (char __user *) (uintptr_t) args->data_ptr;
 613         remain = args->size;
 614
 615         offset = obj->gtt_offset + args->offset;
 616
 617         while (remain > 0) {
 618                 /* Operation in this page
 619                  *
 620                  * page_base = page offset within aperture
 621                  * page_offset = offset within page
 622                  * page_length = bytes to copy for this page
 623                  */
 624                 page_base = (offset & ~(PAGE_SIZE-1));
 625                 page_offset = offset & (PAGE_SIZE-1);
 626                 page_length = remain;
 627                 if ((page_offset + remain) > PAGE_SIZE)
 628                         page_length = PAGE_SIZE - page_offset;
 629
 630                 /* If we get a fault while copying data, then (presumably) our
 631                  * source page isn't available.  Return the error and we'll
 632                  * retry in the slow path.
 633                  */
 634                 if (fast_user_write(dev_priv->mm.gtt_mapping, page_base,
 635                                     page_offset, user_data, page_length))
 636
 637                         return -EFAULT;
 638
 639                 remain -= page_length;
 640                 user_data += page_length;
 641                 offset += page_length;
 642         }
 643
 644         return 0;
 645 }
 646
 647 /**
 648  * This is the fallback GTT pwrite path, which uses get_user_pages to pin
 649  * the memory and maps it using kmap_atomic for copying.
 650  *
 651  * This code resulted in x11perf -rgb10text consuming about 10% more CPU
 652  * than using i915_gem_gtt_pwrite_fast on a G45 (32-bit).
 653  */
 654 static int
 655 i915_gem_gtt_pwrite_slow(struct drm_device *dev,
 656                          struct drm_i915_gem_object *obj,
 657                          struct drm_i915_gem_pwrite *args,
 658                          struct drm_file *file)
 659 {
 660         drm_i915_private_t *dev_priv = dev->dev_private;
 661         ssize_t remain;
 662         loff_t gtt_page_base, offset;
 663         loff_t first_data_page, last_data_page, num_pages;
 664         loff_t pinned_pages, i;
 665         struct page **user_pages;
 666         struct mm_struct *mm = current->mm;
 667         int gtt_page_offset, data_page_offset, data_page_index, page_length;
 668         int ret;
 669         uint64_t data_ptr = args->data_ptr;
 670
 671         remain = args->size;
 672
 673         /* Pin the user pages containing the data.  We can't fault while
 674          * holding the struct mutex, and all of the pwrite implementations
 675          * want to hold it while dereferencing the user data.
 676          */
 677         first_data_page = data_ptr / PAGE_SIZE;
 678         last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
 679         num_pages = last_data_page - first_data_page + 1;
 680
 681         user_pages = drm_malloc_ab(num_pages, sizeof(struct page *));
 682         if (user_pages == NULL)
 683                 return -ENOMEM;
 684
 685         mutex_unlock(&dev->struct_mutex);
 686         down_read(&mm->mmap_sem);
 687         pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
 688                                       num_pages, 0, 0, user_pages, NULL);
 689         up_read(&mm->mmap_sem);
 690         mutex_lock(&dev->struct_mutex);
 691         if (pinned_pages < num_pages) {
 692                 ret = -EFAULT;
 693                 goto out_unpin_pages;
 694         }
 695
 696         ret = i915_gem_object_set_to_gtt_domain(obj, 1);
 697         if (ret)
 698                 goto out_unpin_pages;
 699
 700         offset = obj->gtt_offset + args->offset;
 701
 702         while (remain > 0) {
 703                 /* Operation in this page
 704                  *
 705                  * gtt_page_base = page offset within aperture
 706                  * gtt_page_offset = offset within page in aperture
 707                  * data_page_index = page number in get_user_pages return
 708                  * data_page_offset = offset with data_page_index page.
 709                  * page_length = bytes to copy for this page
 710                  */
 711                 gtt_page_base = offset & PAGE_MASK;
 712                 gtt_page_offset = offset & ~PAGE_MASK;
 713                 data_page_index = data_ptr / PAGE_SIZE - first_data_page;
 714                 data_page_offset = data_ptr & ~PAGE_MASK;
 715
 716                 page_length = remain;
 717                 if ((gtt_page_offset + page_length) > PAGE_SIZE)
 718                         page_length = PAGE_SIZE - gtt_page_offset;
 719                 if ((data_page_offset + page_length) > PAGE_SIZE)
 720                         page_length = PAGE_SIZE - data_page_offset;
 721
 722                 slow_kernel_write(dev_priv->mm.gtt_mapping,
 723                                   gtt_page_base, gtt_page_offset,
 724                                   user_pages[data_page_index],
 725                                   data_page_offset,
 726                                   page_length);
 727
 728                 remain -= page_length;
 729                 offset += page_length;
 730                 data_ptr += page_length;
 731         }
 732
 733 out_unpin_pages:
 734         for (i = 0; i < pinned_pages; i++)
 735                 page_cache_release(user_pages[i]);
 736         drm_free_large(user_pages);
 737
 738         return ret;
 739 }
 740
 741 /**
 742  * This is the fast shmem pwrite path, which attempts to directly
 743  * copy_from_user into the kmapped pages backing the object.
 744  */
 745 static int
 746 i915_gem_shmem_pwrite_fast(struct drm_device *dev,
 747                            struct drm_i915_gem_object *obj,
 748                            struct drm_i915_gem_pwrite *args,
 749                            struct drm_file *file)
 750 {
 751         struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
 752         ssize_t remain;
 753         loff_t offset;
 754         char __user *user_data;
 755         int page_offset, page_length;
 756
 757         user_data = (char __user *) (uintptr_t) args->data_ptr;
 758         remain = args->size;
 759
 760         offset = args->offset;
 761         obj->dirty = 1;
 762
 763         while (remain > 0) {
 764                 struct page *page;
 765                 char *vaddr;
 766                 int ret;
 767
 768                 /* Operation in this page
 769                  *
 770                  * page_offset = offset within page
 771                  * page_length = bytes to copy for this page
 772                  */
 773                 page_offset = offset & (PAGE_SIZE-1);
 774                 page_length = remain;
 775                 if ((page_offset + remain) > PAGE_SIZE)
 776                         page_length = PAGE_SIZE - page_offset;
 777
 778                 page = read_cache_page_gfp(mapping, offset >> PAGE_SHIFT,
 779                                            GFP_HIGHUSER | __GFP_RECLAIMABLE);
 780                 if (IS_ERR(page))
 781                         return PTR_ERR(page);
 782
 783                 vaddr = kmap_atomic(page, KM_USER0);
 784                 ret = __copy_from_user_inatomic(vaddr + page_offset,
 785                                                 user_data,
 786                                                 page_length);
 787                 kunmap_atomic(vaddr, KM_USER0);
 788
 789                 set_page_dirty(page);
 790                 mark_page_accessed(page);
 791                 page_cache_release(page);
 792
 793                 /* If we get a fault while copying data, then (presumably) our
 794                  * source page isn't available.  Return the error and we'll
 795                  * retry in the slow path.
 796                  */
 797                 if (ret)
 798                         return -EFAULT;
 799
 800                 remain -= page_length;
 801                 user_data += page_length;
 802                 offset += page_length;
 803         }
 804
 805         return 0;
 806 }
 807
 808 /**
 809  * This is the fallback shmem pwrite path, which uses get_user_pages to pin
 810  * the memory and maps it using kmap_atomic for copying.
 811  *
 812  * This avoids taking mmap_sem for faulting on the user's address while the
 813  * struct_mutex is held.
 814  */
 815 static int
 816 i915_gem_shmem_pwrite_slow(struct drm_device *dev,
 817                            struct drm_i915_gem_object *obj,
 818                            struct drm_i915_gem_pwrite *args,
 819                            struct drm_file *file)
 820 {
 821         struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
 822         struct mm_struct *mm = current->mm;
 823         struct page **user_pages;
 824         ssize_t remain;
 825         loff_t offset, pinned_pages, i;
 826         loff_t first_data_page, last_data_page, num_pages;
 827         int shmem_page_offset;
 828         int data_page_index,  data_page_offset;
 829         int page_length;
 830         int ret;
 831         uint64_t data_ptr = args->data_ptr;
 832         int do_bit17_swizzling;
 833
 834         remain = args->size;
 835
 836         /* Pin the user pages containing the data.  We can't fault while
 837          * holding the struct mutex, and all of the pwrite implementations
 838          * want to hold it while dereferencing the user data.
 839          */
 840         first_data_page = data_ptr / PAGE_SIZE;
 841         last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
 842         num_pages = last_data_page - first_data_page + 1;
 843
 844         user_pages = drm_malloc_ab(num_pages, sizeof(struct page *));
 845         if (user_pages == NULL)
 846                 return -ENOMEM;
 847
 848         mutex_unlock(&dev->struct_mutex);
 849         down_read(&mm->mmap_sem);
 850         pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
 851                                       num_pages, 0, 0, user_pages, NULL);
 852         up_read(&mm->mmap_sem);
 853         mutex_lock(&dev->struct_mutex);
 854         if (pinned_pages < num_pages) {
 855                 ret = -EFAULT;
 856                 goto out;
 857         }
 858
 859         ret = i915_gem_object_set_to_cpu_domain(obj, 1);
 860         if (ret)
 861                 goto out;
 862
 863         do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
 864
 865         offset = args->offset;
 866         obj->dirty = 1;
 867
 868         while (remain > 0) {
 869                 struct page *page;
 870
 871                 /* Operation in this page
 872                  *
 873                  * shmem_page_offset = offset within page in shmem file
 874                  * data_page_index = page number in get_user_pages return
 875                  * data_page_offset = offset with data_page_index page.
 876                  * page_length = bytes to copy for this page
 877                  */
 878                 shmem_page_offset = offset & ~PAGE_MASK;
 879                 data_page_index = data_ptr / PAGE_SIZE - first_data_page;
 880                 data_page_offset = data_ptr & ~PAGE_MASK;
 881
 882                 page_length = remain;
 883                 if ((shmem_page_offset + page_length) > PAGE_SIZE)
 884                         page_length = PAGE_SIZE - shmem_page_offset;
 885                 if ((data_page_offset + page_length) > PAGE_SIZE)
 886                         page_length = PAGE_SIZE - data_page_offset;
 887
 888                 page = read_cache_page_gfp(mapping, offset >> PAGE_SHIFT,
 889                                            GFP_HIGHUSER | __GFP_RECLAIMABLE);
 890                 if (IS_ERR(page)) {
 891                         ret = PTR_ERR(page);
 892                         goto out;
 893                 }
 894
 895                 if (do_bit17_swizzling) {
 896                         slow_shmem_bit17_copy(page,
 897                                               shmem_page_offset,
 898                                               user_pages[data_page_index],
 899                                               data_page_offset,
 900                                               page_length,
 901                                               0);
 902                 } else {
 903                         slow_shmem_copy(page,
 904                                         shmem_page_offset,
 905                                         user_pages[data_page_index],
 906                                         data_page_offset,
 907                                         page_length);
 908                 }
 909
 910                 set_page_dirty(page);
 911                 mark_page_accessed(page);
 912                 page_cache_release(page);
 913
 914                 remain -= page_length;
 915                 data_ptr += page_length;
 916                 offset += page_length;
 917         }
 918
 919 out:
 920         for (i = 0; i < pinned_pages; i++)
 921                 page_cache_release(user_pages[i]);
 922         drm_free_large(user_pages);
 923
 924         return ret;
 925 }
 926
 927 /**
 928  * Writes data to the object referenced by handle.
 929  *
 930  * On error, the contents of the buffer that were to be modified are undefined.
 931  */
 932 int
 933 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
 934                       struct drm_file *file)
 935 {
 936         struct drm_i915_gem_pwrite *args = data;
 937         struct drm_i915_gem_object *obj;
 938         int ret;
 939
 940         if (args->size == 0)
 941                 return 0;
 942
 943         if (!access_ok(VERIFY_READ,
 944                        (char __user *)(uintptr_t)args->data_ptr,
 945                        args->size))
 946                 return -EFAULT;
 947
 948         ret = fault_in_pages_readable((char __user *)(uintptr_t)args->data_ptr,
 949                                       args->size);
 950         if (ret)
 951                 return -EFAULT;
 952
 953         ret = i915_mutex_lock_interruptible(dev);
 954         if (ret)
 955                 return ret;
 956
 957         obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
 958         if (obj == NULL) {
 959                 ret = -ENOENT;
 960                 goto unlock;
 961         }
 962
 963         /* Bounds check destination. */
 964         if (args->offset > obj->base.size ||
 965             args->size > obj->base.size - args->offset) {
 966                 ret = -EINVAL;
 967                 goto out;
 968         }
 969
 970         /* We can only do the GTT pwrite on untiled buffers, as otherwise
 971          * it would end up going through the fenced access, and we'll get
 972          * different detiling behavior between reading and writing.
 973          * pread/pwrite currently are reading and writing from the CPU
 974          * perspective, requiring manual detiling by the client.
 975          */
 976         if (obj->phys_obj)
 977                 ret = i915_gem_phys_pwrite(dev, obj, args, file);
 978         else if (obj->tiling_mode == I915_TILING_NONE &&
 979                  obj->gtt_space &&
 980                  obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
 981                 ret = i915_gem_object_pin(obj, 0, true);
 982                 if (ret)
 983                         goto out;
 984
 985                 ret = i915_gem_object_set_to_gtt_domain(obj, 1);
 986                 if (ret)
 987                         goto out_unpin;
 988
 989                 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file);
 990                 if (ret == -EFAULT)
 991                         ret = i915_gem_gtt_pwrite_slow(dev, obj, args, file);
 992
 993 out_unpin:
 994                 i915_gem_object_unpin(obj);
 995         } else {
 996                 ret = i915_gem_object_set_to_cpu_domain(obj, 1);
 997                 if (ret)
 998                         goto out;
 999
1000                 ret = -EFAULT;
1001                 if (!i915_gem_object_needs_bit17_swizzle(obj))
1002                         ret = i915_gem_shmem_pwrite_fast(dev, obj, args, file);
1003                 if (ret == -EFAULT)
1004                         ret = i915_gem_shmem_pwrite_slow(dev, obj, args, file);
1005         }
1006
1007 out:
1008         drm_gem_object_unreference(&obj->base);
1009 unlock:
1010         mutex_unlock(&dev->struct_mutex);
1011         return ret;
1012 }
1013
1014 /**
1015  * Called when user space prepares to use an object with the CPU, either
1016  * through the mmap ioctl's mapping or a GTT mapping.
1017  */
1018 int
1019 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
1020                           struct drm_file *file)
1021 {
1022         struct drm_i915_private *dev_priv = dev->dev_private;
1023         struct drm_i915_gem_set_domain *args = data;
1024         struct drm_i915_gem_object *obj;
1025         uint32_t read_domains = args->read_domains;
1026         uint32_t write_domain = args->write_domain;
1027         int ret;
1028
1029         if (!(dev->driver->driver_features & DRIVER_GEM))
1030                 return -ENODEV;
1031
1032         /* Only handle setting domains to types used by the CPU. */
1033         if (write_domain & I915_GEM_GPU_DOMAINS)
1034                 return -EINVAL;
1035
1036         if (read_domains & I915_GEM_GPU_DOMAINS)
1037                 return -EINVAL;
1038
1039         /* Having something in the write domain implies it's in the read
1040          * domain, and only that read domain.  Enforce that in the request.
1041          */
1042         if (write_domain != 0 && read_domains != write_domain)
1043                 return -EINVAL;
1044
1045         ret = i915_mutex_lock_interruptible(dev);
1046         if (ret)
1047                 return ret;
1048
1049         obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
1050         if (obj == NULL) {
1051                 ret = -ENOENT;
1052                 goto unlock;
1053         }
1054
1055         intel_mark_busy(dev, obj);
1056
1057         if (read_domains & I915_GEM_DOMAIN_GTT) {
1058                 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
1059
1060                 /* Update the LRU on the fence for the CPU access that's
1061                  * about to occur.
1062                  */
1063                 if (obj->fence_reg != I915_FENCE_REG_NONE) {
1064                         struct drm_i915_fence_reg *reg =
1065                                 &dev_priv->fence_regs[obj->fence_reg];
1066                         list_move_tail(&reg->lru_list,
1067                                        &dev_priv->mm.fence_list);
1068                 }
1069
1070                 /* Silently promote "you're not bound, there was nothing to do"
1071                  * to success, since the client was just asking us to
1072                  * make sure everything was done.
1073                  */
1074                 if (ret == -EINVAL)
1075                         ret = 0;
1076         } else {
1077                 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
1078         }
1079
1080         /* Maintain LRU order of "inactive" objects */
1081         if (ret == 0 && i915_gem_object_is_inactive(obj))
1082                 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
1083
1084         drm_gem_object_unreference(&obj->base);
1085 unlock:
1086         mutex_unlock(&dev->struct_mutex);
1087         return ret;
1088 }
1089
1090 /**
1091  * Called when user space has done writes to this buffer
1092  */
1093 int
1094 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
1095                          struct drm_file *file)
1096 {
1097         struct drm_i915_gem_sw_finish *args = data;
1098         struct drm_i915_gem_object *obj;
1099         int ret = 0;
1100
1101         if (!(dev->driver->driver_features & DRIVER_GEM))
1102                 return -ENODEV;
1103
1104         ret = i915_mutex_lock_interruptible(dev);
1105         if (ret)
1106                 return ret;
1107
1108         obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
1109         if (obj == NULL) {
1110                 ret = -ENOENT;
1111                 goto unlock;
1112         }
1113
1114         /* Pinned buffers may be scanout, so flush the cache */
1115         if (obj->pin_count)
1116                 i915_gem_object_flush_cpu_write_domain(obj);
1117
1118         drm_gem_object_unreference(&obj->base);
1119 unlock:
1120         mutex_unlock(&dev->struct_mutex);
1121         return ret;
1122 }
1123
1124 /**
1125  * Maps the contents of an object, returning the address it is mapped
1126  * into.
1127  *
1128  * While the mapping holds a reference on the contents of the object, it doesn't
1129  * imply a ref on the object itself.
1130  */
1131 int
1132 i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1133                     struct drm_file *file)
1134 {
1135         struct drm_i915_private *dev_priv = dev->dev_private;
1136         struct drm_i915_gem_mmap *args = data;
1137         struct drm_gem_object *obj;
1138         loff_t offset;
1139         unsigned long addr;
1140
1141         if (!(dev->driver->driver_features & DRIVER_GEM))
1142                 return -ENODEV;
1143
1144         obj = drm_gem_object_lookup(dev, file, args->handle);
1145         if (obj == NULL)
1146                 return -ENOENT;
1147
1148         if (obj->size > dev_priv->mm.gtt_mappable_end) {
1149                 drm_gem_object_unreference_unlocked(obj);
1150                 return -E2BIG;
1151         }
1152
1153         offset = args->offset;
1154
1155         down_write(&current->mm->mmap_sem);
1156         addr = do_mmap(obj->filp, 0, args->size,
1157                        PROT_READ | PROT_WRITE, MAP_SHARED,
1158                        args->offset);
1159         up_write(&current->mm->mmap_sem);
1160         drm_gem_object_unreference_unlocked(obj);
1161         if (IS_ERR((void *)addr))
1162                 return addr;
1163
1164         args->addr_ptr = (uint64_t) addr;
1165
1166         return 0;
1167 }
1168
1169 /**
1170  * i915_gem_fault - fault a page into the GTT
1171  * vma: VMA in question
1172  * vmf: fault info
1173  *
1174  * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
1175  * from userspace.  The fault handler takes care of binding the object to
1176  * the GTT (if needed), allocating and programming a fence register (again,
1177  * only if needed based on whether the old reg is still valid or the object
1178  * is tiled) and inserting a new PTE into the faulting process.
1179  *
1180  * Note that the faulting process may involve evicting existing objects
1181  * from the GTT and/or fence registers to make room.  So performance may
1182  * suffer if the GTT working set is large or there are few fence registers
1183  * left.
1184  */
1185 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1186 {
1187         struct drm_i915_gem_object *obj = to_intel_bo(vma->vm_private_data);
1188         struct drm_device *dev = obj->base.dev;
1189         drm_i915_private_t *dev_priv = dev->dev_private;
1190         pgoff_t page_offset;
1191         unsigned long pfn;
1192         int ret = 0;
1193         bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
1194
1195         /* We don't use vmf->pgoff since that has the fake offset */
1196         page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >>
1197                 PAGE_SHIFT;
1198
1199         /* Now bind it into the GTT if needed */
1200         mutex_lock(&dev->struct_mutex);
1201
1202         if (!obj->map_and_fenceable) {
1203                 ret = i915_gem_object_unbind(obj);
1204                 if (ret)
1205                         goto unlock;
1206         }
1207         if (!obj->gtt_space) {
1208                 ret = i915_gem_object_bind_to_gtt(obj, 0, true);
1209                 if (ret)
1210                         goto unlock;
1211         }
1212
1213         ret = i915_gem_object_set_to_gtt_domain(obj, write);
1214         if (ret)
1215                 goto unlock;
1216
1217         /* Need a new fence register? */
1218         if (obj->tiling_mode != I915_TILING_NONE) {
1219                 ret = i915_gem_object_get_fence_reg(obj, true);
1220                 if (ret)
1221                         goto unlock;
1222         }
1223
1224         if (i915_gem_object_is_inactive(obj))
1225                 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
1226
1227         obj->fault_mappable = true;
1228
1229         pfn = ((dev->agp->base + obj->gtt_offset) >> PAGE_SHIFT) +
1230                 page_offset;
1231
1232         /* Finally, remap it using the new GTT offset */
1233         ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn);
1234 unlock:
1235         mutex_unlock(&dev->struct_mutex);
1236
1237         switch (ret) {
1238         case -EAGAIN:
1239                 set_need_resched();
1240         case 0:
1241         case -ERESTARTSYS:
1242                 return VM_FAULT_NOPAGE;
1243         case -ENOMEM:
1244                 return VM_FAULT_OOM;
1245         default:
1246                 return VM_FAULT_SIGBUS;
1247         }
1248 }
1249
1250 /**
1251  * i915_gem_create_mmap_offset - create a fake mmap offset for an object
1252  * @obj: obj in question
1253  *
1254  * GEM memory mapping works by handing back to userspace a fake mmap offset
1255  * it can use in a subsequent mmap(2) call.  The DRM core code then looks
1256  * up the object based on the offset and sets up the various memory mapping
1257  * structures.
1258  *
1259  * This routine allocates and attaches a fake offset for @obj.
1260  */
1261 static int
1262 i915_gem_create_mmap_offset(struct drm_i915_gem_object *obj)
1263 {
1264         struct drm_device *dev = obj->base.dev;
1265         struct drm_gem_mm *mm = dev->mm_private;
1266         struct drm_map_list *list;
1267         struct drm_local_map *map;
1268         int ret = 0;
1269
1270         /* Set the object up for mmap'ing */
1271         list = &obj->base.map_list;
1272         list->map = kzalloc(sizeof(struct drm_map_list), GFP_KERNEL);
1273         if (!list->map)
1274                 return -ENOMEM;
1275
1276         map = list->map;
1277         map->type = _DRM_GEM;
1278         map->size = obj->base.size;
1279         map->handle = obj;
1280
1281         /* Get a DRM GEM mmap offset allocated... */
1282         list->file_offset_node = drm_mm_search_free(&mm->offset_manager,
1283                                                     obj->base.size / PAGE_SIZE,
1284                                                     0, 0);
1285         if (!list->file_offset_node) {
1286                 DRM_ERROR("failed to allocate offset for bo %d\n",
1287                           obj->base.name);
1288                 ret = -ENOSPC;
1289                 goto out_free_list;
1290         }
1291
1292         list->file_offset_node = drm_mm_get_block(list->file_offset_node,
1293                                                   obj->base.size / PAGE_SIZE,
1294                                                   0);
1295         if (!list->file_offset_node) {
1296                 ret = -ENOMEM;
1297                 goto out_free_list;
1298         }
1299
1300         list->hash.key = list->file_offset_node->start;
1301         ret = drm_ht_insert_item(&mm->offset_hash, &list->hash);
1302         if (ret) {
1303                 DRM_ERROR("failed to add to map hash\n");
1304                 goto out_free_mm;
1305         }
1306
1307         return 0;
1308
1309 out_free_mm:
1310         drm_mm_put_block(list->file_offset_node);
1311 out_free_list:
1312         kfree(list->map);
1313         list->map = NULL;
1314
1315         return ret;
1316 }
1317
1318 /**
1319  * i915_gem_release_mmap - remove physical page mappings
1320  * @obj: obj in question
1321  *
1322  * Preserve the reservation of the mmapping with the DRM core code, but
1323  * relinquish ownership of the pages back to the system.
1324  *
1325  * It is vital that we remove the page mapping if we have mapped a tiled
1326  * object through the GTT and then lose the fence register due to
1327  * resource pressure. Similarly if the object has been moved out of the
1328  * aperture, than pages mapped into userspace must be revoked. Removing the
1329  * mapping will then trigger a page fault on the next user access, allowing
1330  * fixup by i915_gem_fault().
1331  */
1332 void
1333 i915_gem_release_mmap(struct drm_i915_gem_object *obj)
1334 {
1335         if (!obj->fault_mappable)
1336                 return;
1337
1338         unmap_mapping_range(obj->base.dev->dev_mapping,
1339                             (loff_t)obj->base.map_list.hash.key<<PAGE_SHIFT,
1340                             obj->base.size, 1);
1341
1342         obj->fault_mappable = false;
1343 }
1344
1345 static void
1346 i915_gem_free_mmap_offset(struct drm_i915_gem_object *obj)
1347 {
1348         struct drm_device *dev = obj->base.dev;
1349         struct drm_gem_mm *mm = dev->mm_private;
1350         struct drm_map_list *list = &obj->base.map_list;
1351
1352         drm_ht_remove_item(&mm->offset_hash, &list->hash);
1353         drm_mm_put_block(list->file_offset_node);
1354         kfree(list->map);
1355         list->map = NULL;
1356 }
1357
1358 static uint32_t
1359 i915_gem_get_gtt_size(struct drm_i915_gem_object *obj)
1360 {
1361         struct drm_device *dev = obj->base.dev;
1362         uint32_t size;
1363
1364         if (INTEL_INFO(dev)->gen >= 4 ||
1365             obj->tiling_mode == I915_TILING_NONE)
1366                 return obj->base.size;
1367
1368         /* Previous chips need a power-of-two fence region when tiling */
1369         if (INTEL_INFO(dev)->gen == 3)
1370                 size = 1024*1024;
1371         else
1372                 size = 512*1024;
1373
1374         while (size < obj->base.size)
1375                 size <<= 1;
1376
1377         return size;
1378 }
1379
1380 /**
1381  * i915_gem_get_gtt_alignment - return required GTT alignment for an object
1382  * @obj: object to check
1383  *
1384  * Return the required GTT alignment for an object, taking into account
1385  * potential fence register mapping.
1386  */
1387 static uint32_t
1388 i915_gem_get_gtt_alignment(struct drm_i915_gem_object *obj)
1389 {
1390         struct drm_device *dev = obj->base.dev;
1391
1392         /*
1393          * Minimum alignment is 4k (GTT page size), but might be greater
1394          * if a fence register is needed for the object.
1395          */
1396         if (INTEL_INFO(dev)->gen >= 4 ||
1397             obj->tiling_mode == I915_TILING_NONE)
1398                 return 4096;
1399
1400         /*
1401          * Previous chips need to be aligned to the size of the smallest
1402          * fence register that can contain the object.
1403          */
1404         return i915_gem_get_gtt_size(obj);
1405 }
1406
1407 /**
1408  * i915_gem_get_unfenced_gtt_alignment - return required GTT alignment for an
1409  *                                       unfenced object
1410  * @obj: object to check
1411  *
1412  * Return the required GTT alignment for an object, only taking into account
1413  * unfenced tiled surface requirements.
1414  */
1415 static uint32_t
1416 i915_gem_get_unfenced_gtt_alignment(struct drm_i915_gem_object *obj)
1417 {
1418         struct drm_device *dev = obj->base.dev;
1419         int tile_height;
1420
1421         /*
1422          * Minimum alignment is 4k (GTT page size) for sane hw.
1423          */
1424         if (INTEL_INFO(dev)->gen >= 4 || IS_G33(dev) ||
1425             obj->tiling_mode == I915_TILING_NONE)
1426                 return 4096;
1427
1428         /*
1429          * Older chips need unfenced tiled buffers to be aligned to the left
1430          * edge of an even tile row (where tile rows are counted as if the bo is
1431          * placed in a fenced gtt region).
1432          */
1433         if (IS_GEN2(dev) ||
1434             (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev)))
1435                 tile_height = 32;
1436         else
1437                 tile_height = 8;
1438
1439         return tile_height * obj->stride * 2;
1440 }
1441
1442 /**
1443  * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
1444  * @dev: DRM device
1445  * @data: GTT mapping ioctl data
1446  * @file: GEM object info
1447  *
1448  * Simply returns the fake offset to userspace so it can mmap it.
1449  * The mmap call will end up in drm_gem_mmap(), which will set things
1450  * up so we can get faults in the handler above.
1451  *
1452  * The fault handler will take care of binding the object into the GTT
1453  * (since it may have been evicted to make room for something), allocating
1454  * a fence register, and mapping the appropriate aperture address into
1455  * userspace.
1456  */
1457 int
1458 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
1459                         struct drm_file *file)
1460 {
1461         struct drm_i915_private *dev_priv = dev->dev_private;
1462         struct drm_i915_gem_mmap_gtt *args = data;
1463         struct drm_i915_gem_object *obj;
1464         int ret;
1465
1466         if (!(dev->driver->driver_features & DRIVER_GEM))
1467                 return -ENODEV;
1468
1469         ret = i915_mutex_lock_interruptible(dev);
1470         if (ret)
1471                 return ret;
1472
1473         obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
1474         if (obj == NULL) {
1475                 ret = -ENOENT;
1476                 goto unlock;
1477         }
1478
1479         if (obj->base.size > dev_priv->mm.gtt_mappable_end) {
1480                 ret = -E2BIG;
1481                 goto unlock;
1482         }
1483
1484         if (obj->madv != I915_MADV_WILLNEED) {
1485                 DRM_ERROR("Attempting to mmap a purgeable buffer\n");
1486                 ret = -EINVAL;
1487                 goto out;
1488         }
1489
1490         if (!obj->base.map_list.map) {
1491                 ret = i915_gem_create_mmap_offset(obj);
1492                 if (ret)
1493                         goto out;
1494         }
1495
1496         args->offset = (u64)obj->base.map_list.hash.key << PAGE_SHIFT;
1497
1498 out:
1499         drm_gem_object_unreference(&obj->base);
1500 unlock:
1501         mutex_unlock(&dev->struct_mutex);
1502         return ret;
1503 }
1504
1505 static int
1506 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj,
1507                               gfp_t gfpmask)
1508 {
1509         int page_count, i;
1510         struct address_space *mapping;
1511         struct inode *inode;
1512         struct page *page;
1513
1514         /* Get the list of pages out of our struct file.  They'll be pinned
1515          * at this point until we release them.
1516          */
1517         page_count = obj->base.size / PAGE_SIZE;
1518         BUG_ON(obj->pages != NULL);
1519         obj->pages = drm_malloc_ab(page_count, sizeof(struct page *));
1520         if (obj->pages == NULL)
1521                 return -ENOMEM;
1522
1523         inode = obj->base.filp->f_path.dentry->d_inode;
1524         mapping = inode->i_mapping;
1525         for (i = 0; i < page_count; i++) {
1526                 page = read_cache_page_gfp(mapping, i,
1527                                            GFP_HIGHUSER |
1528                                            __GFP_COLD |
1529                                            __GFP_RECLAIMABLE |
1530                                            gfpmask);
1531                 if (IS_ERR(page))
1532                         goto err_pages;
1533
1534                 obj->pages[i] = page;
1535         }
1536
1537         if (obj->tiling_mode != I915_TILING_NONE)
1538                 i915_gem_object_do_bit_17_swizzle(obj);
1539
1540         return 0;
1541
1542 err_pages:
1543         while (i--)
1544                 page_cache_release(obj->pages[i]);
1545
1546         drm_free_large(obj->pages);
1547         obj->pages = NULL;
1548         return PTR_ERR(page);
1549 }
1550
1551 static void
1552 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
1553 {
1554         int page_count = obj->base.size / PAGE_SIZE;
1555         int i;
1556
1557         BUG_ON(obj->madv == __I915_MADV_PURGED);
1558
1559         if (obj->tiling_mode != I915_TILING_NONE)
1560                 i915_gem_object_save_bit_17_swizzle(obj);
1561
1562         if (obj->madv == I915_MADV_DONTNEED)
1563                 obj->dirty = 0;
1564
1565         for (i = 0; i < page_count; i++) {
1566                 if (obj->dirty)
1567                         set_page_dirty(obj->pages[i]);
1568
1569                 if (obj->madv == I915_MADV_WILLNEED)
1570                         mark_page_accessed(obj->pages[i]);
1571
1572                 page_cache_release(obj->pages[i]);
1573         }
1574         obj->dirty = 0;
1575
1576         drm_free_large(obj->pages);
1577         obj->pages = NULL;
1578 }
1579
1580 static uint32_t
1581 i915_gem_next_request_seqno(struct drm_device *dev,
1582                             struct intel_ring_buffer *ring)
1583 {
1584         drm_i915_private_t *dev_priv = dev->dev_private;
1585         return ring->outstanding_lazy_request = dev_priv->next_seqno;
1586 }
1587
1588 static void
1589 i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
1590                                struct intel_ring_buffer *ring)
1591 {
1592         struct drm_device *dev = obj->base.dev;
1593         struct drm_i915_private *dev_priv = dev->dev_private;
1594         uint32_t seqno = i915_gem_next_request_seqno(dev, ring);
1595
1596         BUG_ON(ring == NULL);
1597         obj->ring = ring;
1598
1599         /* Add a reference if we're newly entering the active list. */
1600         if (!obj->active) {
1601                 drm_gem_object_reference(&obj->base);
1602                 obj->active = 1;
1603         }
1604
1605         /* Move from whatever list we were on to the tail of execution. */
1606         list_move_tail(&obj->mm_list, &dev_priv->mm.active_list);
1607         list_move_tail(&obj->ring_list, &ring->active_list);
1608
1609         obj->last_rendering_seqno = seqno;
1610         if (obj->fenced_gpu_access) {
1611                 struct drm_i915_fence_reg *reg;
1612
1613                 BUG_ON(obj->fence_reg == I915_FENCE_REG_NONE);
1614
1615                 obj->last_fenced_seqno = seqno;
1616                 obj->last_fenced_ring = ring;
1617
1618                 reg = &dev_priv->fence_regs[obj->fence_reg];
1619                 list_move_tail(&reg->lru_list, &dev_priv->mm.fence_list);
1620         }
1621 }
1622
1623 static void
1624 i915_gem_object_move_off_active(struct drm_i915_gem_object *obj)
1625 {
1626         list_del_init(&obj->ring_list);
1627         obj->last_rendering_seqno = 0;
1628         obj->last_fenced_seqno = 0;
1629 }
1630
1631 static void
1632 i915_gem_object_move_to_flushing(struct drm_i915_gem_object *obj)
1633 {
1634         struct drm_device *dev = obj->base.dev;
1635         drm_i915_private_t *dev_priv = dev->dev_private;
1636
1637         BUG_ON(!obj->active);
1638         list_move_tail(&obj->mm_list, &dev_priv->mm.flushing_list);
1639
1640         i915_gem_object_move_off_active(obj);
1641 }
1642
1643 static void
1644 i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
1645 {
1646         struct drm_device *dev = obj->base.dev;
1647         struct drm_i915_private *dev_priv = dev->dev_private;
1648
1649         if (obj->pin_count != 0)
1650                 list_move_tail(&obj->mm_list, &dev_priv->mm.pinned_list);
1651         else
1652                 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
1653
1654         BUG_ON(!list_empty(&obj->gpu_write_list));
1655         BUG_ON(!obj->active);
1656         obj->ring = NULL;
1657
1658         i915_gem_object_move_off_active(obj);
1659         obj->fenced_gpu_access = false;
1660         obj->last_fenced_ring = NULL;
1661
1662         obj->active = 0;
1663         drm_gem_object_unreference(&obj->base);
1664
1665         WARN_ON(i915_verify_lists(dev));
1666 }
1667
1668 /* Immediately discard the backing storage */
1669 static void
1670 i915_gem_object_truncate(struct drm_i915_gem_object *obj)
1671 {
1672         struct inode *inode;
1673
1674         /* Our goal here is to return as much of the memory as
1675          * is possible back to the system as we are called from OOM.
1676          * To do this we must instruct the shmfs to drop all of its
1677          * backing pages, *now*. Here we mirror the actions taken
1678          * when by shmem_delete_inode() to release the backing store.
1679          */
1680         inode = obj->base.filp->f_path.dentry->d_inode;
1681         truncate_inode_pages(inode->i_mapping, 0);
1682         if (inode->i_op->truncate_range)
1683                 inode->i_op->truncate_range(inode, 0, (loff_t)-1);
1684
1685         obj->madv = __I915_MADV_PURGED;
1686 }
1687
1688 static inline int
1689 i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj)
1690 {
1691         return obj->madv == I915_MADV_DONTNEED;
1692 }
1693
1694 static void
1695 i915_gem_process_flushing_list(struct drm_device *dev,
1696                                uint32_t flush_domains,
1697                                struct intel_ring_buffer *ring)
1698 {
1699         struct drm_i915_gem_object *obj, *next;
1700
1701         list_for_each_entry_safe(obj, next,
1702                                  &ring->gpu_write_list,
1703                                  gpu_write_list) {
1704                 if (obj->base.write_domain & flush_domains) {
1705                         uint32_t old_write_domain = obj->base.write_domain;
1706
1707                         obj->base.write_domain = 0;
1708                         list_del_init(&obj->gpu_write_list);
1709                         i915_gem_object_move_to_active(obj, ring);
1710
1711                         trace_i915_gem_object_change_domain(obj,
1712                                                             obj->base.read_domains,
1713                                                             old_write_domain);
1714                 }
1715         }
1716 }
1717
1718 int
1719 i915_add_request(struct drm_device *dev,
1720                  struct drm_file *file,
1721                  struct drm_i915_gem_request *request,
1722                  struct intel_ring_buffer *ring)
1723 {
1724         drm_i915_private_t *dev_priv = dev->dev_private;
1725         struct drm_i915_file_private *file_priv = NULL;
1726         uint32_t seqno;
1727         int was_empty;
1728         int ret;
1729
1730         BUG_ON(request == NULL);
1731
1732         if (file != NULL)
1733                 file_priv = file->driver_priv;
1734
1735         ret = ring->add_request(ring, &seqno);
1736         if (ret)
1737             return ret;
1738
1739         ring->outstanding_lazy_request = false;
1740
1741         request->seqno = seqno;
1742         request->ring = ring;
1743         request->emitted_jiffies = jiffies;
1744         was_empty = list_empty(&ring->request_list);
1745         list_add_tail(&request->list, &ring->request_list);
1746
1747         if (file_priv) {
1748                 spin_lock(&file_priv->mm.lock);
1749                 request->file_priv = file_priv;
1750                 list_add_tail(&request->client_list,
1751                               &file_priv->mm.request_list);
1752                 spin_unlock(&file_priv->mm.lock);
1753         }
1754
1755         if (!dev_priv->mm.suspended) {
1756                 mod_timer(&dev_priv->hangcheck_timer,
1757                           jiffies + msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD));
1758                 if (was_empty)
1759                         queue_delayed_work(dev_priv->wq,
1760                                            &dev_priv->mm.retire_work, HZ);
1761         }
1762         return 0;
1763 }
1764
1765 /**
1766  * Command execution barrier
1767  *
1768  * Ensures that all commands in the ring are finished
1769  * before signalling the CPU
1770  */
1771 static void
1772 i915_retire_commands(struct drm_device *dev, struct intel_ring_buffer *ring)
1773 {
1774         uint32_t flush_domains = 0;
1775
1776         /* The sampler always gets flushed on i965 (sigh) */
1777         if (INTEL_INFO(dev)->gen >= 4)
1778                 flush_domains |= I915_GEM_DOMAIN_SAMPLER;
1779
1780         ring->flush(ring, I915_GEM_DOMAIN_COMMAND, flush_domains);
1781 }
1782
1783 static inline void
1784 i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
1785 {
1786         struct drm_i915_file_private *file_priv = request->file_priv;
1787
1788         if (!file_priv)
1789                 return;
1790
1791         spin_lock(&file_priv->mm.lock);
1792         list_del(&request->client_list);
1793         request->file_priv = NULL;
1794         spin_unlock(&file_priv->mm.lock);
1795 }
1796
1797 static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv,
1798                                       struct intel_ring_buffer *ring)
1799 {
1800         while (!list_empty(&ring->request_list)) {
1801                 struct drm_i915_gem_request *request;
1802
1803                 request = list_first_entry(&ring->request_list,
1804                                            struct drm_i915_gem_request,
1805                                            list);
1806
1807                 list_del(&request->list);
1808                 i915_gem_request_remove_from_client(request);
1809                 kfree(request);
1810         }
1811
1812         while (!list_empty(&ring->active_list)) {
1813                 struct drm_i915_gem_object *obj;
1814
1815                 obj = list_first_entry(&ring->active_list,
1816                                        struct drm_i915_gem_object,
1817                                        ring_list);
1818
1819                 obj->base.write_domain = 0;
1820                 list_del_init(&obj->gpu_write_list);
1821                 i915_gem_object_move_to_inactive(obj);
1822         }
1823 }
1824
1825 static void i915_gem_reset_fences(struct drm_device *dev)
1826 {
1827         struct drm_i915_private *dev_priv = dev->dev_private;
1828         int i;
1829
1830         for (i = 0; i < 16; i++) {
1831                 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
1832                 if (reg->obj)
1833                         i915_gem_clear_fence_reg(reg->obj);
1834         }
1835 }
1836
1837 void i915_gem_reset(struct drm_device *dev)
1838 {
1839         struct drm_i915_private *dev_priv = dev->dev_private;
1840         struct drm_i915_gem_object *obj;
1841
1842         i915_gem_reset_ring_lists(dev_priv, &dev_priv->render_ring);
1843         i915_gem_reset_ring_lists(dev_priv, &dev_priv->bsd_ring);
1844         i915_gem_reset_ring_lists(dev_priv, &dev_priv->blt_ring);
1845
1846         /* Remove anything from the flushing lists. The GPU cache is likely
1847          * to be lost on reset along with the data, so simply move the
1848          * lost bo to the inactive list.
1849          */
1850         while (!list_empty(&dev_priv->mm.flushing_list)) {
1851                 obj= list_first_entry(&dev_priv->mm.flushing_list,
1852                                       struct drm_i915_gem_object,
1853                                       mm_list);
1854
1855                 obj->base.write_domain = 0;
1856                 list_del_init(&obj->gpu_write_list);
1857                 i915_gem_object_move_to_inactive(obj);
1858         }
1859
1860         /* Move everything out of the GPU domains to ensure we do any
1861          * necessary invalidation upon reuse.
1862          */
1863         list_for_each_entry(obj,
1864                             &dev_priv->mm.inactive_list,
1865                             mm_list)
1866         {
1867                 obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS;
1868         }
1869
1870         /* The fence registers are invalidated so clear them out */
1871         i915_gem_reset_fences(dev);
1872 }
1873
1874 /**
1875  * This function clears the request list as sequence numbers are passed.
1876  */
1877 static void
1878 i915_gem_retire_requests_ring(struct drm_device *dev,
1879                               struct intel_ring_buffer *ring)
1880 {
1881         drm_i915_private_t *dev_priv = dev->dev_private;
1882         uint32_t seqno;
1883
1884         if (!ring->status_page.page_addr ||
1885             list_empty(&ring->request_list))
1886                 return;
1887
1888         WARN_ON(i915_verify_lists(dev));
1889
1890         seqno = ring->get_seqno(ring);
1891         while (!list_empty(&ring->request_list)) {
1892                 struct drm_i915_gem_request *request;
1893
1894                 request = list_first_entry(&ring->request_list,
1895                                            struct drm_i915_gem_request,
1896                                            list);
1897
1898                 if (!i915_seqno_passed(seqno, request->seqno))
1899                         break;
1900
1901                 trace_i915_gem_request_retire(dev, request->seqno);
1902
1903                 list_del(&request->list);
1904                 i915_gem_request_remove_from_client(request);
1905                 kfree(request);
1906         }
1907
1908         /* Move any buffers on the active list that are no longer referenced
1909          * by the ringbuffer to the flushing/inactive lists as appropriate.
1910          */
1911         while (!list_empty(&ring->active_list)) {
1912                 struct drm_i915_gem_object *obj;
1913
1914                 obj= list_first_entry(&ring->active_list,
1915                                       struct drm_i915_gem_object,
1916                                       ring_list);
1917
1918                 if (!i915_seqno_passed(seqno, obj->last_rendering_seqno))
1919                         break;
1920
1921                 if (obj->base.write_domain != 0)
1922                         i915_gem_object_move_to_flushing(obj);
1923                 else
1924                         i915_gem_object_move_to_inactive(obj);
1925         }
1926
1927         if (unlikely (dev_priv->trace_irq_seqno &&
1928                       i915_seqno_passed(dev_priv->trace_irq_seqno, seqno))) {
1929                 ring->user_irq_put(ring);
1930                 dev_priv->trace_irq_seqno = 0;
1931         }
1932
1933         WARN_ON(i915_verify_lists(dev));
1934 }
1935
1936 void
1937 i915_gem_retire_requests(struct drm_device *dev)
1938 {
1939         drm_i915_private_t *dev_priv = dev->dev_private;
1940
1941         if (!list_empty(&dev_priv->mm.deferred_free_list)) {
1942             struct drm_i915_gem_object *obj, *next;
1943
1944             /* We must be careful that during unbind() we do not
1945              * accidentally infinitely recurse into retire requests.
1946              * Currently:
1947              *   retire -> free -> unbind -> wait -> retire_ring
1948              */
1949             list_for_each_entry_safe(obj, next,
1950                                      &dev_priv->mm.deferred_free_list,
1951                                      mm_list)
1952                     i915_gem_free_object_tail(obj);
1953         }
1954
1955         i915_gem_retire_requests_ring(dev, &dev_priv->render_ring);
1956         i915_gem_retire_requests_ring(dev, &dev_priv->bsd_ring);
1957         i915_gem_retire_requests_ring(dev, &dev_priv->blt_ring);
1958 }
1959
1960 static void
1961 i915_gem_retire_work_handler(struct work_struct *work)
1962 {
1963         drm_i915_private_t *dev_priv;
1964         struct drm_device *dev;
1965
1966         dev_priv = container_of(work, drm_i915_private_t,
1967                                 mm.retire_work.work);
1968         dev = dev_priv->dev;
1969
1970         /* Come back later if the device is busy... */
1971         if (!mutex_trylock(&dev->struct_mutex)) {
1972                 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ);
1973                 return;
1974         }
1975
1976         i915_gem_retire_requests(dev);
1977
1978         if (!dev_priv->mm.suspended &&
1979                 (!list_empty(&dev_priv->render_ring.request_list) ||
1980                  !list_empty(&dev_priv->bsd_ring.request_list) ||
1981                  !list_empty(&dev_priv->blt_ring.request_list)))
1982                 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ);
1983         mutex_unlock(&dev->struct_mutex);
1984 }
1985
1986 int
1987 i915_do_wait_request(struct drm_device *dev, uint32_t seqno,
1988                      bool interruptible, struct intel_ring_buffer *ring)
1989 {
1990         drm_i915_private_t *dev_priv = dev->dev_private;
1991         u32 ier;
1992         int ret = 0;
1993
1994         BUG_ON(seqno == 0);
1995
1996         if (atomic_read(&dev_priv->mm.wedged))
1997                 return -EAGAIN;
1998
1999         if (seqno == ring->outstanding_lazy_request) {
2000                 struct drm_i915_gem_request *request;
2001
2002                 request = kzalloc(sizeof(*request), GFP_KERNEL);
2003                 if (request == NULL)
2004                         return -ENOMEM;
2005
2006                 ret = i915_add_request(dev, NULL, request, ring);
2007                 if (ret) {
2008                         kfree(request);
2009                         return ret;
2010                 }
2011
2012                 seqno = request->seqno;
2013         }
2014
2015         if (!i915_seqno_passed(ring->get_seqno(ring), seqno)) {
2016                 if (HAS_PCH_SPLIT(dev))
2017                         ier = I915_READ(DEIER) | I915_READ(GTIER);
2018                 else
2019                         ier = I915_READ(IER);
2020                 if (!ier) {
2021                         DRM_ERROR("something (likely vbetool) disabled "
2022                                   "interrupts, re-enabling\n");
2023                         i915_driver_irq_preinstall(dev);
2024                         i915_driver_irq_postinstall(dev);
2025                 }
2026
2027                 trace_i915_gem_request_wait_begin(dev, seqno);
2028
2029                 ring->waiting_seqno = seqno;
2030                 ring->user_irq_get(ring);
2031                 if (interruptible)
2032                         ret = wait_event_interruptible(ring->irq_queue,
2033                                 i915_seqno_passed(ring->get_seqno(ring), seqno)
2034                                 || atomic_read(&dev_priv->mm.wedged));
2035                 else
2036                         wait_event(ring->irq_queue,
2037                                 i915_seqno_passed(ring->get_seqno(ring), seqno)
2038                                 || atomic_read(&dev_priv->mm.wedged));
2039
2040                 ring->user_irq_put(ring);
2041                 ring->waiting_seqno = 0;
2042
2043                 trace_i915_gem_request_wait_end(dev, seqno);
2044         }
2045         if (atomic_read(&dev_priv->mm.wedged))
2046                 ret = -EAGAIN;
2047
2048         if (ret && ret != -ERESTARTSYS)
2049                 DRM_ERROR("%s returns %d (awaiting %d at %d, next %d)\n",
2050                           __func__, ret, seqno, ring->get_seqno(ring),
2051                           dev_priv->next_seqno);
2052
2053         /* Directly dispatch request retiring.  While we have the work queue
2054          * to handle this, the waiter on a request often wants an associated
2055          * buffer to have made it to the inactive list, and we would need
2056          * a separate wait queue to handle that.
2057          */
2058         if (ret == 0)
2059                 i915_gem_retire_requests_ring(dev, ring);
2060
2061         return ret;
2062 }
2063
2064 /**
2065  * Waits for a sequence number to be signaled, and cleans up the
2066  * request and object lists appropriately for that event.
2067  */
2068 static int
2069 i915_wait_request(struct drm_device *dev, uint32_t seqno,
2070                   struct intel_ring_buffer *ring)
2071 {
2072         return i915_do_wait_request(dev, seqno, 1, ring);
2073 }
2074
2075 static void
2076 i915_gem_flush_ring(struct drm_device *dev,
2077                     struct intel_ring_buffer *ring,
2078                     uint32_t invalidate_domains,
2079                     uint32_t flush_domains)
2080 {
2081         ring->flush(ring, invalidate_domains, flush_domains);
2082         i915_gem_process_flushing_list(dev, flush_domains, ring);
2083 }
2084
2085 static void
2086 i915_gem_flush(struct drm_device *dev,
2087                uint32_t invalidate_domains,
2088                uint32_t flush_domains,
2089                uint32_t flush_rings)
2090 {
2091         drm_i915_private_t *dev_priv = dev->dev_private;
2092
2093         if (flush_domains & I915_GEM_DOMAIN_CPU)
2094                 intel_gtt_chipset_flush();
2095
2096         if ((flush_domains | invalidate_domains) & I915_GEM_GPU_DOMAINS) {
2097                 if (flush_rings & RING_RENDER)
2098                         i915_gem_flush_ring(dev, &dev_priv->render_ring,
2099                                             invalidate_domains, flush_domains);
2100                 if (flush_rings & RING_BSD)
2101                         i915_gem_flush_ring(dev, &dev_priv->bsd_ring,
2102                                             invalidate_domains, flush_domains);
2103                 if (flush_rings & RING_BLT)
2104                         i915_gem_flush_ring(dev, &dev_priv->blt_ring,
2105                                             invalidate_domains, flush_domains);
2106         }
2107 }
2108
2109 /**
2110  * Ensures that all rendering to the object has completed and the object is
2111  * safe to unbind from the GTT or access from the CPU.
2112  */
2113 static int
2114 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
2115                                bool interruptible)
2116 {
2117         struct drm_device *dev = obj->base.dev;
2118         int ret;
2119
2120         /* This function only exists to support waiting for existing rendering,
2121          * not for emitting required flushes.
2122          */
2123         BUG_ON((obj->base.write_domain & I915_GEM_GPU_DOMAINS) != 0);
2124
2125         /* If there is rendering queued on the buffer being evicted, wait for
2126          * it.
2127          */
2128         if (obj->active) {
2129                 ret = i915_do_wait_request(dev,
2130                                            obj->last_rendering_seqno,
2131                                            interruptible,
2132                                            obj->ring);
2133                 if (ret)
2134                         return ret;
2135         }
2136
2137         return 0;
2138 }
2139
2140 /**
2141  * Unbinds an object from the GTT aperture.
2142  */
2143 int
2144 i915_gem_object_unbind(struct drm_i915_gem_object *obj)
2145 {
2146         int ret = 0;
2147
2148         if (obj->gtt_space == NULL)
2149                 return 0;
2150
2151         if (obj->pin_count != 0) {
2152                 DRM_ERROR("Attempting to unbind pinned buffer\n");
2153                 return -EINVAL;
2154         }
2155
2156         /* blow away mappings if mapped through GTT */
2157         i915_gem_release_mmap(obj);
2158
2159         /* Move the object to the CPU domain to ensure that
2160          * any possible CPU writes while it's not in the GTT
2161          * are flushed when we go to remap it. This will
2162          * also ensure that all pending GPU writes are finished
2163          * before we unbind.
2164          */
2165         ret = i915_gem_object_set_to_cpu_domain(obj, 1);
2166         if (ret == -ERESTARTSYS)
2167                 return ret;
2168         /* Continue on if we fail due to EIO, the GPU is hung so we
2169          * should be safe and we need to cleanup or else we might
2170          * cause memory corruption through use-after-free.
2171          */
2172         if (ret) {
2173                 i915_gem_clflush_object(obj);
2174                 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
2175         }
2176
2177         /* release the fence reg _after_ flushing */
2178         if (obj->fence_reg != I915_FENCE_REG_NONE)
2179                 i915_gem_clear_fence_reg(obj);
2180
2181         i915_gem_gtt_unbind_object(obj);
2182         i915_gem_object_put_pages_gtt(obj);
2183
2184         list_del_init(&obj->gtt_list);
2185         list_del_init(&obj->mm_list);
2186         /* Avoid an unnecessary call to unbind on rebind. */
2187         obj->map_and_fenceable = true;
2188
2189         drm_mm_put_block(obj->gtt_space);
2190         obj->gtt_space = NULL;
2191         obj->gtt_offset = 0;
2192
2193         if (i915_gem_object_is_purgeable(obj))
2194                 i915_gem_object_truncate(obj);
2195
2196         trace_i915_gem_object_unbind(obj);
2197
2198         return ret;
2199 }
2200
2201 static int i915_ring_idle(struct drm_device *dev,
2202                           struct intel_ring_buffer *ring)
2203 {
2204         if (list_empty(&ring->gpu_write_list) && list_empty(&ring->active_list))
2205                 return 0;
2206
2207         i915_gem_flush_ring(dev, ring,
2208                             I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
2209         return i915_wait_request(dev,
2210                                  i915_gem_next_request_seqno(dev, ring),
2211                                  ring);
2212 }
2213
2214 int
2215 i915_gpu_idle(struct drm_device *dev)
2216 {
2217         drm_i915_private_t *dev_priv = dev->dev_private;
2218         bool lists_empty;
2219         int ret;
2220
2221         lists_empty = (list_empty(&dev_priv->mm.flushing_list) &&
2222                        list_empty(&dev_priv->mm.active_list));
2223         if (lists_empty)
2224                 return 0;
2225
2226         /* Flush everything onto the inactive list. */
2227         ret = i915_ring_idle(dev, &dev_priv->render_ring);
2228         if (ret)
2229                 return ret;
2230
2231         ret = i915_ring_idle(dev, &dev_priv->bsd_ring);
2232         if (ret)
2233                 return ret;
2234
2235         ret = i915_ring_idle(dev, &dev_priv->blt_ring);
2236         if (ret)
2237                 return ret;
2238
2239         return 0;
2240 }
2241
2242 static int sandybridge_write_fence_reg(struct drm_i915_gem_object *obj,
2243                                        struct intel_ring_buffer *pipelined)
2244 {
2245         struct drm_device *dev = obj->base.dev;
2246         drm_i915_private_t *dev_priv = dev->dev_private;
2247         u32 size = obj->gtt_space->size;
2248         int regnum = obj->fence_reg;
2249         uint64_t val;
2250
2251         val = (uint64_t)((obj->gtt_offset + size - 4096) &
2252                          0xfffff000) << 32;
2253         val |= obj->gtt_offset & 0xfffff000;
2254         val |= (uint64_t)((obj->stride / 128) - 1) <<
2255                 SANDYBRIDGE_FENCE_PITCH_SHIFT;
2256
2257         if (obj->tiling_mode == I915_TILING_Y)
2258                 val |= 1 << I965_FENCE_TILING_Y_SHIFT;
2259         val |= I965_FENCE_REG_VALID;
2260
2261         if (pipelined) {
2262                 int ret = intel_ring_begin(pipelined, 6);
2263                 if (ret)
2264                         return ret;
2265
2266                 intel_ring_emit(pipelined, MI_NOOP);
2267                 intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(2));
2268                 intel_ring_emit(pipelined, FENCE_REG_SANDYBRIDGE_0 + regnum*8);
2269                 intel_ring_emit(pipelined, (u32)val);
2270                 intel_ring_emit(pipelined, FENCE_REG_SANDYBRIDGE_0 + regnum*8 + 4);
2271                 intel_ring_emit(pipelined, (u32)(val >> 32));
2272                 intel_ring_advance(pipelined);
2273         } else
2274                 I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + regnum * 8, val);
2275
2276         return 0;
2277 }
2278
2279 static int i965_write_fence_reg(struct drm_i915_gem_object *obj,
2280                                 struct intel_ring_buffer *pipelined)
2281 {
2282         struct drm_device *dev = obj->base.dev;
2283         drm_i915_private_t *dev_priv = dev->dev_private;
2284         u32 size = obj->gtt_space->size;
2285         int regnum = obj->fence_reg;
2286         uint64_t val;
2287
2288         val = (uint64_t)((obj->gtt_offset + size - 4096) &
2289                     0xfffff000) << 32;
2290         val |= obj->gtt_offset & 0xfffff000;
2291         val |= ((obj->stride / 128) - 1) << I965_FENCE_PITCH_SHIFT;
2292         if (obj->tiling_mode == I915_TILING_Y)
2293                 val |= 1 << I965_FENCE_TILING_Y_SHIFT;
2294         val |= I965_FENCE_REG_VALID;
2295
2296         if (pipelined) {
2297                 int ret = intel_ring_begin(pipelined, 6);
2298                 if (ret)
2299                         return ret;
2300
2301                 intel_ring_emit(pipelined, MI_NOOP);
2302                 intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(2));
2303                 intel_ring_emit(pipelined, FENCE_REG_965_0 + regnum*8);
2304                 intel_ring_emit(pipelined, (u32)val);
2305                 intel_ring_emit(pipelined, FENCE_REG_965_0 + regnum*8 + 4);
2306                 intel_ring_emit(pipelined, (u32)(val >> 32));
2307                 intel_ring_advance(pipelined);
2308         } else
2309                 I915_WRITE64(FENCE_REG_965_0 + regnum * 8, val);
2310
2311         return 0;
2312 }
2313
2314 static int i915_write_fence_reg(struct drm_i915_gem_object *obj,
2315                                 struct intel_ring_buffer *pipelined)
2316 {
2317         struct drm_device *dev = obj->base.dev;
2318         drm_i915_private_t *dev_priv = dev->dev_private;
2319         u32 size = obj->gtt_space->size;
2320         u32 fence_reg, val, pitch_val;
2321         int tile_width;
2322
2323         if (WARN((obj->gtt_offset & ~I915_FENCE_START_MASK) ||
2324                  (size & -size) != size ||
2325                  (obj->gtt_offset & (size - 1)),
2326                  "object 0x%08x [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n",
2327                  obj->gtt_offset, obj->map_and_fenceable, size))
2328                 return -EINVAL;
2329
2330         if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))
2331                 tile_width = 128;
2332         else
2333                 tile_width = 512;
2334
2335         /* Note: pitch better be a power of two tile widths */
2336         pitch_val = obj->stride / tile_width;
2337         pitch_val = ffs(pitch_val) - 1;
2338
2339         val = obj->gtt_offset;
2340         if (obj->tiling_mode == I915_TILING_Y)
2341                 val |= 1 << I830_FENCE_TILING_Y_SHIFT;
2342         val |= I915_FENCE_SIZE_BITS(size);
2343         val |= pitch_val << I830_FENCE_PITCH_SHIFT;
2344         val |= I830_FENCE_REG_VALID;
2345
2346         fence_reg = obj->fence_reg;
2347         if (fence_reg < 8)
2348                 fence_reg = FENCE_REG_830_0 + fence_reg * 4;
2349         else
2350                 fence_reg = FENCE_REG_945_8 + (fence_reg - 8) * 4;
2351
2352         if (pipelined) {
2353                 int ret = intel_ring_begin(pipelined, 4);
2354                 if (ret)
2355                         return ret;
2356
2357                 intel_ring_emit(pipelined, MI_NOOP);
2358                 intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(1));
2359                 intel_ring_emit(pipelined, fence_reg);
2360                 intel_ring_emit(pipelined, val);
2361                 intel_ring_advance(pipelined);
2362         } else
2363                 I915_WRITE(fence_reg, val);
2364
2365         return 0;
2366 }
2367
2368 static int i830_write_fence_reg(struct drm_i915_gem_object *obj,
2369                                 struct intel_ring_buffer *pipelined)
2370 {
2371         struct drm_device *dev = obj->base.dev;
2372         drm_i915_private_t *dev_priv = dev->dev_private;
2373         u32 size = obj->gtt_space->size;
2374         int regnum = obj->fence_reg;
2375         uint32_t val;
2376         uint32_t pitch_val;
2377
2378         if (WARN((obj->gtt_offset & ~I830_FENCE_START_MASK) ||
2379                  (size & -size) != size ||
2380                  (obj->gtt_offset & (size - 1)),
2381                  "object 0x%08x not 512K or pot-size 0x%08x aligned\n",
2382                  obj->gtt_offset, size))
2383                 return -EINVAL;
2384
2385         pitch_val = obj->stride / 128;
2386         pitch_val = ffs(pitch_val) - 1;
2387
2388         val = obj->gtt_offset;
2389         if (obj->tiling_mode == I915_TILING_Y)
2390                 val |= 1 << I830_FENCE_TILING_Y_SHIFT;
2391         val |= I830_FENCE_SIZE_BITS(size);
2392         val |= pitch_val << I830_FENCE_PITCH_SHIFT;
2393         val |= I830_FENCE_REG_VALID;
2394
2395         if (pipelined) {
2396                 int ret = intel_ring_begin(pipelined, 4);
2397                 if (ret)
2398                         return ret;
2399
2400                 intel_ring_emit(pipelined, MI_NOOP);
2401                 intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(1));
2402                 intel_ring_emit(pipelined, FENCE_REG_830_0 + regnum*4);
2403                 intel_ring_emit(pipelined, val);
2404                 intel_ring_advance(pipelined);
2405         } else
2406                 I915_WRITE(FENCE_REG_830_0 + regnum * 4, val);
2407
2408         return 0;
2409 }
2410
2411 static int i915_find_fence_reg(struct drm_device *dev,
2412                                bool interruptible)
2413 {
2414         struct drm_i915_private *dev_priv = dev->dev_private;
2415         struct drm_i915_fence_reg *reg;
2416         struct drm_i915_gem_object *obj = NULL;
2417         int i, avail, ret;
2418
2419         /* First try to find a free reg */
2420         avail = 0;
2421         for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) {
2422                 reg = &dev_priv->fence_regs[i];
2423                 if (!reg->obj)
2424                         return i;
2425
2426                 if (!reg->obj->pin_count)
2427                         avail++;
2428         }
2429
2430         if (avail == 0)
2431                 return -ENOSPC;
2432
2433         /* None available, try to steal one or wait for a user to finish */
2434         avail = I915_FENCE_REG_NONE;
2435         list_for_each_entry(reg, &dev_priv->mm.fence_list,
2436                             lru_list) {
2437                 obj = reg->obj;
2438                 if (obj->pin_count)
2439                         continue;
2440
2441                 /* found one! */
2442                 avail = obj->fence_reg;
2443                 break;
2444         }
2445
2446         BUG_ON(avail == I915_FENCE_REG_NONE);
2447
2448         /* We only have a reference on obj from the active list. put_fence_reg
2449          * might drop that one, causing a use-after-free in it. So hold a
2450          * private reference to obj like the other callers of put_fence_reg
2451          * (set_tiling ioctl) do. */
2452         drm_gem_object_reference(&obj->base);
2453         ret = i915_gem_object_put_fence_reg(obj, interruptible);
2454         drm_gem_object_unreference(&obj->base);
2455         if (ret != 0)
2456                 return ret;
2457
2458         return avail;
2459 }
2460
2461 /**
2462  * i915_gem_object_get_fence_reg - set up a fence reg for an object
2463  * @obj: object to map through a fence reg
2464  *
2465  * When mapping objects through the GTT, userspace wants to be able to write
2466  * to them without having to worry about swizzling if the object is tiled.
2467  *
2468  * This function walks the fence regs looking for a free one for @obj,
2469  * stealing one if it can't find any.
2470  *
2471  * It then sets up the reg based on the object's properties: address, pitch
2472  * and tiling format.
2473  */
2474 int
2475 i915_gem_object_get_fence_reg(struct drm_i915_gem_object *obj,
2476                               bool interruptible)
2477 {
2478         struct drm_device *dev = obj->base.dev;
2479         struct drm_i915_private *dev_priv = dev->dev_private;
2480         struct drm_i915_fence_reg *reg = NULL;
2481         struct intel_ring_buffer *pipelined = NULL;
2482         int ret;
2483
2484         /* Just update our place in the LRU if our fence is getting used. */
2485         if (obj->fence_reg != I915_FENCE_REG_NONE) {
2486                 reg = &dev_priv->fence_regs[obj->fence_reg];
2487                 list_move_tail(&reg->lru_list, &dev_priv->mm.fence_list);
2488                 return 0;
2489         }
2490
2491         switch (obj->tiling_mode) {
2492         case I915_TILING_NONE:
2493                 WARN(1, "allocating a fence for non-tiled object?\n");
2494                 break;
2495         case I915_TILING_X:
2496                 if (!obj->stride)
2497                         return -EINVAL;
2498                 WARN((obj->stride & (512 - 1)),
2499                      "object 0x%08x is X tiled but has non-512B pitch\n",
2500                      obj->gtt_offset);
2501                 break;
2502         case I915_TILING_Y:
2503                 if (!obj->stride)
2504                         return -EINVAL;
2505                 WARN((obj->stride & (128 - 1)),
2506                      "object 0x%08x is Y tiled but has non-128B pitch\n",
2507                      obj->gtt_offset);
2508                 break;
2509         }
2510
2511         ret = i915_find_fence_reg(dev, interruptible);
2512         if (ret < 0)
2513                 return ret;
2514
2515         obj->fence_reg = ret;
2516         reg = &dev_priv->fence_regs[obj->fence_reg];
2517         list_add_tail(&reg->lru_list, &dev_priv->mm.fence_list);
2518
2519         reg->obj = obj;
2520
2521         switch (INTEL_INFO(dev)->gen) {
2522         case 6:
2523                 ret = sandybridge_write_fence_reg(obj, pipelined);
2524                 break;
2525         case 5:
2526         case 4:
2527                 ret = i965_write_fence_reg(obj, pipelined);
2528                 break;
2529         case 3:
2530                 ret = i915_write_fence_reg(obj, pipelined);
2531                 break;
2532         case 2:
2533                 ret = i830_write_fence_reg(obj, pipelined);
2534                 break;
2535         }
2536
2537         trace_i915_gem_object_get_fence(obj,
2538                                         obj->fence_reg,
2539                                         obj->tiling_mode);
2540         return ret;
2541 }
2542
2543 /**
2544  * i915_gem_clear_fence_reg - clear out fence register info
2545  * @obj: object to clear
2546  *
2547  * Zeroes out the fence register itself and clears out the associated
2548  * data structures in dev_priv and obj.
2549  */
2550 static void
2551 i915_gem_clear_fence_reg(struct drm_i915_gem_object *obj)
2552 {
2553         struct drm_device *dev = obj->base.dev;
2554         drm_i915_private_t *dev_priv = dev->dev_private;
2555         struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[obj->fence_reg];
2556         uint32_t fence_reg;
2557
2558         switch (INTEL_INFO(dev)->gen) {
2559         case 6:
2560                 I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 +
2561                              (obj->fence_reg * 8), 0);
2562                 break;
2563         case 5:
2564         case 4:
2565                 I915_WRITE64(FENCE_REG_965_0 + (obj->fence_reg * 8), 0);
2566                 break;
2567         case 3:
2568                 if (obj->fence_reg >= 8)
2569                         fence_reg = FENCE_REG_945_8 + (obj->fence_reg - 8) * 4;
2570                 else
2571         case 2:
2572                         fence_reg = FENCE_REG_830_0 + obj->fence_reg * 4;
2573
2574                 I915_WRITE(fence_reg, 0);
2575                 break;
2576         }
2577
2578         reg->obj = NULL;
2579         obj->fence_reg = I915_FENCE_REG_NONE;
2580         list_del_init(&reg->lru_list);
2581 }
2582
2583 /**
2584  * i915_gem_object_put_fence_reg - waits on outstanding fenced access
2585  * to the buffer to finish, and then resets the fence register.
2586  * @obj: tiled object holding a fence register.
2587  * @bool: whether the wait upon the fence is interruptible
2588  *
2589  * Zeroes out the fence register itself and clears out the associated
2590  * data structures in dev_priv and obj.
2591  */
2592 int
2593 i915_gem_object_put_fence_reg(struct drm_i915_gem_object *obj,
2594                               bool interruptible)
2595 {
2596         struct drm_device *dev = obj->base.dev;
2597         int ret;
2598
2599         if (obj->fence_reg == I915_FENCE_REG_NONE)
2600                 return 0;
2601
2602         /* If we've changed tiling, GTT-mappings of the object
2603          * need to re-fault to ensure that the correct fence register
2604          * setup is in place.
2605          */
2606         i915_gem_release_mmap(obj);
2607
2608         /* On the i915, GPU access to tiled buffers is via a fence,
2609          * therefore we must wait for any outstanding access to complete
2610          * before clearing the fence.
2611          */
2612         if (obj->fenced_gpu_access) {
2613                 ret = i915_gem_object_flush_gpu_write_domain(obj, NULL);
2614                 if (ret)
2615                         return ret;
2616
2617                 obj->fenced_gpu_access = false;
2618         }
2619
2620         if (obj->last_fenced_seqno) {
2621                 ret = i915_do_wait_request(dev,
2622                                            obj->last_fenced_seqno,
2623                                            interruptible,
2624                                            obj->last_fenced_ring);
2625                 if (ret)
2626                         return ret;
2627
2628                 obj->last_fenced_seqno = false;
2629         }
2630
2631         i915_gem_object_flush_gtt_write_domain(obj);
2632         i915_gem_clear_fence_reg(obj);
2633
2634         return 0;
2635 }
2636
2637 /**
2638  * Finds free space in the GTT aperture and binds the object there.
2639  */
2640 static int
2641 i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
2642                             unsigned alignment,
2643                             bool map_and_fenceable)
2644 {
2645         struct drm_device *dev = obj->base.dev;
2646         drm_i915_private_t *dev_priv = dev->dev_private;
2647         struct drm_mm_node *free_space;
2648         gfp_t gfpmask = __GFP_NORETRY | __GFP_NOWARN;
2649         u32 size, fence_size, fence_alignment, unfenced_alignment;
2650         bool mappable, fenceable;
2651         int ret;
2652
2653         if (obj->madv != I915_MADV_WILLNEED) {
2654                 DRM_ERROR("Attempting to bind a purgeable object\n");
2655                 return -EINVAL;
2656         }
2657
2658         fence_size = i915_gem_get_gtt_size(obj);
2659         fence_alignment = i915_gem_get_gtt_alignment(obj);
2660         unfenced_alignment = i915_gem_get_unfenced_gtt_alignment(obj);
2661
2662         if (alignment == 0)
2663                 alignment = map_and_fenceable ? fence_alignment :
2664                                                 unfenced_alignment;
2665         if (map_and_fenceable && alignment & (fence_alignment - 1)) {
2666                 DRM_ERROR("Invalid object alignment requested %u\n", alignment);
2667                 return -EINVAL;
2668         }
2669
2670         size = map_and_fenceable ? fence_size : obj->base.size;
2671
2672         /* If the object is bigger than the entire aperture, reject it early
2673          * before evicting everything in a vain attempt to find space.
2674          */
2675         if (obj->base.size >
2676             (map_and_fenceable ? dev_priv->mm.gtt_mappable_end : dev_priv->mm.gtt_total)) {
2677                 DRM_ERROR("Attempting to bind an object larger than the aperture\n");
2678                 return -E2BIG;
2679         }
2680
2681  search_free:
2682         if (map_and_fenceable)
2683                 free_space =
2684                         drm_mm_search_free_in_range(&dev_priv->mm.gtt_space,
2685                                                     size, alignment, 0,
2686                                                     dev_priv->mm.gtt_mappable_end,
2687                                                     0);
2688         else
2689                 free_space = drm_mm_search_free(&dev_priv->mm.gtt_space,
2690                                                 size, alignment, 0);
2691
2692         if (free_space != NULL) {
2693                 if (map_and_fenceable)
2694                         obj->gtt_space =
2695                                 drm_mm_get_block_range_generic(free_space,
2696                                                                size, alignment, 0,
2697                                                                dev_priv->mm.gtt_mappable_end,
2698                                                                0);
2699                 else
2700                         obj->gtt_space =
2701                                 drm_mm_get_block(free_space, size, alignment);
2702         }
2703         if (obj->gtt_space == NULL) {
2704                 /* If the gtt is empty and we're still having trouble
2705                  * fitting our object in, we're out of memory.
2706                  */
2707                 ret = i915_gem_evict_something(dev, size, alignment,
2708                                                map_and_fenceable);
2709                 if (ret)
2710                         return ret;
2711
2712                 goto search_free;
2713         }
2714
2715         ret = i915_gem_object_get_pages_gtt(obj, gfpmask);
2716         if (ret) {
2717                 drm_mm_put_block(obj->gtt_space);
2718                 obj->gtt_space = NULL;
2719
2720                 if (ret == -ENOMEM) {
2721                         /* first try to clear up some space from the GTT */
2722                         ret = i915_gem_evict_something(dev, size,
2723                                                        alignment,
2724                                                        map_and_fenceable);
2725                         if (ret) {
2726                                 /* now try to shrink everyone else */
2727                                 if (gfpmask) {
2728                                         gfpmask = 0;
2729                                         goto search_free;
2730                                 }
2731
2732                                 return ret;
2733                         }
2734
2735                         goto search_free;
2736                 }
2737
2738                 return ret;
2739         }
2740
2741         ret = i915_gem_gtt_bind_object(obj);
2742         if (ret) {
2743                 i915_gem_object_put_pages_gtt(obj);
2744                 drm_mm_put_block(obj->gtt_space);
2745                 obj->gtt_space = NULL;
2746
2747                 ret = i915_gem_evict_something(dev, size,
2748                                                alignment, map_and_fenceable);
2749                 if (ret)
2750                         return ret;
2751
2752                 goto search_free;
2753         }
2754
2755         list_add_tail(&obj->gtt_list, &dev_priv->mm.gtt_list);
2756         list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
2757
2758         /* Assert that the object is not currently in any GPU domain. As it
2759          * wasn't in the GTT, there shouldn't be any way it could have been in
2760          * a GPU cache
2761          */
2762         BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
2763         BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
2764
2765         obj->gtt_offset = obj->gtt_space->start;
2766
2767         fenceable =
2768                 obj->gtt_space->size == fence_size &&
2769                 (obj->gtt_space->start & (fence_alignment -1)) == 0;
2770
2771         mappable =
2772                 obj->gtt_offset + obj->base.size <= dev_priv->mm.gtt_mappable_end;
2773
2774         obj->map_and_fenceable = mappable && fenceable;
2775
2776         trace_i915_gem_object_bind(obj, obj->gtt_offset, map_and_fenceable);
2777         return 0;
2778 }
2779
2780 void
2781 i915_gem_clflush_object(struct drm_i915_gem_object *obj)
2782 {
2783         /* If we don't have a page list set up, then we're not pinned
2784          * to GPU, and we can ignore the cache flush because it'll happen
2785          * again at bind time.
2786          */
2787         if (obj->pages == NULL)
2788                 return;
2789
2790         trace_i915_gem_object_clflush(obj);
2791
2792         drm_clflush_pages(obj->pages, obj->base.size / PAGE_SIZE);
2793 }
2794
2795 /** Flushes any GPU write domain for the object if it's dirty. */
2796 static int
2797 i915_gem_object_flush_gpu_write_domain(struct drm_i915_gem_object *obj,
2798                                        struct intel_ring_buffer *pipelined)
2799 {
2800         struct drm_device *dev = obj->base.dev;
2801
2802         if ((obj->base.write_domain & I915_GEM_GPU_DOMAINS) == 0)
2803                 return 0;
2804
2805         /* Queue the GPU write cache flushing we need. */
2806         i915_gem_flush_ring(dev, obj->ring, 0, obj->base.write_domain);
2807         BUG_ON(obj->base.write_domain);
2808
2809         if (pipelined && pipelined == obj->ring)
2810                 return 0;
2811
2812         return i915_gem_object_wait_rendering(obj, true);
2813 }
2814
2815 /** Flushes the GTT write domain for the object if it's dirty. */
2816 static void
2817 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
2818 {
2819         uint32_t old_write_domain;
2820
2821         if (obj->base.write_domain != I915_GEM_DOMAIN_GTT)
2822                 return;
2823
2824         /* No actual flushing is required for the GTT write domain.   Writes
2825          * to it immediately go to main memory as far as we know, so there's
2826          * no chipset flush.  It also doesn't land in render cache.
2827          */
2828         i915_gem_release_mmap(obj);
2829
2830         old_write_domain = obj->base.write_domain;
2831         obj->base.write_domain = 0;
2832
2833         trace_i915_gem_object_change_domain(obj,
2834                                             obj->base.read_domains,
2835                                             old_write_domain);
2836 }
2837
2838 /** Flushes the CPU write domain for the object if it's dirty. */
2839 static void
2840 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
2841 {
2842         uint32_t old_write_domain;
2843
2844         if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
2845                 return;
2846
2847         i915_gem_clflush_object(obj);
2848         intel_gtt_chipset_flush();
2849         old_write_domain = obj->base.write_domain;
2850         obj->base.write_domain = 0;
2851
2852         trace_i915_gem_object_change_domain(obj,
2853                                             obj->base.read_domains,
2854                                             old_write_domain);
2855 }
2856
2857 /**
2858  * Moves a single object to the GTT read, and possibly write domain.
2859  *
2860  * This function returns when the move is complete, including waiting on
2861  * flushes to occur.
2862  */
2863 int
2864 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
2865 {
2866         uint32_t old_write_domain, old_read_domains;
2867         int ret;
2868
2869         /* Not valid to be called on unbound objects. */
2870         if (obj->gtt_space == NULL)
2871                 return -EINVAL;
2872
2873         ret = i915_gem_object_flush_gpu_write_domain(obj, NULL);
2874         if (ret != 0)
2875                 return ret;
2876
2877         i915_gem_object_flush_cpu_write_domain(obj);
2878
2879         if (write) {
2880                 ret = i915_gem_object_wait_rendering(obj, true);
2881                 if (ret)
2882                         return ret;
2883         }
2884
2885         old_write_domain = obj->base.write_domain;
2886         old_read_domains = obj->base.read_domains;
2887
2888         /* It should now be out of any other write domains, and we can update
2889          * the domain values for our changes.
2890          */
2891         BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
2892         obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
2893         if (write) {
2894                 obj->base.read_domains = I915_GEM_DOMAIN_GTT;
2895                 obj->base.write_domain = I915_GEM_DOMAIN_GTT;
2896                 obj->dirty = 1;
2897         }
2898
2899         trace_i915_gem_object_change_domain(obj,
2900                                             old_read_domains,
2901                                             old_write_domain);
2902
2903         return 0;
2904 }
2905
2906 /*
2907  * Prepare buffer for display plane. Use uninterruptible for possible flush
2908  * wait, as in modesetting process we're not supposed to be interrupted.
2909  */
2910 int
2911 i915_gem_object_set_to_display_plane(struct drm_i915_gem_object *obj,
2912                                      struct intel_ring_buffer *pipelined)
2913 {
2914         uint32_t old_read_domains;
2915         int ret;
2916
2917         /* Not valid to be called on unbound objects. */
2918         if (obj->gtt_space == NULL)
2919                 return -EINVAL;
2920
2921         ret = i915_gem_object_flush_gpu_write_domain(obj, pipelined);
2922         if (ret)
2923                 return ret;
2924
2925         /* Currently, we are always called from an non-interruptible context. */
2926         if (!pipelined) {
2927                 ret = i915_gem_object_wait_rendering(obj, false);
2928                 if (ret)
2929                         return ret;
2930         }
2931
2932         i915_gem_object_flush_cpu_write_domain(obj);
2933
2934         old_read_domains = obj->base.read_domains;
2935         obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
2936
2937         trace_i915_gem_object_change_domain(obj,
2938                                             old_read_domains,
2939                                             obj->base.write_domain);
2940
2941         return 0;
2942 }
2943
2944 int
2945 i915_gem_object_flush_gpu(struct drm_i915_gem_object *obj,
2946                           bool interruptible)
2947 {
2948         if (!obj->active)
2949                 return 0;
2950
2951         if (obj->base.write_domain & I915_GEM_GPU_DOMAINS)
2952                 i915_gem_flush_ring(obj->base.dev, obj->ring,
2953                                     0, obj->base.write_domain);
2954
2955         return i915_gem_object_wait_rendering(obj, interruptible);
2956 }
2957
2958 /**
2959  * Moves a single object to the CPU read, and possibly write domain.
2960  *
2961  * This function returns when the move is complete, including waiting on
2962  * flushes to occur.
2963  */
2964 static int
2965 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
2966 {
2967         uint32_t old_write_domain, old_read_domains;
2968         int ret;
2969
2970         ret = i915_gem_object_flush_gpu_write_domain(obj, false);
2971         if (ret != 0)
2972                 return ret;
2973
2974         i915_gem_object_flush_gtt_write_domain(obj);
2975
2976         /* If we have a partially-valid cache of the object in the CPU,
2977          * finish invalidating it and free the per-page flags.
2978          */
2979         i915_gem_object_set_to_full_cpu_read_domain(obj);
2980
2981         if (write) {
2982                 ret = i915_gem_object_wait_rendering(obj, true);
2983                 if (ret)
2984                         return ret;
2985         }
2986
2987         old_write_domain = obj->base.write_domain;
2988         old_read_domains = obj->base.read_domains;
2989
2990         /* Flush the CPU cache if it's still invalid. */
2991         if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
2992                 i915_gem_clflush_object(obj);
2993
2994                 obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
2995         }
2996
2997         /* It should now be out of any other write domains, and we can update
2998          * the domain values for our changes.
2999          */
3000         BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
3001
3002         /* If we're writing through the CPU, then the GPU read domains will
3003          * need to be invalidated at next use.
3004          */
3005         if (write) {
3006                 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
3007                 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
3008         }
3009
3010         trace_i915_gem_object_change_domain(obj,
3011                                             old_read_domains,
3012                                             old_write_domain);
3013
3014         return 0;
3015 }
3016
3017 /*
3018  * Set the next domain for the specified object. This
3019  * may not actually perform the necessary flushing/invaliding though,
3020  * as that may want to be batched with other set_domain operations
3021  *
3022  * This is (we hope) the only really tricky part of gem. The goal
3023  * is fairly simple -- track which caches hold bits of the object
3024  * and make sure they remain coherent. A few concrete examples may
3025  * help to explain how it works. For shorthand, we use the notation
3026  * (read_domains, write_domain), e.g. (CPU, CPU) to indicate the
3027  * a pair of read and write domain masks.
3028  *
3029  * Case 1: the batch buffer
3030  *
3031  *      1. Allocated
3032  *      2. Written by CPU
3033  *      3. Mapped to GTT
3034  *      4. Read by GPU
3035  *      5. Unmapped from GTT
3036  *      6. Freed
3037  *
3038  *      Let's take these a step at a time
3039  *
3040  *      1. Allocated
3041  *              Pages allocated from the kernel may still have
3042  *              cache contents, so we set them to (CPU, CPU) always.
3043  *      2. Written by CPU (using pwrite)
3044  *              The pwrite function calls set_domain (CPU, CPU) and
3045  *              this function does nothing (as nothing changes)
3046  *      3. Mapped by GTT
3047  *              This function asserts that the object is not
3048  *              currently in any GPU-based read or write domains
3049  *      4. Read by GPU
3050  *              i915_gem_execbuffer calls set_domain (COMMAND, 0).
3051  *              As write_domain is zero, this function adds in the
3052  *              current read domains (CPU+COMMAND, 0).
3053  *              flush_domains is set to CPU.
3054  *              invalidate_domains is set to COMMAND
3055  *              clflush is run to get data out of the CPU caches
3056  *              then i915_dev_set_domain calls i915_gem_flush to
3057  *              emit an MI_FLUSH and drm_agp_chipset_flush
3058  *      5. Unmapped from GTT
3059  *              i915_gem_object_unbind calls set_domain (CPU, CPU)
3060  *              flush_domains and invalidate_domains end up both zero
3061  *              so no flushing/invalidating happens
3062  *      6. Freed
3063  *              yay, done
3064  *
3065  * Case 2: The shared render buffer
3066  *
3067  *      1. Allocated
3068  *      2. Mapped to GTT
3069  *      3. Read/written by GPU
3070  *      4. set_domain to (CPU,CPU)
3071  *      5. Read/written by CPU
3072  *      6. Read/written by GPU
3073  *
3074  *      1. Allocated
3075  *              Same as last example, (CPU, CPU)
3076  *      2. Mapped to GTT
3077  *              Nothing changes (assertions find that it is not in the GPU)
3078  *      3. Read/written by GPU
3079  *              execbuffer calls set_domain (RENDER, RENDER)
3080  *              flush_domains gets CPU
3081  *              invalidate_domains gets GPU
3082  *              clflush (obj)
3083  *              MI_FLUSH and drm_agp_chipset_flush
3084  *      4. set_domain (CPU, CPU)
3085  *              flush_domains gets GPU
3086  *              invalidate_domains gets CPU
3087  *              wait_rendering (obj) to make sure all drawing is complete.
3088  *              This will include an MI_FLUSH to get the data from GPU
3089  *              to memory
3090  *              clflush (obj) to invalidate the CPU cache
3091  *              Another MI_FLUSH in i915_gem_flush (eliminate this somehow?)
3092  *      5. Read/written by CPU
3093  *              cache lines are loaded and dirtied
3094  *      6. Read written by GPU
3095  *              Same as last GPU access
3096  *
3097  * Case 3: The constant buffer
3098  *
3099  *      1. Allocated
3100  *      2. Written by CPU
3101  *      3. Read by GPU
3102  *      4. Updated (written) by CPU again
3103  *      5. Read by GPU
3104  *
3105  *      1. Allocated
3106  *              (CPU, CPU)
3107  *      2. Written by CPU
3108  *              (CPU, CPU)
3109  *      3. Read by GPU
3110  *              (CPU+RENDER, 0)
3111  *              flush_domains = CPU
3112  *              invalidate_domains = RENDER
3113  *              clflush (obj)
3114  *              MI_FLUSH
3115  *              drm_agp_chipset_flush
3116  *      4. Updated (written) by CPU again
3117  *              (CPU, CPU)
3118  *              flush_domains = 0 (no previous write domain)
3119  *              invalidate_domains = 0 (no new read domains)
3120  *      5. Read by GPU
3121  *              (CPU+RENDER, 0)
3122  *              flush_domains = CPU
3123  *              invalidate_domains = RENDER
3124  *              clflush (obj)
3125  *              MI_FLUSH
3126  *              drm_agp_chipset_flush
3127  */
3128 static void
3129 i915_gem_object_set_to_gpu_domain(struct drm_i915_gem_object *obj,
3130                                   struct intel_ring_buffer *ring,
3131                                   struct change_domains *cd)
3132 {
3133         uint32_t invalidate_domains = 0, flush_domains = 0;
3134
3135         /*
3136          * If the object isn't moving to a new write domain,
3137          * let the object stay in multiple read domains
3138          */
3139         if (obj->base.pending_write_domain == 0)
3140                 obj->base.pending_read_domains |= obj->base.read_domains;
3141
3142         /*
3143          * Flush the current write domain if
3144          * the new read domains don't match. Invalidate
3145          * any read domains which differ from the old
3146          * write domain
3147          */
3148         if (obj->base.write_domain &&
3149             (((obj->base.write_domain != obj->base.pending_read_domains ||
3150                obj->ring != ring)) ||
3151              (obj->fenced_gpu_access && !obj->pending_fenced_gpu_access))) {
3152                 flush_domains |= obj->base.write_domain;
3153                 invalidate_domains |=
3154                         obj->base.pending_read_domains & ~obj->base.write_domain;
3155         }
3156         /*
3157          * Invalidate any read caches which may have
3158          * stale data. That is, any new read domains.
3159          */
3160         invalidate_domains |= obj->base.pending_read_domains & ~obj->base.read_domains;
3161         if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_CPU)
3162                 i915_gem_clflush_object(obj);
3163
3164         /* blow away mappings if mapped through GTT */
3165         if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_GTT)
3166                 i915_gem_release_mmap(obj);
3167
3168         /* The actual obj->write_domain will be updated with
3169          * pending_write_domain after we emit the accumulated flush for all
3170          * of our domain changes in execbuffers (which clears objects'
3171          * write_domains).  So if we have a current write domain that we
3172          * aren't changing, set pending_write_domain to that.
3173          */
3174         if (flush_domains == 0 && obj->base.pending_write_domain == 0)
3175                 obj->base.pending_write_domain = obj->base.write_domain;
3176
3177         cd->invalidate_domains |= invalidate_domains;
3178         cd->flush_domains |= flush_domains;
3179         if (flush_domains & I915_GEM_GPU_DOMAINS)
3180                 cd->flush_rings |= obj->ring->id;
3181         if (invalidate_domains & I915_GEM_GPU_DOMAINS)
3182                 cd->flush_rings |= ring->id;
3183 }
3184
3185 /**
3186  * Moves the object from a partially CPU read to a full one.
3187  *
3188  * Note that this only resolves i915_gem_object_set_cpu_read_domain_range(),
3189  * and doesn't handle transitioning from !(read_domains & I915_GEM_DOMAIN_CPU).
3190  */
3191 static void
3192 i915_gem_object_set_to_full_cpu_read_domain(struct drm_i915_gem_object *obj)
3193 {
3194         if (!obj->page_cpu_valid)
3195                 return;
3196
3197         /* If we're partially in the CPU read domain, finish moving it in.
3198          */
3199         if (obj->base.read_domains & I915_GEM_DOMAIN_CPU) {
3200                 int i;
3201
3202                 for (i = 0; i <= (obj->base.size - 1) / PAGE_SIZE; i++) {
3203                         if (obj->page_cpu_valid[i])
3204                                 continue;
3205                         drm_clflush_pages(obj->pages + i, 1);
3206                 }
3207         }
3208
3209         /* Free the page_cpu_valid mappings which are now stale, whether
3210          * or not we've got I915_GEM_DOMAIN_CPU.
3211          */
3212         kfree(obj->page_cpu_valid);
3213         obj->page_cpu_valid = NULL;
3214 }
3215
3216 /**
3217  * Set the CPU read domain on a range of the object.
3218  *
3219  * The object ends up with I915_GEM_DOMAIN_CPU in its read flags although it's
3220  * not entirely valid.  The page_cpu_valid member of the object flags which
3221  * pages have been flushed, and will be respected by
3222  * i915_gem_object_set_to_cpu_domain() if it's called on to get a valid mapping
3223  * of the whole object.
3224  *
3225  * This function returns when the move is complete, including waiting on
3226  * flushes to occur.
3227  */
3228 static int
3229 i915_gem_object_set_cpu_read_domain_range(struct drm_i915_gem_object *obj,
3230                                           uint64_t offset, uint64_t size)
3231 {
3232         uint32_t old_read_domains;
3233         int i, ret;
3234
3235         if (offset == 0 && size == obj->base.size)
3236                 return i915_gem_object_set_to_cpu_domain(obj, 0);
3237
3238         ret = i915_gem_object_flush_gpu_write_domain(obj, false);
3239         if (ret != 0)
3240                 return ret;
3241         i915_gem_object_flush_gtt_write_domain(obj);
3242
3243         /* If we're already fully in the CPU read domain, we're done. */
3244         if (obj->page_cpu_valid == NULL &&
3245             (obj->base.read_domains & I915_GEM_DOMAIN_CPU) != 0)
3246                 return 0;
3247
3248         /* Otherwise, create/clear the per-page CPU read domain flag if we're
3249          * newly adding I915_GEM_DOMAIN_CPU
3250          */
3251         if (obj->page_cpu_valid == NULL) {
3252                 obj->page_cpu_valid = kzalloc(obj->base.size / PAGE_SIZE,
3253                                               GFP_KERNEL);
3254                 if (obj->page_cpu_valid == NULL)
3255                         return -ENOMEM;
3256         } else if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0)
3257                 memset(obj->page_cpu_valid, 0, obj->base.size / PAGE_SIZE);
3258
3259         /* Flush the cache on any pages that are still invalid from the CPU's
3260          * perspective.
3261          */
3262         for (i = offset / PAGE_SIZE; i <= (offset + size - 1) / PAGE_SIZE;
3263              i++) {
3264                 if (obj->page_cpu_valid[i])
3265                         continue;
3266
3267                 drm_clflush_pages(obj->pages + i, 1);
3268
3269                 obj->page_cpu_valid[i] = 1;
3270         }
3271
3272         /* It should now be out of any other write domains, and we can update
3273          * the domain values for our changes.
3274          */
3275         BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
3276
3277         old_read_domains = obj->base.read_domains;
3278         obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
3279
3280         trace_i915_gem_object_change_domain(obj,
3281                                             old_read_domains,
3282                                             obj->base.write_domain);
3283
3284         return 0;
3285 }
3286
3287 static int
3288 i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
3289                                    struct drm_file *file_priv,
3290                                    struct drm_i915_gem_exec_object2 *entry,
3291                                    struct drm_i915_gem_relocation_entry *reloc)
3292 {
3293         struct drm_device *dev = obj->base.dev;
3294         struct drm_gem_object *target_obj;
3295         uint32_t target_offset;
3296         int ret = -EINVAL;
3297
3298         target_obj = drm_gem_object_lookup(dev, file_priv,
3299                                            reloc->target_handle);
3300         if (target_obj == NULL)
3301                 return -ENOENT;
3302
3303         target_offset = to_intel_bo(target_obj)->gtt_offset;
3304
3305 #if WATCH_RELOC
3306         DRM_INFO("%s: obj %p offset %08x target %d "
3307                  "read %08x write %08x gtt %08x "
3308                  "presumed %08x delta %08x\n",
3309                  __func__,
3310                  obj,
3311                  (int) reloc->offset,
3312                  (int) reloc->target_handle,
3313                  (int) reloc->read_domains,
3314                  (int) reloc->write_domain,
3315                  (int) target_offset,
3316                  (int) reloc->presumed_offset,
3317                  reloc->delta);
3318 #endif
3319
3320         /* The target buffer should have appeared before us in the
3321          * exec_object list, so it should have a GTT space bound by now.
3322          */
3323         if (target_offset == 0) {
3324                 DRM_ERROR("No GTT space found for object %d\n",
3325                           reloc->target_handle);
3326                 goto err;
3327         }
3328
3329         /* Validate that the target is in a valid r/w GPU domain */
3330         if (reloc->write_domain & (reloc->write_domain - 1)) {
3331                 DRM_ERROR("reloc with multiple write domains: "
3332                           "obj %p target %d offset %d "
3333                           "read %08x write %08x",
3334                           obj, reloc->target_handle,
3335                           (int) reloc->offset,
3336                           reloc->read_domains,
3337                           reloc->write_domain);
3338                 goto err;
3339         }
3340         if (reloc->write_domain & I915_GEM_DOMAIN_CPU ||
3341             reloc->read_domains & I915_GEM_DOMAIN_CPU) {
3342                 DRM_ERROR("reloc with read/write CPU domains: "
3343                           "obj %p target %d offset %d "
3344                           "read %08x write %08x",
3345                           obj, reloc->target_handle,
3346                           (int) reloc->offset,
3347                           reloc->read_domains,
3348                           reloc->write_domain);
3349                 goto err;
3350         }
3351         if (reloc->write_domain && target_obj->pending_write_domain &&
3352             reloc->write_domain != target_obj->pending_write_domain) {
3353                 DRM_ERROR("Write domain conflict: "
3354                           "obj %p target %d offset %d "
3355                           "new %08x old %08x\n",
3356                           obj, reloc->target_handle,
3357                           (int) reloc->offset,
3358                           reloc->write_domain,
3359                           target_obj->pending_write_domain);
3360                 goto err;
3361         }
3362
3363         target_obj->pending_read_domains |= reloc->read_domains;
3364         target_obj->pending_write_domain |= reloc->write_domain;
3365
3366         /* If the relocation already has the right value in it, no
3367          * more work needs to be done.
3368          */
3369         if (target_offset == reloc->presumed_offset)
3370                 goto out;
3371
3372         /* Check that the relocation address is valid... */
3373         if (reloc->offset > obj->base.size - 4) {
3374                 DRM_ERROR("Relocation beyond object bounds: "
3375                           "obj %p target %d offset %d size %d.\n",
3376                           obj, reloc->target_handle,
3377                           (int) reloc->offset,
3378                           (int) obj->base.size);
3379                 goto err;
3380         }
3381         if (reloc->offset & 3) {
3382                 DRM_ERROR("Relocation not 4-byte aligned: "
3383                           "obj %p target %d offset %d.\n",
3384                           obj, reloc->target_handle,
3385                           (int) reloc->offset);
3386                 goto err;
3387         }
3388
3389         /* and points to somewhere within the target object. */
3390         if (reloc->delta >= target_obj->size) {
3391                 DRM_ERROR("Relocation beyond target object bounds: "
3392                           "obj %p target %d delta %d size %d.\n",
3393                           obj, reloc->target_handle,
3394                           (int) reloc->delta,
3395                           (int) target_obj->size);
3396                 goto err;
3397         }
3398
3399         reloc->delta += target_offset;
3400         if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) {
3401                 uint32_t page_offset = reloc->offset & ~PAGE_MASK;
3402                 char *vaddr;
3403
3404                 vaddr = kmap_atomic(obj->pages[reloc->offset >> PAGE_SHIFT]);
3405                 *(uint32_t *)(vaddr + page_offset) = reloc->delta;
3406                 kunmap_atomic(vaddr);
3407         } else {
3408                 struct drm_i915_private *dev_priv = dev->dev_private;
3409                 uint32_t __iomem *reloc_entry;
3410                 void __iomem *reloc_page;
3411
3412                 ret = i915_gem_object_set_to_gtt_domain(obj, 1);
3413                 if (ret)
3414                         goto err;
3415
3416                 /* Map the page containing the relocation we're going to perform.  */
3417                 reloc->offset += obj->gtt_offset;
3418                 reloc_page = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping,
3419                                                       reloc->offset & PAGE_MASK);
3420                 reloc_entry = (uint32_t __iomem *)
3421                         (reloc_page + (reloc->offset & ~PAGE_MASK));
3422                 iowrite32(reloc->delta, reloc_entry);
3423                 io_mapping_unmap_atomic(reloc_page);
3424         }
3425
3426         /* and update the user's relocation entry */
3427         reloc->presumed_offset = target_offset;
3428
3429 out:
3430         ret = 0;
3431 err:
3432         drm_gem_object_unreference(target_obj);
3433         return ret;
3434 }
3435
3436 static int
3437 i915_gem_execbuffer_relocate_object(struct drm_i915_gem_object *obj,
3438                                     struct drm_file *file_priv,
3439                                     struct drm_i915_gem_exec_object2 *entry)
3440 {
3441         struct drm_i915_gem_relocation_entry __user *user_relocs;
3442         int i, ret;
3443
3444         user_relocs = (void __user *)(uintptr_t)entry->relocs_ptr;
3445         for (i = 0; i < entry->relocation_count; i++) {
3446                 struct drm_i915_gem_relocation_entry reloc;
3447
3448                 if (__copy_from_user_inatomic(&reloc,
3449                                               user_relocs+i,
3450                                               sizeof(reloc)))
3451                         return -EFAULT;
3452
3453                 ret = i915_gem_execbuffer_relocate_entry(obj, file_priv, entry, &reloc);
3454                 if (ret)
3455                         return ret;
3456
3457                 if (__copy_to_user_inatomic(&user_relocs[i].presumed_offset,
3458                                             &reloc.presumed_offset,
3459                                             sizeof(reloc.presumed_offset)))
3460                         return -EFAULT;
3461         }
3462
3463         return 0;
3464 }
3465
3466 static int
3467 i915_gem_execbuffer_relocate_object_slow(struct drm_i915_gem_object *obj,
3468                                          struct drm_file *file_priv,
3469                                          struct drm_i915_gem_exec_object2 *entry,
3470                                          struct drm_i915_gem_relocation_entry *relocs)
3471 {
3472         int i, ret;
3473
3474         for (i = 0; i < entry->relocation_count; i++) {
3475                 ret = i915_gem_execbuffer_relocate_entry(obj, file_priv, entry, &relocs[i]);
3476                 if (ret)
3477                         return ret;
3478         }
3479
3480         return 0;
3481 }
3482
3483 static int
3484 i915_gem_execbuffer_relocate(struct drm_device *dev,
3485                              struct drm_file *file,
3486                              struct drm_i915_gem_object **object_list,
3487                              struct drm_i915_gem_exec_object2 *exec_list,
3488                              int count)
3489 {
3490         int i, ret;
3491
3492         for (i = 0; i < count; i++) {
3493                 struct drm_i915_gem_object *obj = object_list[i];
3494                 obj->base.pending_read_domains = 0;
3495                 obj->base.pending_write_domain = 0;
3496                 ret = i915_gem_execbuffer_relocate_object(obj, file,
3497                                                           &exec_list[i]);
3498                 if (ret)
3499                         return ret;
3500         }
3501
3502         return 0;
3503 }
3504
3505 static int
3506 i915_gem_execbuffer_reserve(struct drm_device *dev,
3507                             struct drm_file *file,
3508                             struct drm_i915_gem_object **object_list,
3509                             struct drm_i915_gem_exec_object2 *exec_list,
3510                             int count)
3511 {
3512         int ret, i, retry;
3513
3514         /* Attempt to pin all of the buffers into the GTT.
3515          * This is done in 3 phases:
3516          *
3517          * 1a. Unbind all objects that do not match the GTT constraints for
3518          *     the execbuffer (fenceable, mappable, alignment etc).
3519          * 1b. Increment pin count for already bound objects.
3520          * 2.  Bind new objects.
3521          * 3.  Decrement pin count.
3522          *
3523          * This avoid unnecessary unbinding of later objects in order to makr
3524          * room for the earlier objects *unless* we need to defragment.
3525          */
3526         retry = 0;
3527         do {
3528                 ret = 0;
3529
3530                 /* Unbind any ill-fitting objects or pin. */
3531                 for (i = 0; i < count; i++) {
3532                         struct drm_i915_gem_object *obj = object_list[i];
3533                         struct drm_i915_gem_exec_object2 *entry = &exec_list[i];
3534                         bool need_fence, need_mappable;
3535
3536                         if (!obj->gtt_space)
3537                                 continue;
3538
3539                         need_fence =
3540                                 entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
3541                                 obj->tiling_mode != I915_TILING_NONE;
3542                         need_mappable =
3543                                 entry->relocation_count ? true : need_fence;
3544
3545                         if ((entry->alignment && obj->gtt_offset & (entry->alignment - 1)) ||
3546                             (need_mappable && !obj->map_and_fenceable))
3547                                 ret = i915_gem_object_unbind(obj);
3548                         else
3549                                 ret = i915_gem_object_pin(obj,
3550                                                           entry->alignment,
3551                                                           need_mappable);
3552                         if (ret) {
3553                                 count = i;
3554                                 goto err;
3555                         }
3556                 }
3557
3558                 /* Bind fresh objects */
3559                 for (i = 0; i < count; i++) {
3560                         struct drm_i915_gem_exec_object2 *entry = &exec_list[i];
3561                         struct drm_i915_gem_object *obj = object_list[i];
3562                         bool need_fence;
3563
3564                         need_fence =
3565                                 entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
3566                                 obj->tiling_mode != I915_TILING_NONE;
3567
3568                         if (!obj->gtt_space) {
3569                                 bool need_mappable =
3570                                         entry->relocation_count ? true : need_fence;
3571
3572                                 ret = i915_gem_object_pin(obj,
3573                                                           entry->alignment,
3574                                                           need_mappable);
3575                                 if (ret)
3576                                         break;
3577                         }
3578
3579                         if (need_fence) {
3580                                 ret = i915_gem_object_get_fence_reg(obj, true);
3581                                 if (ret)
3582                                         break;
3583
3584                                 obj->pending_fenced_gpu_access = true;
3585                         }
3586
3587                         entry->offset = obj->gtt_offset;
3588                 }
3589
3590 err:            /* Decrement pin count for bound objects */
3591                 for (i = 0; i < count; i++) {
3592                         struct drm_i915_gem_object *obj = object_list[i];
3593                         if (obj->gtt_space)
3594                                 i915_gem_object_unpin(obj);
3595                 }
3596
3597                 if (ret != -ENOSPC || retry > 1)
3598                         return ret;
3599
3600                 /* First attempt, just clear anything that is purgeable.
3601                  * Second attempt, clear the entire GTT.
3602                  */
3603                 ret = i915_gem_evict_everything(dev, retry == 0);
3604                 if (ret)
3605                         return ret;
3606
3607                 retry++;
3608         } while (1);
3609 }
3610
3611 static int
3612 i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
3613                                   struct drm_file *file,
3614                                   struct drm_i915_gem_object **object_list,
3615                                   struct drm_i915_gem_exec_object2 *exec_list,
3616                                   int count)
3617 {
3618         struct drm_i915_gem_relocation_entry *reloc;
3619         int i, total, ret;
3620
3621         for (i = 0; i < count; i++)
3622                 object_list[i]->in_execbuffer = false;
3623
3624         mutex_unlock(&dev->struct_mutex);
3625
3626         total = 0;
3627         for (i = 0; i < count; i++)
3628                 total += exec_list[i].relocation_count;
3629
3630         reloc = drm_malloc_ab(total, sizeof(*reloc));
3631         if (reloc == NULL) {
3632                 mutex_lock(&dev->struct_mutex);
3633                 return -ENOMEM;
3634         }
3635
3636         total = 0;
3637         for (i = 0; i < count; i++) {
3638                 struct drm_i915_gem_relocation_entry __user *user_relocs;
3639
3640                 user_relocs = (void __user *)(uintptr_t)exec_list[i].relocs_ptr;
3641
3642                 if (copy_from_user(reloc+total, user_relocs,
3643                                    exec_list[i].relocation_count *
3644                                    sizeof(*reloc))) {
3645                         ret = -EFAULT;
3646                         mutex_lock(&dev->struct_mutex);
3647                         goto err;
3648                 }
3649
3650                 total += exec_list[i].relocation_count;
3651         }
3652
3653         ret = i915_mutex_lock_interruptible(dev);
3654         if (ret) {
3655                 mutex_lock(&dev->struct_mutex);
3656                 goto err;
3657         }
3658
3659         ret = i915_gem_execbuffer_reserve(dev, file,
3660                                           object_list, exec_list,
3661                                           count);
3662         if (ret)
3663                 goto err;
3664
3665         total = 0;
3666         for (i = 0; i < count; i++) {
3667                 struct drm_i915_gem_object *obj = object_list[i];
3668                 obj->base.pending_read_domains = 0;
3669                 obj->base.pending_write_domain = 0;
3670                 ret = i915_gem_execbuffer_relocate_object_slow(obj, file,
3671                                                                &exec_list[i],
3672                                                                reloc + total);
3673                 if (ret)
3674                         goto err;
3675
3676                 total += exec_list[i].relocation_count;
3677         }
3678
3679         /* Leave the user relocations as are, this is the painfully slow path,
3680          * and we want to avoid the complication of dropping the lock whilst
3681          * having buffers reserved in the aperture and so causing spurious
3682          * ENOSPC for random operations.
3683          */
3684
3685 err:
3686         drm_free_large(reloc);
3687         return ret;
3688 }
3689
3690 static int
3691 i915_gem_execbuffer_move_to_gpu(struct drm_device *dev,
3692                                 struct drm_file *file,
3693                                 struct intel_ring_buffer *ring,
3694                                 struct drm_i915_gem_object **objects,
3695                                 int count)
3696 {
3697         struct change_domains cd;
3698         int ret, i;
3699
3700         cd.invalidate_domains = 0;
3701         cd.flush_domains = 0;
3702         cd.flush_rings = 0;
3703         for (i = 0; i < count; i++)
3704                 i915_gem_object_set_to_gpu_domain(objects[i], ring, &cd);
3705
3706         if (cd.invalidate_domains | cd.flush_domains) {
3707 #if WATCH_EXEC
3708                 DRM_INFO("%s: invalidate_domains %08x flush_domains %08x\n",
3709                           __func__,
3710                          cd.invalidate_domains,
3711                          cd.flush_domains);
3712 #endif
3713                 i915_gem_flush(dev,
3714                                cd.invalidate_domains,
3715                                cd.flush_domains,
3716                                cd.flush_rings);
3717         }
3718
3719         for (i = 0; i < count; i++) {
3720                 struct drm_i915_gem_object *obj = objects[i];
3721                 /* XXX replace with semaphores */
3722                 if (obj->ring && ring != obj->ring) {
3723                         ret = i915_gem_object_wait_rendering(obj, true);
3724                         if (ret)
3725                                 return ret;
3726                 }
3727         }
3728
3729         return 0;
3730 }
3731
3732 /* Throttle our rendering by waiting until the ring has completed our requests
3733  * emitted over 20 msec ago.
3734  *
3735  * Note that if we were to use the current jiffies each time around the loop,
3736  * we wouldn't escape the function with any frames outstanding if the time to
3737  * render a frame was over 20ms.
3738  *
3739  * This should get us reasonable parallelism between CPU and GPU but also
3740  * relatively low latency when blocking on a particular request to finish.
3741  */
3742 static int
3743 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
3744 {
3745         struct drm_i915_private *dev_priv = dev->dev_private;
3746         struct drm_i915_file_private *file_priv = file->driver_priv;
3747         unsigned long recent_enough = jiffies - msecs_to_jiffies(20);
3748         struct drm_i915_gem_request *request;
3749         struct intel_ring_buffer *ring = NULL;
3750         u32 seqno = 0;
3751         int ret;
3752
3753         spin_lock(&file_priv->mm.lock);
3754         list_for_each_entry(request, &file_priv->mm.request_list, client_list) {
3755                 if (time_after_eq(request->emitted_jiffies, recent_enough))
3756                         break;
3757
3758                 ring = request->ring;
3759                 seqno = request->seqno;
3760         }
3761         spin_unlock(&file_priv->mm.lock);
3762
3763         if (seqno == 0)
3764                 return 0;
3765
3766         ret = 0;
3767         if (!i915_seqno_passed(ring->get_seqno(ring), seqno)) {
3768                 /* And wait for the seqno passing without holding any locks and
3769                  * causing extra latency for others. This is safe as the irq
3770                  * generation is designed to be run atomically and so is
3771                  * lockless.
3772                  */
3773                 ring->user_irq_get(ring);
3774                 ret = wait_event_interruptible(ring->irq_queue,
3775                                                i915_seqno_passed(ring->get_seqno(ring), seqno)
3776                                                || atomic_read(&dev_priv->mm.wedged));
3777                 ring->user_irq_put(ring);
3778
3779                 if (ret == 0 && atomic_read(&dev_priv->mm.wedged))
3780                         ret = -EIO;
3781         }
3782
3783         if (ret == 0)
3784                 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0);
3785
3786         return ret;
3787 }
3788
3789 static int
3790 i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec,
3791                           uint64_t exec_offset)
3792 {
3793         uint32_t exec_start, exec_len;
3794
3795         exec_start = (uint32_t) exec_offset + exec->batch_start_offset;
3796         exec_len = (uint32_t) exec->batch_len;
3797
3798         if ((exec_start | exec_len) & 0x7)
3799                 return -EINVAL;
3800
3801         if (!exec_start)
3802                 return -EINVAL;
3803
3804         return 0;
3805 }
3806
3807 static int
3808 validate_exec_list(struct drm_i915_gem_exec_object2 *exec,
3809                    int count)
3810 {
3811         int i;
3812
3813         for (i = 0; i < count; i++) {
3814                 char __user *ptr = (char __user *)(uintptr_t)exec[i].relocs_ptr;
3815                 int length; /* limited by fault_in_pages_readable() */
3816
3817                 /* First check for malicious input causing overflow */
3818                 if (exec[i].relocation_count >
3819                     INT_MAX / sizeof(struct drm_i915_gem_relocation_entry))
3820                         return -EINVAL;
3821
3822                 length = exec[i].relocation_count *
3823                         sizeof(struct drm_i915_gem_relocation_entry);
3824                 if (!access_ok(VERIFY_READ, ptr, length))
3825                         return -EFAULT;
3826
3827                 /* we may also need to update the presumed offsets */
3828                 if (!access_ok(VERIFY_WRITE, ptr, length))
3829                         return -EFAULT;
3830
3831                 if (fault_in_pages_readable(ptr, length))
3832                         return -EFAULT;
3833         }
3834
3835         return 0;
3836 }
3837
3838 static int
3839 i915_gem_do_execbuffer(struct drm_device *dev, void *data,
3840                        struct drm_file *file,
3841                        struct drm_i915_gem_execbuffer2 *args,
3842                        struct drm_i915_gem_exec_object2 *exec_list)
3843 {
3844         drm_i915_private_t *dev_priv = dev->dev_private;
3845         struct drm_i915_gem_object **object_list = NULL;
3846         struct drm_i915_gem_object *batch_obj;
3847         struct drm_clip_rect *cliprects = NULL;
3848         struct drm_i915_gem_request *request = NULL;
3849         int ret, i, flips;
3850         uint64_t exec_offset;
3851
3852         struct intel_ring_buffer *ring = NULL;
3853
3854         ret = i915_gem_check_is_wedged(dev);
3855         if (ret)
3856                 return ret;
3857
3858         ret = validate_exec_list(exec_list, args->buffer_count);
3859         if (ret)
3860                 return ret;
3861
3862 #if WATCH_EXEC
3863         DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n",
3864                   (int) args->buffers_ptr, args->buffer_count, args->batch_len);
3865 #endif
3866         switch (args->flags & I915_EXEC_RING_MASK) {
3867         case I915_EXEC_DEFAULT:
3868         case I915_EXEC_RENDER:
3869                 ring = &dev_priv->render_ring;
3870                 break;
3871         case I915_EXEC_BSD:
3872                 if (!HAS_BSD(dev)) {
3873                         DRM_ERROR("execbuf with invalid ring (BSD)\n");
3874                         return -EINVAL;
3875                 }
3876                 ring = &dev_priv->bsd_ring;
3877                 break;
3878         case I915_EXEC_BLT:
3879                 if (!HAS_BLT(dev)) {
3880                         DRM_ERROR("execbuf with invalid ring (BLT)\n");
3881                         return -EINVAL;
3882                 }
3883                 ring = &dev_priv->blt_ring;
3884                 break;
3885         default:
3886                 DRM_ERROR("execbuf with unknown ring: %d\n",
3887                           (int)(args->flags & I915_EXEC_RING_MASK));
3888                 return -EINVAL;
3889         }
3890
3891         if (args->buffer_count < 1) {
3892                 DRM_ERROR("execbuf with %d buffers\n", args->buffer_count);
3893                 return -EINVAL;
3894         }
3895         object_list = drm_malloc_ab(sizeof(*object_list), args->buffer_count);
3896         if (object_list == NULL) {
3897                 DRM_ERROR("Failed to allocate object list for %d buffers\n",
3898                           args->buffer_count);
3899                 ret = -ENOMEM;
3900                 goto pre_mutex_err;
3901         }
3902
3903         if (args->num_cliprects != 0) {
3904                 cliprects = kcalloc(args->num_cliprects, sizeof(*cliprects),
3905                                     GFP_KERNEL);
3906                 if (cliprects == NULL) {
3907                         ret = -ENOMEM;
3908                         goto pre_mutex_err;
3909                 }
3910
3911                 ret = copy_from_user(cliprects,
3912                                      (struct drm_clip_rect __user *)
3913                                      (uintptr_t) args->cliprects_ptr,
3914                                      sizeof(*cliprects) * args->num_cliprects);
3915                 if (ret != 0) {
3916                         DRM_ERROR("copy %d cliprects failed: %d\n",
3917                                   args->num_cliprects, ret);
3918                         ret = -EFAULT;
3919                         goto pre_mutex_err;
3920                 }
3921         }
3922
3923         request = kzalloc(sizeof(*request), GFP_KERNEL);
3924         if (request == NULL) {
3925                 ret = -ENOMEM;
3926                 goto pre_mutex_err;
3927         }
3928
3929         ret = i915_mutex_lock_interruptible(dev);
3930         if (ret)
3931                 goto pre_mutex_err;
3932
3933         if (dev_priv->mm.suspended) {
3934                 mutex_unlock(&dev->struct_mutex);
3935                 ret = -EBUSY;
3936                 goto pre_mutex_err;
3937         }
3938
3939         /* Look up object handles */
3940         for (i = 0; i < args->buffer_count; i++) {
3941                 struct drm_i915_gem_object *obj;
3942
3943                 obj = to_intel_bo (drm_gem_object_lookup(dev, file,
3944                                                          exec_list[i].handle));
3945                 if (obj == NULL) {
3946                         DRM_ERROR("Invalid object handle %d at index %d\n",
3947                                    exec_list[i].handle, i);
3948                         /* prevent error path from reading uninitialized data */
3949                         args->buffer_count = i;
3950                         ret = -ENOENT;
3951                         goto err;
3952                 }
3953                 object_list[i] = obj;
3954
3955                 if (obj->in_execbuffer) {
3956                         DRM_ERROR("Object %p appears more than once in object list\n",
3957                                    obj);
3958                         /* prevent error path from reading uninitialized data */
3959                         args->buffer_count = i + 1;
3960                         ret = -EINVAL;
3961                         goto err;
3962                 }
3963                 obj->in_execbuffer = true;
3964                 obj->pending_fenced_gpu_access = false;
3965         }
3966
3967         /* Move the objects en-masse into the GTT, evicting if necessary. */
3968         ret = i915_gem_execbuffer_reserve(dev, file,
3969                                           object_list, exec_list,
3970                                           args->buffer_count);
3971         if (ret)
3972                 goto err;
3973
3974         /* The objects are in their final locations, apply the relocations. */
3975         ret = i915_gem_execbuffer_relocate(dev, file,
3976                                            object_list, exec_list,
3977                                            args->buffer_count);
3978         if (ret) {
3979                 if (ret == -EFAULT) {
3980                         ret = i915_gem_execbuffer_relocate_slow(dev, file,
3981                                                                 object_list,
3982                                                                 exec_list,
3983                                                                 args->buffer_count);
3984                         BUG_ON(!mutex_is_locked(&dev->struct_mutex));
3985                 }
3986                 if (ret)
3987                         goto err;
3988         }
3989
3990         /* Set the pending read domains for the batch buffer to COMMAND */
3991         batch_obj = object_list[args->buffer_count-1];
3992         if (batch_obj->base.pending_write_domain) {
3993                 DRM_ERROR("Attempting to use self-modifying batch buffer\n");
3994                 ret = -EINVAL;
3995                 goto err;
3996         }
3997         batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
3998
3999         /* Sanity check the batch buffer */
4000         exec_offset = batch_obj->gtt_offset;
4001         ret = i915_gem_check_execbuffer(args, exec_offset);
4002         if (ret != 0) {
4003                 DRM_ERROR("execbuf with invalid offset/length\n");
4004                 goto err;
4005         }
4006
4007         ret = i915_gem_execbuffer_move_to_gpu(dev, file, ring,
4008                                               object_list, args->buffer_count);
4009         if (ret)
4010                 goto err;
4011
4012 #if WATCH_COHERENCY
4013         for (i = 0; i < args->buffer_count; i++) {
4014                 i915_gem_object_check_coherency(object_list[i],
4015                                                 exec_list[i].handle);
4016         }
4017 #endif
4018
4019 #if WATCH_EXEC
4020         i915_gem_dump_object(batch_obj,
4021                               args->batch_len,
4022                               __func__,
4023                               ~0);
4024 #endif
4025
4026         /* Check for any pending flips. As we only maintain a flip queue depth
4027          * of 1, we can simply insert a WAIT for the next display flip prior
4028          * to executing the batch and avoid stalling the CPU.
4029          */
4030         flips = 0;
4031         for (i = 0; i < args->buffer_count; i++) {
4032                 if (object_list[i]->base.write_domain)
4033                         flips |= atomic_read(&object_list[i]->pending_flip);
4034         }
4035         if (flips) {
4036                 int plane, flip_mask;
4037
4038                 for (plane = 0; flips >> plane; plane++) {
4039                         if (((flips >> plane) & 1) == 0)
4040                                 continue;
4041
4042                         if (plane)
4043                                 flip_mask = MI_WAIT_FOR_PLANE_B_FLIP;
4044                         else
4045                                 flip_mask = MI_WAIT_FOR_PLANE_A_FLIP;
4046
4047                         ret = intel_ring_begin(ring, 2);
4048                         if (ret)
4049                                 goto err;
4050
4051                         intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask);
4052                         intel_ring_emit(ring, MI_NOOP);
4053                         intel_ring_advance(ring);
4054                 }
4055         }
4056
4057         /* Exec the batchbuffer */
4058         ret = ring->dispatch_execbuffer(ring, args, cliprects, exec_offset);
4059         if (ret) {
4060                 DRM_ERROR("dispatch failed %d\n", ret);
4061                 goto err;
4062         }
4063
4064         for (i = 0; i < args->buffer_count; i++) {
4065                 struct drm_i915_gem_object *obj = object_list[i];
4066
4067                 obj->base.read_domains = obj->base.pending_read_domains;
4068                 obj->base.write_domain = obj->base.pending_write_domain;
4069                 obj->fenced_gpu_access = obj->pending_fenced_gpu_access;
4070
4071                 i915_gem_object_move_to_active(obj, ring);
4072                 if (obj->base.write_domain) {
4073                         obj->dirty = 1;
4074                         list_move_tail(&obj->gpu_write_list,
4075                                        &ring->gpu_write_list);
4076                         intel_mark_busy(dev, obj);
4077                 }
4078
4079                 trace_i915_gem_object_change_domain(obj,
4080                                                     obj->base.read_domains,
4081                                                     obj->base.write_domain);
4082         }
4083
4084         /*
4085          * Ensure that the commands in the batch buffer are
4086          * finished before the interrupt fires
4087          */
4088         i915_retire_commands(dev, ring);
4089
4090         if (i915_add_request(dev, file, request, ring))
4091                 i915_gem_next_request_seqno(dev, ring);
4092         else
4093                 request = NULL;
4094
4095 err:
4096         for (i = 0; i < args->buffer_count; i++) {
4097                 object_list[i]->in_execbuffer = false;
4098                 drm_gem_object_unreference(&object_list[i]->base);
4099         }
4100
4101         mutex_unlock(&dev->struct_mutex);
4102
4103 pre_mutex_err:
4104         drm_free_large(object_list);
4105         kfree(cliprects);
4106         kfree(request);
4107
4108         return ret;
4109 }
4110
4111 /*
4112  * Legacy execbuffer just creates an exec2 list from the original exec object
4113  * list array and passes it to the real function.
4114  */
4115 int
4116 i915_gem_execbuffer(struct drm_device *dev, void *data,
4117                     struct drm_file *file)
4118 {
4119         struct drm_i915_gem_execbuffer *args = data;
4120         struct drm_i915_gem_execbuffer2 exec2;
4121         struct drm_i915_gem_exec_object *exec_list = NULL;
4122         struct drm_i915_gem_exec_object2 *exec2_list = NULL;
4123         int ret, i;
4124
4125 #if WATCH_EXEC
4126         DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n",
4127                   (int) args->buffers_ptr, args->buffer_count, args->batch_len);
4128 #endif
4129
4130         if (args->buffer_count < 1) {
4131                 DRM_ERROR("execbuf with %d buffers\n", args->buffer_count);
4132                 return -EINVAL;
4133         }
4134
4135         /* Copy in the exec list from userland */
4136         exec_list = drm_malloc_ab(sizeof(*exec_list), args->buffer_count);
4137         exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count);
4138         if (exec_list == NULL || exec2_list == NULL) {
4139                 DRM_ERROR("Failed to allocate exec list for %d buffers\n",
4140                           args->buffer_count);
4141                 drm_free_large(exec_list);
4142                 drm_free_large(exec2_list);
4143                 return -ENOMEM;
4144         }
4145         ret = copy_from_user(exec_list,
4146                              (struct drm_i915_relocation_entry __user *)
4147                              (uintptr_t) args->buffers_ptr,
4148                              sizeof(*exec_list) * args->buffer_count);
4149         if (ret != 0) {
4150                 DRM_ERROR("copy %d exec entries failed %d\n",
4151                           args->buffer_count, ret);
4152                 drm_free_large(exec_list);
4153                 drm_free_large(exec2_list);
4154                 return -EFAULT;
4155         }
4156
4157         for (i = 0; i < args->buffer_count; i++) {
4158                 exec2_list[i].handle = exec_list[i].handle;
4159                 exec2_list[i].relocation_count = exec_list[i].relocation_count;
4160                 exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr;
4161                 exec2_list[i].alignment = exec_list[i].alignment;
4162                 exec2_list[i].offset = exec_list[i].offset;
4163                 if (INTEL_INFO(dev)->gen < 4)
4164                         exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE;
4165                 else
4166                         exec2_list[i].flags = 0;
4167         }
4168
4169         exec2.buffers_ptr = args->buffers_ptr;
4170         exec2.buffer_count = args->buffer_count;
4171         exec2.batch_start_offset = args->batch_start_offset;
4172         exec2.batch_len = args->batch_len;
4173         exec2.DR1 = args->DR1;
4174         exec2.DR4 = args->DR4;
4175         exec2.num_cliprects = args->num_cliprects;
4176         exec2.cliprects_ptr = args->cliprects_ptr;
4177         exec2.flags = I915_EXEC_RENDER;
4178
4179         ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list);
4180         if (!ret) {
4181                 /* Copy the new buffer offsets back to the user's exec list. */
4182                 for (i = 0; i < args->buffer_count; i++)
4183                         exec_list[i].offset = exec2_list[i].offset;
4184                 /* ... and back out to userspace */
4185                 ret = copy_to_user((struct drm_i915_relocation_entry __user *)
4186                                    (uintptr_t) args->buffers_ptr,
4187                                    exec_list,
4188                                    sizeof(*exec_list) * args->buffer_count);
4189                 if (ret) {
4190                         ret = -EFAULT;
4191                         DRM_ERROR("failed to copy %d exec entries "
4192                                   "back to user (%d)\n",
4193                                   args->buffer_count, ret);
4194                 }
4195         }
4196
4197         drm_free_large(exec_list);
4198         drm_free_large(exec2_list);
4199         return ret;
4200 }
4201
4202 int
4203 i915_gem_execbuffer2(struct drm_device *dev, void *data,
4204                      struct drm_file *file)
4205 {
4206         struct drm_i915_gem_execbuffer2 *args = data;
4207         struct drm_i915_gem_exec_object2 *exec2_list = NULL;
4208         int ret;
4209
4210 #if WATCH_EXEC
4211         DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n",
4212                   (int) args->buffers_ptr, args->buffer_count, args->batch_len);
4213 #endif
4214
4215         if (args->buffer_count < 1) {
4216                 DRM_ERROR("execbuf2 with %d buffers\n", args->buffer_count);
4217                 return -EINVAL;
4218         }
4219
4220         exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count);
4221         if (exec2_list == NULL) {
4222                 DRM_ERROR("Failed to allocate exec list for %d buffers\n",
4223                           args->buffer_count);
4224                 return -ENOMEM;
4225         }
4226         ret = copy_from_user(exec2_list,
4227                              (struct drm_i915_relocation_entry __user *)
4228                              (uintptr_t) args->buffers_ptr,
4229                              sizeof(*exec2_list) * args->buffer_count);
4230         if (ret != 0) {
4231                 DRM_ERROR("copy %d exec entries failed %d\n",
4232                           args->buffer_count, ret);
4233                 drm_free_large(exec2_list);
4234                 return -EFAULT;
4235         }
4236
4237         ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list);
4238         if (!ret) {
4239                 /* Copy the new buffer offsets back to the user's exec list. */
4240                 ret = copy_to_user((struct drm_i915_relocation_entry __user *)
4241                                    (uintptr_t) args->buffers_ptr,
4242                                    exec2_list,
4243                                    sizeof(*exec2_list) * args->buffer_count);
4244                 if (ret) {
4245                         ret = -EFAULT;
4246                         DRM_ERROR("failed to copy %d exec entries "
4247                                   "back to user (%d)\n",
4248                                   args->buffer_count, ret);
4249                 }
4250         }
4251
4252         drm_free_large(exec2_list);
4253         return ret;
4254 }
4255
4256 int
4257 i915_gem_object_pin(struct drm_i915_gem_object *obj,
4258                     uint32_t alignment,
4259                     bool map_and_fenceable)
4260 {
4261         struct drm_device *dev = obj->base.dev;
4262         struct drm_i915_private *dev_priv = dev->dev_private;
4263         int ret;
4264
4265         BUG_ON(obj->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT);
4266         WARN_ON(i915_verify_lists(dev));
4267
4268         if (obj->gtt_space != NULL) {
4269                 if ((alignment && obj->gtt_offset & (alignment - 1)) ||
4270                     (map_and_fenceable && !obj->map_and_fenceable)) {
4271                         WARN(obj->pin_count,
4272                              "bo is already pinned with incorrect alignment:"
4273                              " offset=%x, req.alignment=%x, req.map_and_fenceable=%d,"
4274                              " obj->map_and_fenceable=%d\n",
4275                              obj->gtt_offset, alignment,
4276                              map_and_fenceable,
4277                              obj->map_and_fenceable);
4278                         ret = i915_gem_object_unbind(obj);
4279                         if (ret)
4280                                 return ret;
4281                 }
4282         }
4283
4284         if (obj->gtt_space == NULL) {
4285                 ret = i915_gem_object_bind_to_gtt(obj, alignment,
4286                                                   map_and_fenceable);
4287                 if (ret)
4288                         return ret;
4289         }
4290
4291         if (obj->pin_count++ == 0) {
4292                 if (!obj->active)
4293                         list_move_tail(&obj->mm_list,
4294                                        &dev_priv->mm.pinned_list);
4295         }
4296         obj->pin_mappable |= map_and_fenceable;
4297
4298         WARN_ON(i915_verify_lists(dev));
4299         return 0;
4300 }
4301
4302 void
4303 i915_gem_object_unpin(struct drm_i915_gem_object *obj)
4304 {
4305         struct drm_device *dev = obj->base.dev;
4306         drm_i915_private_t *dev_priv = dev->dev_private;
4307
4308         WARN_ON(i915_verify_lists(dev));
4309         BUG_ON(obj->pin_count == 0);
4310         BUG_ON(obj->gtt_space == NULL);
4311
4312         if (--obj->pin_count == 0) {
4313                 if (!obj->active)
4314                         list_move_tail(&obj->mm_list,
4315                                        &dev_priv->mm.inactive_list);
4316                 obj->pin_mappable = false;
4317         }
4318         WARN_ON(i915_verify_lists(dev));
4319 }
4320
4321 int
4322 i915_gem_pin_ioctl(struct drm_device *dev, void *data,
4323                    struct drm_file *file)
4324 {
4325         struct drm_i915_gem_pin *args = data;
4326         struct drm_i915_gem_object *obj;
4327         int ret;
4328
4329         ret = i915_mutex_lock_interruptible(dev);
4330         if (ret)
4331                 return ret;
4332
4333         obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
4334         if (obj == NULL) {
4335                 ret = -ENOENT;
4336                 goto unlock;
4337         }
4338
4339         if (obj->madv != I915_MADV_WILLNEED) {
4340                 DRM_ERROR("Attempting to pin a purgeable buffer\n");
4341                 ret = -EINVAL;
4342                 goto out;
4343         }
4344
4345         if (obj->pin_filp != NULL && obj->pin_filp != file) {
4346                 DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n",
4347                           args->handle);
4348                 ret = -EINVAL;
4349                 goto out;
4350         }
4351
4352         obj->user_pin_count++;
4353         obj->pin_filp = file;
4354         if (obj->user_pin_count == 1) {
4355                 ret = i915_gem_object_pin(obj, args->alignment, true);
4356                 if (ret)
4357                         goto out;
4358         }
4359
4360         /* XXX - flush the CPU caches for pinned objects
4361          * as the X server doesn't manage domains yet
4362          */
4363         i915_gem_object_flush_cpu_write_domain(obj);
4364         args->offset = obj->gtt_offset;
4365 out:
4366         drm_gem_object_unreference(&obj->base);
4367 unlock:
4368         mutex_unlock(&dev->struct_mutex);
4369         return ret;
4370 }
4371
4372 int
4373 i915_gem_unpin_ioctl(struct drm_device *dev, void *data,
4374                      struct drm_file *file)
4375 {
4376         struct drm_i915_gem_pin *args = data;
4377         struct drm_i915_gem_object *obj;
4378         int ret;
4379
4380         ret = i915_mutex_lock_interruptible(dev);
4381         if (ret)
4382                 return ret;
4383
4384         obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
4385         if (obj == NULL) {
4386                 ret = -ENOENT;
4387                 goto unlock;
4388         }
4389
4390         if (obj->pin_filp != file) {
4391                 DRM_ERROR("Not pinned by caller in i915_gem_pin_ioctl(): %d\n",
4392                           args->handle);
4393                 ret = -EINVAL;
4394                 goto out;
4395         }
4396         obj->user_pin_count--;
4397         if (obj->user_pin_count == 0) {
4398                 obj->pin_filp = NULL;
4399                 i915_gem_object_unpin(obj);
4400         }
4401
4402 out:
4403         drm_gem_object_unreference(&obj->base);
4404 unlock:
4405         mutex_unlock(&dev->struct_mutex);
4406         return ret;
4407 }
4408
4409 int
4410 i915_gem_busy_ioctl(struct drm_device *dev, void *data,
4411                     struct drm_file *file)
4412 {
4413         struct drm_i915_gem_busy *args = data;
4414         struct drm_i915_gem_object *obj;
4415         int ret;
4416
4417         ret = i915_mutex_lock_interruptible(dev);
4418         if (ret)
4419                 return ret;
4420
4421         obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
4422         if (obj == NULL) {
4423                 ret = -ENOENT;
4424                 goto unlock;
4425         }
4426
4427         /* Count all active objects as busy, even if they are currently not used
4428          * by the gpu. Users of this interface expect objects to eventually
4429          * become non-busy without any further actions, therefore emit any
4430          * necessary flushes here.
4431          */
4432         args->busy = obj->active;
4433         if (args->busy) {
4434                 /* Unconditionally flush objects, even when the gpu still uses this
4435                  * object. Userspace calling this function indicates that it wants to
4436                  * use this buffer rather sooner than later, so issuing the required
4437                  * flush earlier is beneficial.
4438                  */
4439                 if (obj->base.write_domain & I915_GEM_GPU_DOMAINS)
4440                         i915_gem_flush_ring(dev, obj->ring,
4441                                             0, obj->base.write_domain);
4442
4443                 /* Update the active list for the hardware's current position.
4444                  * Otherwise this only updates on a delayed timer or when irqs
4445                  * are actually unmasked, and our working set ends up being
4446                  * larger than required.
4447                  */
4448                 i915_gem_retire_requests_ring(dev, obj->ring);
4449
4450                 args->busy = obj->active;
4451         }
4452
4453         drm_gem_object_unreference(&obj->base);
4454 unlock:
4455         mutex_unlock(&dev->struct_mutex);
4456         return ret;
4457 }
4458
4459 int
4460 i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
4461                         struct drm_file *file_priv)
4462 {
4463     return i915_gem_ring_throttle(dev, file_priv);
4464 }
4465
4466 int
4467 i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
4468                        struct drm_file *file_priv)
4469 {
4470         struct drm_i915_gem_madvise *args = data;
4471         struct drm_i915_gem_object *obj;
4472         int ret;
4473
4474         switch (args->madv) {
4475         case I915_MADV_DONTNEED:
4476         case I915_MADV_WILLNEED:
4477             break;
4478         default:
4479             return -EINVAL;
4480         }
4481
4482         ret = i915_mutex_lock_interruptible(dev);
4483         if (ret)
4484                 return ret;
4485
4486         obj = to_intel_bo(drm_gem_object_lookup(dev, file_priv, args->handle));
4487         if (obj == NULL) {
4488                 ret = -ENOENT;
4489                 goto unlock;
4490         }
4491
4492         if (obj->pin_count) {
4493                 ret = -EINVAL;
4494                 goto out;
4495         }
4496
4497         if (obj->madv != __I915_MADV_PURGED)
4498                 obj->madv = args->madv;
4499
4500         /* if the object is no longer bound, discard its backing storage */
4501         if (i915_gem_object_is_purgeable(obj) &&
4502             obj->gtt_space == NULL)
4503                 i915_gem_object_truncate(obj);
4504
4505         args->retained = obj->madv != __I915_MADV_PURGED;
4506
4507 out:
4508         drm_gem_object_unreference(&obj->base);
4509 unlock:
4510         mutex_unlock(&dev->struct_mutex);
4511         return ret;
4512 }
4513
4514 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
4515                                                   size_t size)
4516 {
4517         struct drm_i915_private *dev_priv = dev->dev_private;
4518         struct drm_i915_gem_object *obj;
4519
4520         obj = kzalloc(sizeof(*obj), GFP_KERNEL);
4521         if (obj == NULL)
4522                 return NULL;
4523
4524         if (drm_gem_object_init(dev, &obj->base, size) != 0) {
4525                 kfree(obj);
4526                 return NULL;
4527         }
4528
4529         i915_gem_info_add_obj(dev_priv, size);
4530
4531         obj->base.write_domain = I915_GEM_DOMAIN_CPU;
4532         obj->base.read_domains = I915_GEM_DOMAIN_CPU;
4533
4534         obj->agp_type = AGP_USER_MEMORY;
4535         obj->base.driver_private = NULL;
4536         obj->fence_reg = I915_FENCE_REG_NONE;
4537         INIT_LIST_HEAD(&obj->mm_list);
4538         INIT_LIST_HEAD(&obj->gtt_list);
4539         INIT_LIST_HEAD(&obj->ring_list);
4540         INIT_LIST_HEAD(&obj->gpu_write_list);
4541         obj->madv = I915_MADV_WILLNEED;
4542         /* Avoid an unnecessary call to unbind on the first bind. */
4543         obj->map_and_fenceable = true;
4544
4545         return obj;
4546 }
4547
4548 int i915_gem_init_object(struct drm_gem_object *obj)
4549 {
4550         BUG();
4551
4552         return 0;
4553 }
4554
4555 static void i915_gem_free_object_tail(struct drm_i915_gem_object *obj)
4556 {
4557         struct drm_device *dev = obj->base.dev;
4558         drm_i915_private_t *dev_priv = dev->dev_private;
4559         int ret;
4560
4561         ret = i915_gem_object_unbind(obj);
4562         if (ret == -ERESTARTSYS) {
4563                 list_move(&obj->mm_list,
4564                           &dev_priv->mm.deferred_free_list);
4565                 return;
4566         }
4567
4568         if (obj->base.map_list.map)
4569                 i915_gem_free_mmap_offset(obj);
4570
4571         drm_gem_object_release(&obj->base);
4572         i915_gem_info_remove_obj(dev_priv, obj->base.size);
4573
4574         kfree(obj->page_cpu_valid);
4575         kfree(obj->bit_17);
4576         kfree(obj);
4577 }
4578
4579 void i915_gem_free_object(struct drm_gem_object *gem_obj)
4580 {
4581         struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
4582         struct drm_device *dev = obj->base.dev;
4583
4584         trace_i915_gem_object_destroy(obj);
4585
4586         while (obj->pin_count > 0)
4587                 i915_gem_object_unpin(obj);
4588
4589         if (obj->phys_obj)
4590                 i915_gem_detach_phys_object(dev, obj);
4591
4592         i915_gem_free_object_tail(obj);
4593 }
4594
4595 int
4596 i915_gem_idle(struct drm_device *dev)
4597 {
4598         drm_i915_private_t *dev_priv = dev->dev_private;
4599         int ret;
4600
4601         mutex_lock(&dev->struct_mutex);
4602
4603         if (dev_priv->mm.suspended) {
4604                 mutex_unlock(&dev->struct_mutex);
4605                 return 0;
4606         }
4607
4608         ret = i915_gpu_idle(dev);
4609         if (ret) {
4610                 mutex_unlock(&dev->struct_mutex);
4611                 return ret;
4612         }
4613
4614         /* Under UMS, be paranoid and evict. */
4615         if (!drm_core_check_feature(dev, DRIVER_MODESET)) {
4616                 ret = i915_gem_evict_inactive(dev, false);
4617                 if (ret) {
4618                         mutex_unlock(&dev->struct_mutex);
4619                         return ret;
4620                 }
4621         }
4622
4623         i915_gem_reset_fences(dev);
4624
4625         /* Hack!  Don't let anybody do execbuf while we don't control the chip.
4626          * We need to replace this with a semaphore, or something.
4627          * And not confound mm.suspended!
4628          */
4629         dev_priv->mm.suspended = 1;
4630         del_timer_sync(&dev_priv->hangcheck_timer);
4631
4632         i915_kernel_lost_context(dev);
4633         i915_gem_cleanup_ringbuffer(dev);
4634
4635         mutex_unlock(&dev->struct_mutex);
4636
4637         /* Cancel the retire work handler, which should be idle now. */
4638         cancel_delayed_work_sync(&dev_priv->mm.retire_work);
4639
4640         return 0;
4641 }
4642
4643 int
4644 i915_gem_init_ringbuffer(struct drm_device *dev)
4645 {
4646         drm_i915_private_t *dev_priv = dev->dev_private;
4647         int ret;
4648
4649         ret = intel_init_render_ring_buffer(dev);
4650         if (ret)
4651                 return ret;
4652
4653         if (HAS_BSD(dev)) {
4654                 ret = intel_init_bsd_ring_buffer(dev);
4655                 if (ret)
4656                         goto cleanup_render_ring;
4657         }
4658
4659         if (HAS_BLT(dev)) {
4660                 ret = intel_init_blt_ring_buffer(dev);
4661                 if (ret)
4662                         goto cleanup_bsd_ring;
4663         }
4664
4665         dev_priv->next_seqno = 1;
4666
4667         return 0;
4668
4669 cleanup_bsd_ring:
4670         intel_cleanup_ring_buffer(&dev_priv->bsd_ring);
4671 cleanup_render_ring:
4672         intel_cleanup_ring_buffer(&dev_priv->render_ring);
4673         return ret;
4674 }
4675
4676 void
4677 i915_gem_cleanup_ringbuffer(struct drm_device *dev)
4678 {
4679         drm_i915_private_t *dev_priv = dev->dev_private;
4680
4681         intel_cleanup_ring_buffer(&dev_priv->render_ring);
4682         intel_cleanup_ring_buffer(&dev_priv->bsd_ring);
4683         intel_cleanup_ring_buffer(&dev_priv->blt_ring);
4684 }
4685
4686 int
4687 i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
4688                        struct drm_file *file_priv)
4689 {
4690         drm_i915_private_t *dev_priv = dev->dev_private;
4691         int ret;
4692
4693         if (drm_core_check_feature(dev, DRIVER_MODESET))
4694                 return 0;
4695
4696         if (atomic_read(&dev_priv->mm.wedged)) {
4697                 DRM_ERROR("Reenabling wedged hardware, good luck\n");
4698                 atomic_set(&dev_priv->mm.wedged, 0);
4699         }
4700
4701         mutex_lock(&dev->struct_mutex);
4702         dev_priv->mm.suspended = 0;
4703
4704         ret = i915_gem_init_ringbuffer(dev);
4705         if (ret != 0) {
4706                 mutex_unlock(&dev->struct_mutex);
4707                 return ret;
4708         }
4709
4710         BUG_ON(!list_empty(&dev_priv->mm.active_list));
4711         BUG_ON(!list_empty(&dev_priv->render_ring.active_list));
4712         BUG_ON(!list_empty(&dev_priv->bsd_ring.active_list));
4713         BUG_ON(!list_empty(&dev_priv->blt_ring.active_list));
4714         BUG_ON(!list_empty(&dev_priv->mm.flushing_list));
4715         BUG_ON(!list_empty(&dev_priv->mm.inactive_list));
4716         BUG_ON(!list_empty(&dev_priv->render_ring.request_list));
4717         BUG_ON(!list_empty(&dev_priv->bsd_ring.request_list));
4718         BUG_ON(!list_empty(&dev_priv->blt_ring.request_list));
4719         mutex_unlock(&dev->struct_mutex);
4720
4721         ret = drm_irq_install(dev);
4722         if (ret)
4723                 goto cleanup_ringbuffer;
4724
4725         return 0;
4726
4727 cleanup_ringbuffer:
4728         mutex_lock(&dev->struct_mutex);
4729         i915_gem_cleanup_ringbuffer(dev);
4730         dev_priv->mm.suspended = 1;
4731         mutex_unlock(&dev->struct_mutex);
4732
4733         return ret;
4734 }
4735
4736 int
4737 i915_gem_leavevt_ioctl(struct drm_device *dev, void *data,
4738                        struct drm_file *file_priv)
4739 {
4740         if (drm_core_check_feature(dev, DRIVER_MODESET))
4741                 return 0;
4742
4743         drm_irq_uninstall(dev);
4744         return i915_gem_idle(dev);
4745 }
4746
4747 void
4748 i915_gem_lastclose(struct drm_device *dev)
4749 {
4750         int ret;
4751
4752         if (drm_core_check_feature(dev, DRIVER_MODESET))
4753                 return;
4754
4755         ret = i915_gem_idle(dev);
4756         if (ret)
4757                 DRM_ERROR("failed to idle hardware: %d\n", ret);
4758 }
4759
4760 static void
4761 init_ring_lists(struct intel_ring_buffer *ring)
4762 {
4763         INIT_LIST_HEAD(&ring->active_list);
4764         INIT_LIST_HEAD(&ring->request_list);
4765         INIT_LIST_HEAD(&ring->gpu_write_list);
4766 }
4767
4768 void
4769 i915_gem_load(struct drm_device *dev)
4770 {
4771         int i;
4772         drm_i915_private_t *dev_priv = dev->dev_private;
4773
4774         INIT_LIST_HEAD(&dev_priv->mm.active_list);
4775         INIT_LIST_HEAD(&dev_priv->mm.flushing_list);
4776         INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
4777         INIT_LIST_HEAD(&dev_priv->mm.pinned_list);
4778         INIT_LIST_HEAD(&dev_priv->mm.fence_list);
4779         INIT_LIST_HEAD(&dev_priv->mm.deferred_free_list);
4780         INIT_LIST_HEAD(&dev_priv->mm.gtt_list);
4781         init_ring_lists(&dev_priv->render_ring);
4782         init_ring_lists(&dev_priv->bsd_ring);
4783         init_ring_lists(&dev_priv->blt_ring);
4784         for (i = 0; i < 16; i++)
4785                 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list);
4786         INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
4787                           i915_gem_retire_work_handler);
4788         init_completion(&dev_priv->error_completion);
4789
4790         /* On GEN3 we really need to make sure the ARB C3 LP bit is set */
4791         if (IS_GEN3(dev)) {
4792                 u32 tmp = I915_READ(MI_ARB_STATE);
4793                 if (!(tmp & MI_ARB_C3_LP_WRITE_ENABLE)) {
4794                         /* arb state is a masked write, so set bit + bit in mask */
4795                         tmp = MI_ARB_C3_LP_WRITE_ENABLE | (MI_ARB_C3_LP_WRITE_ENABLE << MI_ARB_MASK_SHIFT);
4796                         I915_WRITE(MI_ARB_STATE, tmp);
4797                 }
4798         }
4799
4800         /* Old X drivers will take 0-2 for front, back, depth buffers */
4801         if (!drm_core_check_feature(dev, DRIVER_MODESET))
4802                 dev_priv->fence_reg_start = 3;
4803
4804         if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
4805                 dev_priv->num_fence_regs = 16;
4806         else
4807                 dev_priv->num_fence_regs = 8;
4808
4809         /* Initialize fence registers to zero */
4810         switch (INTEL_INFO(dev)->gen) {
4811         case 6:
4812                 for (i = 0; i < 16; i++)
4813                         I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + (i * 8), 0);
4814                 break;
4815         case 5:
4816         case 4:
4817                 for (i = 0; i < 16; i++)
4818                         I915_WRITE64(FENCE_REG_965_0 + (i * 8), 0);
4819                 break;
4820         case 3:
4821                 if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
4822                         for (i = 0; i < 8; i++)
4823                                 I915_WRITE(FENCE_REG_945_8 + (i * 4), 0);
4824         case 2:
4825                 for (i = 0; i < 8; i++)
4826                         I915_WRITE(FENCE_REG_830_0 + (i * 4), 0);
4827                 break;
4828         }
4829         i915_gem_detect_bit_6_swizzle(dev);
4830         init_waitqueue_head(&dev_priv->pending_flip_queue);
4831
4832         dev_priv->mm.inactive_shrinker.shrink = i915_gem_inactive_shrink;
4833         dev_priv->mm.inactive_shrinker.seeks = DEFAULT_SEEKS;
4834         register_shrinker(&dev_priv->mm.inactive_shrinker);
4835 }
4836
4837 /*
4838  * Create a physically contiguous memory object for this object
4839  * e.g. for cursor + overlay regs
4840  */
4841 static int i915_gem_init_phys_object(struct drm_device *dev,
4842                                      int id, int size, int align)
4843 {
4844         drm_i915_private_t *dev_priv = dev->dev_private;
4845         struct drm_i915_gem_phys_object *phys_obj;
4846         int ret;
4847
4848         if (dev_priv->mm.phys_objs[id - 1] || !size)
4849                 return 0;
4850
4851         phys_obj = kzalloc(sizeof(struct drm_i915_gem_phys_object), GFP_KERNEL);
4852         if (!phys_obj)
4853                 return -ENOMEM;
4854
4855         phys_obj->id = id;
4856
4857         phys_obj->handle = drm_pci_alloc(dev, size, align);
4858         if (!phys_obj->handle) {
4859                 ret = -ENOMEM;
4860                 goto kfree_obj;
4861         }
4862 #ifdef CONFIG_X86
4863         set_memory_wc((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE);
4864 #endif
4865
4866         dev_priv->mm.phys_objs[id - 1] = phys_obj;
4867
4868         return 0;
4869 kfree_obj:
4870         kfree(phys_obj);
4871         return ret;
4872 }
4873
4874 static void i915_gem_free_phys_object(struct drm_device *dev, int id)
4875 {
4876         drm_i915_private_t *dev_priv = dev->dev_private;
4877         struct drm_i915_gem_phys_object *phys_obj;
4878
4879         if (!dev_priv->mm.phys_objs[id - 1])
4880                 return;
4881
4882         phys_obj = dev_priv->mm.phys_objs[id - 1];
4883         if (phys_obj->cur_obj) {
4884                 i915_gem_detach_phys_object(dev, phys_obj->cur_obj);
4885         }
4886
4887 #ifdef CONFIG_X86
4888         set_memory_wb((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE);
4889 #endif
4890         drm_pci_free(dev, phys_obj->handle);
4891         kfree(phys_obj);
4892         dev_priv->mm.phys_objs[id - 1] = NULL;
4893 }
4894
4895 void i915_gem_free_all_phys_object(struct drm_device *dev)
4896 {
4897         int i;
4898
4899         for (i = I915_GEM_PHYS_CURSOR_0; i <= I915_MAX_PHYS_OBJECT; i++)
4900                 i915_gem_free_phys_object(dev, i);
4901 }
4902
4903 void i915_gem_detach_phys_object(struct drm_device *dev,
4904                                  struct drm_i915_gem_object *obj)
4905 {
4906         struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
4907         char *vaddr;
4908         int i;
4909         int page_count;
4910
4911         if (!obj->phys_obj)
4912                 return;
4913         vaddr = obj->phys_obj->handle->vaddr;
4914
4915         page_count = obj->base.size / PAGE_SIZE;
4916         for (i = 0; i < page_count; i++) {
4917                 struct page *page = read_cache_page_gfp(mapping, i,
4918                                                         GFP_HIGHUSER | __GFP_RECLAIMABLE);
4919                 if (!IS_ERR(page)) {
4920                         char *dst = kmap_atomic(page);
4921                         memcpy(dst, vaddr + i*PAGE_SIZE, PAGE_SIZE);
4922                         kunmap_atomic(dst);
4923
4924                         drm_clflush_pages(&page, 1);
4925
4926                         set_page_dirty(page);
4927                         mark_page_accessed(page);
4928                         page_cache_release(page);
4929                 }
4930         }
4931         intel_gtt_chipset_flush();
4932
4933         obj->phys_obj->cur_obj = NULL;
4934         obj->phys_obj = NULL;
4935 }
4936
4937 int
4938 i915_gem_attach_phys_object(struct drm_device *dev,
4939                             struct drm_i915_gem_object *obj,
4940                             int id,
4941                             int align)
4942 {
4943         struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
4944         drm_i915_private_t *dev_priv = dev->dev_private;
4945         int ret = 0;
4946         int page_count;
4947         int i;
4948
4949         if (id > I915_MAX_PHYS_OBJECT)
4950                 return -EINVAL;
4951
4952         if (obj->phys_obj) {
4953                 if (obj->phys_obj->id == id)
4954                         return 0;
4955                 i915_gem_detach_phys_object(dev, obj);
4956         }
4957
4958         /* create a new object */
4959         if (!dev_priv->mm.phys_objs[id - 1]) {
4960                 ret = i915_gem_init_phys_object(dev, id,
4961                                                 obj->base.size, align);
4962                 if (ret) {
4963                         DRM_ERROR("failed to init phys object %d size: %zu\n",
4964                                   id, obj->base.size);
4965                         return ret;
4966                 }
4967         }
4968
4969         /* bind to the object */
4970         obj->phys_obj = dev_priv->mm.phys_objs[id - 1];
4971         obj->phys_obj->cur_obj = obj;
4972
4973         page_count = obj->base.size / PAGE_SIZE;
4974
4975         for (i = 0; i < page_count; i++) {
4976                 struct page *page;
4977                 char *dst, *src;
4978
4979                 page = read_cache_page_gfp(mapping, i,
4980                                            GFP_HIGHUSER | __GFP_RECLAIMABLE);
4981                 if (IS_ERR(page))
4982                         return PTR_ERR(page);
4983
4984                 src = kmap_atomic(page);
4985                 dst = obj->phys_obj->handle->vaddr + (i * PAGE_SIZE);
4986                 memcpy(dst, src, PAGE_SIZE);
4987                 kunmap_atomic(src);
4988
4989                 mark_page_accessed(page);
4990                 page_cache_release(page);
4991         }
4992
4993         return 0;
4994 }
4995
4996 static int
4997 i915_gem_phys_pwrite(struct drm_device *dev,
4998                      struct drm_i915_gem_object *obj,
4999                      struct drm_i915_gem_pwrite *args,
5000                      struct drm_file *file_priv)
5001 {
5002         void *vaddr = obj->phys_obj->handle->vaddr + args->offset;
5003         char __user *user_data = (char __user *) (uintptr_t) args->data_ptr;
5004
5005         if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) {
5006                 unsigned long unwritten;
5007
5008                 /* The physical object once assigned is fixed for the lifetime
5009                  * of the obj, so we can safely drop the lock and continue
5010                  * to access vaddr.
5011                  */
5012                 mutex_unlock(&dev->struct_mutex);
5013                 unwritten = copy_from_user(vaddr, user_data, args->size);
5014                 mutex_lock(&dev->struct_mutex);
5015                 if (unwritten)
5016                         return -EFAULT;
5017         }
5018
5019         intel_gtt_chipset_flush();
5020         return 0;
5021 }
5022
5023 void i915_gem_release(struct drm_device *dev, struct drm_file *file)
5024 {
5025         struct drm_i915_file_private *file_priv = file->driver_priv;
5026
5027         /* Clean up our request list when the client is going away, so that
5028          * later retire_requests won't dereference our soon-to-be-gone
5029          * file_priv.
5030          */
5031         spin_lock(&file_priv->mm.lock);
5032         while (!list_empty(&file_priv->mm.request_list)) {
5033                 struct drm_i915_gem_request *request;
5034
5035                 request = list_first_entry(&file_priv->mm.request_list,
5036                                            struct drm_i915_gem_request,
5037                                            client_list);
5038                 list_del(&request->client_list);
5039                 request->file_priv = NULL;
5040         }
5041         spin_unlock(&file_priv->mm.lock);
5042 }
5043
5044 static int
5045 i915_gpu_is_active(struct drm_device *dev)
5046 {
5047         drm_i915_private_t *dev_priv = dev->dev_private;
5048         int lists_empty;
5049
5050         lists_empty = list_empty(&dev_priv->mm.flushing_list) &&
5051                       list_empty(&dev_priv->mm.active_list);
5052
5053         return !lists_empty;
5054 }
5055
5056 static int
5057 i915_gem_inactive_shrink(struct shrinker *shrinker,
5058                          int nr_to_scan,
5059                          gfp_t gfp_mask)
5060 {
5061         struct drm_i915_private *dev_priv =
5062                 container_of(shrinker,
5063                              struct drm_i915_private,
5064                              mm.inactive_shrinker);
5065         struct drm_device *dev = dev_priv->dev;
5066         struct drm_i915_gem_object *obj, *next;
5067         int cnt;
5068
5069         if (!mutex_trylock(&dev->struct_mutex))
5070                 return 0;
5071
5072         /* "fast-path" to count number of available objects */
5073         if (nr_to_scan == 0) {
5074                 cnt = 0;
5075                 list_for_each_entry(obj,
5076                                     &dev_priv->mm.inactive_list,
5077                                     mm_list)
5078                         cnt++;
5079                 mutex_unlock(&dev->struct_mutex);
5080                 return cnt / 100 * sysctl_vfs_cache_pressure;
5081         }
5082
5083 rescan:
5084         /* first scan for clean buffers */
5085         i915_gem_retire_requests(dev);
5086
5087         list_for_each_entry_safe(obj, next,
5088                                  &dev_priv->mm.inactive_list,
5089                                  mm_list) {
5090                 if (i915_gem_object_is_purgeable(obj)) {
5091                         if (i915_gem_object_unbind(obj) == 0 &&
5092                             --nr_to_scan == 0)
5093                                 break;
5094                 }
5095         }
5096
5097         /* second pass, evict/count anything still on the inactive list */
5098         cnt = 0;
5099         list_for_each_entry_safe(obj, next,
5100                                  &dev_priv->mm.inactive_list,
5101                                  mm_list) {
5102                 if (nr_to_scan &&
5103                     i915_gem_object_unbind(obj) == 0)
5104                         nr_to_scan--;
5105                 else
5106                         cnt++;
5107         }
5108
5109         if (nr_to_scan && i915_gpu_is_active(dev)) {
5110                 /*
5111                  * We are desperate for pages, so as a last resort, wait
5112                  * for the GPU to finish and discard whatever we can.
5113                  * This has a dramatic impact to reduce the number of
5114                  * OOM-killer events whilst running the GPU aggressively.
5115                  */
5116                 if (i915_gpu_idle(dev) == 0)
5117                         goto rescan;
5118         }
5119         mutex_unlock(&dev->struct_mutex);
5120         return cnt / 100 * sysctl_vfs_cache_pressure;
5121 }