drivers/gpu/drm/i915/i915_gem_execbuffer.c

   1 /*
   2  * Copyright © 2008,2010 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  *
  23  * Authors:
  24  *    Eric Anholt <eric@anholt.net>
  25  *    Chris Wilson <chris@chris-wilson.co.uk>
  26  *
  27  */
  28
  29 #include "drmP.h"
  30 #include "drm.h"
  31 #include "i915_drm.h"
  32 #include "i915_drv.h"
  33 #include "i915_trace.h"
  34 #include "intel_drv.h"
  35
  36 struct change_domains {
  37         uint32_t invalidate_domains;
  38         uint32_t flush_domains;
  39         uint32_t flush_rings;
  40 };
  41
  42 /*
  43  * Set the next domain for the specified object. This
  44  * may not actually perform the necessary flushing/invaliding though,
  45  * as that may want to be batched with other set_domain operations
  46  *
  47  * This is (we hope) the only really tricky part of gem. The goal
  48  * is fairly simple -- track which caches hold bits of the object
  49  * and make sure they remain coherent. A few concrete examples may
  50  * help to explain how it works. For shorthand, we use the notation
  51  * (read_domains, write_domain), e.g. (CPU, CPU) to indicate the
  52  * a pair of read and write domain masks.
  53  *
  54  * Case 1: the batch buffer
  55  *
  56  *      1. Allocated
  57  *      2. Written by CPU
  58  *      3. Mapped to GTT
  59  *      4. Read by GPU
  60  *      5. Unmapped from GTT
  61  *      6. Freed
  62  *
  63  *      Let's take these a step at a time
  64  *
  65  *      1. Allocated
  66  *              Pages allocated from the kernel may still have
  67  *              cache contents, so we set them to (CPU, CPU) always.
  68  *      2. Written by CPU (using pwrite)
  69  *              The pwrite function calls set_domain (CPU, CPU) and
  70  *              this function does nothing (as nothing changes)
  71  *      3. Mapped by GTT
  72  *              This function asserts that the object is not
  73  *              currently in any GPU-based read or write domains
  74  *      4. Read by GPU
  75  *              i915_gem_execbuffer calls set_domain (COMMAND, 0).
  76  *              As write_domain is zero, this function adds in the
  77  *              current read domains (CPU+COMMAND, 0).
  78  *              flush_domains is set to CPU.
  79  *              invalidate_domains is set to COMMAND
  80  *              clflush is run to get data out of the CPU caches
  81  *              then i915_dev_set_domain calls i915_gem_flush to
  82  *              emit an MI_FLUSH and drm_agp_chipset_flush
  83  *      5. Unmapped from GTT
  84  *              i915_gem_object_unbind calls set_domain (CPU, CPU)
  85  *              flush_domains and invalidate_domains end up both zero
  86  *              so no flushing/invalidating happens
  87  *      6. Freed
  88  *              yay, done
  89  *
  90  * Case 2: The shared render buffer
  91  *
  92  *      1. Allocated
  93  *      2. Mapped to GTT
  94  *      3. Read/written by GPU
  95  *      4. set_domain to (CPU,CPU)
  96  *      5. Read/written by CPU
  97  *      6. Read/written by GPU
  98  *
  99  *      1. Allocated
 100  *              Same as last example, (CPU, CPU)
 101  *      2. Mapped to GTT
 102  *              Nothing changes (assertions find that it is not in the GPU)
 103  *      3. Read/written by GPU
 104  *              execbuffer calls set_domain (RENDER, RENDER)
 105  *              flush_domains gets CPU
 106  *              invalidate_domains gets GPU
 107  *              clflush (obj)
 108  *              MI_FLUSH and drm_agp_chipset_flush
 109  *      4. set_domain (CPU, CPU)
 110  *              flush_domains gets GPU
 111  *              invalidate_domains gets CPU
 112  *              wait_rendering (obj) to make sure all drawing is complete.
 113  *              This will include an MI_FLUSH to get the data from GPU
 114  *              to memory
 115  *              clflush (obj) to invalidate the CPU cache
 116  *              Another MI_FLUSH in i915_gem_flush (eliminate this somehow?)
 117  *      5. Read/written by CPU
 118  *              cache lines are loaded and dirtied
 119  *      6. Read written by GPU
 120  *              Same as last GPU access
 121  *
 122  * Case 3: The constant buffer
 123  *
 124  *      1. Allocated
 125  *      2. Written by CPU
 126  *      3. Read by GPU
 127  *      4. Updated (written) by CPU again
 128  *      5. Read by GPU
 129  *
 130  *      1. Allocated
 131  *              (CPU, CPU)
 132  *      2. Written by CPU
 133  *              (CPU, CPU)
 134  *      3. Read by GPU
 135  *              (CPU+RENDER, 0)
 136  *              flush_domains = CPU
 137  *              invalidate_domains = RENDER
 138  *              clflush (obj)
 139  *              MI_FLUSH
 140  *              drm_agp_chipset_flush
 141  *      4. Updated (written) by CPU again
 142  *              (CPU, CPU)
 143  *              flush_domains = 0 (no previous write domain)
 144  *              invalidate_domains = 0 (no new read domains)
 145  *      5. Read by GPU
 146  *              (CPU+RENDER, 0)
 147  *              flush_domains = CPU
 148  *              invalidate_domains = RENDER
 149  *              clflush (obj)
 150  *              MI_FLUSH
 151  *              drm_agp_chipset_flush
 152  */
 153 static void
 154 i915_gem_object_set_to_gpu_domain(struct drm_i915_gem_object *obj,
 155                                   struct intel_ring_buffer *ring,
 156                                   struct change_domains *cd)
 157 {
 158         uint32_t invalidate_domains = 0, flush_domains = 0;
 159
 160         /*
 161          * If the object isn't moving to a new write domain,
 162          * let the object stay in multiple read domains
 163          */
 164         if (obj->base.pending_write_domain == 0)
 165                 obj->base.pending_read_domains |= obj->base.read_domains;
 166
 167         /*
 168          * Flush the current write domain if
 169          * the new read domains don't match. Invalidate
 170          * any read domains which differ from the old
 171          * write domain
 172          */
 173         if (obj->base.write_domain &&
 174             (((obj->base.write_domain != obj->base.pending_read_domains ||
 175                obj->ring != ring)) ||
 176              (obj->fenced_gpu_access && !obj->pending_fenced_gpu_access))) {
 177                 flush_domains |= obj->base.write_domain;
 178                 invalidate_domains |=
 179                         obj->base.pending_read_domains & ~obj->base.write_domain;
 180         }
 181         /*
 182          * Invalidate any read caches which may have
 183          * stale data. That is, any new read domains.
 184          */
 185         invalidate_domains |= obj->base.pending_read_domains & ~obj->base.read_domains;
 186         if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_CPU)
 187                 i915_gem_clflush_object(obj);
 188
 189         /* blow away mappings if mapped through GTT */
 190         if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_GTT)
 191                 i915_gem_release_mmap(obj);
 192
 193         /* The actual obj->write_domain will be updated with
 194          * pending_write_domain after we emit the accumulated flush for all
 195          * of our domain changes in execbuffers (which clears objects'
 196          * write_domains).  So if we have a current write domain that we
 197          * aren't changing, set pending_write_domain to that.
 198          */
 199         if (flush_domains == 0 && obj->base.pending_write_domain == 0)
 200                 obj->base.pending_write_domain = obj->base.write_domain;
 201
 202         cd->invalidate_domains |= invalidate_domains;
 203         cd->flush_domains |= flush_domains;
 204         if (flush_domains & I915_GEM_GPU_DOMAINS)
 205                 cd->flush_rings |= obj->ring->id;
 206         if (invalidate_domains & I915_GEM_GPU_DOMAINS)
 207                 cd->flush_rings |= ring->id;
 208 }
 209
 210 static int
 211 i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
 212                                    struct drm_file *file_priv,
 213                                    struct drm_i915_gem_exec_object2 *entry,
 214                                    struct drm_i915_gem_relocation_entry *reloc)
 215 {
 216         struct drm_device *dev = obj->base.dev;
 217         struct drm_gem_object *target_obj;
 218         uint32_t target_offset;
 219         int ret = -EINVAL;
 220
 221         target_obj = drm_gem_object_lookup(dev, file_priv,
 222                                            reloc->target_handle);
 223         if (target_obj == NULL)
 224                 return -ENOENT;
 225
 226         target_offset = to_intel_bo(target_obj)->gtt_offset;
 227
 228 #if WATCH_RELOC
 229         DRM_INFO("%s: obj %p offset %08x target %d "
 230                  "read %08x write %08x gtt %08x "
 231                  "presumed %08x delta %08x\n",
 232                  __func__,
 233                  obj,
 234                  (int) reloc->offset,
 235                  (int) reloc->target_handle,
 236                  (int) reloc->read_domains,
 237                  (int) reloc->write_domain,
 238                  (int) target_offset,
 239                  (int) reloc->presumed_offset,
 240                  reloc->delta);
 241 #endif
 242
 243         /* The target buffer should have appeared before us in the
 244          * exec_object list, so it should have a GTT space bound by now.
 245          */
 246         if (target_offset == 0) {
 247                 DRM_ERROR("No GTT space found for object %d\n",
 248                           reloc->target_handle);
 249                 goto err;
 250         }
 251
 252         /* Validate that the target is in a valid r/w GPU domain */
 253         if (reloc->write_domain & (reloc->write_domain - 1)) {
 254                 DRM_ERROR("reloc with multiple write domains: "
 255                           "obj %p target %d offset %d "
 256                           "read %08x write %08x",
 257                           obj, reloc->target_handle,
 258                           (int) reloc->offset,
 259                           reloc->read_domains,
 260                           reloc->write_domain);
 261                 goto err;
 262         }
 263         if (reloc->write_domain & I915_GEM_DOMAIN_CPU ||
 264             reloc->read_domains & I915_GEM_DOMAIN_CPU) {
 265                 DRM_ERROR("reloc with read/write CPU domains: "
 266                           "obj %p target %d offset %d "
 267                           "read %08x write %08x",
 268                           obj, reloc->target_handle,
 269                           (int) reloc->offset,
 270                           reloc->read_domains,
 271                           reloc->write_domain);
 272                 goto err;
 273         }
 274         if (reloc->write_domain && target_obj->pending_write_domain &&
 275             reloc->write_domain != target_obj->pending_write_domain) {
 276                 DRM_ERROR("Write domain conflict: "
 277                           "obj %p target %d offset %d "
 278                           "new %08x old %08x\n",
 279                           obj, reloc->target_handle,
 280                           (int) reloc->offset,
 281                           reloc->write_domain,
 282                           target_obj->pending_write_domain);
 283                 goto err;
 284         }
 285
 286         target_obj->pending_read_domains |= reloc->read_domains;
 287         target_obj->pending_write_domain |= reloc->write_domain;
 288
 289         /* If the relocation already has the right value in it, no
 290          * more work needs to be done.
 291          */
 292         if (target_offset == reloc->presumed_offset)
 293                 goto out;
 294
 295         /* Check that the relocation address is valid... */
 296         if (reloc->offset > obj->base.size - 4) {
 297                 DRM_ERROR("Relocation beyond object bounds: "
 298                           "obj %p target %d offset %d size %d.\n",
 299                           obj, reloc->target_handle,
 300                           (int) reloc->offset,
 301                           (int) obj->base.size);
 302                 goto err;
 303         }
 304         if (reloc->offset & 3) {
 305                 DRM_ERROR("Relocation not 4-byte aligned: "
 306                           "obj %p target %d offset %d.\n",
 307                           obj, reloc->target_handle,
 308                           (int) reloc->offset);
 309                 goto err;
 310         }
 311
 312         /* and points to somewhere within the target object. */
 313         if (reloc->delta >= target_obj->size) {
 314                 DRM_ERROR("Relocation beyond target object bounds: "
 315                           "obj %p target %d delta %d size %d.\n",
 316                           obj, reloc->target_handle,
 317                           (int) reloc->delta,
 318                           (int) target_obj->size);
 319                 goto err;
 320         }
 321
 322         reloc->delta += target_offset;
 323         if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) {
 324                 uint32_t page_offset = reloc->offset & ~PAGE_MASK;
 325                 char *vaddr;
 326
 327                 vaddr = kmap_atomic(obj->pages[reloc->offset >> PAGE_SHIFT]);
 328                 *(uint32_t *)(vaddr + page_offset) = reloc->delta;
 329                 kunmap_atomic(vaddr);
 330         } else {
 331                 struct drm_i915_private *dev_priv = dev->dev_private;
 332                 uint32_t __iomem *reloc_entry;
 333                 void __iomem *reloc_page;
 334
 335                 ret = i915_gem_object_set_to_gtt_domain(obj, 1);
 336                 if (ret)
 337                         goto err;
 338
 339                 /* Map the page containing the relocation we're going to perform.  */
 340                 reloc->offset += obj->gtt_offset;
 341                 reloc_page = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping,
 342                                                       reloc->offset & PAGE_MASK);
 343                 reloc_entry = (uint32_t __iomem *)
 344                         (reloc_page + (reloc->offset & ~PAGE_MASK));
 345                 iowrite32(reloc->delta, reloc_entry);
 346                 io_mapping_unmap_atomic(reloc_page);
 347         }
 348
 349         /* and update the user's relocation entry */
 350         reloc->presumed_offset = target_offset;
 351
 352 out:
 353         ret = 0;
 354 err:
 355         drm_gem_object_unreference(target_obj);
 356         return ret;
 357 }
 358
 359 static int
 360 i915_gem_execbuffer_relocate_object(struct drm_i915_gem_object *obj,
 361                                     struct drm_file *file_priv,
 362                                     struct drm_i915_gem_exec_object2 *entry)
 363 {
 364         struct drm_i915_gem_relocation_entry __user *user_relocs;
 365         int i, ret;
 366
 367         user_relocs = (void __user *)(uintptr_t)entry->relocs_ptr;
 368         for (i = 0; i < entry->relocation_count; i++) {
 369                 struct drm_i915_gem_relocation_entry reloc;
 370
 371                 if (__copy_from_user_inatomic(&reloc,
 372                                               user_relocs+i,
 373                                               sizeof(reloc)))
 374                         return -EFAULT;
 375
 376                 ret = i915_gem_execbuffer_relocate_entry(obj, file_priv, entry, &reloc);
 377                 if (ret)
 378                         return ret;
 379
 380                 if (__copy_to_user_inatomic(&user_relocs[i].presumed_offset,
 381                                             &reloc.presumed_offset,
 382                                             sizeof(reloc.presumed_offset)))
 383                         return -EFAULT;
 384         }
 385
 386         return 0;
 387 }
 388
 389 static int
 390 i915_gem_execbuffer_relocate_object_slow(struct drm_i915_gem_object *obj,
 391                                          struct drm_file *file_priv,
 392                                          struct drm_i915_gem_exec_object2 *entry,
 393                                          struct drm_i915_gem_relocation_entry *relocs)
 394 {
 395         int i, ret;
 396
 397         for (i = 0; i < entry->relocation_count; i++) {
 398                 ret = i915_gem_execbuffer_relocate_entry(obj, file_priv, entry, &relocs[i]);
 399                 if (ret)
 400                         return ret;
 401         }
 402
 403         return 0;
 404 }
 405
 406 static int
 407 i915_gem_execbuffer_relocate(struct drm_device *dev,
 408                              struct drm_file *file,
 409                              struct drm_i915_gem_object **object_list,
 410                              struct drm_i915_gem_exec_object2 *exec_list,
 411                              int count)
 412 {
 413         int i, ret;
 414
 415         for (i = 0; i < count; i++) {
 416                 struct drm_i915_gem_object *obj = object_list[i];
 417                 obj->base.pending_read_domains = 0;
 418                 obj->base.pending_write_domain = 0;
 419                 ret = i915_gem_execbuffer_relocate_object(obj, file,
 420                                                           &exec_list[i]);
 421                 if (ret)
 422                         return ret;
 423         }
 424
 425         return 0;
 426 }
 427
 428 static int
 429 i915_gem_execbuffer_reserve(struct drm_device *dev,
 430                             struct drm_file *file,
 431                             struct drm_i915_gem_object **object_list,
 432                             struct drm_i915_gem_exec_object2 *exec_list,
 433                             int count)
 434 {
 435         int ret, i, retry;
 436
 437         /* Attempt to pin all of the buffers into the GTT.
 438          * This is done in 3 phases:
 439          *
 440          * 1a. Unbind all objects that do not match the GTT constraints for
 441          *     the execbuffer (fenceable, mappable, alignment etc).
 442          * 1b. Increment pin count for already bound objects.
 443          * 2.  Bind new objects.
 444          * 3.  Decrement pin count.
 445          *
 446          * This avoid unnecessary unbinding of later objects in order to makr
 447          * room for the earlier objects *unless* we need to defragment.
 448          */
 449         retry = 0;
 450         do {
 451                 ret = 0;
 452
 453                 /* Unbind any ill-fitting objects or pin. */
 454                 for (i = 0; i < count; i++) {
 455                         struct drm_i915_gem_object *obj = object_list[i];
 456                         struct drm_i915_gem_exec_object2 *entry = &exec_list[i];
 457                         bool need_fence, need_mappable;
 458
 459                         if (!obj->gtt_space)
 460                                 continue;
 461
 462                         need_fence =
 463                                 entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
 464                                 obj->tiling_mode != I915_TILING_NONE;
 465                         need_mappable =
 466                                 entry->relocation_count ? true : need_fence;
 467
 468                         if ((entry->alignment && obj->gtt_offset & (entry->alignment - 1)) ||
 469                             (need_mappable && !obj->map_and_fenceable))
 470                                 ret = i915_gem_object_unbind(obj);
 471                         else
 472                                 ret = i915_gem_object_pin(obj,
 473                                                           entry->alignment,
 474                                                           need_mappable);
 475                         if (ret) {
 476                                 count = i;
 477                                 goto err;
 478                         }
 479                 }
 480
 481                 /* Bind fresh objects */
 482                 for (i = 0; i < count; i++) {
 483                         struct drm_i915_gem_exec_object2 *entry = &exec_list[i];
 484                         struct drm_i915_gem_object *obj = object_list[i];
 485                         bool need_fence;
 486
 487                         need_fence =
 488                                 entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
 489                                 obj->tiling_mode != I915_TILING_NONE;
 490
 491                         if (!obj->gtt_space) {
 492                                 bool need_mappable =
 493                                         entry->relocation_count ? true : need_fence;
 494
 495                                 ret = i915_gem_object_pin(obj,
 496                                                           entry->alignment,
 497                                                           need_mappable);
 498                                 if (ret)
 499                                         break;
 500                         }
 501
 502                         if (need_fence) {
 503                                 ret = i915_gem_object_get_fence_reg(obj, true);
 504                                 if (ret)
 505                                         break;
 506
 507                                 obj->pending_fenced_gpu_access = true;
 508                         }
 509
 510                         entry->offset = obj->gtt_offset;
 511                 }
 512
 513 err:            /* Decrement pin count for bound objects */
 514                 for (i = 0; i < count; i++) {
 515                         struct drm_i915_gem_object *obj = object_list[i];
 516                         if (obj->gtt_space)
 517                                 i915_gem_object_unpin(obj);
 518                 }
 519
 520                 if (ret != -ENOSPC || retry > 1)
 521                         return ret;
 522
 523                 /* First attempt, just clear anything that is purgeable.
 524                  * Second attempt, clear the entire GTT.
 525                  */
 526                 ret = i915_gem_evict_everything(dev, retry == 0);
 527                 if (ret)
 528                         return ret;
 529
 530                 retry++;
 531         } while (1);
 532 }
 533
 534 static int
 535 i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
 536                                   struct drm_file *file,
 537                                   struct drm_i915_gem_object **object_list,
 538                                   struct drm_i915_gem_exec_object2 *exec_list,
 539                                   int count)
 540 {
 541         struct drm_i915_gem_relocation_entry *reloc;
 542         int i, total, ret;
 543
 544         for (i = 0; i < count; i++)
 545                 object_list[i]->in_execbuffer = false;
 546
 547         mutex_unlock(&dev->struct_mutex);
 548
 549         total = 0;
 550         for (i = 0; i < count; i++)
 551                 total += exec_list[i].relocation_count;
 552
 553         reloc = drm_malloc_ab(total, sizeof(*reloc));
 554         if (reloc == NULL) {
 555                 mutex_lock(&dev->struct_mutex);
 556                 return -ENOMEM;
 557         }
 558
 559         total = 0;
 560         for (i = 0; i < count; i++) {
 561                 struct drm_i915_gem_relocation_entry __user *user_relocs;
 562
 563                 user_relocs = (void __user *)(uintptr_t)exec_list[i].relocs_ptr;
 564
 565                 if (copy_from_user(reloc+total, user_relocs,
 566                                    exec_list[i].relocation_count *
 567                                    sizeof(*reloc))) {
 568                         ret = -EFAULT;
 569                         mutex_lock(&dev->struct_mutex);
 570                         goto err;
 571                 }
 572
 573                 total += exec_list[i].relocation_count;
 574         }
 575
 576         ret = i915_mutex_lock_interruptible(dev);
 577         if (ret) {
 578                 mutex_lock(&dev->struct_mutex);
 579                 goto err;
 580         }
 581
 582         ret = i915_gem_execbuffer_reserve(dev, file,
 583                                           object_list, exec_list,
 584                                           count);
 585         if (ret)
 586                 goto err;
 587
 588         total = 0;
 589         for (i = 0; i < count; i++) {
 590                 struct drm_i915_gem_object *obj = object_list[i];
 591                 obj->base.pending_read_domains = 0;
 592                 obj->base.pending_write_domain = 0;
 593                 ret = i915_gem_execbuffer_relocate_object_slow(obj, file,
 594                                                                &exec_list[i],
 595                                                                reloc + total);
 596                 if (ret)
 597                         goto err;
 598
 599                 total += exec_list[i].relocation_count;
 600         }
 601
 602         /* Leave the user relocations as are, this is the painfully slow path,
 603          * and we want to avoid the complication of dropping the lock whilst
 604          * having buffers reserved in the aperture and so causing spurious
 605          * ENOSPC for random operations.
 606          */
 607
 608 err:
 609         drm_free_large(reloc);
 610         return ret;
 611 }
 612
 613 static void
 614 i915_gem_execbuffer_flush(struct drm_device *dev,
 615                           uint32_t invalidate_domains,
 616                           uint32_t flush_domains,
 617                           uint32_t flush_rings)
 618 {
 619         drm_i915_private_t *dev_priv = dev->dev_private;
 620
 621         if (flush_domains & I915_GEM_DOMAIN_CPU)
 622                 intel_gtt_chipset_flush();
 623
 624         if ((flush_domains | invalidate_domains) & I915_GEM_GPU_DOMAINS) {
 625                 if (flush_rings & RING_RENDER)
 626                         i915_gem_flush_ring(dev, &dev_priv->render_ring,
 627                                             invalidate_domains, flush_domains);
 628                 if (flush_rings & RING_BSD)
 629                         i915_gem_flush_ring(dev, &dev_priv->bsd_ring,
 630                                             invalidate_domains, flush_domains);
 631                 if (flush_rings & RING_BLT)
 632                         i915_gem_flush_ring(dev, &dev_priv->blt_ring,
 633                                             invalidate_domains, flush_domains);
 634         }
 635 }
 636
 637
 638 static int
 639 i915_gem_execbuffer_move_to_gpu(struct drm_device *dev,
 640                                 struct drm_file *file,
 641                                 struct intel_ring_buffer *ring,
 642                                 struct drm_i915_gem_object **objects,
 643                                 int count)
 644 {
 645         struct change_domains cd;
 646         int ret, i;
 647
 648         cd.invalidate_domains = 0;
 649         cd.flush_domains = 0;
 650         cd.flush_rings = 0;
 651         for (i = 0; i < count; i++)
 652                 i915_gem_object_set_to_gpu_domain(objects[i], ring, &cd);
 653
 654         if (cd.invalidate_domains | cd.flush_domains) {
 655 #if WATCH_EXEC
 656                 DRM_INFO("%s: invalidate_domains %08x flush_domains %08x\n",
 657                           __func__,
 658                          cd.invalidate_domains,
 659                          cd.flush_domains);
 660 #endif
 661                 i915_gem_execbuffer_flush(dev,
 662                                           cd.invalidate_domains,
 663                                           cd.flush_domains,
 664                                           cd.flush_rings);
 665         }
 666
 667         for (i = 0; i < count; i++) {
 668                 struct drm_i915_gem_object *obj = objects[i];
 669                 /* XXX replace with semaphores */
 670                 if (obj->ring && ring != obj->ring) {
 671                         ret = i915_gem_object_wait_rendering(obj, true);
 672                         if (ret)
 673                                 return ret;
 674                 }
 675         }
 676
 677         return 0;
 678 }
 679
 680 static int
 681 i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec,
 682                           uint64_t exec_offset)
 683 {
 684         uint32_t exec_start, exec_len;
 685
 686         exec_start = (uint32_t) exec_offset + exec->batch_start_offset;
 687         exec_len = (uint32_t) exec->batch_len;
 688
 689         if ((exec_start | exec_len) & 0x7)
 690                 return -EINVAL;
 691
 692         if (!exec_start)
 693                 return -EINVAL;
 694
 695         return 0;
 696 }
 697
 698 static int
 699 validate_exec_list(struct drm_i915_gem_exec_object2 *exec,
 700                    int count)
 701 {
 702         int i;
 703
 704         for (i = 0; i < count; i++) {
 705                 char __user *ptr = (char __user *)(uintptr_t)exec[i].relocs_ptr;
 706                 int length; /* limited by fault_in_pages_readable() */
 707
 708                 /* First check for malicious input causing overflow */
 709                 if (exec[i].relocation_count >
 710                     INT_MAX / sizeof(struct drm_i915_gem_relocation_entry))
 711                         return -EINVAL;
 712
 713                 length = exec[i].relocation_count *
 714                         sizeof(struct drm_i915_gem_relocation_entry);
 715                 if (!access_ok(VERIFY_READ, ptr, length))
 716                         return -EFAULT;
 717
 718                 /* we may also need to update the presumed offsets */
 719                 if (!access_ok(VERIFY_WRITE, ptr, length))
 720                         return -EFAULT;
 721
 722                 if (fault_in_pages_readable(ptr, length))
 723                         return -EFAULT;
 724         }
 725
 726         return 0;
 727 }
 728
 729 static void
 730 i915_gem_execbuffer_retire_commands(struct drm_device *dev,
 731                                     struct intel_ring_buffer *ring)
 732 {
 733         uint32_t flush_domains = 0;
 734
 735         /* The sampler always gets flushed on i965 (sigh) */
 736         if (INTEL_INFO(dev)->gen >= 4)
 737                 flush_domains |= I915_GEM_DOMAIN_SAMPLER;
 738
 739         ring->flush(ring, I915_GEM_DOMAIN_COMMAND, flush_domains);
 740 }
 741
 742
 743 static int
 744 i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 745                        struct drm_file *file,
 746                        struct drm_i915_gem_execbuffer2 *args,
 747                        struct drm_i915_gem_exec_object2 *exec_list)
 748 {
 749         drm_i915_private_t *dev_priv = dev->dev_private;
 750         struct drm_i915_gem_object **object_list = NULL;
 751         struct drm_i915_gem_object *batch_obj;
 752         struct drm_clip_rect *cliprects = NULL;
 753         struct drm_i915_gem_request *request = NULL;
 754         struct intel_ring_buffer *ring;
 755         int ret, i, flips;
 756         uint64_t exec_offset;
 757
 758         ret = validate_exec_list(exec_list, args->buffer_count);
 759         if (ret)
 760                 return ret;
 761
 762 #if WATCH_EXEC
 763         DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n",
 764                   (int) args->buffers_ptr, args->buffer_count, args->batch_len);
 765 #endif
 766         switch (args->flags & I915_EXEC_RING_MASK) {
 767         case I915_EXEC_DEFAULT:
 768         case I915_EXEC_RENDER:
 769                 ring = &dev_priv->render_ring;
 770                 break;
 771         case I915_EXEC_BSD:
 772                 if (!HAS_BSD(dev)) {
 773                         DRM_ERROR("execbuf with invalid ring (BSD)\n");
 774                         return -EINVAL;
 775                 }
 776                 ring = &dev_priv->bsd_ring;
 777                 break;
 778         case I915_EXEC_BLT:
 779                 if (!HAS_BLT(dev)) {
 780                         DRM_ERROR("execbuf with invalid ring (BLT)\n");
 781                         return -EINVAL;
 782                 }
 783                 ring = &dev_priv->blt_ring;
 784                 break;
 785         default:
 786                 DRM_ERROR("execbuf with unknown ring: %d\n",
 787                           (int)(args->flags & I915_EXEC_RING_MASK));
 788                 return -EINVAL;
 789         }
 790
 791         if (args->buffer_count < 1) {
 792                 DRM_ERROR("execbuf with %d buffers\n", args->buffer_count);
 793                 return -EINVAL;
 794         }
 795         object_list = drm_malloc_ab(sizeof(*object_list), args->buffer_count);
 796         if (object_list == NULL) {
 797                 DRM_ERROR("Failed to allocate object list for %d buffers\n",
 798                           args->buffer_count);
 799                 ret = -ENOMEM;
 800                 goto pre_mutex_err;
 801         }
 802
 803         if (args->num_cliprects != 0) {
 804                 cliprects = kcalloc(args->num_cliprects, sizeof(*cliprects),
 805                                     GFP_KERNEL);
 806                 if (cliprects == NULL) {
 807                         ret = -ENOMEM;
 808                         goto pre_mutex_err;
 809                 }
 810
 811                 ret = copy_from_user(cliprects,
 812                                      (struct drm_clip_rect __user *)
 813                                      (uintptr_t) args->cliprects_ptr,
 814                                      sizeof(*cliprects) * args->num_cliprects);
 815                 if (ret != 0) {
 816                         DRM_ERROR("copy %d cliprects failed: %d\n",
 817                                   args->num_cliprects, ret);
 818                         ret = -EFAULT;
 819                         goto pre_mutex_err;
 820                 }
 821         }
 822
 823         request = kzalloc(sizeof(*request), GFP_KERNEL);
 824         if (request == NULL) {
 825                 ret = -ENOMEM;
 826                 goto pre_mutex_err;
 827         }
 828
 829         ret = i915_mutex_lock_interruptible(dev);
 830         if (ret)
 831                 goto pre_mutex_err;
 832
 833         if (dev_priv->mm.suspended) {
 834                 mutex_unlock(&dev->struct_mutex);
 835                 ret = -EBUSY;
 836                 goto pre_mutex_err;
 837         }
 838
 839         /* Look up object handles */
 840         for (i = 0; i < args->buffer_count; i++) {
 841                 struct drm_i915_gem_object *obj;
 842
 843                 obj = to_intel_bo (drm_gem_object_lookup(dev, file,
 844                                                          exec_list[i].handle));
 845                 if (obj == NULL) {
 846                         DRM_ERROR("Invalid object handle %d at index %d\n",
 847                                    exec_list[i].handle, i);
 848                         /* prevent error path from reading uninitialized data */
 849                         args->buffer_count = i;
 850                         ret = -ENOENT;
 851                         goto err;
 852                 }
 853                 object_list[i] = obj;
 854
 855                 if (obj->in_execbuffer) {
 856                         DRM_ERROR("Object %p appears more than once in object list\n",
 857                                    obj);
 858                         /* prevent error path from reading uninitialized data */
 859                         args->buffer_count = i + 1;
 860                         ret = -EINVAL;
 861                         goto err;
 862                 }
 863                 obj->in_execbuffer = true;
 864                 obj->pending_fenced_gpu_access = false;
 865         }
 866
 867         /* Move the objects en-masse into the GTT, evicting if necessary. */
 868         ret = i915_gem_execbuffer_reserve(dev, file,
 869                                           object_list, exec_list,
 870                                           args->buffer_count);
 871         if (ret)
 872                 goto err;
 873
 874         /* The objects are in their final locations, apply the relocations. */
 875         ret = i915_gem_execbuffer_relocate(dev, file,
 876                                            object_list, exec_list,
 877                                            args->buffer_count);
 878         if (ret) {
 879                 if (ret == -EFAULT) {
 880                         ret = i915_gem_execbuffer_relocate_slow(dev, file,
 881                                                                 object_list,
 882                                                                 exec_list,
 883                                                                 args->buffer_count);
 884                         BUG_ON(!mutex_is_locked(&dev->struct_mutex));
 885                 }
 886                 if (ret)
 887                         goto err;
 888         }
 889
 890         /* Set the pending read domains for the batch buffer to COMMAND */
 891         batch_obj = object_list[args->buffer_count-1];
 892         if (batch_obj->base.pending_write_domain) {
 893                 DRM_ERROR("Attempting to use self-modifying batch buffer\n");
 894                 ret = -EINVAL;
 895                 goto err;
 896         }
 897         batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
 898
 899         /* Sanity check the batch buffer */
 900         exec_offset = batch_obj->gtt_offset;
 901         ret = i915_gem_check_execbuffer(args, exec_offset);
 902         if (ret != 0) {
 903                 DRM_ERROR("execbuf with invalid offset/length\n");
 904                 goto err;
 905         }
 906
 907         ret = i915_gem_execbuffer_move_to_gpu(dev, file, ring,
 908                                               object_list, args->buffer_count);
 909         if (ret)
 910                 goto err;
 911
 912 #if WATCH_COHERENCY
 913         for (i = 0; i < args->buffer_count; i++) {
 914                 i915_gem_object_check_coherency(object_list[i],
 915                                                 exec_list[i].handle);
 916         }
 917 #endif
 918
 919 #if WATCH_EXEC
 920         i915_gem_dump_object(batch_obj,
 921                               args->batch_len,
 922                               __func__,
 923                               ~0);
 924 #endif
 925
 926         /* Check for any pending flips. As we only maintain a flip queue depth
 927          * of 1, we can simply insert a WAIT for the next display flip prior
 928          * to executing the batch and avoid stalling the CPU.
 929          */
 930         flips = 0;
 931         for (i = 0; i < args->buffer_count; i++) {
 932                 if (object_list[i]->base.write_domain)
 933                         flips |= atomic_read(&object_list[i]->pending_flip);
 934         }
 935         if (flips) {
 936                 int plane, flip_mask;
 937
 938                 for (plane = 0; flips >> plane; plane++) {
 939                         if (((flips >> plane) & 1) == 0)
 940                                 continue;
 941
 942                         if (plane)
 943                                 flip_mask = MI_WAIT_FOR_PLANE_B_FLIP;
 944                         else
 945                                 flip_mask = MI_WAIT_FOR_PLANE_A_FLIP;
 946
 947                         ret = intel_ring_begin(ring, 2);
 948                         if (ret)
 949                                 goto err;
 950
 951                         intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask);
 952                         intel_ring_emit(ring, MI_NOOP);
 953                         intel_ring_advance(ring);
 954                 }
 955         }
 956
 957         /* Exec the batchbuffer */
 958         ret = ring->dispatch_execbuffer(ring, args, cliprects, exec_offset);
 959         if (ret) {
 960                 DRM_ERROR("dispatch failed %d\n", ret);
 961                 goto err;
 962         }
 963
 964         for (i = 0; i < args->buffer_count; i++) {
 965                 struct drm_i915_gem_object *obj = object_list[i];
 966
 967                 obj->base.read_domains = obj->base.pending_read_domains;
 968                 obj->base.write_domain = obj->base.pending_write_domain;
 969                 obj->fenced_gpu_access = obj->pending_fenced_gpu_access;
 970
 971                 i915_gem_object_move_to_active(obj, ring);
 972                 if (obj->base.write_domain) {
 973                         obj->dirty = 1;
 974                         list_move_tail(&obj->gpu_write_list,
 975                                        &ring->gpu_write_list);
 976                         intel_mark_busy(dev, obj);
 977                 }
 978
 979                 trace_i915_gem_object_change_domain(obj,
 980                                                     obj->base.read_domains,
 981                                                     obj->base.write_domain);
 982         }
 983
 984         /*
 985          * Ensure that the commands in the batch buffer are
 986          * finished before the interrupt fires
 987          */
 988         i915_gem_execbuffer_retire_commands(dev, ring);
 989
 990         if (i915_add_request(dev, file, request, ring))
 991                 i915_gem_next_request_seqno(dev, ring);
 992         else
 993                 request = NULL;
 994
 995 err:
 996         for (i = 0; i < args->buffer_count; i++) {
 997                 object_list[i]->in_execbuffer = false;
 998                 drm_gem_object_unreference(&object_list[i]->base);
 999         }
1000
1001         mutex_unlock(&dev->struct_mutex);
1002
1003 pre_mutex_err:
1004         drm_free_large(object_list);
1005         kfree(cliprects);
1006         kfree(request);
1007
1008         return ret;
1009 }
1010
1011 /*
1012  * Legacy execbuffer just creates an exec2 list from the original exec object
1013  * list array and passes it to the real function.
1014  */
1015 int
1016 i915_gem_execbuffer(struct drm_device *dev, void *data,
1017                     struct drm_file *file)
1018 {
1019         struct drm_i915_gem_execbuffer *args = data;
1020         struct drm_i915_gem_execbuffer2 exec2;
1021         struct drm_i915_gem_exec_object *exec_list = NULL;
1022         struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1023         int ret, i;
1024
1025 #if WATCH_EXEC
1026         DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n",
1027                   (int) args->buffers_ptr, args->buffer_count, args->batch_len);
1028 #endif
1029
1030         if (args->buffer_count < 1) {
1031                 DRM_ERROR("execbuf with %d buffers\n", args->buffer_count);
1032                 return -EINVAL;
1033         }
1034
1035         /* Copy in the exec list from userland */
1036         exec_list = drm_malloc_ab(sizeof(*exec_list), args->buffer_count);
1037         exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count);
1038         if (exec_list == NULL || exec2_list == NULL) {
1039                 DRM_ERROR("Failed to allocate exec list for %d buffers\n",
1040                           args->buffer_count);
1041                 drm_free_large(exec_list);
1042                 drm_free_large(exec2_list);
1043                 return -ENOMEM;
1044         }
1045         ret = copy_from_user(exec_list,
1046                              (struct drm_i915_relocation_entry __user *)
1047                              (uintptr_t) args->buffers_ptr,
1048                              sizeof(*exec_list) * args->buffer_count);
1049         if (ret != 0) {
1050                 DRM_ERROR("copy %d exec entries failed %d\n",
1051                           args->buffer_count, ret);
1052                 drm_free_large(exec_list);
1053                 drm_free_large(exec2_list);
1054                 return -EFAULT;
1055         }
1056
1057         for (i = 0; i < args->buffer_count; i++) {
1058                 exec2_list[i].handle = exec_list[i].handle;
1059                 exec2_list[i].relocation_count = exec_list[i].relocation_count;
1060                 exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr;
1061                 exec2_list[i].alignment = exec_list[i].alignment;
1062                 exec2_list[i].offset = exec_list[i].offset;
1063                 if (INTEL_INFO(dev)->gen < 4)
1064                         exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE;
1065                 else
1066                         exec2_list[i].flags = 0;
1067         }
1068
1069         exec2.buffers_ptr = args->buffers_ptr;
1070         exec2.buffer_count = args->buffer_count;
1071         exec2.batch_start_offset = args->batch_start_offset;
1072         exec2.batch_len = args->batch_len;
1073         exec2.DR1 = args->DR1;
1074         exec2.DR4 = args->DR4;
1075         exec2.num_cliprects = args->num_cliprects;
1076         exec2.cliprects_ptr = args->cliprects_ptr;
1077         exec2.flags = I915_EXEC_RENDER;
1078
1079         ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list);
1080         if (!ret) {
1081                 /* Copy the new buffer offsets back to the user's exec list. */
1082                 for (i = 0; i < args->buffer_count; i++)
1083                         exec_list[i].offset = exec2_list[i].offset;
1084                 /* ... and back out to userspace */
1085                 ret = copy_to_user((struct drm_i915_relocation_entry __user *)
1086                                    (uintptr_t) args->buffers_ptr,
1087                                    exec_list,
1088                                    sizeof(*exec_list) * args->buffer_count);
1089                 if (ret) {
1090                         ret = -EFAULT;
1091                         DRM_ERROR("failed to copy %d exec entries "
1092                                   "back to user (%d)\n",
1093                                   args->buffer_count, ret);
1094                 }
1095         }
1096
1097         drm_free_large(exec_list);
1098         drm_free_large(exec2_list);
1099         return ret;
1100 }
1101
1102 int
1103 i915_gem_execbuffer2(struct drm_device *dev, void *data,
1104                      struct drm_file *file)
1105 {
1106         struct drm_i915_gem_execbuffer2 *args = data;
1107         struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1108         int ret;
1109
1110 #if WATCH_EXEC
1111         DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n",
1112                   (int) args->buffers_ptr, args->buffer_count, args->batch_len);
1113 #endif
1114
1115         if (args->buffer_count < 1) {
1116                 DRM_ERROR("execbuf2 with %d buffers\n", args->buffer_count);
1117                 return -EINVAL;
1118         }
1119
1120         exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count);
1121         if (exec2_list == NULL) {
1122                 DRM_ERROR("Failed to allocate exec list for %d buffers\n",
1123                           args->buffer_count);
1124                 return -ENOMEM;
1125         }
1126         ret = copy_from_user(exec2_list,
1127                              (struct drm_i915_relocation_entry __user *)
1128                              (uintptr_t) args->buffers_ptr,
1129                              sizeof(*exec2_list) * args->buffer_count);
1130         if (ret != 0) {
1131                 DRM_ERROR("copy %d exec entries failed %d\n",
1132                           args->buffer_count, ret);
1133                 drm_free_large(exec2_list);
1134                 return -EFAULT;
1135         }
1136
1137         ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list);
1138         if (!ret) {
1139                 /* Copy the new buffer offsets back to the user's exec list. */
1140                 ret = copy_to_user((struct drm_i915_relocation_entry __user *)
1141                                    (uintptr_t) args->buffers_ptr,
1142                                    exec2_list,
1143                                    sizeof(*exec2_list) * args->buffer_count);
1144                 if (ret) {
1145                         ret = -EFAULT;
1146                         DRM_ERROR("failed to copy %d exec entries "
1147                                   "back to user (%d)\n",
1148                                   args->buffer_count, ret);
1149                 }
1150         }
1151
1152         drm_free_large(exec2_list);
1153         return ret;
1154 }
1155