}
static int
-i915_gem_wait_for_error(struct drm_device *dev)
+i915_gem_wait_for_error(struct i915_gpu_error *error)
{
- struct drm_i915_private *dev_priv = dev->dev_private;
- struct completion *x = &dev_priv->error_completion;
- unsigned long flags;
int ret;
- if (!atomic_read(&dev_priv->mm.wedged))
+#define EXIT_COND (!i915_reset_in_progress(error))
+ if (EXIT_COND)
return 0;
+ /* GPU is already declared terminally dead, give up. */
+ if (i915_terminally_wedged(error))
+ return -EIO;
+
/*
* Only wait 10 seconds for the gpu reset to complete to avoid hanging
* userspace. If it takes that long something really bad is going on and
* we should simply try to bail out and fail as gracefully as possible.
*/
- ret = wait_for_completion_interruptible_timeout(x, 10*HZ);
+ ret = wait_event_interruptible_timeout(error->reset_queue,
+ EXIT_COND,
+ 10*HZ);
if (ret == 0) {
DRM_ERROR("Timed out waiting for the gpu reset to complete\n");
return -EIO;
} else if (ret < 0) {
return ret;
}
+#undef EXIT_COND
- if (atomic_read(&dev_priv->mm.wedged)) {
- /* GPU is hung, bump the completion count to account for
- * the token we just consumed so that we never hit zero and
- * end up waiting upon a subsequent completion event that
- * will never happen.
- */
- spin_lock_irqsave(&x->wait.lock, flags);
- x->done++;
- spin_unlock_irqrestore(&x->wait.lock, flags);
- }
return 0;
}
int i915_mutex_lock_interruptible(struct drm_device *dev)
{
+ struct drm_i915_private *dev_priv = dev->dev_private;
int ret;
- ret = i915_gem_wait_for_error(dev);
+ ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
if (ret)
return ret;
i915_gem_init_ioctl(struct drm_device *dev, void *data,
struct drm_file *file)
{
+ struct drm_i915_private *dev_priv = dev->dev_private;
struct drm_i915_gem_init *args = data;
if (drm_core_check_feature(dev, DRIVER_MODESET))
return -ENODEV;
mutex_lock(&dev->struct_mutex);
- i915_gem_init_global_gtt(dev, args->gtt_start,
- args->gtt_end, args->gtt_end);
+ i915_gem_setup_global_gtt(dev, args->gtt_start, args->gtt_end,
+ args->gtt_end);
+ dev_priv->gtt.mappable_end = args->gtt_end;
mutex_unlock(&dev->struct_mutex);
return 0;
pinned += obj->gtt_space->size;
mutex_unlock(&dev->struct_mutex);
- args->aper_size = dev_priv->mm.gtt_total;
+ args->aper_size = dev_priv->gtt.total;
args->aper_available_size = args->aper_size - pinned;
return 0;
}
+void *i915_gem_object_alloc(struct drm_device *dev)
+{
+ struct drm_i915_private *dev_priv = dev->dev_private;
+ return kmem_cache_alloc(dev_priv->slab, GFP_KERNEL | __GFP_ZERO);
+}
+
+void i915_gem_object_free(struct drm_i915_gem_object *obj)
+{
+ struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
+ kmem_cache_free(dev_priv->slab, obj);
+}
+
static int
i915_gem_create(struct drm_file *file,
struct drm_device *dev,
if (ret) {
drm_gem_object_release(&obj->base);
i915_gem_info_remove_obj(dev->dev_private, obj->base.size);
- kfree(obj);
+ i915_gem_object_free(obj);
return ret;
}
args->size, &args->handle);
}
-static int i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj)
-{
- drm_i915_private_t *dev_priv = obj->base.dev->dev_private;
-
- return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 &&
- obj->tiling_mode != I915_TILING_NONE;
-}
-
static inline int
__copy_to_user_swizzled(char __user *cpu_vaddr,
const char *gpu_vaddr, int gpu_offset,
loff_t offset;
int shmem_page_offset, page_length, ret = 0;
int obj_do_bit17_swizzling, page_do_bit17_swizzling;
- int hit_slowpath = 0;
int prefaulted = 0;
int needs_clflush = 0;
- struct scatterlist *sg;
- int i;
+ struct sg_page_iter sg_iter;
- user_data = (char __user *) (uintptr_t) args->data_ptr;
+ user_data = to_user_ptr(args->data_ptr);
remain = args->size;
obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
offset = args->offset;
- for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
- struct page *page;
-
- if (i < offset >> PAGE_SHIFT)
- continue;
+ for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
+ offset >> PAGE_SHIFT) {
+ struct page *page = sg_iter.page;
if (remain <= 0)
break;
if ((shmem_page_offset + page_length) > PAGE_SIZE)
page_length = PAGE_SIZE - shmem_page_offset;
- page = sg_page(sg);
page_do_bit17_swizzling = obj_do_bit17_swizzling &&
(page_to_phys(page) & (1 << 17)) != 0;
if (ret == 0)
goto next_page;
- hit_slowpath = 1;
mutex_unlock(&dev->struct_mutex);
if (!prefaulted) {
out:
i915_gem_object_unpin_pages(obj);
- if (hit_slowpath) {
- /* Fixup: Kill any reinstated backing storage pages */
- if (obj->madv == __I915_MADV_PURGED)
- i915_gem_object_truncate(obj);
- }
-
return ret;
}
return 0;
if (!access_ok(VERIFY_WRITE,
- (char __user *)(uintptr_t)args->data_ptr,
+ to_user_ptr(args->data_ptr),
args->size))
return -EFAULT;
if (ret)
goto out_unpin;
- user_data = (char __user *) (uintptr_t) args->data_ptr;
+ user_data = to_user_ptr(args->data_ptr);
remain = args->size;
offset = obj->gtt_offset + args->offset;
* source page isn't available. Return the error and we'll
* retry in the slow path.
*/
- if (fast_user_write(dev_priv->mm.gtt_mapping, page_base,
+ if (fast_user_write(dev_priv->gtt.mappable, page_base,
page_offset, user_data, page_length)) {
ret = -EFAULT;
goto out_unpin;
int hit_slowpath = 0;
int needs_clflush_after = 0;
int needs_clflush_before = 0;
- int i;
- struct scatterlist *sg;
+ struct sg_page_iter sg_iter;
- user_data = (char __user *) (uintptr_t) args->data_ptr;
+ user_data = to_user_ptr(args->data_ptr);
remain = args->size;
obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
offset = args->offset;
obj->dirty = 1;
- for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
- struct page *page;
+ for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
+ offset >> PAGE_SHIFT) {
+ struct page *page = sg_iter.page;
int partial_cacheline_write;
- if (i < offset >> PAGE_SHIFT)
- continue;
-
if (remain <= 0)
break;
((shmem_page_offset | page_length)
& (boot_cpu_data.x86_clflush_size - 1));
- page = sg_page(sg);
page_do_bit17_swizzling = obj_do_bit17_swizzling &&
(page_to_phys(page) & (1 << 17)) != 0;
i915_gem_object_unpin_pages(obj);
if (hit_slowpath) {
- /* Fixup: Kill any reinstated backing storage pages */
- if (obj->madv == __I915_MADV_PURGED)
- i915_gem_object_truncate(obj);
- /* and flush dirty cachelines in case the object isn't in the cpu write
- * domain anymore. */
- if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
+ /*
+ * Fixup: Flush cpu caches in case we didn't flush the dirty
+ * cachelines in-line while writing and the object moved
+ * out of the cpu write domain while we've dropped the lock.
+ */
+ if (!needs_clflush_after &&
+ obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
i915_gem_clflush_object(obj);
i915_gem_chipset_flush(dev);
}
return 0;
if (!access_ok(VERIFY_READ,
- (char __user *)(uintptr_t)args->data_ptr,
+ to_user_ptr(args->data_ptr),
args->size))
return -EFAULT;
- ret = fault_in_multipages_readable((char __user *)(uintptr_t)args->data_ptr,
+ ret = fault_in_multipages_readable(to_user_ptr(args->data_ptr),
args->size);
if (ret)
return -EFAULT;
}
int
-i915_gem_check_wedge(struct drm_i915_private *dev_priv,
+i915_gem_check_wedge(struct i915_gpu_error *error,
bool interruptible)
{
- if (atomic_read(&dev_priv->mm.wedged)) {
- struct completion *x = &dev_priv->error_completion;
- bool recovery_complete;
- unsigned long flags;
-
- /* Give the error handler a chance to run. */
- spin_lock_irqsave(&x->wait.lock, flags);
- recovery_complete = x->done > 0;
- spin_unlock_irqrestore(&x->wait.lock, flags);
-
+ if (i915_reset_in_progress(error)) {
/* Non-interruptible callers can't handle -EAGAIN, hence return
* -EIO unconditionally for these. */
if (!interruptible)
return -EIO;
- /* Recovery complete, but still wedged means reset failure. */
- if (recovery_complete)
+ /* Recovery complete, but the reset failed ... */
+ if (i915_terminally_wedged(error))
return -EIO;
return -EAGAIN;
* __wait_seqno - wait until execution of seqno has finished
* @ring: the ring expected to report seqno
* @seqno: duh!
+ * @reset_counter: reset sequence associated with the given seqno
* @interruptible: do an interruptible wait (normally yes)
* @timeout: in - how long to wait (NULL forever); out - how much time remaining
*
+ * Note: It is of utmost importance that the passed in seqno and reset_counter
+ * values have been read by the caller in an smp safe manner. Where read-side
+ * locks are involved, it is sufficient to read the reset_counter before
+ * unlocking the lock that protects the seqno. For lockless tricks, the
+ * reset_counter _must_ be read before, and an appropriate smp_rmb must be
+ * inserted.
+ *
* Returns 0 if the seqno was found within the alloted time. Else returns the
* errno with remaining time filled in timeout argument.
*/
static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
+ unsigned reset_counter,
bool interruptible, struct timespec *timeout)
{
drm_i915_private_t *dev_priv = ring->dev->dev_private;
#define EXIT_COND \
(i915_seqno_passed(ring->get_seqno(ring, false), seqno) || \
- atomic_read(&dev_priv->mm.wedged))
+ i915_reset_in_progress(&dev_priv->gpu_error) || \
+ reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter))
do {
if (interruptible)
end = wait_event_interruptible_timeout(ring->irq_queue,
end = wait_event_timeout(ring->irq_queue, EXIT_COND,
timeout_jiffies);
- ret = i915_gem_check_wedge(dev_priv, interruptible);
+ /* We need to check whether any gpu reset happened in between
+ * the caller grabbing the seqno and now ... */
+ if (reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter))
+ end = -EAGAIN;
+
+ /* ... but upgrade the -EGAIN to an -EIO if the gpu is truely
+ * gone. */
+ ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible);
if (ret)
end = ret;
} while (end == 0 && wait_forever);
BUG_ON(!mutex_is_locked(&dev->struct_mutex));
BUG_ON(seqno == 0);
- ret = i915_gem_check_wedge(dev_priv, interruptible);
+ ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible);
if (ret)
return ret;
if (ret)
return ret;
- return __wait_seqno(ring, seqno, interruptible, NULL);
+ return __wait_seqno(ring, seqno,
+ atomic_read(&dev_priv->gpu_error.reset_counter),
+ interruptible, NULL);
}
/**
struct drm_device *dev = obj->base.dev;
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_ring_buffer *ring = obj->ring;
+ unsigned reset_counter;
u32 seqno;
int ret;
if (seqno == 0)
return 0;
- ret = i915_gem_check_wedge(dev_priv, true);
+ ret = i915_gem_check_wedge(&dev_priv->gpu_error, true);
if (ret)
return ret;
if (ret)
return ret;
+ reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
mutex_unlock(&dev->struct_mutex);
- ret = __wait_seqno(ring, seqno, true, NULL);
+ ret = __wait_seqno(ring, seqno, reset_counter, true, NULL);
mutex_lock(&dev->struct_mutex);
i915_gem_retire_requests_ring(ring);
trace_i915_gem_object_fault(obj, page_offset, true, write);
+ /* Access to snoopable pages through the GTT is incoherent. */
+ if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) {
+ ret = -EINVAL;
+ goto unlock;
+ }
+
/* Now bind it into the GTT if needed */
ret = i915_gem_object_pin(obj, 0, true, false);
if (ret)
obj->fault_mappable = true;
- pfn = ((dev_priv->mm.gtt_base_addr + obj->gtt_offset) >> PAGE_SHIFT) +
+ pfn = ((dev_priv->gtt.mappable_base + obj->gtt_offset) >> PAGE_SHIFT) +
page_offset;
/* Finally, remap it using the new GTT offset */
/* If this -EIO is due to a gpu hang, give the reset code a
* chance to clean up the mess. Otherwise return the proper
* SIGBUS. */
- if (!atomic_read(&dev_priv->mm.wedged))
+ if (i915_terminally_wedged(&dev_priv->gpu_error))
return VM_FAULT_SIGBUS;
case -EAGAIN:
/* Give the error handler a chance to run and move the
obj->fault_mappable = false;
}
-static uint32_t
+uint32_t
i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode)
{
uint32_t gtt_size;
* Return the required GTT alignment for an object, taking into account
* potential fence register mapping.
*/
-static uint32_t
-i915_gem_get_gtt_alignment(struct drm_device *dev,
- uint32_t size,
- int tiling_mode)
+uint32_t
+i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size,
+ int tiling_mode, bool fenced)
{
/*
* Minimum alignment is 4k (GTT page size), but might be greater
* if a fence register is needed for the object.
*/
- if (INTEL_INFO(dev)->gen >= 4 ||
+ if (INTEL_INFO(dev)->gen >= 4 || (!fenced && IS_G33(dev)) ||
tiling_mode == I915_TILING_NONE)
return 4096;
return i915_gem_get_gtt_size(dev, size, tiling_mode);
}
-/**
- * i915_gem_get_unfenced_gtt_alignment - return required GTT alignment for an
- * unfenced object
- * @dev: the device
- * @size: size of the object
- * @tiling_mode: tiling mode of the object
- *
- * Return the required GTT alignment for an object, only taking into account
- * unfenced tiled surface requirements.
- */
-uint32_t
-i915_gem_get_unfenced_gtt_alignment(struct drm_device *dev,
- uint32_t size,
- int tiling_mode)
-{
- /*
- * Minimum alignment is 4k (GTT page size) for sane hw.
- */
- if (INTEL_INFO(dev)->gen >= 4 || IS_G33(dev) ||
- tiling_mode == I915_TILING_NONE)
- return 4096;
-
- /* Previous hardware however needs to be aligned to a power-of-two
- * tile height. The simplest method for determining this is to reuse
- * the power-of-tile object size.
- */
- return i915_gem_get_gtt_size(dev, size, tiling_mode);
-}
-
static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
{
struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
goto unlock;
}
- if (obj->base.size > dev_priv->mm.gtt_mappable_end) {
+ if (obj->base.size > dev_priv->gtt.mappable_end) {
ret = -E2BIG;
goto out;
}
* To do this we must instruct the shmfs to drop all of its
* backing pages, *now*.
*/
- inode = obj->base.filp->f_path.dentry->d_inode;
+ inode = file_inode(obj->base.filp);
shmem_truncate_range(inode, 0, (loff_t)-1);
obj->madv = __I915_MADV_PURGED;
kfree(obj->pages);
}
-static int
+int
i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
{
const struct drm_i915_gem_object_ops *ops = obj->ops;
*
* Fail silently without starting the shrinker
*/
- mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
+ mapping = file_inode(obj->base.filp)->i_mapping;
gfp = mapping_gfp_mask(mapping);
gfp |= __GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD;
gfp &= ~(__GFP_IO | __GFP_WAIT);
if (obj->pages)
return 0;
+ if (obj->madv != I915_MADV_WILLNEED) {
+ DRM_ERROR("Attempting to obtain a purgeable object\n");
+ return -EINVAL;
+ }
+
BUG_ON(obj->pages_pin_count);
ret = ops->get_pages(obj);
BUG_ON(obj->base.write_domain & ~I915_GEM_GPU_DOMAINS);
BUG_ON(!obj->active);
- if (obj->pin_count) /* are we a framebuffer? */
- intel_mark_fb_idle(obj);
-
list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
list_del_init(&obj->ring_list);
}
static int
-i915_gem_handle_seqno_wrap(struct drm_device *dev)
+i915_gem_init_seqno(struct drm_device *dev, u32 seqno)
{
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_ring_buffer *ring;
int ret, i, j;
- /* The hardware uses various monotonic 32-bit counters, if we
- * detect that they will wraparound we need to idle the GPU
- * and reset those counters.
- */
- ret = 0;
+ /* Carefully retire all requests without writing to the rings */
for_each_ring(ring, dev_priv, i) {
- for (j = 0; j < ARRAY_SIZE(ring->sync_seqno); j++)
- ret |= ring->sync_seqno[j] != 0;
+ ret = intel_ring_idle(ring);
+ if (ret)
+ return ret;
}
- if (ret == 0)
- return ret;
-
- ret = i915_gpu_idle(dev);
- if (ret)
- return ret;
-
i915_gem_retire_requests(dev);
+
+ /* Finally reset hw state */
for_each_ring(ring, dev_priv, i) {
+ intel_ring_init_seqno(ring, seqno);
+
for (j = 0; j < ARRAY_SIZE(ring->sync_seqno); j++)
ring->sync_seqno[j] = 0;
}
return 0;
}
+int i915_gem_set_seqno(struct drm_device *dev, u32 seqno)
+{
+ struct drm_i915_private *dev_priv = dev->dev_private;
+ int ret;
+
+ if (seqno == 0)
+ return -EINVAL;
+
+ /* HWS page needs to be set less than what we
+ * will inject to ring
+ */
+ ret = i915_gem_init_seqno(dev, seqno - 1);
+ if (ret)
+ return ret;
+
+ /* Carefully set the last_seqno value so that wrap
+ * detection still works
+ */
+ dev_priv->next_seqno = seqno;
+ dev_priv->last_seqno = seqno - 1;
+ if (dev_priv->last_seqno == 0)
+ dev_priv->last_seqno--;
+
+ return 0;
+}
+
int
i915_gem_get_seqno(struct drm_device *dev, u32 *seqno)
{
/* reserve 0 for non-seqno */
if (dev_priv->next_seqno == 0) {
- int ret = i915_gem_handle_seqno_wrap(dev);
+ int ret = i915_gem_init_seqno(dev, 0);
if (ret)
return ret;
dev_priv->next_seqno = 1;
}
- *seqno = dev_priv->next_seqno++;
+ *seqno = dev_priv->last_seqno = dev_priv->next_seqno++;
return 0;
}
if (!dev_priv->mm.suspended) {
if (i915_enable_hangcheck) {
- mod_timer(&dev_priv->hangcheck_timer,
+ mod_timer(&dev_priv->gpu_error.hangcheck_timer,
round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES));
}
if (was_empty) {
int
i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
{
+ drm_i915_private_t *dev_priv = dev->dev_private;
struct drm_i915_gem_wait *args = data;
struct drm_i915_gem_object *obj;
struct intel_ring_buffer *ring = NULL;
struct timespec timeout_stack, *timeout = NULL;
+ unsigned reset_counter;
u32 seqno = 0;
int ret = 0;
}
drm_gem_object_unreference(&obj->base);
+ reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
mutex_unlock(&dev->struct_mutex);
- ret = __wait_seqno(ring, seqno, true, timeout);
+ ret = __wait_seqno(ring, seqno, reset_counter, true, timeout);
if (timeout) {
WARN_ON(!timespec_valid(timeout));
args->timeout_ns = timespec_to_ns(timeout);
{
u32 old_write_domain, old_read_domains;
- /* Act a barrier for all accesses through the GTT */
- mb();
-
/* Force a pagefault for domain tracking on next user access */
i915_gem_release_mmap(obj);
if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
return;
+ /* Wait for any direct GTT access to complete */
+ mb();
+
old_read_domains = obj->base.read_domains;
old_write_domain = obj->base.write_domain;
i915_gem_object_unbind(struct drm_i915_gem_object *obj)
{
drm_i915_private_t *dev_priv = obj->base.dev->dev_private;
- int ret = 0;
+ int ret;
if (obj->gtt_space == NULL)
return 0;
return 0;
}
-static void sandybridge_write_fence_reg(struct drm_device *dev, int reg,
- struct drm_i915_gem_object *obj)
-{
- drm_i915_private_t *dev_priv = dev->dev_private;
- uint64_t val;
-
- if (obj) {
- u32 size = obj->gtt_space->size;
-
- val = (uint64_t)((obj->gtt_offset + size - 4096) &
- 0xfffff000) << 32;
- val |= obj->gtt_offset & 0xfffff000;
- val |= (uint64_t)((obj->stride / 128) - 1) <<
- SANDYBRIDGE_FENCE_PITCH_SHIFT;
-
- if (obj->tiling_mode == I915_TILING_Y)
- val |= 1 << I965_FENCE_TILING_Y_SHIFT;
- val |= I965_FENCE_REG_VALID;
- } else
- val = 0;
-
- I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + reg * 8, val);
- POSTING_READ(FENCE_REG_SANDYBRIDGE_0 + reg * 8);
-}
-
static void i965_write_fence_reg(struct drm_device *dev, int reg,
struct drm_i915_gem_object *obj)
{
drm_i915_private_t *dev_priv = dev->dev_private;
+ int fence_reg;
+ int fence_pitch_shift;
uint64_t val;
+ if (INTEL_INFO(dev)->gen >= 6) {
+ fence_reg = FENCE_REG_SANDYBRIDGE_0;
+ fence_pitch_shift = SANDYBRIDGE_FENCE_PITCH_SHIFT;
+ } else {
+ fence_reg = FENCE_REG_965_0;
+ fence_pitch_shift = I965_FENCE_PITCH_SHIFT;
+ }
+
if (obj) {
u32 size = obj->gtt_space->size;
val = (uint64_t)((obj->gtt_offset + size - 4096) &
0xfffff000) << 32;
val |= obj->gtt_offset & 0xfffff000;
- val |= ((obj->stride / 128) - 1) << I965_FENCE_PITCH_SHIFT;
+ val |= (uint64_t)((obj->stride / 128) - 1) << fence_pitch_shift;
if (obj->tiling_mode == I915_TILING_Y)
val |= 1 << I965_FENCE_TILING_Y_SHIFT;
val |= I965_FENCE_REG_VALID;
} else
val = 0;
- I915_WRITE64(FENCE_REG_965_0 + reg * 8, val);
- POSTING_READ(FENCE_REG_965_0 + reg * 8);
+ fence_reg += reg * 8;
+ I915_WRITE64(fence_reg, val);
+ POSTING_READ(fence_reg);
}
static void i915_write_fence_reg(struct drm_device *dev, int reg,
POSTING_READ(FENCE_REG_830_0 + reg * 4);
}
+inline static bool i915_gem_object_needs_mb(struct drm_i915_gem_object *obj)
+{
+ return obj && obj->base.read_domains & I915_GEM_DOMAIN_GTT;
+}
+
static void i915_gem_write_fence(struct drm_device *dev, int reg,
struct drm_i915_gem_object *obj)
{
+ struct drm_i915_private *dev_priv = dev->dev_private;
+
+ /* Ensure that all CPU reads are completed before installing a fence
+ * and all writes before removing the fence.
+ */
+ if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj))
+ mb();
+
switch (INTEL_INFO(dev)->gen) {
case 7:
- case 6: sandybridge_write_fence_reg(dev, reg, obj); break;
+ case 6:
case 5:
case 4: i965_write_fence_reg(dev, reg, obj); break;
case 3: i915_write_fence_reg(dev, reg, obj); break;
case 2: i830_write_fence_reg(dev, reg, obj); break;
- default: break;
+ default: BUG();
}
+
+ /* And similarly be paranoid that no direct access to this region
+ * is reordered to before the fence is installed.
+ */
+ if (i915_gem_object_needs_mb(obj))
+ mb();
}
static inline int fence_number(struct drm_i915_private *dev_priv,
}
static int
-i915_gem_object_flush_fence(struct drm_i915_gem_object *obj)
+i915_gem_object_wait_fence(struct drm_i915_gem_object *obj)
{
if (obj->last_fenced_seqno) {
int ret = i915_wait_seqno(obj->ring, obj->last_fenced_seqno);
obj->last_fenced_seqno = 0;
}
- /* Ensure that all CPU reads are completed before installing a fence
- * and all writes before removing the fence.
- */
- if (obj->base.read_domains & I915_GEM_DOMAIN_GTT)
- mb();
-
obj->fenced_gpu_access = false;
return 0;
}
struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
int ret;
- ret = i915_gem_object_flush_fence(obj);
+ ret = i915_gem_object_wait_fence(obj);
if (ret)
return ret;
* will need to serialise the write to the associated fence register?
*/
if (obj->fence_dirty) {
- ret = i915_gem_object_flush_fence(obj);
+ ret = i915_gem_object_wait_fence(obj);
if (ret)
return ret;
}
if (reg->obj) {
struct drm_i915_gem_object *old = reg->obj;
- ret = i915_gem_object_flush_fence(old);
+ ret = i915_gem_object_wait_fence(old);
if (ret)
return ret;
/* On non-LLC machines we have to be careful when putting differing
* types of snoopable memory together to avoid the prefetcher
- * crossing memory domains and dieing.
+ * crossing memory domains and dying.
*/
if (HAS_LLC(dev))
return true;
bool mappable, fenceable;
int ret;
- if (obj->madv != I915_MADV_WILLNEED) {
- DRM_ERROR("Attempting to bind a purgeable object\n");
- return -EINVAL;
- }
-
fence_size = i915_gem_get_gtt_size(dev,
obj->base.size,
obj->tiling_mode);
fence_alignment = i915_gem_get_gtt_alignment(dev,
obj->base.size,
- obj->tiling_mode);
+ obj->tiling_mode, true);
unfenced_alignment =
- i915_gem_get_unfenced_gtt_alignment(dev,
+ i915_gem_get_gtt_alignment(dev,
obj->base.size,
- obj->tiling_mode);
+ obj->tiling_mode, false);
if (alignment == 0)
alignment = map_and_fenceable ? fence_alignment :
* before evicting everything in a vain attempt to find space.
*/
if (obj->base.size >
- (map_and_fenceable ? dev_priv->mm.gtt_mappable_end : dev_priv->mm.gtt_total)) {
+ (map_and_fenceable ? dev_priv->gtt.mappable_end : dev_priv->gtt.total)) {
DRM_ERROR("Attempting to bind an object larger than the aperture\n");
return -E2BIG;
}
if (map_and_fenceable)
ret = drm_mm_insert_node_in_range_generic(&dev_priv->mm.gtt_space, node,
size, alignment, obj->cache_level,
- 0, dev_priv->mm.gtt_mappable_end);
+ 0, dev_priv->gtt.mappable_end);
else
ret = drm_mm_insert_node_generic(&dev_priv->mm.gtt_space, node,
size, alignment, obj->cache_level);
(node->start & (fence_alignment - 1)) == 0;
mappable =
- obj->gtt_offset + obj->base.size <= dev_priv->mm.gtt_mappable_end;
+ obj->gtt_offset + obj->base.size <= dev_priv->gtt.mappable_end;
obj->map_and_fenceable = mappable && fenceable;
if (obj->pages == NULL)
return;
+ /*
+ * Stolen memory is always coherent with the GPU as it is explicitly
+ * marked as wc by the system, or the system is cache-coherent.
+ */
+ if (obj->stolen)
+ return;
+
/* If the GPU is snooping the contents of the CPU cache,
* we do not need to manually clear the CPU cache lines. However,
* the caches are only snooped when the render cache is
i915_gem_object_flush_cpu_write_domain(obj);
+ /* Serialise direct access to this object with the barriers for
+ * coherent writes from the GPU, by effectively invalidating the
+ * GTT domain upon first access.
+ */
+ if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
+ mb();
+
old_write_domain = obj->base.write_domain;
old_read_domains = obj->base.read_domains;
unsigned long recent_enough = jiffies - msecs_to_jiffies(20);
struct drm_i915_gem_request *request;
struct intel_ring_buffer *ring = NULL;
+ unsigned reset_counter;
u32 seqno = 0;
int ret;
- if (atomic_read(&dev_priv->mm.wedged))
- return -EIO;
+ ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
+ if (ret)
+ return ret;
+
+ ret = i915_gem_check_wedge(&dev_priv->gpu_error, false);
+ if (ret)
+ return ret;
spin_lock(&file_priv->mm.lock);
list_for_each_entry(request, &file_priv->mm.request_list, client_list) {
ring = request->ring;
seqno = request->seqno;
}
+ reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
spin_unlock(&file_priv->mm.lock);
if (seqno == 0)
return 0;
- ret = __wait_seqno(ring, seqno, true, NULL);
+ ret = __wait_seqno(ring, seqno, reset_counter, true, NULL);
if (ret == 0)
queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0);
{
struct drm_i915_gem_object *obj;
struct address_space *mapping;
- u32 mask;
+ gfp_t mask;
- obj = kzalloc(sizeof(*obj), GFP_KERNEL);
+ obj = i915_gem_object_alloc(dev);
if (obj == NULL)
return NULL;
if (drm_gem_object_init(dev, &obj->base, size) != 0) {
- kfree(obj);
+ i915_gem_object_free(obj);
return NULL;
}
mask |= __GFP_DMA32;
}
- mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
+ mapping = file_inode(obj->base.filp)->i_mapping;
mapping_set_gfp_mask(mapping, mask);
i915_gem_object_init(obj, &i915_gem_object_ops);
obj->pages_pin_count = 0;
i915_gem_object_put_pages(obj);
i915_gem_object_free_mmap_offset(obj);
+ i915_gem_object_release_stolen(obj);
BUG_ON(obj->pages);
i915_gem_info_remove_obj(dev_priv, obj->base.size);
kfree(obj->bit_17);
- kfree(obj);
+ i915_gem_object_free(obj);
}
int
* And not confound mm.suspended!
*/
dev_priv->mm.suspended = 1;
- del_timer_sync(&dev_priv->hangcheck_timer);
+ del_timer_sync(&dev_priv->gpu_error.hangcheck_timer);
i915_kernel_lost_context(dev);
i915_gem_cleanup_ringbuffer(dev);
u32 misccpctl;
int i;
- if (!IS_IVYBRIDGE(dev))
+ if (!HAS_L3_GPU_CACHE(dev))
return;
if (!dev_priv->l3_parity.remap_info)
I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
if (IS_GEN6(dev))
I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB));
- else
+ else if (IS_GEN7(dev))
I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB));
+ else
+ BUG();
}
static bool
return true;
}
-int
-i915_gem_init_hw(struct drm_device *dev)
+static int i915_gem_init_rings(struct drm_device *dev)
{
- drm_i915_private_t *dev_priv = dev->dev_private;
+ struct drm_i915_private *dev_priv = dev->dev_private;
int ret;
- if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt())
- return -EIO;
-
- if (IS_HASWELL(dev) && (I915_READ(0x120010) == 1))
- I915_WRITE(0x9008, I915_READ(0x9008) | 0xf0000);
-
- i915_gem_l3_remap(dev);
-
- i915_gem_init_swizzling(dev);
-
ret = intel_init_render_ring_buffer(dev);
if (ret)
return ret;
goto cleanup_bsd_ring;
}
- dev_priv->next_seqno = 1;
-
- /*
- * XXX: There was some w/a described somewhere suggesting loading
- * contexts before PPGTT.
- */
- i915_gem_context_init(dev);
- i915_gem_init_ppgtt(dev);
+ ret = i915_gem_set_seqno(dev, ((u32)~0 - 0x1000));
+ if (ret)
+ goto cleanup_blt_ring;
return 0;
+cleanup_blt_ring:
+ intel_cleanup_ring_buffer(&dev_priv->ring[BCS]);
cleanup_bsd_ring:
intel_cleanup_ring_buffer(&dev_priv->ring[VCS]);
cleanup_render_ring:
intel_cleanup_ring_buffer(&dev_priv->ring[RCS]);
+
return ret;
}
-static bool
-intel_enable_ppgtt(struct drm_device *dev)
+int
+i915_gem_init_hw(struct drm_device *dev)
{
- if (i915_enable_ppgtt >= 0)
- return i915_enable_ppgtt;
+ drm_i915_private_t *dev_priv = dev->dev_private;
+ int ret;
-#ifdef CONFIG_INTEL_IOMMU
- /* Disable ppgtt on SNB if VT-d is on. */
- if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped)
- return false;
-#endif
+ if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt())
+ return -EIO;
- return true;
+ if (IS_HASWELL(dev) && (I915_READ(0x120010) == 1))
+ I915_WRITE(0x9008, I915_READ(0x9008) | 0xf0000);
+
+ i915_gem_l3_remap(dev);
+
+ i915_gem_init_swizzling(dev);
+
+ ret = i915_gem_init_rings(dev);
+ if (ret)
+ return ret;
+
+ /*
+ * XXX: There was some w/a described somewhere suggesting loading
+ * contexts before PPGTT.
+ */
+ i915_gem_context_init(dev);
+ i915_gem_init_ppgtt(dev);
+
+ return 0;
}
int i915_gem_init(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
- unsigned long gtt_size, mappable_size;
int ret;
- gtt_size = dev_priv->mm.gtt->gtt_total_entries << PAGE_SHIFT;
- mappable_size = dev_priv->mm.gtt->gtt_mappable_entries << PAGE_SHIFT;
-
mutex_lock(&dev->struct_mutex);
- if (intel_enable_ppgtt(dev) && HAS_ALIASING_PPGTT(dev)) {
- /* PPGTT pdes are stolen from global gtt ptes, so shrink the
- * aperture accordingly when using aliasing ppgtt. */
- gtt_size -= I915_PPGTT_PD_ENTRIES*PAGE_SIZE;
-
- i915_gem_init_global_gtt(dev, 0, mappable_size, gtt_size);
- ret = i915_gem_init_aliasing_ppgtt(dev);
- if (ret) {
- mutex_unlock(&dev->struct_mutex);
- return ret;
- }
- } else {
- /* Let GEM Manage all of the aperture.
- *
- * However, leave one page at the end still bound to the scratch
- * page. There are a number of places where the hardware
- * apparently prefetches past the end of the object, and we've
- * seen multiple hangs with the GPU head pointer stuck in a
- * batchbuffer bound at the last page of the aperture. One page
- * should be enough to keep any prefetching inside of the
- * aperture.
- */
- i915_gem_init_global_gtt(dev, 0, mappable_size,
- gtt_size);
+ if (IS_VALLEYVIEW(dev)) {
+ /* VLVA0 (potential hack), BIOS isn't actually waking us */
+ I915_WRITE(VLV_GTLC_WAKE_CTRL, 1);
+ if (wait_for((I915_READ(VLV_GTLC_PW_STATUS) & 1) == 1, 10))
+ DRM_DEBUG_DRIVER("allow wake ack timed out\n");
}
+ i915_gem_init_global_gtt(dev);
+
ret = i915_gem_init_hw(dev);
mutex_unlock(&dev->struct_mutex);
if (ret) {
if (drm_core_check_feature(dev, DRIVER_MODESET))
return 0;
- if (atomic_read(&dev_priv->mm.wedged)) {
+ if (i915_reset_in_progress(&dev_priv->gpu_error)) {
DRM_ERROR("Reenabling wedged hardware, good luck\n");
- atomic_set(&dev_priv->mm.wedged, 0);
+ atomic_set(&dev_priv->gpu_error.reset_counter, 0);
}
mutex_lock(&dev->struct_mutex);
void
i915_gem_load(struct drm_device *dev)
{
- int i;
drm_i915_private_t *dev_priv = dev->dev_private;
+ int i;
+
+ dev_priv->slab =
+ kmem_cache_create("i915_gem_object",
+ sizeof(struct drm_i915_gem_object), 0,
+ SLAB_HWCACHE_ALIGN,
+ NULL);
INIT_LIST_HEAD(&dev_priv->mm.active_list);
INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list);
INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
i915_gem_retire_work_handler);
- init_completion(&dev_priv->error_completion);
+ init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
/* On GEN3 we really need to make sure the ARB C3 LP bit is set */
if (IS_GEN3(dev)) {
void i915_gem_detach_phys_object(struct drm_device *dev,
struct drm_i915_gem_object *obj)
{
- struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
+ struct address_space *mapping = file_inode(obj->base.filp)->i_mapping;
char *vaddr;
int i;
int page_count;
int id,
int align)
{
- struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
+ struct address_space *mapping = file_inode(obj->base.filp)->i_mapping;
drm_i915_private_t *dev_priv = dev->dev_private;
int ret = 0;
int page_count;
struct drm_file *file_priv)
{
void *vaddr = obj->phys_obj->handle->vaddr + args->offset;
- char __user *user_data = (char __user *) (uintptr_t) args->data_ptr;
+ char __user *user_data = to_user_ptr(args->data_ptr);
if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) {
unsigned long unwritten;