drivers/gpu/drm/i915/i915_gem_tiling.c

   1 /*
   2  * Copyright © 2008 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  *
  23  * Authors:
  24  *    Eric Anholt <eric@anholt.net>
  25  *
  26  */
  27
  28 #include "drmP.h"
  29 #include "drm.h"
  30 #include "i915_drm.h"
  31 #include "i915_drv.h"
  32
  33 /** @file i915_gem_tiling.c
  34  *
  35  * Support for managing tiling state of buffer objects.
  36  *
  37  * The idea behind tiling is to increase cache hit rates by rearranging
  38  * pixel data so that a group of pixel accesses are in the same cacheline.
  39  * Performance improvement from doing this on the back/depth buffer are on
  40  * the order of 30%.
  41  *
  42  * Intel architectures make this somewhat more complicated, though, by
  43  * adjustments made to addressing of data when the memory is in interleaved
  44  * mode (matched pairs of DIMMS) to improve memory bandwidth.
  45  * For interleaved memory, the CPU sends every sequential 64 bytes
  46  * to an alternate memory channel so it can get the bandwidth from both.
  47  *
  48  * The GPU also rearranges its accesses for increased bandwidth to interleaved
  49  * memory, and it matches what the CPU does for non-tiled.  However, when tiled
  50  * it does it a little differently, since one walks addresses not just in the
  51  * X direction but also Y.  So, along with alternating channels when bit
  52  * 6 of the address flips, it also alternates when other bits flip --  Bits 9
  53  * (every 512 bytes, an X tile scanline) and 10 (every two X tile scanlines)
  54  * are common to both the 915 and 965-class hardware.
  55  *
  56  * The CPU also sometimes XORs in higher bits as well, to improve
  57  * bandwidth doing strided access like we do so frequently in graphics.  This
  58  * is called "Channel XOR Randomization" in the MCH documentation.  The result
  59  * is that the CPU is XORing in either bit 11 or bit 17 to bit 6 of its address
  60  * decode.
  61  *
  62  * All of this bit 6 XORing has an effect on our memory management,
  63  * as we need to make sure that the 3d driver can correctly address object
  64  * contents.
  65  *
  66  * If we don't have interleaved memory, all tiling is safe and no swizzling is
  67  * required.
  68  *
  69  * When bit 17 is XORed in, we simply refuse to tile at all.  Bit
  70  * 17 is not just a page offset, so as we page an objet out and back in,
  71  * individual pages in it will have different bit 17 addresses, resulting in
  72  * each 64 bytes being swapped with its neighbor!
  73  *
  74  * Otherwise, if interleaved, we have to tell the 3d driver what the address
  75  * swizzling it needs to do is, since it's writing with the CPU to the pages
  76  * (bit 6 and potentially bit 11 XORed in), and the GPU is reading from the
  77  * pages (bit 6, 9, and 10 XORed in), resulting in a cumulative bit swizzling
  78  * required by the CPU of XORing in bit 6, 9, 10, and potentially 11, in order
  79  * to match what the GPU expects.
  80  */
  81
  82 /**
  83  * Detects bit 6 swizzling of address lookup between IGD access and CPU
  84  * access through main memory.
  85  */
  86 void
  87 i915_gem_detect_bit_6_swizzle(struct drm_device *dev)
  88 {
  89         drm_i915_private_t *dev_priv = dev->dev_private;
  90         uint32_t swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
  91         uint32_t swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
  92
  93         if (!IS_I9XX(dev)) {
  94                 /* As far as we know, the 865 doesn't have these bit 6
  95                  * swizzling issues.
  96                  */
  97                 swizzle_x = I915_BIT_6_SWIZZLE_NONE;
  98                 swizzle_y = I915_BIT_6_SWIZZLE_NONE;
  99         } else if (IS_MOBILE(dev)) {
 100                 uint32_t dcc;
 101
 102                 /* On mobile 9xx chipsets, channel interleave by the CPU is
 103                  * determined by DCC.  For single-channel, neither the CPU
 104                  * nor the GPU do swizzling.  For dual channel interleaved,
 105                  * the GPU's interleave is bit 9 and 10 for X tiled, and bit
 106                  * 9 for Y tiled.  The CPU's interleave is independent, and
 107                  * can be based on either bit 11 (haven't seen this yet) or
 108                  * bit 17 (common).
 109                  */
 110                 dcc = I915_READ(DCC);
 111                 switch (dcc & DCC_ADDRESSING_MODE_MASK) {
 112                 case DCC_ADDRESSING_MODE_SINGLE_CHANNEL:
 113                 case DCC_ADDRESSING_MODE_DUAL_CHANNEL_ASYMMETRIC:
 114                         swizzle_x = I915_BIT_6_SWIZZLE_NONE;
 115                         swizzle_y = I915_BIT_6_SWIZZLE_NONE;
 116                         break;
 117                 case DCC_ADDRESSING_MODE_DUAL_CHANNEL_INTERLEAVED:
 118                         if (dcc & DCC_CHANNEL_XOR_DISABLE) {
 119                                 /* This is the base swizzling by the GPU for
 120                                  * tiled buffers.
 121                                  */
 122                                 swizzle_x = I915_BIT_6_SWIZZLE_9_10;
 123                                 swizzle_y = I915_BIT_6_SWIZZLE_9;
 124                         } else if ((dcc & DCC_CHANNEL_XOR_BIT_17) == 0) {
 125                                 /* Bit 11 swizzling by the CPU in addition. */
 126                                 swizzle_x = I915_BIT_6_SWIZZLE_9_10_11;
 127                                 swizzle_y = I915_BIT_6_SWIZZLE_9_11;
 128                         } else {
 129                                 /* Bit 17 swizzling by the CPU in addition. */
 130                                 swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
 131                                 swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
 132                         }
 133                         break;
 134                 }
 135                 if (dcc == 0xffffffff) {
 136                         DRM_ERROR("Couldn't read from MCHBAR.  "
 137                                   "Disabling tiling.\n");
 138                         swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
 139                         swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
 140                 }
 141         } else {
 142                 /* The 965, G33, and newer, have a very flexible memory
 143                  * configuration.  It will enable dual-channel mode
 144                  * (interleaving) on as much memory as it can, and the GPU
 145                  * will additionally sometimes enable different bit 6
 146                  * swizzling for tiled objects from the CPU.
 147                  *
 148                  * Here's what I found on the G965:
 149                  *    slot fill         memory size  swizzling
 150                  * 0A   0B   1A   1B    1-ch   2-ch
 151                  * 512  0    0    0     512    0     O
 152                  * 512  0    512  0     16     1008  X
 153                  * 512  0    0    512   16     1008  X
 154                  * 0    512  0    512   16     1008  X
 155                  * 1024 1024 1024 0     2048   1024  O
 156                  *
 157                  * We could probably detect this based on either the DRB
 158                  * matching, which was the case for the swizzling required in
 159                  * the table above, or from the 1-ch value being less than
 160                  * the minimum size of a rank.
 161                  */
 162                 if (I915_READ16(C0DRB3) != I915_READ16(C1DRB3)) {
 163                         swizzle_x = I915_BIT_6_SWIZZLE_NONE;
 164                         swizzle_y = I915_BIT_6_SWIZZLE_NONE;
 165                 } else {
 166                         swizzle_x = I915_BIT_6_SWIZZLE_9_10;
 167                         swizzle_y = I915_BIT_6_SWIZZLE_9;
 168                 }
 169         }
 170
 171         dev_priv->mm.bit_6_swizzle_x = swizzle_x;
 172         dev_priv->mm.bit_6_swizzle_y = swizzle_y;
 173 }
 174
 175
 176 /**
 177  * Returns the size of the fence for a tiled object of the given size.
 178  */
 179 static int
 180 i915_get_fence_size(struct drm_device *dev, int size)
 181 {
 182         int i;
 183         int start;
 184
 185         if (IS_I965G(dev)) {
 186                 /* The 965 can have fences at any page boundary. */
 187                 return ALIGN(size, 4096);
 188         } else {
 189                 /* Align the size to a power of two greater than the smallest
 190                  * fence size.
 191                  */
 192                 if (IS_I9XX(dev))
 193                         start = 1024 * 1024;
 194                 else
 195                         start = 512 * 1024;
 196
 197                 for (i = start; i < size; i <<= 1)
 198                         ;
 199
 200                 return i;
 201         }
 202 }
 203
 204 /* Check pitch constriants for all chips & tiling formats */
 205 static bool
 206 i915_tiling_ok(struct drm_device *dev, int stride, int size, int tiling_mode)
 207 {
 208         int tile_width;
 209
 210         /* Linear is always fine */
 211         if (tiling_mode == I915_TILING_NONE)
 212                 return true;
 213
 214         if (tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))
 215                 tile_width = 128;
 216         else
 217                 tile_width = 512;
 218
 219         /* check maximum stride & object size */
 220         if (IS_I965G(dev)) {
 221                 /* i965 stores the end address of the gtt mapping in the fence
 222                  * reg, so dont bother to check the size */
 223                 if (stride / 128 > I965_FENCE_MAX_PITCH_VAL)
 224                         return false;
 225         } else if (IS_I9XX(dev)) {
 226                 if (stride / tile_width > I830_FENCE_MAX_PITCH_VAL ||
 227                     size > (I830_FENCE_MAX_SIZE_VAL << 20))
 228                         return false;
 229         } else {
 230                 if (stride / 128 > I830_FENCE_MAX_PITCH_VAL ||
 231                     size > (I830_FENCE_MAX_SIZE_VAL << 19))
 232                         return false;
 233         }
 234
 235         /* 965+ just needs multiples of tile width */
 236         if (IS_I965G(dev)) {
 237                 if (stride & (tile_width - 1))
 238                         return false;
 239                 return true;
 240         }
 241
 242         /* Pre-965 needs power of two tile widths */
 243         if (stride < tile_width)
 244                 return false;
 245
 246         if (stride & (stride - 1))
 247                 return false;
 248
 249         /* We don't handle the aperture area covered by the fence being bigger
 250          * than the object size.
 251          */
 252         if (i915_get_fence_size(dev, size) != size)
 253                 return false;
 254
 255         return true;
 256 }
 257
 258 /**
 259  * Sets the tiling mode of an object, returning the required swizzling of
 260  * bit 6 of addresses in the object.
 261  */
 262 int
 263 i915_gem_set_tiling(struct drm_device *dev, void *data,
 264                    struct drm_file *file_priv)
 265 {
 266         struct drm_i915_gem_set_tiling *args = data;
 267         drm_i915_private_t *dev_priv = dev->dev_private;
 268         struct drm_gem_object *obj;
 269         struct drm_i915_gem_object *obj_priv;
 270
 271         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
 272         if (obj == NULL)
 273                 return -EINVAL;
 274         obj_priv = obj->driver_private;
 275
 276         if (!i915_tiling_ok(dev, args->stride, obj->size, args->tiling_mode)) {
 277                 drm_gem_object_unreference(obj);
 278                 return -EINVAL;
 279         }
 280
 281         mutex_lock(&dev->struct_mutex);
 282
 283         if (args->tiling_mode == I915_TILING_NONE) {
 284                 obj_priv->tiling_mode = I915_TILING_NONE;
 285                 args->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
 286         } else {
 287                 if (args->tiling_mode == I915_TILING_X)
 288                         args->swizzle_mode = dev_priv->mm.bit_6_swizzle_x;
 289                 else
 290                         args->swizzle_mode = dev_priv->mm.bit_6_swizzle_y;
 291                 /* If we can't handle the swizzling, make it untiled. */
 292                 if (args->swizzle_mode == I915_BIT_6_SWIZZLE_UNKNOWN) {
 293                         args->tiling_mode = I915_TILING_NONE;
 294                         args->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
 295                 }
 296         }
 297         if (args->tiling_mode != obj_priv->tiling_mode) {
 298                 int ret;
 299
 300                 /* Unbind the object, as switching tiling means we're
 301                  * switching the cache organization due to fencing, probably.
 302                  */
 303                 ret = i915_gem_object_unbind(obj);
 304                 if (ret != 0) {
 305                         WARN(ret != -ERESTARTSYS,
 306                              "failed to unbind object for tiling switch");
 307                         args->tiling_mode = obj_priv->tiling_mode;
 308                         mutex_unlock(&dev->struct_mutex);
 309                         drm_gem_object_unreference(obj);
 310
 311                         return ret;
 312                 }
 313                 obj_priv->tiling_mode = args->tiling_mode;
 314         }
 315         obj_priv->stride = args->stride;
 316
 317         drm_gem_object_unreference(obj);
 318         mutex_unlock(&dev->struct_mutex);
 319
 320         return 0;
 321 }
 322
 323 /**
 324  * Returns the current tiling mode and required bit 6 swizzling for the object.
 325  */
 326 int
 327 i915_gem_get_tiling(struct drm_device *dev, void *data,
 328                    struct drm_file *file_priv)
 329 {
 330         struct drm_i915_gem_get_tiling *args = data;
 331         drm_i915_private_t *dev_priv = dev->dev_private;
 332         struct drm_gem_object *obj;
 333         struct drm_i915_gem_object *obj_priv;
 334
 335         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
 336         if (obj == NULL)
 337                 return -EINVAL;
 338         obj_priv = obj->driver_private;
 339
 340         mutex_lock(&dev->struct_mutex);
 341
 342         args->tiling_mode = obj_priv->tiling_mode;
 343         switch (obj_priv->tiling_mode) {
 344         case I915_TILING_X:
 345                 args->swizzle_mode = dev_priv->mm.bit_6_swizzle_x;
 346                 break;
 347         case I915_TILING_Y:
 348                 args->swizzle_mode = dev_priv->mm.bit_6_swizzle_y;
 349                 break;
 350         case I915_TILING_NONE:
 351                 args->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
 352                 break;
 353         default:
 354                 DRM_ERROR("unknown tiling mode\n");
 355         }
 356
 357         drm_gem_object_unreference(obj);
 358         mutex_unlock(&dev->struct_mutex);
 359
 360         return 0;
 361 }