drivers/gpu/drm/i915/i915_guc_submission.c

   1 /*
   2  * Copyright © 2014 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  *
  23  */
  24 #include <linux/firmware.h>
  25 #include <linux/circ_buf.h>
  26 #include "i915_drv.h"
  27 #include "intel_guc.h"
  28
  29 /**
  30  * DOC: GuC Client
  31  *
  32  * i915_guc_client:
  33  * We use the term client to avoid confusion with contexts. A i915_guc_client is
  34  * equivalent to GuC object guc_context_desc. This context descriptor is
  35  * allocated from a pool of 1024 entries. Kernel driver will allocate doorbell
  36  * and workqueue for it. Also the process descriptor (guc_process_desc), which
  37  * is mapped to client space. So the client can write Work Item then ring the
  38  * doorbell.
  39  *
  40  * To simplify the implementation, we allocate one gem object that contains all
  41  * pages for doorbell, process descriptor and workqueue.
  42  *
  43  * The Scratch registers:
  44  * There are 16 MMIO-based registers start from 0xC180. The kernel driver writes
  45  * a value to the action register (SOFT_SCRATCH_0) along with any data. It then
  46  * triggers an interrupt on the GuC via another register write (0xC4C8).
  47  * Firmware writes a success/fail code back to the action register after
  48  * processes the request. The kernel driver polls waiting for this update and
  49  * then proceeds.
  50  * See host2guc_action()
  51  *
  52  * Doorbells:
  53  * Doorbells are interrupts to uKernel. A doorbell is a single cache line (QW)
  54  * mapped into process space.
  55  *
  56  * Work Items:
  57  * There are several types of work items that the host may place into a
  58  * workqueue, each with its own requirements and limitations. Currently only
  59  * WQ_TYPE_INORDER is needed to support legacy submission via GuC, which
  60  * represents in-order queue. The kernel driver packs ring tail pointer and an
  61  * ELSP context descriptor dword into Work Item.
  62  * See guc_add_workqueue_item()
  63  *
  64  */
  65
  66 /*
  67  * Read GuC command/status register (SOFT_SCRATCH_0)
  68  * Return true if it contains a response rather than a command
  69  */
  70 static inline bool host2guc_action_response(struct drm_i915_private *dev_priv,
  71                                             u32 *status)
  72 {
  73         u32 val = I915_READ(SOFT_SCRATCH(0));
  74         *status = val;
  75         return GUC2HOST_IS_RESPONSE(val);
  76 }
  77
  78 static int host2guc_action(struct intel_guc *guc, u32 *data, u32 len)
  79 {
  80         struct drm_i915_private *dev_priv = guc_to_i915(guc);
  81         u32 status;
  82         int i;
  83         int ret;
  84
  85         if (WARN_ON(len < 1 || len > 15))
  86                 return -EINVAL;
  87
  88         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
  89         spin_lock(&dev_priv->guc.host2guc_lock);
  90
  91         dev_priv->guc.action_count += 1;
  92         dev_priv->guc.action_cmd = data[0];
  93
  94         for (i = 0; i < len; i++)
  95                 I915_WRITE(SOFT_SCRATCH(i), data[i]);
  96
  97         POSTING_READ(SOFT_SCRATCH(i - 1));
  98
  99         I915_WRITE(HOST2GUC_INTERRUPT, HOST2GUC_TRIGGER);
 100
 101         /* No HOST2GUC command should take longer than 10ms */
 102         ret = wait_for_atomic(host2guc_action_response(dev_priv, &status), 10);
 103         if (status != GUC2HOST_STATUS_SUCCESS) {
 104                 /*
 105                  * Either the GuC explicitly returned an error (which
 106                  * we convert to -EIO here) or no response at all was
 107                  * received within the timeout limit (-ETIMEDOUT)
 108                  */
 109                 if (ret != -ETIMEDOUT)
 110                         ret = -EIO;
 111
 112                 DRM_ERROR("GUC: host2guc action 0x%X failed. ret=%d "
 113                                 "status=0x%08X response=0x%08X\n",
 114                                 data[0], ret, status,
 115                                 I915_READ(SOFT_SCRATCH(15)));
 116
 117                 dev_priv->guc.action_fail += 1;
 118                 dev_priv->guc.action_err = ret;
 119         }
 120         dev_priv->guc.action_status = status;
 121
 122         spin_unlock(&dev_priv->guc.host2guc_lock);
 123         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
 124
 125         return ret;
 126 }
 127
 128 /*
 129  * Tell the GuC to allocate or deallocate a specific doorbell
 130  */
 131
 132 static int host2guc_allocate_doorbell(struct intel_guc *guc,
 133                                       struct i915_guc_client *client)
 134 {
 135         u32 data[2];
 136
 137         data[0] = HOST2GUC_ACTION_ALLOCATE_DOORBELL;
 138         data[1] = client->ctx_index;
 139
 140         return host2guc_action(guc, data, 2);
 141 }
 142
 143 static int host2guc_release_doorbell(struct intel_guc *guc,
 144                                      struct i915_guc_client *client)
 145 {
 146         u32 data[2];
 147
 148         data[0] = HOST2GUC_ACTION_DEALLOCATE_DOORBELL;
 149         data[1] = client->ctx_index;
 150
 151         return host2guc_action(guc, data, 2);
 152 }
 153
 154 /*
 155  * Initialise, update, or clear doorbell data shared with the GuC
 156  *
 157  * These functions modify shared data and so need access to the mapped
 158  * client object which contains the page being used for the doorbell
 159  */
 160
 161 static void guc_init_doorbell(struct intel_guc *guc,
 162                               struct i915_guc_client *client)
 163 {
 164         struct guc_doorbell_info *doorbell;
 165         void *base;
 166
 167         base = kmap_atomic(i915_gem_object_get_page(client->client_obj, 0));
 168         doorbell = base + client->doorbell_offset;
 169
 170         doorbell->db_status = 1;
 171         doorbell->cookie = 0;
 172
 173         kunmap_atomic(base);
 174 }
 175
 176 static int guc_ring_doorbell(struct i915_guc_client *gc)
 177 {
 178         struct guc_process_desc *desc;
 179         union guc_doorbell_qw db_cmp, db_exc, db_ret;
 180         union guc_doorbell_qw *db;
 181         void *base;
 182         int attempt = 2, ret = -EAGAIN;
 183
 184         base = kmap_atomic(i915_gem_object_get_page(gc->client_obj, 0));
 185         desc = base + gc->proc_desc_offset;
 186
 187         /* Update the tail so it is visible to GuC */
 188         desc->tail = gc->wq_tail;
 189
 190         /* current cookie */
 191         db_cmp.db_status = GUC_DOORBELL_ENABLED;
 192         db_cmp.cookie = gc->cookie;
 193
 194         /* cookie to be updated */
 195         db_exc.db_status = GUC_DOORBELL_ENABLED;
 196         db_exc.cookie = gc->cookie + 1;
 197         if (db_exc.cookie == 0)
 198                 db_exc.cookie = 1;
 199
 200         /* pointer of current doorbell cacheline */
 201         db = base + gc->doorbell_offset;
 202
 203         while (attempt--) {
 204                 /* lets ring the doorbell */
 205                 db_ret.value_qw = atomic64_cmpxchg((atomic64_t *)db,
 206                         db_cmp.value_qw, db_exc.value_qw);
 207
 208                 /* if the exchange was successfully executed */
 209                 if (db_ret.value_qw == db_cmp.value_qw) {
 210                         /* db was successfully rung */
 211                         gc->cookie = db_exc.cookie;
 212                         ret = 0;
 213                         break;
 214                 }
 215
 216                 /* XXX: doorbell was lost and need to acquire it again */
 217                 if (db_ret.db_status == GUC_DOORBELL_DISABLED)
 218                         break;
 219
 220                 DRM_ERROR("Cookie mismatch. Expected %d, returned %d\n",
 221                           db_cmp.cookie, db_ret.cookie);
 222
 223                 /* update the cookie to newly read cookie from GuC */
 224                 db_cmp.cookie = db_ret.cookie;
 225                 db_exc.cookie = db_ret.cookie + 1;
 226                 if (db_exc.cookie == 0)
 227                         db_exc.cookie = 1;
 228         }
 229
 230         kunmap_atomic(base);
 231         return ret;
 232 }
 233
 234 static void guc_disable_doorbell(struct intel_guc *guc,
 235                                  struct i915_guc_client *client)
 236 {
 237         struct drm_i915_private *dev_priv = guc_to_i915(guc);
 238         struct guc_doorbell_info *doorbell;
 239         void *base;
 240         int drbreg = GEN8_DRBREGL(client->doorbell_id);
 241         int value;
 242
 243         base = kmap_atomic(i915_gem_object_get_page(client->client_obj, 0));
 244         doorbell = base + client->doorbell_offset;
 245
 246         doorbell->db_status = 0;
 247
 248         kunmap_atomic(base);
 249
 250         I915_WRITE(drbreg, I915_READ(drbreg) & ~GEN8_DRB_VALID);
 251
 252         value = I915_READ(drbreg);
 253         WARN_ON((value & GEN8_DRB_VALID) != 0);
 254
 255         I915_WRITE(GEN8_DRBREGU(client->doorbell_id), 0);
 256         I915_WRITE(drbreg, 0);
 257
 258         /* XXX: wait for any interrupts */
 259         /* XXX: wait for workqueue to drain */
 260 }
 261
 262 /*
 263  * Select, assign and relase doorbell cachelines
 264  *
 265  * These functions track which doorbell cachelines are in use.
 266  * The data they manipulate is protected by the host2guc lock.
 267  */
 268
 269 static uint32_t select_doorbell_cacheline(struct intel_guc *guc)
 270 {
 271         const uint32_t cacheline_size = cache_line_size();
 272         uint32_t offset;
 273
 274         spin_lock(&guc->host2guc_lock);
 275
 276         /* Doorbell uses a single cache line within a page */
 277         offset = offset_in_page(guc->db_cacheline);
 278
 279         /* Moving to next cache line to reduce contention */
 280         guc->db_cacheline += cacheline_size;
 281
 282         spin_unlock(&guc->host2guc_lock);
 283
 284         DRM_DEBUG_DRIVER("selected doorbell cacheline 0x%x, next 0x%x, linesize %u\n",
 285                         offset, guc->db_cacheline, cacheline_size);
 286
 287         return offset;
 288 }
 289
 290 static uint16_t assign_doorbell(struct intel_guc *guc, uint32_t priority)
 291 {
 292         /*
 293          * The bitmap is split into two halves; the first half is used for
 294          * normal priority contexts, the second half for high-priority ones.
 295          * Note that logically higher priorities are numerically less than
 296          * normal ones, so the test below means "is it high-priority?"
 297          */
 298         const bool hi_pri = (priority <= GUC_CTX_PRIORITY_HIGH);
 299         const uint16_t half = GUC_MAX_DOORBELLS / 2;
 300         const uint16_t start = hi_pri ? half : 0;
 301         const uint16_t end = start + half;
 302         uint16_t id;
 303
 304         spin_lock(&guc->host2guc_lock);
 305         id = find_next_zero_bit(guc->doorbell_bitmap, end, start);
 306         if (id == end)
 307                 id = GUC_INVALID_DOORBELL_ID;
 308         else
 309                 bitmap_set(guc->doorbell_bitmap, id, 1);
 310         spin_unlock(&guc->host2guc_lock);
 311
 312         DRM_DEBUG_DRIVER("assigned %s priority doorbell id 0x%x\n",
 313                         hi_pri ? "high" : "normal", id);
 314
 315         return id;
 316 }
 317
 318 static void release_doorbell(struct intel_guc *guc, uint16_t id)
 319 {
 320         spin_lock(&guc->host2guc_lock);
 321         bitmap_clear(guc->doorbell_bitmap, id, 1);
 322         spin_unlock(&guc->host2guc_lock);
 323 }
 324
 325 /*
 326  * Initialise the process descriptor shared with the GuC firmware.
 327  */
 328 static void guc_init_proc_desc(struct intel_guc *guc,
 329                                struct i915_guc_client *client)
 330 {
 331         struct guc_process_desc *desc;
 332         void *base;
 333
 334         base = kmap_atomic(i915_gem_object_get_page(client->client_obj, 0));
 335         desc = base + client->proc_desc_offset;
 336
 337         memset(desc, 0, sizeof(*desc));
 338
 339         /*
 340          * XXX: pDoorbell and WQVBaseAddress are pointers in process address
 341          * space for ring3 clients (set them as in mmap_ioctl) or kernel
 342          * space for kernel clients (map on demand instead? May make debug
 343          * easier to have it mapped).
 344          */
 345         desc->wq_base_addr = 0;
 346         desc->db_base_addr = 0;
 347
 348         desc->context_id = client->ctx_index;
 349         desc->wq_size_bytes = client->wq_size;
 350         desc->wq_status = WQ_STATUS_ACTIVE;
 351         desc->priority = client->priority;
 352
 353         kunmap_atomic(base);
 354 }
 355
 356 /*
 357  * Initialise/clear the context descriptor shared with the GuC firmware.
 358  *
 359  * This descriptor tells the GuC where (in GGTT space) to find the important
 360  * data structures relating to this client (doorbell, process descriptor,
 361  * write queue, etc).
 362  */
 363
 364 static void guc_init_ctx_desc(struct intel_guc *guc,
 365                               struct i915_guc_client *client)
 366 {
 367         struct intel_context *ctx = client->owner;
 368         struct guc_context_desc desc;
 369         struct sg_table *sg;
 370         int i;
 371
 372         memset(&desc, 0, sizeof(desc));
 373
 374         desc.attribute = GUC_CTX_DESC_ATTR_ACTIVE | GUC_CTX_DESC_ATTR_KERNEL;
 375         desc.context_id = client->ctx_index;
 376         desc.priority = client->priority;
 377         desc.db_id = client->doorbell_id;
 378
 379         for (i = 0; i < I915_NUM_RINGS; i++) {
 380                 struct guc_execlist_context *lrc = &desc.lrc[i];
 381                 struct intel_ringbuffer *ringbuf = ctx->engine[i].ringbuf;
 382                 struct intel_engine_cs *ring;
 383                 struct drm_i915_gem_object *obj;
 384                 uint64_t ctx_desc;
 385
 386                 /* TODO: We have a design issue to be solved here. Only when we
 387                  * receive the first batch, we know which engine is used by the
 388                  * user. But here GuC expects the lrc and ring to be pinned. It
 389                  * is not an issue for default context, which is the only one
 390                  * for now who owns a GuC client. But for future owner of GuC
 391                  * client, need to make sure lrc is pinned prior to enter here.
 392                  */
 393                 obj = ctx->engine[i].state;
 394                 if (!obj)
 395                         break;  /* XXX: continue? */
 396
 397                 ring = ringbuf->ring;
 398                 ctx_desc = intel_lr_context_descriptor(ctx, ring);
 399                 lrc->context_desc = (u32)ctx_desc;
 400
 401                 /* The state page is after PPHWSP */
 402                 lrc->ring_lcra = i915_gem_obj_ggtt_offset(obj) +
 403                                 LRC_STATE_PN * PAGE_SIZE;
 404                 lrc->context_id = (client->ctx_index << GUC_ELC_CTXID_OFFSET) |
 405                                 (ring->id << GUC_ELC_ENGINE_OFFSET);
 406
 407                 obj = ringbuf->obj;
 408
 409                 lrc->ring_begin = i915_gem_obj_ggtt_offset(obj);
 410                 lrc->ring_end = lrc->ring_begin + obj->base.size - 1;
 411                 lrc->ring_next_free_location = lrc->ring_begin;
 412                 lrc->ring_current_tail_pointer_value = 0;
 413
 414                 desc.engines_used |= (1 << ring->id);
 415         }
 416
 417         WARN_ON(desc.engines_used == 0);
 418
 419         /*
 420          * The CPU address is only needed at certain points, so kmap_atomic on
 421          * demand instead of storing it in the ctx descriptor.
 422          * XXX: May make debug easier to have it mapped
 423          */
 424         desc.db_trigger_cpu = 0;
 425         desc.db_trigger_uk = client->doorbell_offset +
 426                 i915_gem_obj_ggtt_offset(client->client_obj);
 427         desc.db_trigger_phy = client->doorbell_offset +
 428                 sg_dma_address(client->client_obj->pages->sgl);
 429
 430         desc.process_desc = client->proc_desc_offset +
 431                 i915_gem_obj_ggtt_offset(client->client_obj);
 432
 433         desc.wq_addr = client->wq_offset +
 434                 i915_gem_obj_ggtt_offset(client->client_obj);
 435
 436         desc.wq_size = client->wq_size;
 437
 438         /*
 439          * XXX: Take LRCs from an existing intel_context if this is not an
 440          * IsKMDCreatedContext client
 441          */
 442         desc.desc_private = (uintptr_t)client;
 443
 444         /* Pool context is pinned already */
 445         sg = guc->ctx_pool_obj->pages;
 446         sg_pcopy_from_buffer(sg->sgl, sg->nents, &desc, sizeof(desc),
 447                              sizeof(desc) * client->ctx_index);
 448 }
 449
 450 static void guc_fini_ctx_desc(struct intel_guc *guc,
 451                               struct i915_guc_client *client)
 452 {
 453         struct guc_context_desc desc;
 454         struct sg_table *sg;
 455
 456         memset(&desc, 0, sizeof(desc));
 457
 458         sg = guc->ctx_pool_obj->pages;
 459         sg_pcopy_from_buffer(sg->sgl, sg->nents, &desc, sizeof(desc),
 460                              sizeof(desc) * client->ctx_index);
 461 }
 462
 463 /* Get valid workqueue item and return it back to offset */
 464 static int guc_get_workqueue_space(struct i915_guc_client *gc, u32 *offset)
 465 {
 466         struct guc_process_desc *desc;
 467         void *base;
 468         u32 size = sizeof(struct guc_wq_item);
 469         int ret = 0, timeout_counter = 200;
 470
 471         base = kmap_atomic(i915_gem_object_get_page(gc->client_obj, 0));
 472         desc = base + gc->proc_desc_offset;
 473
 474         while (timeout_counter-- > 0) {
 475                 ret = wait_for_atomic(CIRC_SPACE(gc->wq_tail, desc->head,
 476                                 gc->wq_size) >= size, 1);
 477
 478                 if (!ret) {
 479                         *offset = gc->wq_tail;
 480
 481                         /* advance the tail for next workqueue item */
 482                         gc->wq_tail += size;
 483                         gc->wq_tail &= gc->wq_size - 1;
 484
 485                         /* this will break the loop */
 486                         timeout_counter = 0;
 487                 }
 488         };
 489
 490         kunmap_atomic(base);
 491
 492         return ret;
 493 }
 494
 495 static int guc_add_workqueue_item(struct i915_guc_client *gc,
 496                                   struct drm_i915_gem_request *rq)
 497 {
 498         enum intel_ring_id ring_id = rq->ring->id;
 499         struct guc_wq_item *wqi;
 500         void *base;
 501         u32 tail, wq_len, wq_off = 0;
 502         int ret;
 503
 504         ret = guc_get_workqueue_space(gc, &wq_off);
 505         if (ret)
 506                 return ret;
 507
 508         /* For now workqueue item is 4 DWs; workqueue buffer is 2 pages. So we
 509          * should not have the case where structure wqi is across page, neither
 510          * wrapped to the beginning. This simplifies the implementation below.
 511          *
 512          * XXX: if not the case, we need save data to a temp wqi and copy it to
 513          * workqueue buffer dw by dw.
 514          */
 515         WARN_ON(sizeof(struct guc_wq_item) != 16);
 516         WARN_ON(wq_off & 3);
 517
 518         /* wq starts from the page after doorbell / process_desc */
 519         base = kmap_atomic(i915_gem_object_get_page(gc->client_obj,
 520                         (wq_off + GUC_DB_SIZE) >> PAGE_SHIFT));
 521         wq_off &= PAGE_SIZE - 1;
 522         wqi = (struct guc_wq_item *)((char *)base + wq_off);
 523
 524         /* len does not include the header */
 525         wq_len = sizeof(struct guc_wq_item) / sizeof(u32) - 1;
 526         wqi->header = WQ_TYPE_INORDER |
 527                         (wq_len << WQ_LEN_SHIFT) |
 528                         (ring_id << WQ_TARGET_SHIFT) |
 529                         WQ_NO_WCFLUSH_WAIT;
 530
 531         /* The GuC wants only the low-order word of the context descriptor */
 532         wqi->context_desc = (u32)intel_lr_context_descriptor(rq->ctx, rq->ring);
 533
 534         /* The GuC firmware wants the tail index in QWords, not bytes */
 535         tail = rq->ringbuf->tail >> 3;
 536         wqi->ring_tail = tail << WQ_RING_TAIL_SHIFT;
 537         wqi->fence_id = 0; /*XXX: what fence to be here */
 538
 539         kunmap_atomic(base);
 540
 541         return 0;
 542 }
 543
 544 #define CTX_RING_BUFFER_START           0x08
 545
 546 /* Update the ringbuffer pointer in a saved context image */
 547 static void lr_context_update(struct drm_i915_gem_request *rq)
 548 {
 549         enum intel_ring_id ring_id = rq->ring->id;
 550         struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[ring_id].state;
 551         struct drm_i915_gem_object *rb_obj = rq->ringbuf->obj;
 552         struct page *page;
 553         uint32_t *reg_state;
 554
 555         BUG_ON(!ctx_obj);
 556         WARN_ON(!i915_gem_obj_is_pinned(ctx_obj));
 557         WARN_ON(!i915_gem_obj_is_pinned(rb_obj));
 558
 559         page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
 560         reg_state = kmap_atomic(page);
 561
 562         reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(rb_obj);
 563
 564         kunmap_atomic(reg_state);
 565 }
 566
 567 /**
 568  * i915_guc_submit() - Submit commands through GuC
 569  * @client:     the guc client where commands will go through
 570  * @ctx:        LRC where commands come from
 571  * @ring:       HW engine that will excute the commands
 572  *
 573  * Return:      0 if succeed
 574  */
 575 int i915_guc_submit(struct i915_guc_client *client,
 576                     struct drm_i915_gem_request *rq)
 577 {
 578         struct intel_guc *guc = client->guc;
 579         enum intel_ring_id ring_id = rq->ring->id;
 580         unsigned long flags;
 581         int q_ret, b_ret;
 582
 583         /* Need this because of the deferred pin ctx and ring */
 584         /* Shall we move this right after ring is pinned? */
 585         lr_context_update(rq);
 586
 587         spin_lock_irqsave(&client->wq_lock, flags);
 588
 589         q_ret = guc_add_workqueue_item(client, rq);
 590         if (q_ret == 0)
 591                 b_ret = guc_ring_doorbell(client);
 592
 593         client->submissions[ring_id] += 1;
 594         if (q_ret) {
 595                 client->q_fail += 1;
 596                 client->retcode = q_ret;
 597         } else if (b_ret) {
 598                 client->b_fail += 1;
 599                 client->retcode = q_ret = b_ret;
 600         } else {
 601                 client->retcode = 0;
 602         }
 603         spin_unlock_irqrestore(&client->wq_lock, flags);
 604
 605         spin_lock(&guc->host2guc_lock);
 606         guc->submissions[ring_id] += 1;
 607         guc->last_seqno[ring_id] = rq->seqno;
 608         spin_unlock(&guc->host2guc_lock);
 609
 610         return q_ret;
 611 }
 612
 613 /*
 614  * Everything below here is concerned with setup & teardown, and is
 615  * therefore not part of the somewhat time-critical batch-submission
 616  * path of i915_guc_submit() above.
 617  */
 618
 619 /**
 620  * gem_allocate_guc_obj() - Allocate gem object for GuC usage
 621  * @dev:        drm device
 622  * @size:       size of object
 623  *
 624  * This is a wrapper to create a gem obj. In order to use it inside GuC, the
 625  * object needs to be pinned lifetime. Also we must pin it to gtt space other
 626  * than [0, GUC_WOPCM_TOP) because this range is reserved inside GuC.
 627  *
 628  * Return:      A drm_i915_gem_object if successful, otherwise NULL.
 629  */
 630 static struct drm_i915_gem_object *gem_allocate_guc_obj(struct drm_device *dev,
 631                                                         u32 size)
 632 {
 633         struct drm_i915_private *dev_priv = dev->dev_private;
 634         struct drm_i915_gem_object *obj;
 635
 636         obj = i915_gem_alloc_object(dev, size);
 637         if (!obj)
 638                 return NULL;
 639
 640         if (i915_gem_object_get_pages(obj)) {
 641                 drm_gem_object_unreference(&obj->base);
 642                 return NULL;
 643         }
 644
 645         if (i915_gem_obj_ggtt_pin(obj, PAGE_SIZE,
 646                         PIN_OFFSET_BIAS | GUC_WOPCM_TOP)) {
 647                 drm_gem_object_unreference(&obj->base);
 648                 return NULL;
 649         }
 650
 651         /* Invalidate GuC TLB to let GuC take the latest updates to GTT. */
 652         I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE);
 653
 654         return obj;
 655 }
 656
 657 /**
 658  * gem_release_guc_obj() - Release gem object allocated for GuC usage
 659  * @obj:        gem obj to be released
 660   */
 661 static void gem_release_guc_obj(struct drm_i915_gem_object *obj)
 662 {
 663         if (!obj)
 664                 return;
 665
 666         if (i915_gem_obj_is_pinned(obj))
 667                 i915_gem_object_ggtt_unpin(obj);
 668
 669         drm_gem_object_unreference(&obj->base);
 670 }
 671
 672 static void guc_client_free(struct drm_device *dev,
 673                             struct i915_guc_client *client)
 674 {
 675         struct drm_i915_private *dev_priv = dev->dev_private;
 676         struct intel_guc *guc = &dev_priv->guc;
 677
 678         if (!client)
 679                 return;
 680
 681         if (client->doorbell_id != GUC_INVALID_DOORBELL_ID) {
 682                 /*
 683                  * First disable the doorbell, then tell the GuC we've
 684                  * finished with it, finally deallocate it in our bitmap
 685                  */
 686                 guc_disable_doorbell(guc, client);
 687                 host2guc_release_doorbell(guc, client);
 688                 release_doorbell(guc, client->doorbell_id);
 689         }
 690
 691         /*
 692          * XXX: wait for any outstanding submissions before freeing memory.
 693          * Be sure to drop any locks
 694          */
 695
 696         gem_release_guc_obj(client->client_obj);
 697
 698         if (client->ctx_index != GUC_INVALID_CTX_ID) {
 699                 guc_fini_ctx_desc(guc, client);
 700                 ida_simple_remove(&guc->ctx_ids, client->ctx_index);
 701         }
 702
 703         kfree(client);
 704 }
 705
 706 /**
 707  * guc_client_alloc() - Allocate an i915_guc_client
 708  * @dev:        drm device
 709  * @priority:   four levels priority _CRITICAL, _HIGH, _NORMAL and _LOW
 710  *              The kernel client to replace ExecList submission is created with
 711  *              NORMAL priority. Priority of a client for scheduler can be HIGH,
 712  *              while a preemption context can use CRITICAL.
 713  * @ctx         the context to own the client (we use the default render context)
 714  *
 715  * Return:      An i915_guc_client object if success.
 716  */
 717 static struct i915_guc_client *guc_client_alloc(struct drm_device *dev,
 718                                                 uint32_t priority,
 719                                                 struct intel_context *ctx)
 720 {
 721         struct i915_guc_client *client;
 722         struct drm_i915_private *dev_priv = dev->dev_private;
 723         struct intel_guc *guc = &dev_priv->guc;
 724         struct drm_i915_gem_object *obj;
 725
 726         client = kzalloc(sizeof(*client), GFP_KERNEL);
 727         if (!client)
 728                 return NULL;
 729
 730         client->doorbell_id = GUC_INVALID_DOORBELL_ID;
 731         client->priority = priority;
 732         client->owner = ctx;
 733         client->guc = guc;
 734
 735         client->ctx_index = (uint32_t)ida_simple_get(&guc->ctx_ids, 0,
 736                         GUC_MAX_GPU_CONTEXTS, GFP_KERNEL);
 737         if (client->ctx_index >= GUC_MAX_GPU_CONTEXTS) {
 738                 client->ctx_index = GUC_INVALID_CTX_ID;
 739                 goto err;
 740         }
 741
 742         /* The first page is doorbell/proc_desc. Two followed pages are wq. */
 743         obj = gem_allocate_guc_obj(dev, GUC_DB_SIZE + GUC_WQ_SIZE);
 744         if (!obj)
 745                 goto err;
 746
 747         client->client_obj = obj;
 748         client->wq_offset = GUC_DB_SIZE;
 749         client->wq_size = GUC_WQ_SIZE;
 750         spin_lock_init(&client->wq_lock);
 751
 752         client->doorbell_offset = select_doorbell_cacheline(guc);
 753
 754         /*
 755          * Since the doorbell only requires a single cacheline, we can save
 756          * space by putting the application process descriptor in the same
 757          * page. Use the half of the page that doesn't include the doorbell.
 758          */
 759         if (client->doorbell_offset >= (GUC_DB_SIZE / 2))
 760                 client->proc_desc_offset = 0;
 761         else
 762                 client->proc_desc_offset = (GUC_DB_SIZE / 2);
 763
 764         client->doorbell_id = assign_doorbell(guc, client->priority);
 765         if (client->doorbell_id == GUC_INVALID_DOORBELL_ID)
 766                 /* XXX: evict a doorbell instead */
 767                 goto err;
 768
 769         guc_init_proc_desc(guc, client);
 770         guc_init_ctx_desc(guc, client);
 771         guc_init_doorbell(guc, client);
 772
 773         /* XXX: Any cache flushes needed? General domain mgmt calls? */
 774
 775         if (host2guc_allocate_doorbell(guc, client))
 776                 goto err;
 777
 778         DRM_DEBUG_DRIVER("new priority %u client %p: ctx_index %u db_id %u\n",
 779                 priority, client, client->ctx_index, client->doorbell_id);
 780
 781         return client;
 782
 783 err:
 784         DRM_ERROR("FAILED to create priority %u GuC client!\n", priority);
 785
 786         guc_client_free(dev, client);
 787         return NULL;
 788 }
 789
 790 static void guc_create_log(struct intel_guc *guc)
 791 {
 792         struct drm_i915_private *dev_priv = guc_to_i915(guc);
 793         struct drm_i915_gem_object *obj;
 794         unsigned long offset;
 795         uint32_t size, flags;
 796
 797         if (i915.guc_log_level < GUC_LOG_VERBOSITY_MIN)
 798                 return;
 799
 800         if (i915.guc_log_level > GUC_LOG_VERBOSITY_MAX)
 801                 i915.guc_log_level = GUC_LOG_VERBOSITY_MAX;
 802
 803         /* The first page is to save log buffer state. Allocate one
 804          * extra page for others in case for overlap */
 805         size = (1 + GUC_LOG_DPC_PAGES + 1 +
 806                 GUC_LOG_ISR_PAGES + 1 +
 807                 GUC_LOG_CRASH_PAGES + 1) << PAGE_SHIFT;
 808
 809         obj = guc->log_obj;
 810         if (!obj) {
 811                 obj = gem_allocate_guc_obj(dev_priv->dev, size);
 812                 if (!obj) {
 813                         /* logging will be off */
 814                         i915.guc_log_level = -1;
 815                         return;
 816                 }
 817
 818                 guc->log_obj = obj;
 819         }
 820
 821         /* each allocated unit is a page */
 822         flags = GUC_LOG_VALID | GUC_LOG_NOTIFY_ON_HALF_FULL |
 823                 (GUC_LOG_DPC_PAGES << GUC_LOG_DPC_SHIFT) |
 824                 (GUC_LOG_ISR_PAGES << GUC_LOG_ISR_SHIFT) |
 825                 (GUC_LOG_CRASH_PAGES << GUC_LOG_CRASH_SHIFT);
 826
 827         offset = i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT; /* in pages */
 828         guc->log_flags = (offset << GUC_LOG_BUF_ADDR_SHIFT) | flags;
 829 }
 830
 831 /*
 832  * Set up the memory resources to be shared with the GuC.  At this point,
 833  * we require just one object that can be mapped through the GGTT.
 834  */
 835 int i915_guc_submission_init(struct drm_device *dev)
 836 {
 837         struct drm_i915_private *dev_priv = dev->dev_private;
 838         const size_t ctxsize = sizeof(struct guc_context_desc);
 839         const size_t poolsize = GUC_MAX_GPU_CONTEXTS * ctxsize;
 840         const size_t gemsize = round_up(poolsize, PAGE_SIZE);
 841         struct intel_guc *guc = &dev_priv->guc;
 842
 843         if (!i915.enable_guc_submission)
 844                 return 0; /* not enabled  */
 845
 846         if (guc->ctx_pool_obj)
 847                 return 0; /* already allocated */
 848
 849         guc->ctx_pool_obj = gem_allocate_guc_obj(dev_priv->dev, gemsize);
 850         if (!guc->ctx_pool_obj)
 851                 return -ENOMEM;
 852
 853         spin_lock_init(&dev_priv->guc.host2guc_lock);
 854
 855         ida_init(&guc->ctx_ids);
 856
 857         guc_create_log(guc);
 858
 859         return 0;
 860 }
 861
 862 int i915_guc_submission_enable(struct drm_device *dev)
 863 {
 864         struct drm_i915_private *dev_priv = dev->dev_private;
 865         struct intel_guc *guc = &dev_priv->guc;
 866         struct intel_context *ctx = dev_priv->ring[RCS].default_context;
 867         struct i915_guc_client *client;
 868
 869         /* client for execbuf submission */
 870         client = guc_client_alloc(dev, GUC_CTX_PRIORITY_KMD_NORMAL, ctx);
 871         if (!client) {
 872                 DRM_ERROR("Failed to create execbuf guc_client\n");
 873                 return -ENOMEM;
 874         }
 875
 876         guc->execbuf_client = client;
 877         return 0;
 878 }
 879
 880 void i915_guc_submission_disable(struct drm_device *dev)
 881 {
 882         struct drm_i915_private *dev_priv = dev->dev_private;
 883         struct intel_guc *guc = &dev_priv->guc;
 884
 885         guc_client_free(dev, guc->execbuf_client);
 886         guc->execbuf_client = NULL;
 887 }
 888
 889 void i915_guc_submission_fini(struct drm_device *dev)
 890 {
 891         struct drm_i915_private *dev_priv = dev->dev_private;
 892         struct intel_guc *guc = &dev_priv->guc;
 893
 894         gem_release_guc_obj(dev_priv->guc.log_obj);
 895         guc->log_obj = NULL;
 896
 897         if (guc->ctx_pool_obj)
 898                 ida_destroy(&guc->ctx_ids);
 899         gem_release_guc_obj(guc->ctx_pool_obj);
 900         guc->ctx_pool_obj = NULL;
 901 }