drivers/gpu/drm/amd/scheduler/gpu_scheduler.c

   1 /*
   2  * Copyright 2015 Advanced Micro Devices, Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included in
  12  * all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20  * OTHER DEALINGS IN THE SOFTWARE.
  21  *
  22  *
  23  */
  24 #include <linux/kthread.h>
  25 #include <linux/wait.h>
  26 #include <linux/sched.h>
  27 #include <drm/drmP.h>
  28 #include "gpu_scheduler.h"
  29
  30 /* Initialize a given run queue struct */
  31 static void init_rq(struct amd_run_queue *rq)
  32 {
  33         INIT_LIST_HEAD(&rq->head.list);
  34         rq->head.belongto_rq = rq;
  35         mutex_init(&rq->lock);
  36         atomic_set(&rq->nr_entity, 0);
  37         rq->current_entity = &rq->head;
  38 }
  39
  40 /* Note: caller must hold the lock or in a atomic context */
  41 static void rq_remove_entity(struct amd_run_queue *rq,
  42                              struct amd_sched_entity *entity)
  43 {
  44         if (rq->current_entity == entity)
  45                 rq->current_entity = list_entry(entity->list.prev,
  46                                                 typeof(*entity), list);
  47         list_del_init(&entity->list);
  48         atomic_dec(&rq->nr_entity);
  49 }
  50
  51 static void rq_add_entity(struct amd_run_queue *rq,
  52                           struct amd_sched_entity *entity)
  53 {
  54         list_add_tail(&entity->list, &rq->head.list);
  55         atomic_inc(&rq->nr_entity);
  56 }
  57
  58 /**
  59  * Select next entity from a specified run queue with round robin policy.
  60  * It could return the same entity as current one if current is the only
  61  * available one in the queue. Return NULL if nothing available.
  62  */
  63 static struct amd_sched_entity *rq_select_entity(struct amd_run_queue *rq)
  64 {
  65         struct amd_sched_entity *p = rq->current_entity;
  66         int i = atomic_read(&rq->nr_entity) + 1; /*real count + dummy head*/
  67         while (i) {
  68                 p = list_entry(p->list.next, typeof(*p), list);
  69                 if (!rq->check_entity_status(p)) {
  70                         rq->current_entity = p;
  71                         break;
  72                 }
  73                 i--;
  74         }
  75         return i ? p : NULL;
  76 }
  77
  78 static bool context_entity_is_waiting(struct amd_context_entity *entity)
  79 {
  80         /* TODO: sync obj for multi-ring synchronization */
  81         return false;
  82 }
  83
  84 static int gpu_entity_check_status(struct amd_sched_entity *entity)
  85 {
  86         struct amd_context_entity *tmp = NULL;
  87
  88         if (entity == &entity->belongto_rq->head)
  89                 return -1;
  90
  91         tmp = container_of(entity, typeof(*tmp), generic_entity);
  92         if (kfifo_is_empty(&tmp->job_queue) ||
  93             context_entity_is_waiting(tmp))
  94                 return -1;
  95
  96         return 0;
  97 }
  98
  99 /**
 100  * Note: This function should only been called inside scheduler main
 101  * function for thread safety, there is no other protection here.
 102  * return ture if scheduler has something ready to run.
 103  *
 104  * For active_hw_rq, there is only one producer(scheduler thread) and
 105  * one consumer(ISR). It should be safe to use this function in scheduler
 106  * main thread to decide whether to continue emit more IBs.
 107 */
 108 static bool is_scheduler_ready(struct amd_gpu_scheduler *sched)
 109 {
 110         return !kfifo_is_full(&sched->active_hw_rq);
 111 }
 112
 113 /**
 114  * Select next entity from the kernel run queue, if not available,
 115  * return null.
 116 */
 117 static struct amd_context_entity *kernel_rq_select_context(
 118         struct amd_gpu_scheduler *sched)
 119 {
 120         struct amd_sched_entity *sched_entity = NULL;
 121         struct amd_context_entity *tmp = NULL;
 122         struct amd_run_queue *rq = &sched->kernel_rq;
 123
 124         mutex_lock(&rq->lock);
 125         sched_entity = rq_select_entity(rq);
 126         if (sched_entity)
 127                 tmp = container_of(sched_entity,
 128                                    typeof(*tmp),
 129                                    generic_entity);
 130         mutex_unlock(&rq->lock);
 131         return tmp;
 132 }
 133
 134 /**
 135  * Select next entity containing real IB submissions
 136 */
 137 static struct amd_context_entity *select_context(
 138         struct amd_gpu_scheduler *sched)
 139 {
 140         struct amd_context_entity *wake_entity = NULL;
 141         struct amd_context_entity *tmp;
 142         struct amd_run_queue *rq;
 143
 144         if (!is_scheduler_ready(sched))
 145                 return NULL;
 146
 147         /* Kernel run queue has higher priority than normal run queue*/
 148         tmp = kernel_rq_select_context(sched);
 149         if (tmp != NULL)
 150                 goto exit;
 151
 152         WARN_ON(offsetof(struct amd_context_entity, generic_entity) != 0);
 153
 154         rq = &sched->sched_rq;
 155         mutex_lock(&rq->lock);
 156         tmp = container_of(rq_select_entity(rq),
 157                            typeof(*tmp), generic_entity);
 158         mutex_unlock(&rq->lock);
 159 exit:
 160         if (sched->current_entity && (sched->current_entity != tmp))
 161                 wake_entity = sched->current_entity;
 162         sched->current_entity = tmp;
 163         if (wake_entity)
 164                 wake_up(&wake_entity->wait_queue);
 165         return tmp;
 166 }
 167
 168 /**
 169  * Init a context entity used by scheduler when submit to HW ring.
 170  *
 171  * @sched       The pointer to the scheduler
 172  * @entity      The pointer to a valid amd_context_entity
 173  * @parent      The parent entity of this amd_context_entity
 174  * @rq          The run queue this entity belongs
 175  * @context_id  The context id for this entity
 176  *
 177  * return 0 if succeed. negative error code on failure
 178 */
 179 int amd_context_entity_init(struct amd_gpu_scheduler *sched,
 180                             struct amd_context_entity *entity,
 181                             struct amd_sched_entity *parent,
 182                             struct amd_run_queue *rq,
 183                             uint32_t context_id)
 184 {
 185         uint64_t seq_ring = 0;
 186
 187         if (!(sched && entity && rq))
 188                 return -EINVAL;
 189
 190         memset(entity, 0, sizeof(struct amd_context_entity));
 191         seq_ring = ((uint64_t)sched->ring_id) << 60;
 192         spin_lock_init(&entity->lock);
 193         entity->generic_entity.belongto_rq = rq;
 194         entity->generic_entity.parent = parent;
 195         entity->scheduler = sched;
 196         init_waitqueue_head(&entity->wait_queue);
 197         init_waitqueue_head(&entity->wait_emit);
 198         if(kfifo_alloc(&entity->job_queue,
 199                        AMD_MAX_JOB_ENTRY_PER_CONTEXT * sizeof(void *),
 200                        GFP_KERNEL))
 201                 return -EINVAL;
 202
 203         spin_lock_init(&entity->queue_lock);
 204         entity->tgid = (context_id == AMD_KERNEL_CONTEXT_ID) ?
 205                 AMD_KERNEL_PROCESS_ID : current->tgid;
 206         entity->context_id = context_id;
 207         atomic64_set(&entity->last_emitted_v_seq, seq_ring);
 208         atomic64_set(&entity->last_queued_v_seq, seq_ring);
 209         atomic64_set(&entity->last_signaled_v_seq, seq_ring);
 210
 211         /* Add the entity to the run queue */
 212         mutex_lock(&rq->lock);
 213         rq_add_entity(rq, &entity->generic_entity);
 214         mutex_unlock(&rq->lock);
 215         return 0;
 216 }
 217
 218 /**
 219  * Query if entity is initialized
 220  *
 221  * @sched       Pointer to scheduler instance
 222  * @entity      The pointer to a valid scheduler entity
 223  *
 224  * return true if entity is initialized, false otherwise
 225 */
 226 static bool is_context_entity_initialized(struct amd_gpu_scheduler *sched,
 227                                           struct amd_context_entity *entity)
 228 {
 229         return entity->scheduler == sched &&
 230                 entity->generic_entity.belongto_rq != NULL;
 231 }
 232
 233 static bool is_context_entity_idle(struct amd_gpu_scheduler *sched,
 234                                    struct amd_context_entity *entity)
 235 {
 236         /**
 237          * Idle means no pending IBs, and the entity is not
 238          * currently being used.
 239         */
 240         barrier();
 241         if ((sched->current_entity != entity) &&
 242             kfifo_is_empty(&entity->job_queue))
 243                 return true;
 244
 245         return false;
 246 }
 247
 248 /**
 249  * Destroy a context entity
 250  *
 251  * @sched       Pointer to scheduler instance
 252  * @entity      The pointer to a valid scheduler entity
 253  *
 254  * return 0 if succeed. negative error code on failure
 255  */
 256 int amd_context_entity_fini(struct amd_gpu_scheduler *sched,
 257                             struct amd_context_entity *entity)
 258 {
 259         int r = 0;
 260         struct amd_run_queue *rq = entity->generic_entity.belongto_rq;
 261
 262         if (!is_context_entity_initialized(sched, entity))
 263                 return 0;
 264
 265         /**
 266          * The client will not queue more IBs during this fini, consume existing
 267          * queued IBs
 268         */
 269         r = wait_event_timeout(
 270                 entity->wait_queue,
 271                 is_context_entity_idle(sched, entity),
 272                 msecs_to_jiffies(AMD_GPU_WAIT_IDLE_TIMEOUT_IN_MS)
 273                 ) ?  0 : -1;
 274
 275         if (r) {
 276                 if (entity->is_pending)
 277                         DRM_INFO("Entity %u is in waiting state during fini,\
 278                                 all pending ibs will be canceled.\n",
 279                                  entity->context_id);
 280         }
 281
 282         mutex_lock(&rq->lock);
 283         rq_remove_entity(rq, &entity->generic_entity);
 284         mutex_unlock(&rq->lock);
 285         kfifo_free(&entity->job_queue);
 286         return r;
 287 }
 288
 289 /**
 290  * Submit a normal job to the job queue
 291  *
 292  * @sched       The pointer to the scheduler
 293  * @c_entity    The pointer to amd_context_entity
 294  * @job         The pointer to job required to submit
 295  * return 0 if succeed. -1 if failed.
 296  *        -2 indicate queue is full for this client, client should wait untill
 297  *           scheduler consum some queued command.
 298  *        -1 other fail.
 299 */
 300 int amd_sched_push_job(struct amd_gpu_scheduler *sched,
 301                        struct amd_context_entity *c_entity,
 302                        void *job)
 303 {
 304         while (kfifo_in_spinlocked(&c_entity->job_queue, &job, sizeof(void *),
 305                                    &c_entity->queue_lock) != sizeof(void *)) {
 306                 /**
 307                  * Current context used up all its IB slots
 308                  * wait here, or need to check whether GPU is hung
 309                 */
 310                 schedule();
 311         }
 312
 313         wake_up_interruptible(&sched->wait_queue);
 314         return 0;
 315 }
 316
 317 /**
 318  * Check the virtual sequence number for specified context
 319  *
 320  * @seq         The virtual sequence number to check
 321  * @c_entity    The pointer to a valid amd_context_entity
 322  *
 323  * return 0 if signaled, -1 else.
 324 */
 325 int amd_sched_check_ts(struct amd_context_entity *c_entity, uint64_t seq)
 326 {
 327         return (seq <= atomic64_read(&c_entity->last_signaled_v_seq)) ? 0 : -1;
 328 }
 329
 330 /**
 331  * Wait for a virtual sequence number to be signaled or timeout
 332  *
 333  * @c_entity    The pointer to a valid context entity
 334  * @seq         The virtual sequence number to wait
 335  * @intr        Interruptible or not
 336  * @timeout     Timeout in ms, wait infinitely if <0
 337  * @emit        wait for emit or signal
 338  *
 339  * return =0 signaled ,  <0 failed
 340 */
 341 static int amd_sched_wait(struct amd_context_entity *c_entity,
 342                           uint64_t seq,
 343                           bool intr,
 344                           long timeout,
 345                           bool emit)
 346 {
 347         atomic64_t *v_seq = emit ? &c_entity->last_emitted_v_seq :
 348                 &c_entity->last_signaled_v_seq;
 349         wait_queue_head_t *wait_queue = emit ? &c_entity->wait_emit :
 350                 &c_entity->wait_queue;
 351
 352         if (intr && (timeout < 0)) {
 353                 wait_event_interruptible(
 354                         *wait_queue,
 355                         seq <= atomic64_read(v_seq));
 356                 return 0;
 357         } else if (intr && (timeout >= 0)) {
 358                 wait_event_interruptible_timeout(
 359                         *wait_queue,
 360                         seq <= atomic64_read(v_seq),
 361                         msecs_to_jiffies(timeout));
 362                 return (seq <= atomic64_read(v_seq)) ?
 363                         0 : -1;
 364         } else if (!intr && (timeout < 0)) {
 365                 wait_event(
 366                         *wait_queue,
 367                         seq <= atomic64_read(v_seq));
 368                 return 0;
 369         } else if (!intr && (timeout >= 0)) {
 370                 wait_event_timeout(
 371                         *wait_queue,
 372                         seq <= atomic64_read(v_seq),
 373                         msecs_to_jiffies(timeout));
 374                 return (seq <= atomic64_read(v_seq)) ?
 375                         0 : -1;
 376         }
 377         return 0;
 378 }
 379
 380 int amd_sched_wait_signal(struct amd_context_entity *c_entity,
 381                           uint64_t seq,
 382                           bool intr,
 383                           long timeout)
 384 {
 385         return amd_sched_wait(c_entity, seq, intr, timeout, false);
 386 }
 387
 388 int amd_sched_wait_emit(struct amd_context_entity *c_entity,
 389                         uint64_t seq,
 390                         bool intr,
 391                         long timeout)
 392 {
 393         return amd_sched_wait(c_entity, seq, intr, timeout, true);
 394 }
 395
 396 static int amd_sched_main(void *param)
 397 {
 398         int r;
 399         void *job;
 400         struct sched_param sparam = {.sched_priority = 1};
 401         struct amd_context_entity *c_entity = NULL;
 402         struct amd_gpu_scheduler *sched = (struct amd_gpu_scheduler *)param;
 403
 404         sched_setscheduler(current, SCHED_FIFO, &sparam);
 405
 406         while (!kthread_should_stop()) {
 407                 wait_event_interruptible(sched->wait_queue,
 408                                          is_scheduler_ready(sched) &&
 409                                          (c_entity = select_context(sched)));
 410                 r = kfifo_out(&c_entity->job_queue, &job, sizeof(void *));
 411                 if (r != sizeof(void *))
 412                         continue;
 413                 r = sched->ops->prepare_job(sched, c_entity, job);
 414                 if (!r)
 415                         WARN_ON(kfifo_in_spinlocked(
 416                                         &sched->active_hw_rq,
 417                                         &job,
 418                                         sizeof(void *),
 419                                         &sched->queue_lock) != sizeof(void *));
 420                 mutex_lock(&sched->sched_lock);
 421                 sched->ops->run_job(sched, c_entity, job);
 422                 mutex_unlock(&sched->sched_lock);
 423         }
 424         return 0;
 425 }
 426
 427 uint64_t amd_sched_get_handled_seq(struct amd_gpu_scheduler *sched)
 428 {
 429         return sched->last_handled_seq;
 430 }
 431
 432 /**
 433  * ISR to handle EOP inetrrupts
 434  *
 435  * @sched: gpu scheduler
 436  *
 437 */
 438 void amd_sched_isr(struct amd_gpu_scheduler *sched)
 439 {
 440         int r;
 441         void *job;
 442         r = kfifo_out_spinlocked(&sched->active_hw_rq,
 443                                  &job, sizeof(void *),
 444                                  &sched->queue_lock);
 445
 446         if (r != sizeof(void *))
 447                 job = NULL;
 448
 449         sched->ops->process_job(sched, job);
 450         sched->last_handled_seq++;
 451         wake_up_interruptible(&sched->wait_queue);
 452 }
 453
 454 /**
 455  * Create a gpu scheduler
 456  *
 457  * @device      The device context for this scheduler
 458  * @ops         The backend operations for this scheduler.
 459  * @id          The scheduler is per ring, here is ring id.
 460  * @granularity The minumum ms unit the scheduler will scheduled.
 461  * @preemption  Indicate whether this ring support preemption, 0 is no.
 462  *
 463  * return the pointer to scheduler for success, otherwise return NULL
 464 */
 465 struct amd_gpu_scheduler *amd_sched_create(void *device,
 466                                            struct amd_sched_backend_ops *ops,
 467                                            unsigned ring,
 468                                            unsigned granularity,
 469                                            unsigned preemption)
 470 {
 471         struct amd_gpu_scheduler *sched;
 472         char name[20] = "gpu_sched[0]";
 473
 474         sched = kzalloc(sizeof(struct amd_gpu_scheduler), GFP_KERNEL);
 475         if (!sched)
 476                 return NULL;
 477
 478         sched->device = device;
 479         sched->ops = ops;
 480         sched->granularity = granularity;
 481         sched->ring_id = ring;
 482         sched->preemption = preemption;
 483         sched->last_handled_seq = 0;
 484
 485         snprintf(name, sizeof(name), "gpu_sched[%d]", ring);
 486         mutex_init(&sched->sched_lock);
 487         spin_lock_init(&sched->queue_lock);
 488         init_rq(&sched->sched_rq);
 489         sched->sched_rq.check_entity_status = gpu_entity_check_status;
 490
 491         init_rq(&sched->kernel_rq);
 492         sched->kernel_rq.check_entity_status = gpu_entity_check_status;
 493
 494         init_waitqueue_head(&sched->wait_queue);
 495         if(kfifo_alloc(&sched->active_hw_rq,
 496                        AMD_MAX_ACTIVE_HW_SUBMISSION * sizeof(void *),
 497                        GFP_KERNEL)) {
 498                 kfree(sched);
 499                 return NULL;
 500         }
 501
 502         /* Each scheduler will run on a seperate kernel thread */
 503         sched->thread = kthread_create(amd_sched_main, sched, name);
 504         if (sched->thread) {
 505                 wake_up_process(sched->thread);
 506                 DRM_INFO("Create gpu scheduler for id %d successfully.\n",
 507                          ring);
 508                 return sched;
 509         }
 510
 511         DRM_ERROR("Failed to create scheduler for id %d.\n", ring);
 512         kfifo_free(&sched->active_hw_rq);
 513         kfree(sched);
 514         return NULL;
 515 }
 516
 517 /**
 518  * Destroy a gpu scheduler
 519  *
 520  * @sched       The pointer to the scheduler
 521  *
 522  * return 0 if succeed. -1 if failed.
 523  */
 524 int amd_sched_destroy(struct amd_gpu_scheduler *sched)
 525 {
 526         kthread_stop(sched->thread);
 527         kfifo_free(&sched->active_hw_rq);
 528         kfree(sched);
 529         return  0;
 530 }
 531