fs/orangefs/waitqueue.c

   1 /*
   2  * (C) 2001 Clemson University and The University of Chicago
   3  * (C) 2011 Omnibond Systems
   4  *
   5  * Changes by Acxiom Corporation to implement generic service_operation()
   6  * function, Copyright Acxiom Corporation, 2005.
   7  *
   8  * See COPYING in top-level directory.
   9  */
  10
  11 /*
  12  *  In-kernel waitqueue operations.
  13  */
  14
  15 #include "protocol.h"
  16 #include "orangefs-kernel.h"
  17 #include "orangefs-bufmap.h"
  18
  19 static int wait_for_cancellation_downcall(struct orangefs_kernel_op_s *);
  20 static int wait_for_matching_downcall(struct orangefs_kernel_op_s *);
  21
  22 /*
  23  * What we do in this function is to walk the list of operations that are
  24  * present in the request queue and mark them as purged.
  25  * NOTE: This is called from the device close after client-core has
  26  * guaranteed that no new operations could appear on the list since the
  27  * client-core is anyway going to exit.
  28  */
  29 void purge_waiting_ops(void)
  30 {
  31         struct orangefs_kernel_op_s *op;
  32
  33         spin_lock(&orangefs_request_list_lock);
  34         list_for_each_entry(op, &orangefs_request_list, list) {
  35                 gossip_debug(GOSSIP_WAIT_DEBUG,
  36                              "pvfs2-client-core: purging op tag %llu %s\n",
  37                              llu(op->tag),
  38                              get_opname_string(op));
  39                 spin_lock(&op->lock);
  40                 set_op_state_purged(op);
  41                 spin_unlock(&op->lock);
  42         }
  43         spin_unlock(&orangefs_request_list_lock);
  44 }
  45
  46 static inline void
  47 add_op_to_request_list(struct orangefs_kernel_op_s *op)
  48 {
  49         spin_lock(&orangefs_request_list_lock);
  50         spin_lock(&op->lock);
  51         set_op_state_waiting(op);
  52         list_add_tail(&op->list, &orangefs_request_list);
  53         spin_unlock(&orangefs_request_list_lock);
  54         spin_unlock(&op->lock);
  55         wake_up_interruptible(&orangefs_request_list_waitq);
  56 }
  57
  58 static inline
  59 void add_priority_op_to_request_list(struct orangefs_kernel_op_s *op)
  60 {
  61         spin_lock(&orangefs_request_list_lock);
  62         spin_lock(&op->lock);
  63         set_op_state_waiting(op);
  64
  65         list_add(&op->list, &orangefs_request_list);
  66         spin_unlock(&orangefs_request_list_lock);
  67         spin_unlock(&op->lock);
  68         wake_up_interruptible(&orangefs_request_list_waitq);
  69 }
  70
  71 /*
  72  * submits a ORANGEFS operation and waits for it to complete
  73  *
  74  * Note op->downcall.status will contain the status of the operation (in
  75  * errno format), whether provided by pvfs2-client or a result of failure to
  76  * service the operation.  If the caller wishes to distinguish, then
  77  * op->state can be checked to see if it was serviced or not.
  78  *
  79  * Returns contents of op->downcall.status for convenience
  80  */
  81 int service_operation(struct orangefs_kernel_op_s *op,
  82                       const char *op_name,
  83                       int flags)
  84 {
  85         /* flags to modify behavior */
  86         sigset_t orig_sigset;
  87         int ret = 0;
  88
  89         /* irqflags and wait_entry are only used IF the client-core aborts */
  90         unsigned long irqflags;
  91
  92         DEFINE_WAIT(wait_entry);
  93
  94         op->upcall.tgid = current->tgid;
  95         op->upcall.pid = current->pid;
  96
  97 retry_servicing:
  98         op->downcall.status = 0;
  99         gossip_debug(GOSSIP_WAIT_DEBUG,
 100                      "orangefs: service_operation: %s %p\n",
 101                      op_name,
 102                      op);
 103         gossip_debug(GOSSIP_WAIT_DEBUG,
 104                      "orangefs: operation posted by process: %s, pid: %i\n",
 105                      current->comm,
 106                      current->pid);
 107
 108         /* mask out signals if this operation is not to be interrupted */
 109         if (!(flags & ORANGEFS_OP_INTERRUPTIBLE))
 110                 orangefs_block_signals(&orig_sigset);
 111
 112         if (!(flags & ORANGEFS_OP_NO_SEMAPHORE)) {
 113                 ret = mutex_lock_interruptible(&request_mutex);
 114                 /*
 115                  * check to see if we were interrupted while waiting for
 116                  * semaphore
 117                  */
 118                 if (ret < 0) {
 119                         if (!(flags & ORANGEFS_OP_INTERRUPTIBLE))
 120                                 orangefs_set_signals(&orig_sigset);
 121                         op->downcall.status = ret;
 122                         gossip_debug(GOSSIP_WAIT_DEBUG,
 123                                      "orangefs: service_operation interrupted.\n");
 124                         return ret;
 125                 }
 126         }
 127
 128         gossip_debug(GOSSIP_WAIT_DEBUG,
 129                      "%s:About to call is_daemon_in_service().\n",
 130                      __func__);
 131
 132         if (is_daemon_in_service() < 0) {
 133                 /*
 134                  * By incrementing the per-operation attempt counter, we
 135                  * directly go into the timeout logic while waiting for
 136                  * the matching downcall to be read
 137                  */
 138                 gossip_debug(GOSSIP_WAIT_DEBUG,
 139                              "%s:client core is NOT in service(%d).\n",
 140                              __func__,
 141                              is_daemon_in_service());
 142                 op->attempts++;
 143         }
 144
 145         /* queue up the operation */
 146         if (flags & ORANGEFS_OP_PRIORITY) {
 147                 add_priority_op_to_request_list(op);
 148         } else {
 149                 gossip_debug(GOSSIP_WAIT_DEBUG,
 150                              "%s:About to call add_op_to_request_list().\n",
 151                              __func__);
 152                 add_op_to_request_list(op);
 153         }
 154
 155         if (!(flags & ORANGEFS_OP_NO_SEMAPHORE))
 156                 mutex_unlock(&request_mutex);
 157
 158         /*
 159          * If we are asked to service an asynchronous operation from
 160          * VFS perspective, we are done.
 161          */
 162         if (flags & ORANGEFS_OP_ASYNC)
 163                 return 0;
 164
 165         if (flags & ORANGEFS_OP_CANCELLATION) {
 166                 gossip_debug(GOSSIP_WAIT_DEBUG,
 167                              "%s:"
 168                              "About to call wait_for_cancellation_downcall.\n",
 169                              __func__);
 170                 ret = wait_for_cancellation_downcall(op);
 171         } else {
 172                 ret = wait_for_matching_downcall(op);
 173         }
 174
 175         if (ret < 0) {
 176                 /* failed to get matching downcall */
 177                 if (ret == -ETIMEDOUT) {
 178                         gossip_err("orangefs: %s -- wait timed out; aborting attempt.\n",
 179                                    op_name);
 180                 }
 181                 op->downcall.status = ret;
 182         } else {
 183                 /* got matching downcall; make sure status is in errno format */
 184                 op->downcall.status =
 185                     orangefs_normalize_to_errno(op->downcall.status);
 186                 ret = op->downcall.status;
 187         }
 188
 189         if (!(flags & ORANGEFS_OP_INTERRUPTIBLE))
 190                 orangefs_set_signals(&orig_sigset);
 191
 192         BUG_ON(ret != op->downcall.status);
 193         /* retry if operation has not been serviced and if requested */
 194         if (!op_state_serviced(op) && op->downcall.status == -EAGAIN) {
 195                 gossip_debug(GOSSIP_WAIT_DEBUG,
 196                              "orangefs: tag %llu (%s)"
 197                              " -- operation to be retried (%d attempt)\n",
 198                              llu(op->tag),
 199                              op_name,
 200                              op->attempts + 1);
 201
 202                 if (!op->uses_shared_memory)
 203                         /*
 204                          * this operation doesn't use the shared memory
 205                          * system
 206                          */
 207                         goto retry_servicing;
 208
 209                 /* op uses shared memory */
 210                 if (orangefs_get_bufmap_init() == 0) {
 211                         /*
 212                          * This operation uses the shared memory system AND
 213                          * the system is not yet ready. This situation occurs
 214                          * when the client-core is restarted AND there were
 215                          * operations waiting to be processed or were already
 216                          * in process.
 217                          */
 218                         gossip_debug(GOSSIP_WAIT_DEBUG,
 219                                      "uses_shared_memory is true.\n");
 220                         gossip_debug(GOSSIP_WAIT_DEBUG,
 221                                      "Client core in-service status(%d).\n",
 222                                      is_daemon_in_service());
 223                         gossip_debug(GOSSIP_WAIT_DEBUG, "bufmap_init:%d.\n",
 224                                      orangefs_get_bufmap_init());
 225                         gossip_debug(GOSSIP_WAIT_DEBUG,
 226                                      "operation's status is 0x%0x.\n",
 227                                      op->op_state);
 228
 229                         /*
 230                          * let process sleep for a few seconds so shared
 231                          * memory system can be initialized.
 232                          */
 233                         spin_lock_irqsave(&op->lock, irqflags);
 234                         prepare_to_wait(&orangefs_bufmap_init_waitq,
 235                                         &wait_entry,
 236                                         TASK_INTERRUPTIBLE);
 237                         spin_unlock_irqrestore(&op->lock, irqflags);
 238
 239                         /*
 240                          * Wait for orangefs_bufmap_initialize() to wake me up
 241                          * within the allotted time.
 242                          */
 243                         ret = schedule_timeout(MSECS_TO_JIFFIES
 244                                 (1000 * ORANGEFS_BUFMAP_WAIT_TIMEOUT_SECS));
 245
 246                         gossip_debug(GOSSIP_WAIT_DEBUG,
 247                                      "Value returned from schedule_timeout:"
 248                                      "%d.\n",
 249                                      ret);
 250                         gossip_debug(GOSSIP_WAIT_DEBUG,
 251                                      "Is shared memory available? (%d).\n",
 252                                      orangefs_get_bufmap_init());
 253
 254                         spin_lock_irqsave(&op->lock, irqflags);
 255                         finish_wait(&orangefs_bufmap_init_waitq, &wait_entry);
 256                         spin_unlock_irqrestore(&op->lock, irqflags);
 257
 258                         if (orangefs_get_bufmap_init() == 0) {
 259                                 gossip_err("%s:The shared memory system has not started in %d seconds after the client core restarted.  Aborting user's request(%s).\n",
 260                                            __func__,
 261                                            ORANGEFS_BUFMAP_WAIT_TIMEOUT_SECS,
 262                                            get_opname_string(op));
 263                                 return -EIO;
 264                         }
 265
 266                         /*
 267                          * Return to the calling function and re-populate a
 268                          * shared memory buffer.
 269                          */
 270                         return -EAGAIN;
 271                 }
 272         }
 273
 274         gossip_debug(GOSSIP_WAIT_DEBUG,
 275                      "orangefs: service_operation %s returning: %d for %p.\n",
 276                      op_name,
 277                      ret,
 278                      op);
 279         return ret;
 280 }
 281
 282 static void orangefs_clean_up_interrupted_operation(struct orangefs_kernel_op_s *op)
 283 {
 284         /*
 285          * handle interrupted cases depending on what state we were in when
 286          * the interruption is detected.  there is a coarse grained lock
 287          * across the operation.
 288          *
 289          * NOTE: be sure not to reverse lock ordering by locking an op lock
 290          * while holding the request_list lock.  Here, we first lock the op
 291          * and then lock the appropriate list.
 292          */
 293         if (!op) {
 294                 gossip_debug(GOSSIP_WAIT_DEBUG,
 295                             "%s: op is null, ignoring\n",
 296                              __func__);
 297                 return;
 298         }
 299
 300         /*
 301          * one more sanity check, make sure it's in one of the possible states
 302          * or don't try to cancel it
 303          */
 304         if (!(op_state_waiting(op) ||
 305               op_state_in_progress(op) ||
 306               op_state_serviced(op) ||
 307               op_state_purged(op))) {
 308                 gossip_debug(GOSSIP_WAIT_DEBUG,
 309                              "%s: op %p not in a valid state (%0x), "
 310                              "ignoring\n",
 311                              __func__,
 312                              op,
 313                              op->op_state);
 314                 return;
 315         }
 316
 317         spin_lock(&op->lock);
 318         op->op_state |= OP_VFS_STATE_GIVEN_UP;
 319
 320         if (op_state_waiting(op)) {
 321                 /*
 322                  * upcall hasn't been read; remove op from upcall request
 323                  * list.
 324                  */
 325                 spin_unlock(&op->lock);
 326                 spin_lock(&orangefs_request_list_lock);
 327                 list_del(&op->list);
 328                 spin_unlock(&orangefs_request_list_lock);
 329                 gossip_debug(GOSSIP_WAIT_DEBUG,
 330                              "Interrupted: Removed op %p from request_list\n",
 331                              op);
 332         } else if (op_state_in_progress(op)) {
 333                 /* op must be removed from the in progress htable */
 334                 spin_unlock(&op->lock);
 335                 spin_lock(&htable_ops_in_progress_lock);
 336                 list_del(&op->list);
 337                 spin_unlock(&htable_ops_in_progress_lock);
 338                 gossip_debug(GOSSIP_WAIT_DEBUG,
 339                              "Interrupted: Removed op %p"
 340                              " from htable_ops_in_progress\n",
 341                              op);
 342         } else if (!op_state_serviced(op)) {
 343                 spin_unlock(&op->lock);
 344                 gossip_err("interrupted operation is in a weird state 0x%x\n",
 345                            op->op_state);
 346         } else {
 347                 /*
 348                  * It is not intended for execution to flow here,
 349                  * but having this unlock here makes sparse happy.
 350                  */
 351                 gossip_err("%s: can't get here.\n", __func__);
 352                 spin_unlock(&op->lock);
 353         }
 354 }
 355
 356 /*
 357  * sleeps on waitqueue waiting for matching downcall.
 358  * if client-core finishes servicing, then we are good to go.
 359  * else if client-core exits, we get woken up here, and retry with a timeout
 360  *
 361  * Post when this call returns to the caller, the specified op will no
 362  * longer be on any list or htable.
 363  *
 364  * Returns 0 on success and -errno on failure
 365  * Errors are:
 366  * EAGAIN in case we want the caller to requeue and try again..
 367  * EINTR/EIO/ETIMEDOUT indicating we are done trying to service this
 368  * operation since client-core seems to be exiting too often
 369  * or if we were interrupted.
 370  */
 371 static int wait_for_matching_downcall(struct orangefs_kernel_op_s *op)
 372 {
 373         int ret = -EINVAL;
 374         DEFINE_WAIT(wait_entry);
 375
 376         while (1) {
 377                 spin_lock(&op->lock);
 378                 prepare_to_wait(&op->waitq, &wait_entry, TASK_INTERRUPTIBLE);
 379                 if (op_state_serviced(op)) {
 380                         spin_unlock(&op->lock);
 381                         ret = 0;
 382                         break;
 383                 }
 384                 spin_unlock(&op->lock);
 385
 386                 if (!signal_pending(current)) {
 387                         /*
 388                          * if this was our first attempt and client-core
 389                          * has not purged our operation, we are happy to
 390                          * simply wait
 391                          */
 392                         spin_lock(&op->lock);
 393                         if (op->attempts == 0 && !op_state_purged(op)) {
 394                                 spin_unlock(&op->lock);
 395                                 schedule();
 396                         } else {
 397                                 spin_unlock(&op->lock);
 398                                 /*
 399                                  * subsequent attempts, we retry exactly once
 400                                  * with timeouts
 401                                  */
 402                                 if (!schedule_timeout(MSECS_TO_JIFFIES
 403                                       (1000 * op_timeout_secs))) {
 404                                         gossip_debug(GOSSIP_WAIT_DEBUG,
 405                                                      "*** %s:"
 406                                                      " operation timed out (tag"
 407                                                      " %llu, %p, att %d)\n",
 408                                                      __func__,
 409                                                      llu(op->tag),
 410                                                      op,
 411                                                      op->attempts);
 412                                         ret = -ETIMEDOUT;
 413                                         orangefs_clean_up_interrupted_operation
 414                                             (op);
 415                                         break;
 416                                 }
 417                         }
 418                         spin_lock(&op->lock);
 419                         op->attempts++;
 420                         /*
 421                          * if the operation was purged in the meantime, it
 422                          * is better to requeue it afresh but ensure that
 423                          * we have not been purged repeatedly. This could
 424                          * happen if client-core crashes when an op
 425                          * is being serviced, so we requeue the op, client
 426                          * core crashes again so we requeue the op, client
 427                          * core starts, and so on...
 428                          */
 429                         if (op_state_purged(op)) {
 430                                 ret = (op->attempts < ORANGEFS_PURGE_RETRY_COUNT) ?
 431                                          -EAGAIN :
 432                                          -EIO;
 433                                 spin_unlock(&op->lock);
 434                                 gossip_debug(GOSSIP_WAIT_DEBUG,
 435                                              "*** %s:"
 436                                              " operation purged (tag "
 437                                              "%llu, %p, att %d)\n",
 438                                              __func__,
 439                                              llu(op->tag),
 440                                              op,
 441                                              op->attempts);
 442                                 orangefs_clean_up_interrupted_operation(op);
 443                                 break;
 444                         }
 445                         spin_unlock(&op->lock);
 446                         continue;
 447                 }
 448
 449                 gossip_debug(GOSSIP_WAIT_DEBUG,
 450                              "*** %s:"
 451                              " operation interrupted by a signal (tag "
 452                              "%llu, op %p)\n",
 453                              __func__,
 454                              llu(op->tag),
 455                              op);
 456                 orangefs_clean_up_interrupted_operation(op);
 457                 ret = -EINTR;
 458                 break;
 459         }
 460
 461         spin_lock(&op->lock);
 462         finish_wait(&op->waitq, &wait_entry);
 463         spin_unlock(&op->lock);
 464
 465         return ret;
 466 }
 467
 468 /*
 469  * similar to wait_for_matching_downcall(), but used in the special case
 470  * of I/O cancellations.
 471  *
 472  * Note we need a special wait function because if this is called we already
 473  *      know that a signal is pending in current and need to service the
 474  *      cancellation upcall anyway.  the only way to exit this is to either
 475  *      timeout or have the cancellation be serviced properly.
 476  */
 477 static int wait_for_cancellation_downcall(struct orangefs_kernel_op_s *op)
 478 {
 479         int ret = -EINVAL;
 480         DEFINE_WAIT(wait_entry);
 481
 482         while (1) {
 483                 spin_lock(&op->lock);
 484                 prepare_to_wait(&op->waitq, &wait_entry, TASK_INTERRUPTIBLE);
 485                 if (op_state_serviced(op)) {
 486                         gossip_debug(GOSSIP_WAIT_DEBUG,
 487                                      "%s:op-state is SERVICED.\n",
 488                                      __func__);
 489                         spin_unlock(&op->lock);
 490                         ret = 0;
 491                         break;
 492                 }
 493                 spin_unlock(&op->lock);
 494
 495                 if (signal_pending(current)) {
 496                         gossip_debug(GOSSIP_WAIT_DEBUG,
 497                                      "%s:operation interrupted by a signal (tag"
 498                                      " %llu, op %p)\n",
 499                                      __func__,
 500                                      llu(op->tag),
 501                                      op);
 502                         orangefs_clean_up_interrupted_operation(op);
 503                         ret = -EINTR;
 504                         break;
 505                 }
 506
 507                 gossip_debug(GOSSIP_WAIT_DEBUG,
 508                              "%s:About to call schedule_timeout.\n",
 509                              __func__);
 510                 ret =
 511                     schedule_timeout(MSECS_TO_JIFFIES(1000 * op_timeout_secs));
 512
 513                 gossip_debug(GOSSIP_WAIT_DEBUG,
 514                              "%s:Value returned from schedule_timeout(%d).\n",
 515                              __func__,
 516                              ret);
 517                 if (!ret) {
 518                         gossip_debug(GOSSIP_WAIT_DEBUG,
 519                                      "%s:*** operation timed out: %p\n",
 520                                      __func__,
 521                                      op);
 522                         orangefs_clean_up_interrupted_operation(op);
 523                         ret = -ETIMEDOUT;
 524                         break;
 525                 }
 526
 527                 gossip_debug(GOSSIP_WAIT_DEBUG,
 528                              "%s:Breaking out of loop, regardless of value returned by schedule_timeout.\n",
 529                              __func__);
 530                 ret = -ETIMEDOUT;
 531                 break;
 532         }
 533
 534         spin_lock(&op->lock);
 535         finish_wait(&op->waitq, &wait_entry);
 536         spin_unlock(&op->lock);
 537
 538         gossip_debug(GOSSIP_WAIT_DEBUG,
 539                      "%s:returning ret(%d)\n",
 540                      __func__,
 541                      ret);
 542
 543         return ret;
 544 }