fs/orangefs/waitqueue.c

   1 /*
   2  * (C) 2001 Clemson University and The University of Chicago
   3  * (C) 2011 Omnibond Systems
   4  *
   5  * Changes by Acxiom Corporation to implement generic service_operation()
   6  * function, Copyright Acxiom Corporation, 2005.
   7  *
   8  * See COPYING in top-level directory.
   9  */
  10
  11 /*
  12  *  In-kernel waitqueue operations.
  13  */
  14
  15 #include "protocol.h"
  16 #include "orangefs-kernel.h"
  17 #include "orangefs-bufmap.h"
  18
  19 static int wait_for_cancellation_downcall(struct orangefs_kernel_op_s *);
  20 static int wait_for_matching_downcall(struct orangefs_kernel_op_s *);
  21
  22 /*
  23  * What we do in this function is to walk the list of operations that are
  24  * present in the request queue and mark them as purged.
  25  * NOTE: This is called from the device close after client-core has
  26  * guaranteed that no new operations could appear on the list since the
  27  * client-core is anyway going to exit.
  28  */
  29 void purge_waiting_ops(void)
  30 {
  31         struct orangefs_kernel_op_s *op;
  32
  33         spin_lock(&orangefs_request_list_lock);
  34         list_for_each_entry(op, &orangefs_request_list, list) {
  35                 gossip_debug(GOSSIP_WAIT_DEBUG,
  36                              "pvfs2-client-core: purging op tag %llu %s\n",
  37                              llu(op->tag),
  38                              get_opname_string(op));
  39                 spin_lock(&op->lock);
  40                 set_op_state_purged(op);
  41                 spin_unlock(&op->lock);
  42         }
  43         spin_unlock(&orangefs_request_list_lock);
  44 }
  45
  46 static inline void
  47 add_op_to_request_list(struct orangefs_kernel_op_s *op)
  48 {
  49         spin_lock(&orangefs_request_list_lock);
  50         spin_lock(&op->lock);
  51         set_op_state_waiting(op);
  52         list_add_tail(&op->list, &orangefs_request_list);
  53         spin_unlock(&orangefs_request_list_lock);
  54         spin_unlock(&op->lock);
  55         wake_up_interruptible(&orangefs_request_list_waitq);
  56 }
  57
  58 static inline
  59 void add_priority_op_to_request_list(struct orangefs_kernel_op_s *op)
  60 {
  61         spin_lock(&orangefs_request_list_lock);
  62         spin_lock(&op->lock);
  63         set_op_state_waiting(op);
  64
  65         list_add(&op->list, &orangefs_request_list);
  66         spin_unlock(&orangefs_request_list_lock);
  67         spin_unlock(&op->lock);
  68         wake_up_interruptible(&orangefs_request_list_waitq);
  69 }
  70
  71 /*
  72  * submits a ORANGEFS operation and waits for it to complete
  73  *
  74  * Note op->downcall.status will contain the status of the operation (in
  75  * errno format), whether provided by pvfs2-client or a result of failure to
  76  * service the operation.  If the caller wishes to distinguish, then
  77  * op->state can be checked to see if it was serviced or not.
  78  *
  79  * Returns contents of op->downcall.status for convenience
  80  */
  81 int service_operation(struct orangefs_kernel_op_s *op,
  82                       const char *op_name,
  83                       int flags)
  84 {
  85         /* flags to modify behavior */
  86         sigset_t orig_sigset;
  87         int ret = 0;
  88
  89         /* irqflags and wait_entry are only used IF the client-core aborts */
  90         unsigned long irqflags;
  91
  92         DEFINE_WAIT(wait_entry);
  93
  94         op->upcall.tgid = current->tgid;
  95         op->upcall.pid = current->pid;
  96
  97 retry_servicing:
  98         op->downcall.status = 0;
  99         gossip_debug(GOSSIP_WAIT_DEBUG,
 100                      "orangefs: service_operation: %s %p\n",
 101                      op_name,
 102                      op);
 103         gossip_debug(GOSSIP_WAIT_DEBUG,
 104                      "orangefs: operation posted by process: %s, pid: %i\n",
 105                      current->comm,
 106                      current->pid);
 107
 108         /* mask out signals if this operation is not to be interrupted */
 109         if (!(flags & ORANGEFS_OP_INTERRUPTIBLE))
 110                 orangefs_block_signals(&orig_sigset);
 111
 112         if (!(flags & ORANGEFS_OP_NO_SEMAPHORE)) {
 113                 ret = mutex_lock_interruptible(&request_mutex);
 114                 /*
 115                  * check to see if we were interrupted while waiting for
 116                  * semaphore
 117                  */
 118                 if (ret < 0) {
 119                         if (!(flags & ORANGEFS_OP_INTERRUPTIBLE))
 120                                 orangefs_set_signals(&orig_sigset);
 121                         op->downcall.status = ret;
 122                         gossip_debug(GOSSIP_WAIT_DEBUG,
 123                                      "orangefs: service_operation interrupted.\n");
 124                         return ret;
 125                 }
 126         }
 127
 128         gossip_debug(GOSSIP_WAIT_DEBUG,
 129                      "%s:About to call is_daemon_in_service().\n",
 130                      __func__);
 131
 132         if (is_daemon_in_service() < 0) {
 133                 /*
 134                  * By incrementing the per-operation attempt counter, we
 135                  * directly go into the timeout logic while waiting for
 136                  * the matching downcall to be read
 137                  */
 138                 gossip_debug(GOSSIP_WAIT_DEBUG,
 139                              "%s:client core is NOT in service(%d).\n",
 140                              __func__,
 141                              is_daemon_in_service());
 142                 op->attempts++;
 143         }
 144
 145         /* queue up the operation */
 146         if (flags & ORANGEFS_OP_PRIORITY) {
 147                 add_priority_op_to_request_list(op);
 148         } else {
 149                 gossip_debug(GOSSIP_WAIT_DEBUG,
 150                              "%s:About to call add_op_to_request_list().\n",
 151                              __func__);
 152                 add_op_to_request_list(op);
 153         }
 154
 155         if (!(flags & ORANGEFS_OP_NO_SEMAPHORE))
 156                 mutex_unlock(&request_mutex);
 157
 158         /*
 159          * If we are asked to service an asynchronous operation from
 160          * VFS perspective, we are done.
 161          */
 162         if (flags & ORANGEFS_OP_ASYNC)
 163                 return 0;
 164
 165         if (flags & ORANGEFS_OP_CANCELLATION) {
 166                 gossip_debug(GOSSIP_WAIT_DEBUG,
 167                              "%s:"
 168                              "About to call wait_for_cancellation_downcall.\n",
 169                              __func__);
 170                 ret = wait_for_cancellation_downcall(op);
 171         } else {
 172                 ret = wait_for_matching_downcall(op);
 173         }
 174
 175         if (ret < 0) {
 176                 /* failed to get matching downcall */
 177                 if (ret == -ETIMEDOUT) {
 178                         gossip_err("orangefs: %s -- wait timed out; aborting attempt.\n",
 179                                    op_name);
 180                 }
 181                 op->downcall.status = ret;
 182         } else {
 183                 /* got matching downcall; make sure status is in errno format */
 184                 op->downcall.status =
 185                     orangefs_normalize_to_errno(op->downcall.status);
 186                 ret = op->downcall.status;
 187         }
 188
 189         if (!(flags & ORANGEFS_OP_INTERRUPTIBLE))
 190                 orangefs_set_signals(&orig_sigset);
 191
 192         BUG_ON(ret != op->downcall.status);
 193         /* retry if operation has not been serviced and if requested */
 194         if (!op_state_serviced(op) && op->downcall.status == -EAGAIN) {
 195                 gossip_debug(GOSSIP_WAIT_DEBUG,
 196                              "orangefs: tag %llu (%s)"
 197                              " -- operation to be retried (%d attempt)\n",
 198                              llu(op->tag),
 199                              op_name,
 200                              op->attempts + 1);
 201
 202                 if (!op->uses_shared_memory)
 203                         /*
 204                          * this operation doesn't use the shared memory
 205                          * system
 206                          */
 207                         goto retry_servicing;
 208
 209                 /* op uses shared memory */
 210                 if (orangefs_get_bufmap_init() == 0) {
 211                         /*
 212                          * This operation uses the shared memory system AND
 213                          * the system is not yet ready. This situation occurs
 214                          * when the client-core is restarted AND there were
 215                          * operations waiting to be processed or were already
 216                          * in process.
 217                          */
 218                         gossip_debug(GOSSIP_WAIT_DEBUG,
 219                                      "uses_shared_memory is true.\n");
 220                         gossip_debug(GOSSIP_WAIT_DEBUG,
 221                                      "Client core in-service status(%d).\n",
 222                                      is_daemon_in_service());
 223                         gossip_debug(GOSSIP_WAIT_DEBUG, "bufmap_init:%d.\n",
 224                                      orangefs_get_bufmap_init());
 225                         gossip_debug(GOSSIP_WAIT_DEBUG,
 226                                      "operation's status is 0x%0x.\n",
 227                                      op->op_state);
 228
 229                         /*
 230                          * let process sleep for a few seconds so shared
 231                          * memory system can be initialized.
 232                          */
 233                         spin_lock_irqsave(&op->lock, irqflags);
 234                         prepare_to_wait(&orangefs_bufmap_init_waitq,
 235                                         &wait_entry,
 236                                         TASK_INTERRUPTIBLE);
 237                         spin_unlock_irqrestore(&op->lock, irqflags);
 238
 239                         /*
 240                          * Wait for orangefs_bufmap_initialize() to wake me up
 241                          * within the allotted time.
 242                          */
 243                         ret = schedule_timeout(MSECS_TO_JIFFIES
 244                                 (1000 * ORANGEFS_BUFMAP_WAIT_TIMEOUT_SECS));
 245
 246                         gossip_debug(GOSSIP_WAIT_DEBUG,
 247                                      "Value returned from schedule_timeout:"
 248                                      "%d.\n",
 249                                      ret);
 250                         gossip_debug(GOSSIP_WAIT_DEBUG,
 251                                      "Is shared memory available? (%d).\n",
 252                                      orangefs_get_bufmap_init());
 253
 254                         spin_lock_irqsave(&op->lock, irqflags);
 255                         finish_wait(&orangefs_bufmap_init_waitq, &wait_entry);
 256                         spin_unlock_irqrestore(&op->lock, irqflags);
 257
 258                         if (orangefs_get_bufmap_init() == 0) {
 259                                 gossip_err("%s:The shared memory system has not started in %d seconds after the client core restarted.  Aborting user's request(%s).\n",
 260                                            __func__,
 261                                            ORANGEFS_BUFMAP_WAIT_TIMEOUT_SECS,
 262                                            get_opname_string(op));
 263                                 return -EIO;
 264                         }
 265
 266                         /*
 267                          * Return to the calling function and re-populate a
 268                          * shared memory buffer.
 269                          */
 270                         return -EAGAIN;
 271                 }
 272         }
 273
 274         gossip_debug(GOSSIP_WAIT_DEBUG,
 275                      "orangefs: service_operation %s returning: %d for %p.\n",
 276                      op_name,
 277                      ret,
 278                      op);
 279         return ret;
 280 }
 281
 282 static inline void remove_op_from_request_list(struct orangefs_kernel_op_s *op)
 283 {
 284         struct list_head *tmp = NULL;
 285         struct list_head *tmp_safe = NULL;
 286         struct orangefs_kernel_op_s *tmp_op = NULL;
 287
 288         spin_lock(&orangefs_request_list_lock);
 289         list_for_each_safe(tmp, tmp_safe, &orangefs_request_list) {
 290                 tmp_op = list_entry(tmp,
 291                                     struct orangefs_kernel_op_s,
 292                                     list);
 293                 if (tmp_op && (tmp_op == op)) {
 294                         list_del(&tmp_op->list);
 295                         break;
 296                 }
 297         }
 298         spin_unlock(&orangefs_request_list_lock);
 299 }
 300
 301 static void orangefs_clean_up_interrupted_operation(struct orangefs_kernel_op_s *op)
 302 {
 303         /*
 304          * handle interrupted cases depending on what state we were in when
 305          * the interruption is detected.  there is a coarse grained lock
 306          * across the operation.
 307          *
 308          * NOTE: be sure not to reverse lock ordering by locking an op lock
 309          * while holding the request_list lock.  Here, we first lock the op
 310          * and then lock the appropriate list.
 311          */
 312         if (!op) {
 313                 gossip_debug(GOSSIP_WAIT_DEBUG,
 314                             "%s: op is null, ignoring\n",
 315                              __func__);
 316                 return;
 317         }
 318
 319         /*
 320          * one more sanity check, make sure it's in one of the possible states
 321          * or don't try to cancel it
 322          */
 323         if (!(op_state_waiting(op) ||
 324               op_state_in_progress(op) ||
 325               op_state_serviced(op) ||
 326               op_state_purged(op))) {
 327                 gossip_debug(GOSSIP_WAIT_DEBUG,
 328                              "%s: op %p not in a valid state (%0x), "
 329                              "ignoring\n",
 330                              __func__,
 331                              op,
 332                              op->op_state);
 333                 return;
 334         }
 335
 336         spin_lock(&op->lock);
 337
 338         if (op_state_waiting(op)) {
 339                 /*
 340                  * upcall hasn't been read; remove op from upcall request
 341                  * list.
 342                  */
 343                 spin_unlock(&op->lock);
 344                 remove_op_from_request_list(op);
 345                 gossip_debug(GOSSIP_WAIT_DEBUG,
 346                              "Interrupted: Removed op %p from request_list\n",
 347                              op);
 348         } else if (op_state_in_progress(op)) {
 349                 /* op must be removed from the in progress htable */
 350                 spin_unlock(&op->lock);
 351                 spin_lock(&htable_ops_in_progress_lock);
 352                 list_del(&op->list);
 353                 spin_unlock(&htable_ops_in_progress_lock);
 354                 gossip_debug(GOSSIP_WAIT_DEBUG,
 355                              "Interrupted: Removed op %p"
 356                              " from htable_ops_in_progress\n",
 357                              op);
 358         } else if (!op_state_serviced(op)) {
 359                 spin_unlock(&op->lock);
 360                 gossip_err("interrupted operation is in a weird state 0x%x\n",
 361                            op->op_state);
 362         } else {
 363                 /*
 364                  * It is not intended for execution to flow here,
 365                  * but having this unlock here makes sparse happy.
 366                  */
 367                 gossip_err("%s: can't get here.\n", __func__);
 368                 spin_unlock(&op->lock);
 369         }
 370 }
 371
 372 /*
 373  * sleeps on waitqueue waiting for matching downcall.
 374  * if client-core finishes servicing, then we are good to go.
 375  * else if client-core exits, we get woken up here, and retry with a timeout
 376  *
 377  * Post when this call returns to the caller, the specified op will no
 378  * longer be on any list or htable.
 379  *
 380  * Returns 0 on success and -errno on failure
 381  * Errors are:
 382  * EAGAIN in case we want the caller to requeue and try again..
 383  * EINTR/EIO/ETIMEDOUT indicating we are done trying to service this
 384  * operation since client-core seems to be exiting too often
 385  * or if we were interrupted.
 386  */
 387 static int wait_for_matching_downcall(struct orangefs_kernel_op_s *op)
 388 {
 389         int ret = -EINVAL;
 390         DEFINE_WAIT(wait_entry);
 391
 392         while (1) {
 393                 spin_lock(&op->lock);
 394                 prepare_to_wait(&op->waitq, &wait_entry, TASK_INTERRUPTIBLE);
 395                 if (op_state_serviced(op)) {
 396                         spin_unlock(&op->lock);
 397                         ret = 0;
 398                         break;
 399                 }
 400                 spin_unlock(&op->lock);
 401
 402                 if (!signal_pending(current)) {
 403                         /*
 404                          * if this was our first attempt and client-core
 405                          * has not purged our operation, we are happy to
 406                          * simply wait
 407                          */
 408                         spin_lock(&op->lock);
 409                         if (op->attempts == 0 && !op_state_purged(op)) {
 410                                 spin_unlock(&op->lock);
 411                                 schedule();
 412                         } else {
 413                                 spin_unlock(&op->lock);
 414                                 /*
 415                                  * subsequent attempts, we retry exactly once
 416                                  * with timeouts
 417                                  */
 418                                 if (!schedule_timeout(MSECS_TO_JIFFIES
 419                                       (1000 * op_timeout_secs))) {
 420                                         gossip_debug(GOSSIP_WAIT_DEBUG,
 421                                                      "*** %s:"
 422                                                      " operation timed out (tag"
 423                                                      " %llu, %p, att %d)\n",
 424                                                      __func__,
 425                                                      llu(op->tag),
 426                                                      op,
 427                                                      op->attempts);
 428                                         ret = -ETIMEDOUT;
 429                                         orangefs_clean_up_interrupted_operation
 430                                             (op);
 431                                         break;
 432                                 }
 433                         }
 434                         spin_lock(&op->lock);
 435                         op->attempts++;
 436                         /*
 437                          * if the operation was purged in the meantime, it
 438                          * is better to requeue it afresh but ensure that
 439                          * we have not been purged repeatedly. This could
 440                          * happen if client-core crashes when an op
 441                          * is being serviced, so we requeue the op, client
 442                          * core crashes again so we requeue the op, client
 443                          * core starts, and so on...
 444                          */
 445                         if (op_state_purged(op)) {
 446                                 ret = (op->attempts < ORANGEFS_PURGE_RETRY_COUNT) ?
 447                                          -EAGAIN :
 448                                          -EIO;
 449                                 spin_unlock(&op->lock);
 450                                 gossip_debug(GOSSIP_WAIT_DEBUG,
 451                                              "*** %s:"
 452                                              " operation purged (tag "
 453                                              "%llu, %p, att %d)\n",
 454                                              __func__,
 455                                              llu(op->tag),
 456                                              op,
 457                                              op->attempts);
 458                                 orangefs_clean_up_interrupted_operation(op);
 459                                 break;
 460                         }
 461                         spin_unlock(&op->lock);
 462                         continue;
 463                 }
 464
 465                 gossip_debug(GOSSIP_WAIT_DEBUG,
 466                              "*** %s:"
 467                              " operation interrupted by a signal (tag "
 468                              "%llu, op %p)\n",
 469                              __func__,
 470                              llu(op->tag),
 471                              op);
 472                 orangefs_clean_up_interrupted_operation(op);
 473                 ret = -EINTR;
 474                 break;
 475         }
 476
 477         spin_lock(&op->lock);
 478         finish_wait(&op->waitq, &wait_entry);
 479         spin_unlock(&op->lock);
 480
 481         return ret;
 482 }
 483
 484 /*
 485  * similar to wait_for_matching_downcall(), but used in the special case
 486  * of I/O cancellations.
 487  *
 488  * Note we need a special wait function because if this is called we already
 489  *      know that a signal is pending in current and need to service the
 490  *      cancellation upcall anyway.  the only way to exit this is to either
 491  *      timeout or have the cancellation be serviced properly.
 492  */
 493 static int wait_for_cancellation_downcall(struct orangefs_kernel_op_s *op)
 494 {
 495         int ret = -EINVAL;
 496         DEFINE_WAIT(wait_entry);
 497
 498         while (1) {
 499                 spin_lock(&op->lock);
 500                 prepare_to_wait(&op->waitq, &wait_entry, TASK_INTERRUPTIBLE);
 501                 if (op_state_serviced(op)) {
 502                         gossip_debug(GOSSIP_WAIT_DEBUG,
 503                                      "%s:op-state is SERVICED.\n",
 504                                      __func__);
 505                         spin_unlock(&op->lock);
 506                         ret = 0;
 507                         break;
 508                 }
 509                 spin_unlock(&op->lock);
 510
 511                 if (signal_pending(current)) {
 512                         gossip_debug(GOSSIP_WAIT_DEBUG,
 513                                      "%s:operation interrupted by a signal (tag"
 514                                      " %llu, op %p)\n",
 515                                      __func__,
 516                                      llu(op->tag),
 517                                      op);
 518                         orangefs_clean_up_interrupted_operation(op);
 519                         ret = -EINTR;
 520                         break;
 521                 }
 522
 523                 gossip_debug(GOSSIP_WAIT_DEBUG,
 524                              "%s:About to call schedule_timeout.\n",
 525                              __func__);
 526                 ret =
 527                     schedule_timeout(MSECS_TO_JIFFIES(1000 * op_timeout_secs));
 528
 529                 gossip_debug(GOSSIP_WAIT_DEBUG,
 530                              "%s:Value returned from schedule_timeout(%d).\n",
 531                              __func__,
 532                              ret);
 533                 if (!ret) {
 534                         gossip_debug(GOSSIP_WAIT_DEBUG,
 535                                      "%s:*** operation timed out: %p\n",
 536                                      __func__,
 537                                      op);
 538                         orangefs_clean_up_interrupted_operation(op);
 539                         ret = -ETIMEDOUT;
 540                         break;
 541                 }
 542
 543                 gossip_debug(GOSSIP_WAIT_DEBUG,
 544                              "%s:Breaking out of loop, regardless of value returned by schedule_timeout.\n",
 545                              __func__);
 546                 ret = -ETIMEDOUT;
 547                 break;
 548         }
 549
 550         spin_lock(&op->lock);
 551         finish_wait(&op->waitq, &wait_entry);
 552         spin_unlock(&op->lock);
 553
 554         gossip_debug(GOSSIP_WAIT_DEBUG,
 555                      "%s:returning ret(%d)\n",
 556                      __func__,
 557                      ret);
 558
 559         return ret;
 560 }