fs/orangefs/devorangefs-req.c

   1 /*
   2  * (C) 2001 Clemson University and The University of Chicago
   3  *
   4  * Changes by Acxiom Corporation to add protocol version to kernel
   5  * communication, Copyright Acxiom Corporation, 2005.
   6  *
   7  * See COPYING in top-level directory.
   8  */
   9
  10 #include "protocol.h"
  11 #include "orangefs-kernel.h"
  12 #include "orangefs-dev-proto.h"
  13 #include "orangefs-bufmap.h"
  14
  15 #include <linux/debugfs.h>
  16 #include <linux/slab.h>
  17
  18 /* this file implements the /dev/pvfs2-req device node */
  19
  20 static int open_access_count;
  21
  22 #define DUMP_DEVICE_ERROR()                                                   \
  23 do {                                                                          \
  24         gossip_err("*****************************************************\n");\
  25         gossip_err("ORANGEFS Device Error:  You cannot open the device file ");  \
  26         gossip_err("\n/dev/%s more than once.  Please make sure that\nthere " \
  27                    "are no ", ORANGEFS_REQDEVICE_NAME);                          \
  28         gossip_err("instances of a program using this device\ncurrently "     \
  29                    "running. (You must verify this!)\n");                     \
  30         gossip_err("For example, you can use the lsof program as follows:\n");\
  31         gossip_err("'lsof | grep %s' (run this as root)\n",                   \
  32                    ORANGEFS_REQDEVICE_NAME);                                     \
  33         gossip_err("  open_access_count = %d\n", open_access_count);          \
  34         gossip_err("*****************************************************\n");\
  35 } while (0)
  36
  37 static int hash_func(__u64 tag, int table_size)
  38 {
  39         return do_div(tag, (unsigned int)table_size);
  40 }
  41
  42 static void orangefs_devreq_add_op(struct orangefs_kernel_op_s *op)
  43 {
  44         int index = hash_func(op->tag, hash_table_size);
  45
  46         spin_lock(&htable_ops_in_progress_lock);
  47         list_add_tail(&op->list, &htable_ops_in_progress[index]);
  48         spin_unlock(&htable_ops_in_progress_lock);
  49 }
  50
  51 static struct orangefs_kernel_op_s *orangefs_devreq_remove_op(__u64 tag)
  52 {
  53         struct orangefs_kernel_op_s *op, *next;
  54         int index;
  55
  56         index = hash_func(tag, hash_table_size);
  57
  58         spin_lock(&htable_ops_in_progress_lock);
  59         list_for_each_entry_safe(op,
  60                                  next,
  61                                  &htable_ops_in_progress[index],
  62                                  list) {
  63                 if (op->tag == tag) {
  64                         list_del(&op->list);
  65                         spin_unlock(&htable_ops_in_progress_lock);
  66                         return op;
  67                 }
  68         }
  69
  70         spin_unlock(&htable_ops_in_progress_lock);
  71         return NULL;
  72 }
  73
  74 static int orangefs_devreq_open(struct inode *inode, struct file *file)
  75 {
  76         int ret = -EINVAL;
  77
  78         if (!(file->f_flags & O_NONBLOCK)) {
  79                 gossip_err("%s: device cannot be opened in blocking mode\n",
  80                            __func__);
  81                 goto out;
  82         }
  83         ret = -EACCES;
  84         gossip_debug(GOSSIP_DEV_DEBUG, "client-core: opening device\n");
  85         mutex_lock(&devreq_mutex);
  86
  87         if (open_access_count == 0) {
  88                 ret = generic_file_open(inode, file);
  89                 if (ret == 0)
  90                         open_access_count++;
  91         } else {
  92                 DUMP_DEVICE_ERROR();
  93         }
  94         mutex_unlock(&devreq_mutex);
  95
  96 out:
  97
  98         gossip_debug(GOSSIP_DEV_DEBUG,
  99                      "pvfs2-client-core: open device complete (ret = %d)\n",
 100                      ret);
 101         return ret;
 102 }
 103
 104 /* Function for read() callers into the device */
 105 static ssize_t orangefs_devreq_read(struct file *file,
 106                                  char __user *buf,
 107                                  size_t count, loff_t *offset)
 108 {
 109         struct orangefs_kernel_op_s *op, *temp;
 110         __s32 proto_ver = ORANGEFS_KERNEL_PROTO_VERSION;
 111         static __s32 magic = ORANGEFS_DEVREQ_MAGIC;
 112         struct orangefs_kernel_op_s *cur_op = NULL;
 113         unsigned long ret;
 114
 115         /* We do not support blocking IO. */
 116         if (!(file->f_flags & O_NONBLOCK)) {
 117                 gossip_err("%s: blocking read from client-core.\n",
 118                            __func__);
 119                 return -EINVAL;
 120         }
 121
 122         /*
 123          * The client will do an ioctl to find MAX_DEV_REQ_UPSIZE, then
 124          * always read with that size buffer.
 125          */
 126         if (count != MAX_DEV_REQ_UPSIZE) {
 127                 gossip_err("orangefs: client-core tried to read wrong size\n");
 128                 return -EINVAL;
 129         }
 130
 131         /* Get next op (if any) from top of list. */
 132         spin_lock(&orangefs_request_list_lock);
 133         list_for_each_entry_safe(op, temp, &orangefs_request_list, list) {
 134                 __s32 fsid;
 135                 /* This lock is held past the end of the loop when we break. */
 136                 spin_lock(&op->lock);
 137
 138                 fsid = fsid_of_op(op);
 139                 if (fsid != ORANGEFS_FS_ID_NULL) {
 140                         int ret;
 141                         /* Skip ops whose filesystem needs to be mounted. */
 142                         ret = fs_mount_pending(fsid);
 143                         if (ret == 1) {
 144                                 gossip_debug(GOSSIP_DEV_DEBUG,
 145                                     "orangefs: skipping op tag %llu %s\n",
 146                                     llu(op->tag), get_opname_string(op));
 147                                 spin_unlock(&op->lock);
 148                                 continue;
 149                         /*
 150                          * Skip ops whose filesystem we don't know about unless
 151                          * it is being mounted.
 152                          */
 153                         /* XXX: is there a better way to detect this? */
 154                         } else if (ret == -1 &&
 155                                    !(op->upcall.type ==
 156                                         ORANGEFS_VFS_OP_FS_MOUNT ||
 157                                      op->upcall.type ==
 158                                         ORANGEFS_VFS_OP_GETATTR)) {
 159                                 gossip_debug(GOSSIP_DEV_DEBUG,
 160                                     "orangefs: skipping op tag %llu %s\n",
 161                                     llu(op->tag), get_opname_string(op));
 162                                 gossip_err(
 163                                     "orangefs: ERROR: fs_mount_pending %d\n",
 164                                     fsid);
 165                                 spin_unlock(&op->lock);
 166                                 continue;
 167                         }
 168                 }
 169                 /*
 170                  * Either this op does not pertain to a filesystem, is mounting
 171                  * a filesystem, or pertains to a mounted filesystem. Let it
 172                  * through.
 173                  */
 174                 cur_op = op;
 175                 break;
 176         }
 177
 178         /*
 179          * At this point we either have a valid op and can continue or have not
 180          * found an op and must ask the client to try again later.
 181          */
 182         if (!cur_op) {
 183                 spin_unlock(&orangefs_request_list_lock);
 184                 return -EAGAIN;
 185         }
 186
 187         gossip_debug(GOSSIP_DEV_DEBUG, "orangefs: reading op tag %llu %s\n",
 188                      llu(cur_op->tag), get_opname_string(cur_op));
 189
 190         /*
 191          * Such an op should never be on the list in the first place. If so, we
 192          * will abort.
 193          */
 194         if (op_state_in_progress(cur_op) || op_state_serviced(cur_op)) {
 195                 gossip_err("orangefs: ERROR: Current op already queued.\n");
 196                 list_del(&cur_op->list);
 197                 spin_unlock(&cur_op->lock);
 198                 spin_unlock(&orangefs_request_list_lock);
 199                 return -EAGAIN;
 200         }
 201
 202         /*
 203          * Set the operation to be in progress and move it between lists since
 204          * it has been sent to the client.
 205          */
 206         set_op_state_inprogress(cur_op);
 207
 208         list_del(&cur_op->list);
 209         spin_unlock(&orangefs_request_list_lock);
 210         orangefs_devreq_add_op(cur_op);
 211         spin_unlock(&cur_op->lock);
 212
 213         /* Push the upcall out. */
 214         ret = copy_to_user(buf, &proto_ver, sizeof(__s32));
 215         if (ret != 0)
 216                 goto error;
 217         ret = copy_to_user(buf+sizeof(__s32), &magic, sizeof(__s32));
 218         if (ret != 0)
 219                 goto error;
 220         ret = copy_to_user(buf+2 * sizeof(__s32), &cur_op->tag, sizeof(__u64));
 221         if (ret != 0)
 222                 goto error;
 223         ret = copy_to_user(buf+2*sizeof(__s32)+sizeof(__u64), &cur_op->upcall,
 224                            sizeof(struct orangefs_upcall_s));
 225         if (ret != 0)
 226                 goto error;
 227
 228         /* The client only asks to read one size buffer. */
 229         return MAX_DEV_REQ_UPSIZE;
 230 error:
 231         /*
 232          * We were unable to copy the op data to the client. Put the op back in
 233          * list. If client has crashed, the op will be purged later when the
 234          * device is released.
 235          */
 236         gossip_err("orangefs: Failed to copy data to user space\n");
 237         spin_lock(&orangefs_request_list_lock);
 238         spin_lock(&cur_op->lock);
 239         set_op_state_waiting(cur_op);
 240         orangefs_devreq_remove_op(cur_op->tag);
 241         list_add(&cur_op->list, &orangefs_request_list);
 242         spin_unlock(&cur_op->lock);
 243         spin_unlock(&orangefs_request_list_lock);
 244         return -EFAULT;
 245 }
 246
 247 /*
 248  * Function for writev() callers into the device. Readdir related
 249  * operations have an extra iovec containing info about objects
 250  * contained in directories.
 251  */
 252 static ssize_t orangefs_devreq_writev(struct file *file,
 253                                    const struct iovec *iov,
 254                                    size_t count,
 255                                    loff_t *offset)
 256 {
 257         struct orangefs_kernel_op_s *op = NULL;
 258         void *buffer = NULL;
 259         void *ptr = NULL;
 260         unsigned long i = 0;
 261         int num_remaining = MAX_DEV_REQ_DOWNSIZE;
 262         int ret = 0;
 263         /* num elements in iovec without trailer */
 264         int notrailer_count = 4;
 265         /*
 266          * If there's a trailer, its iov index will be equal to
 267          * notrailer_count.
 268          */
 269         int trailer_index = notrailer_count;
 270         int payload_size = 0;
 271         int returned_downcall_size = 0;
 272         __s32 magic = 0;
 273         __s32 proto_ver = 0;
 274         __u64 tag = 0;
 275         ssize_t total_returned_size = 0;
 276
 277         /*
 278          * There will always be at least notrailer_count iovecs, and
 279          * when there's a trailer, one more than notrailer_count. Check
 280          * count's sanity.
 281          */
 282         if (count != notrailer_count && count != (notrailer_count + 1)) {
 283                 gossip_err("%s: count:%zu: notrailer_count :%d:\n",
 284                         __func__,
 285                         count,
 286                         notrailer_count);
 287                 return -EPROTO;
 288         }
 289
 290
 291         /* Copy the non-trailer iovec data into a device request buffer. */
 292         buffer = dev_req_alloc();
 293         if (!buffer) {
 294                 gossip_err("%s: dev_req_alloc failed.\n", __func__);
 295                 return -ENOMEM;
 296         }
 297         ptr = buffer;
 298         for (i = 0; i < notrailer_count; i++) {
 299                 if (iov[i].iov_len > num_remaining) {
 300                         gossip_err
 301                             ("writev error: Freeing buffer and returning\n");
 302                         dev_req_release(buffer);
 303                         return -EMSGSIZE;
 304                 }
 305                 ret = copy_from_user(ptr, iov[i].iov_base, iov[i].iov_len);
 306                 if (ret) {
 307                         gossip_err("Failed to copy data from user space\n");
 308                         dev_req_release(buffer);
 309                         return -EIO;
 310                 }
 311                 num_remaining -= iov[i].iov_len;
 312                 ptr += iov[i].iov_len;
 313                 payload_size += iov[i].iov_len;
 314         }
 315         total_returned_size = payload_size;
 316
 317         /* these elements are currently 8 byte aligned (8 bytes for (version +
 318          * magic) 8 bytes for tag).  If you add another element, either
 319          * make it 8 bytes big, or use get_unaligned when asigning.
 320          */
 321         ptr = buffer;
 322         proto_ver = *((__s32 *) ptr); /* unused */
 323         ptr += sizeof(__s32);
 324
 325         magic = *((__s32 *) ptr);
 326         ptr += sizeof(__s32);
 327
 328         tag = *((__u64 *) ptr);
 329         ptr += sizeof(__u64);
 330
 331         if (magic != ORANGEFS_DEVREQ_MAGIC) {
 332                 gossip_err("Error: Device magic number does not match.\n");
 333                 dev_req_release(buffer);
 334                 return -EPROTO;
 335         }
 336
 337         op = orangefs_devreq_remove_op(tag);
 338         if (op) {
 339                 /* Increase ref count! */
 340                 get_op(op);
 341
 342                 /* calculate the size of the returned downcall. */
 343                 returned_downcall_size =
 344                         payload_size - (2 * sizeof(__s32) + sizeof(__u64));
 345
 346                 /* copy the passed in downcall into the op */
 347                 if (returned_downcall_size ==
 348                         sizeof(struct orangefs_downcall_s)) {
 349                         memcpy(&op->downcall,
 350                                ptr,
 351                                sizeof(struct orangefs_downcall_s));
 352                 } else {
 353                         gossip_err("%s: returned downcall size:%d: \n",
 354                                    __func__,
 355                                    returned_downcall_size);
 356                         dev_req_release(buffer);
 357                         put_op(op);
 358                         return -EMSGSIZE;
 359                 }
 360
 361                 /* Don't tolerate an unexpected trailer iovec. */
 362                 if ((op->downcall.trailer_size == 0) &&
 363                     (count != notrailer_count)) {
 364                         gossip_err("%s: unexpected trailer iovec.\n",
 365                                    __func__);
 366                         dev_req_release(buffer);
 367                         put_op(op);
 368                         return -EPROTO;
 369                 }
 370
 371                 /* Don't consider the trailer if there's a bad status. */
 372                 if (op->downcall.status != 0)
 373                         goto no_trailer;
 374
 375                 /* get the trailer if there is one. */
 376                 if (op->downcall.trailer_size == 0)
 377                         goto no_trailer;
 378
 379                 gossip_debug(GOSSIP_DEV_DEBUG,
 380                              "%s: op->downcall.trailer_size %lld\n",
 381                              __func__,
 382                              op->downcall.trailer_size);
 383
 384                 /*
 385                  * Bail if we think think there should be a trailer, but
 386                  * there's no iovec for it.
 387                  */
 388                 if (count != (notrailer_count + 1)) {
 389                         gossip_err("%s: trailer_size:%lld: count:%zu:\n",
 390                                    __func__,
 391                                    op->downcall.trailer_size,
 392                                    count);
 393                         dev_req_release(buffer);
 394                         put_op(op);
 395                         return -EPROTO;
 396                 }
 397
 398                 /* Verify that trailer_size is accurate. */
 399                 if (op->downcall.trailer_size != iov[trailer_index].iov_len) {
 400                         gossip_err("%s: trailer_size:%lld: != iov_len:%zd:\n",
 401                                    __func__,
 402                                    op->downcall.trailer_size,
 403                                    iov[trailer_index].iov_len);
 404                         dev_req_release(buffer);
 405                         put_op(op);
 406                         return -EMSGSIZE;
 407                 }
 408
 409                 total_returned_size += iov[trailer_index].iov_len;
 410
 411                 /*
 412                  * Allocate a buffer, copy the trailer bytes into it and
 413                  * attach it to the downcall.
 414                  */
 415                 op->downcall.trailer_buf = vmalloc(iov[trailer_index].iov_len);
 416                 if (op->downcall.trailer_buf != NULL) {
 417                         gossip_debug(GOSSIP_DEV_DEBUG, "vmalloc: %p\n",
 418                                      op->downcall.trailer_buf);
 419                         ret = copy_from_user(op->downcall.trailer_buf,
 420                                              iov[trailer_index].iov_base,
 421                                              iov[trailer_index].iov_len);
 422                         if (ret) {
 423                                 gossip_err("%s: Failed to copy trailer.\n",
 424                                            __func__);
 425                                 dev_req_release(buffer);
 426                                 gossip_debug(GOSSIP_DEV_DEBUG,
 427                                              "vfree: %p\n",
 428                                              op->downcall.trailer_buf);
 429                                 vfree(op->downcall.trailer_buf);
 430                                 op->downcall.trailer_buf = NULL;
 431                                 put_op(op);
 432                                 return -EIO;
 433                         }
 434                 } else {
 435                         gossip_err("writev: could not vmalloc for trailer!\n");
 436                         dev_req_release(buffer);
 437                         put_op(op);
 438                         return -ENOMEM;
 439                 }
 440
 441 no_trailer:
 442
 443                 /* if this operation is an I/O operation we need to wait
 444                  * for all data to be copied before we can return to avoid
 445                  * buffer corruption and races that can pull the buffers
 446                  * out from under us.
 447                  *
 448                  * Essentially we're synchronizing with other parts of the
 449                  * vfs implicitly by not allowing the user space
 450                  * application reading/writing this device to return until
 451                  * the buffers are done being used.
 452                  */
 453                 if (op->upcall.type == ORANGEFS_VFS_OP_FILE_IO) {
 454                         int timed_out = 0;
 455                         DEFINE_WAIT(wait_entry);
 456
 457                         /*
 458                          * tell the vfs op waiting on a waitqueue
 459                          * that this op is done
 460                          */
 461                         spin_lock(&op->lock);
 462                         set_op_state_serviced(op);
 463                         spin_unlock(&op->lock);
 464
 465                         wake_up_interruptible(&op->waitq);
 466
 467                         while (1) {
 468                                 spin_lock(&op->lock);
 469                                 prepare_to_wait_exclusive(
 470                                         &op->io_completion_waitq,
 471                                         &wait_entry,
 472                                         TASK_INTERRUPTIBLE);
 473                                 if (op->io_completed) {
 474                                         spin_unlock(&op->lock);
 475                                         break;
 476                                 }
 477                                 spin_unlock(&op->lock);
 478
 479                                 if (!signal_pending(current)) {
 480                                         int timeout =
 481                                             MSECS_TO_JIFFIES(1000 *
 482                                                              op_timeout_secs);
 483                                         if (!schedule_timeout(timeout)) {
 484                                                 gossip_debug(GOSSIP_DEV_DEBUG,
 485                                                         "%s: timed out.\n",
 486                                                         __func__);
 487                                                 timed_out = 1;
 488                                                 break;
 489                                         }
 490                                         continue;
 491                                 }
 492
 493                                 gossip_debug(GOSSIP_DEV_DEBUG,
 494                                         "%s: signal on I/O wait, aborting\n",
 495                                         __func__);
 496                                 break;
 497                         }
 498
 499                         spin_lock(&op->lock);
 500                         finish_wait(&op->io_completion_waitq, &wait_entry);
 501                         spin_unlock(&op->lock);
 502
 503                         /* NOTE: for I/O operations we handle releasing the op
 504                          * object except in the case of timeout.  the reason we
 505                          * can't free the op in timeout cases is that the op
 506                          * service logic in the vfs retries operations using
 507                          * the same op ptr, thus it can't be freed.
 508                          */
 509                         if (!timed_out)
 510                                 op_release(op);
 511                 } else {
 512
 513                         /*
 514                          * tell the vfs op waiting on a waitqueue that
 515                          * this op is done
 516                          */
 517                         spin_lock(&op->lock);
 518                         set_op_state_serviced(op);
 519                         spin_unlock(&op->lock);
 520                         /*
 521                          * for every other operation (i.e. non-I/O), we need to
 522                          * wake up the callers for downcall completion
 523                          * notification
 524                          */
 525                         wake_up_interruptible(&op->waitq);
 526                 }
 527         } else {
 528                 /* ignore downcalls that we're not interested in */
 529                 gossip_debug(GOSSIP_DEV_DEBUG,
 530                              "WARNING: No one's waiting for tag %llu\n",
 531                              llu(tag));
 532         }
 533         /* put_op? */
 534         dev_req_release(buffer);
 535
 536         return total_returned_size;
 537 }
 538
 539 static ssize_t orangefs_devreq_write_iter(struct kiocb *iocb,
 540                                       struct iov_iter *iter)
 541 {
 542         return orangefs_devreq_writev(iocb->ki_filp,
 543                                    iter->iov,
 544                                    iter->nr_segs,
 545                                    &iocb->ki_pos);
 546 }
 547
 548 /* Returns whether any FS are still pending remounted */
 549 static int mark_all_pending_mounts(void)
 550 {
 551         int unmounted = 1;
 552         struct orangefs_sb_info_s *orangefs_sb = NULL;
 553
 554         spin_lock(&orangefs_superblocks_lock);
 555         list_for_each_entry(orangefs_sb, &orangefs_superblocks, list) {
 556                 /* All of these file system require a remount */
 557                 orangefs_sb->mount_pending = 1;
 558                 unmounted = 0;
 559         }
 560         spin_unlock(&orangefs_superblocks_lock);
 561         return unmounted;
 562 }
 563
 564 /*
 565  * Determine if a given file system needs to be remounted or not
 566  *  Returns -1 on error
 567  *           0 if already mounted
 568  *           1 if needs remount
 569  */
 570 int fs_mount_pending(__s32 fsid)
 571 {
 572         int mount_pending = -1;
 573         struct orangefs_sb_info_s *orangefs_sb = NULL;
 574
 575         spin_lock(&orangefs_superblocks_lock);
 576         list_for_each_entry(orangefs_sb, &orangefs_superblocks, list) {
 577                 if (orangefs_sb->fs_id == fsid) {
 578                         mount_pending = orangefs_sb->mount_pending;
 579                         break;
 580                 }
 581         }
 582         spin_unlock(&orangefs_superblocks_lock);
 583         return mount_pending;
 584 }
 585
 586 /*
 587  * NOTE: gets called when the last reference to this device is dropped.
 588  * Using the open_access_count variable, we enforce a reference count
 589  * on this file so that it can be opened by only one process at a time.
 590  * the devreq_mutex is used to make sure all i/o has completed
 591  * before we call orangefs_bufmap_finalize, and similar such tricky
 592  * situations
 593  */
 594 static int orangefs_devreq_release(struct inode *inode, struct file *file)
 595 {
 596         int unmounted = 0;
 597
 598         gossip_debug(GOSSIP_DEV_DEBUG,
 599                      "%s:pvfs2-client-core: exiting, closing device\n",
 600                      __func__);
 601
 602         mutex_lock(&devreq_mutex);
 603         if (orangefs_get_bufmap_init())
 604                 orangefs_bufmap_finalize();
 605
 606         open_access_count--;
 607
 608         unmounted = mark_all_pending_mounts();
 609         gossip_debug(GOSSIP_DEV_DEBUG, "ORANGEFS Device Close: Filesystem(s) %s\n",
 610                      (unmounted ? "UNMOUNTED" : "MOUNTED"));
 611         mutex_unlock(&devreq_mutex);
 612
 613         /*
 614          * Walk through the list of ops in the request list, mark them
 615          * as purged and wake them up.
 616          */
 617         purge_waiting_ops();
 618         /*
 619          * Walk through the hash table of in progress operations; mark
 620          * them as purged and wake them up
 621          */
 622         purge_inprogress_ops();
 623         gossip_debug(GOSSIP_DEV_DEBUG,
 624                      "pvfs2-client-core: device close complete\n");
 625         return 0;
 626 }
 627
 628 int is_daemon_in_service(void)
 629 {
 630         int in_service;
 631
 632         /*
 633          * What this function does is checks if client-core is alive
 634          * based on the access count we maintain on the device.
 635          */
 636         mutex_lock(&devreq_mutex);
 637         in_service = open_access_count == 1 ? 0 : -EIO;
 638         mutex_unlock(&devreq_mutex);
 639         return in_service;
 640 }
 641
 642 static inline long check_ioctl_command(unsigned int command)
 643 {
 644         /* Check for valid ioctl codes */
 645         if (_IOC_TYPE(command) != ORANGEFS_DEV_MAGIC) {
 646                 gossip_err("device ioctl magic numbers don't match! Did you rebuild pvfs2-client-core/libpvfs2? [cmd %x, magic %x != %x]\n",
 647                         command,
 648                         _IOC_TYPE(command),
 649                         ORANGEFS_DEV_MAGIC);
 650                 return -EINVAL;
 651         }
 652         /* and valid ioctl commands */
 653         if (_IOC_NR(command) >= ORANGEFS_DEV_MAXNR || _IOC_NR(command) <= 0) {
 654                 gossip_err("Invalid ioctl command number [%d >= %d]\n",
 655                            _IOC_NR(command), ORANGEFS_DEV_MAXNR);
 656                 return -ENOIOCTLCMD;
 657         }
 658         return 0;
 659 }
 660
 661 static long dispatch_ioctl_command(unsigned int command, unsigned long arg)
 662 {
 663         static __s32 magic = ORANGEFS_DEVREQ_MAGIC;
 664         static __s32 max_up_size = MAX_DEV_REQ_UPSIZE;
 665         static __s32 max_down_size = MAX_DEV_REQ_DOWNSIZE;
 666         struct ORANGEFS_dev_map_desc user_desc;
 667         int ret = 0;
 668         struct dev_mask_info_s mask_info = { 0 };
 669         struct dev_mask2_info_s mask2_info = { 0, 0 };
 670         int upstream_kmod = 1;
 671         struct list_head *tmp = NULL;
 672         struct orangefs_sb_info_s *orangefs_sb = NULL;
 673
 674         /* mtmoore: add locking here */
 675
 676         switch (command) {
 677         case ORANGEFS_DEV_GET_MAGIC:
 678                 return ((put_user(magic, (__s32 __user *) arg) == -EFAULT) ?
 679                         -EIO :
 680                         0);
 681         case ORANGEFS_DEV_GET_MAX_UPSIZE:
 682                 return ((put_user(max_up_size,
 683                                   (__s32 __user *) arg) == -EFAULT) ?
 684                                         -EIO :
 685                                         0);
 686         case ORANGEFS_DEV_GET_MAX_DOWNSIZE:
 687                 return ((put_user(max_down_size,
 688                                   (__s32 __user *) arg) == -EFAULT) ?
 689                                         -EIO :
 690                                         0);
 691         case ORANGEFS_DEV_MAP:
 692                 ret = copy_from_user(&user_desc,
 693                                      (struct ORANGEFS_dev_map_desc __user *)
 694                                      arg,
 695                                      sizeof(struct ORANGEFS_dev_map_desc));
 696                 if (orangefs_get_bufmap_init()) {
 697                         return -EINVAL;
 698                 } else {
 699                         return ret ?
 700                                -EIO :
 701                                orangefs_bufmap_initialize(&user_desc);
 702                 }
 703         case ORANGEFS_DEV_REMOUNT_ALL:
 704                 gossip_debug(GOSSIP_DEV_DEBUG,
 705                              "%s: got ORANGEFS_DEV_REMOUNT_ALL\n",
 706                              __func__);
 707
 708                 /*
 709                  * remount all mounted orangefs volumes to regain the lost
 710                  * dynamic mount tables (if any) -- NOTE: this is done
 711                  * without keeping the superblock list locked due to the
 712                  * upcall/downcall waiting.  also, the request semaphore is
 713                  * used to ensure that no operations will be serviced until
 714                  * all of the remounts are serviced (to avoid ops between
 715                  * mounts to fail)
 716                  */
 717                 ret = mutex_lock_interruptible(&request_mutex);
 718                 if (ret < 0)
 719                         return ret;
 720                 gossip_debug(GOSSIP_DEV_DEBUG,
 721                              "%s: priority remount in progress\n",
 722                              __func__);
 723                 list_for_each(tmp, &orangefs_superblocks) {
 724                         orangefs_sb =
 725                                 list_entry(tmp,
 726                                            struct orangefs_sb_info_s,
 727                                            list);
 728                         if (orangefs_sb && (orangefs_sb->sb)) {
 729                                 gossip_debug(GOSSIP_DEV_DEBUG,
 730                                              "%s: Remounting SB %p\n",
 731                                              __func__,
 732                                              orangefs_sb);
 733
 734                                 ret = orangefs_remount(orangefs_sb->sb);
 735                                 if (ret) {
 736                                         gossip_debug(GOSSIP_DEV_DEBUG,
 737                                                      "SB %p remount failed\n",
 738                                                      orangefs_sb);
 739                                         break;
 740                                 }
 741                         }
 742                 }
 743                 gossip_debug(GOSSIP_DEV_DEBUG,
 744                              "%s: priority remount complete\n",
 745                              __func__);
 746                 mutex_unlock(&request_mutex);
 747                 return ret;
 748
 749         case ORANGEFS_DEV_UPSTREAM:
 750                 ret = copy_to_user((void __user *)arg,
 751                                     &upstream_kmod,
 752                                     sizeof(upstream_kmod));
 753
 754                 if (ret != 0)
 755                         return -EIO;
 756                 else
 757                         return ret;
 758
 759         case ORANGEFS_DEV_CLIENT_MASK:
 760                 ret = copy_from_user(&mask2_info,
 761                                      (void __user *)arg,
 762                                      sizeof(struct dev_mask2_info_s));
 763
 764                 if (ret != 0)
 765                         return -EIO;
 766
 767                 client_debug_mask.mask1 = mask2_info.mask1_value;
 768                 client_debug_mask.mask2 = mask2_info.mask2_value;
 769
 770                 pr_info("%s: client debug mask has been been received "
 771                         ":%llx: :%llx:\n",
 772                         __func__,
 773                         (unsigned long long)client_debug_mask.mask1,
 774                         (unsigned long long)client_debug_mask.mask2);
 775
 776                 return ret;
 777
 778         case ORANGEFS_DEV_CLIENT_STRING:
 779                 ret = copy_from_user(&client_debug_array_string,
 780                                      (void __user *)arg,
 781                                      ORANGEFS_MAX_DEBUG_STRING_LEN);
 782                 if (ret != 0) {
 783                         pr_info("%s: CLIENT_STRING: copy_from_user failed\n",
 784                                 __func__);
 785                         return -EIO;
 786                 }
 787
 788                 pr_info("%s: client debug array string has been received.\n",
 789                         __func__);
 790
 791                 if (!help_string_initialized) {
 792
 793                         /* Free the "we don't know yet" default string... */
 794                         kfree(debug_help_string);
 795
 796                         /* build a proper debug help string */
 797                         if (orangefs_prepare_debugfs_help_string(0)) {
 798                                 gossip_err("%s: no debug help string \n",
 799                                            __func__);
 800                                 return -EIO;
 801                         }
 802
 803                         /* Replace the boilerplate boot-time debug-help file. */
 804                         debugfs_remove(help_file_dentry);
 805
 806                         help_file_dentry =
 807                                 debugfs_create_file(
 808                                         ORANGEFS_KMOD_DEBUG_HELP_FILE,
 809                                         0444,
 810                                         debug_dir,
 811                                         debug_help_string,
 812                                         &debug_help_fops);
 813
 814                         if (!help_file_dentry) {
 815                                 gossip_err("%s: debugfs_create_file failed for"
 816                                            " :%s:!\n",
 817                                            __func__,
 818                                            ORANGEFS_KMOD_DEBUG_HELP_FILE);
 819                                 return -EIO;
 820                         }
 821                 }
 822
 823                 debug_mask_to_string(&client_debug_mask, 1);
 824
 825                 debugfs_remove(client_debug_dentry);
 826
 827                 orangefs_client_debug_init();
 828
 829                 help_string_initialized++;
 830
 831                 return ret;
 832
 833         case ORANGEFS_DEV_DEBUG:
 834                 ret = copy_from_user(&mask_info,
 835                                      (void __user *)arg,
 836                                      sizeof(mask_info));
 837
 838                 if (ret != 0)
 839                         return -EIO;
 840
 841                 if (mask_info.mask_type == KERNEL_MASK) {
 842                         if ((mask_info.mask_value == 0)
 843                             && (kernel_mask_set_mod_init)) {
 844                                 /*
 845                                  * the kernel debug mask was set when the
 846                                  * kernel module was loaded; don't override
 847                                  * it if the client-core was started without
 848                                  * a value for ORANGEFS_KMODMASK.
 849                                  */
 850                                 return 0;
 851                         }
 852                         debug_mask_to_string(&mask_info.mask_value,
 853                                              mask_info.mask_type);
 854                         gossip_debug_mask = mask_info.mask_value;
 855                         pr_info("%s: kernel debug mask has been modified to "
 856                                 ":%s: :%llx:\n",
 857                                 __func__,
 858                                 kernel_debug_string,
 859                                 (unsigned long long)gossip_debug_mask);
 860                 } else if (mask_info.mask_type == CLIENT_MASK) {
 861                         debug_mask_to_string(&mask_info.mask_value,
 862                                              mask_info.mask_type);
 863                         pr_info("%s: client debug mask has been modified to"
 864                                 ":%s: :%llx:\n",
 865                                 __func__,
 866                                 client_debug_string,
 867                                 llu(mask_info.mask_value));
 868                 } else {
 869                         gossip_lerr("Invalid mask type....\n");
 870                         return -EINVAL;
 871                 }
 872
 873                 return ret;
 874
 875         default:
 876                 return -ENOIOCTLCMD;
 877         }
 878         return -ENOIOCTLCMD;
 879 }
 880
 881 static long orangefs_devreq_ioctl(struct file *file,
 882                                unsigned int command, unsigned long arg)
 883 {
 884         long ret;
 885
 886         /* Check for properly constructed commands */
 887         ret = check_ioctl_command(command);
 888         if (ret < 0)
 889                 return (int)ret;
 890
 891         return (int)dispatch_ioctl_command(command, arg);
 892 }
 893
 894 #ifdef CONFIG_COMPAT            /* CONFIG_COMPAT is in .config */
 895
 896 /*  Compat structure for the ORANGEFS_DEV_MAP ioctl */
 897 struct ORANGEFS_dev_map_desc32 {
 898         compat_uptr_t ptr;
 899         __s32 total_size;
 900         __s32 size;
 901         __s32 count;
 902 };
 903
 904 static unsigned long translate_dev_map26(unsigned long args, long *error)
 905 {
 906         struct ORANGEFS_dev_map_desc32 __user *p32 = (void __user *)args;
 907         /*
 908          * Depending on the architecture, allocate some space on the
 909          * user-call-stack based on our expected layout.
 910          */
 911         struct ORANGEFS_dev_map_desc __user *p =
 912             compat_alloc_user_space(sizeof(*p));
 913         compat_uptr_t addr;
 914
 915         *error = 0;
 916         /* get the ptr from the 32 bit user-space */
 917         if (get_user(addr, &p32->ptr))
 918                 goto err;
 919         /* try to put that into a 64-bit layout */
 920         if (put_user(compat_ptr(addr), &p->ptr))
 921                 goto err;
 922         /* copy the remaining fields */
 923         if (copy_in_user(&p->total_size, &p32->total_size, sizeof(__s32)))
 924                 goto err;
 925         if (copy_in_user(&p->size, &p32->size, sizeof(__s32)))
 926                 goto err;
 927         if (copy_in_user(&p->count, &p32->count, sizeof(__s32)))
 928                 goto err;
 929         return (unsigned long)p;
 930 err:
 931         *error = -EFAULT;
 932         return 0;
 933 }
 934
 935 /*
 936  * 32 bit user-space apps' ioctl handlers when kernel modules
 937  * is compiled as a 64 bit one
 938  */
 939 static long orangefs_devreq_compat_ioctl(struct file *filp, unsigned int cmd,
 940                                       unsigned long args)
 941 {
 942         long ret;
 943         unsigned long arg = args;
 944
 945         /* Check for properly constructed commands */
 946         ret = check_ioctl_command(cmd);
 947         if (ret < 0)
 948                 return ret;
 949         if (cmd == ORANGEFS_DEV_MAP) {
 950                 /*
 951                  * convert the arguments to what we expect internally
 952                  * in kernel space
 953                  */
 954                 arg = translate_dev_map26(args, &ret);
 955                 if (ret < 0) {
 956                         gossip_err("Could not translate dev map\n");
 957                         return ret;
 958                 }
 959         }
 960         /* no other ioctl requires translation */
 961         return dispatch_ioctl_command(cmd, arg);
 962 }
 963
 964 #endif /* CONFIG_COMPAT is in .config */
 965
 966 /*
 967  * The following two ioctl32 functions had been refactored into the above
 968  * CONFIG_COMPAT ifdef, but that was an over simplification that was
 969  * not noticed until we tried to compile on power pc...
 970  */
 971 #if (defined(CONFIG_COMPAT) && !defined(HAVE_REGISTER_IOCTL32_CONVERSION)) || !defined(CONFIG_COMPAT)
 972 static int orangefs_ioctl32_init(void)
 973 {
 974         return 0;
 975 }
 976
 977 static void orangefs_ioctl32_cleanup(void)
 978 {
 979         return;
 980 }
 981 #endif
 982
 983 /* the assigned character device major number */
 984 static int orangefs_dev_major;
 985
 986 /*
 987  * Initialize orangefs device specific state:
 988  * Must be called at module load time only
 989  */
 990 int orangefs_dev_init(void)
 991 {
 992         int ret;
 993
 994         /* register the ioctl32 sub-system */
 995         ret = orangefs_ioctl32_init();
 996         if (ret < 0)
 997                 return ret;
 998
 999         /* register orangefs-req device  */
1000         orangefs_dev_major = register_chrdev(0,
1001                                           ORANGEFS_REQDEVICE_NAME,
1002                                           &orangefs_devreq_file_operations);
1003         if (orangefs_dev_major < 0) {
1004                 gossip_debug(GOSSIP_DEV_DEBUG,
1005                              "Failed to register /dev/%s (error %d)\n",
1006                              ORANGEFS_REQDEVICE_NAME, orangefs_dev_major);
1007                 orangefs_ioctl32_cleanup();
1008                 return orangefs_dev_major;
1009         }
1010
1011         gossip_debug(GOSSIP_DEV_DEBUG,
1012                      "*** /dev/%s character device registered ***\n",
1013                      ORANGEFS_REQDEVICE_NAME);
1014         gossip_debug(GOSSIP_DEV_DEBUG, "'mknod /dev/%s c %d 0'.\n",
1015                      ORANGEFS_REQDEVICE_NAME, orangefs_dev_major);
1016         return 0;
1017 }
1018
1019 void orangefs_dev_cleanup(void)
1020 {
1021         unregister_chrdev(orangefs_dev_major, ORANGEFS_REQDEVICE_NAME);
1022         gossip_debug(GOSSIP_DEV_DEBUG,
1023                      "*** /dev/%s character device unregistered ***\n",
1024                      ORANGEFS_REQDEVICE_NAME);
1025         /* unregister the ioctl32 sub-system */
1026         orangefs_ioctl32_cleanup();
1027 }
1028
1029 static unsigned int orangefs_devreq_poll(struct file *file,
1030                                       struct poll_table_struct *poll_table)
1031 {
1032         int poll_revent_mask = 0;
1033
1034         if (open_access_count == 1) {
1035                 poll_wait(file, &orangefs_request_list_waitq, poll_table);
1036
1037                 spin_lock(&orangefs_request_list_lock);
1038                 if (!list_empty(&orangefs_request_list))
1039                         poll_revent_mask |= POLL_IN;
1040                 spin_unlock(&orangefs_request_list_lock);
1041         }
1042         return poll_revent_mask;
1043 }
1044
1045 const struct file_operations orangefs_devreq_file_operations = {
1046         .owner = THIS_MODULE,
1047         .read = orangefs_devreq_read,
1048         .write_iter = orangefs_devreq_write_iter,
1049         .open = orangefs_devreq_open,
1050         .release = orangefs_devreq_release,
1051         .unlocked_ioctl = orangefs_devreq_ioctl,
1052
1053 #ifdef CONFIG_COMPAT            /* CONFIG_COMPAT is in .config */
1054         .compat_ioctl = orangefs_devreq_compat_ioctl,
1055 #endif
1056         .poll = orangefs_devreq_poll
1057 };