drivers/staging/lustre/lustre/mdc/mdc_locks.c

   1 /*
   2  * GPL HEADER START
   3  *
   4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5  *
   6  * This program is free software; you can redistribute it and/or modify
   7  * it under the terms of the GNU General Public License version 2 only,
   8  * as published by the Free Software Foundation.
   9  *
  10  * This program is distributed in the hope that it will be useful, but
  11  * WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13  * General Public License version 2 for more details (a copy is included
  14  * in the LICENSE file that accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License
  17  * version 2 along with this program; If not, see
  18  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
  19  *
  20  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
  21  * CA 95054 USA or visit www.sun.com if you need additional information or
  22  * have any questions.
  23  *
  24  * GPL HEADER END
  25  */
  26 /*
  27  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  28  * Use is subject to license terms.
  29  *
  30  * Copyright (c) 2011, 2012, Intel Corporation.
  31  */
  32 /*
  33  * This file is part of Lustre, http://www.lustre.org/
  34  * Lustre is a trademark of Sun Microsystems, Inc.
  35  */
  36
  37 #define DEBUG_SUBSYSTEM S_MDC
  38
  39 # include <linux/module.h>
  40 # include <linux/pagemap.h>
  41 # include <linux/miscdevice.h>
  42
  43 #include <lustre_acl.h>
  44 #include <obd_class.h>
  45 #include <lustre_dlm.h>
  46 /* fid_res_name_eq() */
  47 #include <lustre_fid.h>
  48 #include <lprocfs_status.h>
  49 #include "mdc_internal.h"
  50
  51 struct mdc_getattr_args {
  52         struct obd_export          *ga_exp;
  53         struct md_enqueue_info      *ga_minfo;
  54         struct ldlm_enqueue_info    *ga_einfo;
  55 };
  56
  57 int it_disposition(struct lookup_intent *it, int flag)
  58 {
  59         return it->d.lustre.it_disposition & flag;
  60 }
  61 EXPORT_SYMBOL(it_disposition);
  62
  63 void it_set_disposition(struct lookup_intent *it, int flag)
  64 {
  65         it->d.lustre.it_disposition |= flag;
  66 }
  67 EXPORT_SYMBOL(it_set_disposition);
  68
  69 void it_clear_disposition(struct lookup_intent *it, int flag)
  70 {
  71         it->d.lustre.it_disposition &= ~flag;
  72 }
  73 EXPORT_SYMBOL(it_clear_disposition);
  74
  75 int it_open_error(int phase, struct lookup_intent *it)
  76 {
  77         if (it_disposition(it, DISP_OPEN_LEASE)) {
  78                 if (phase >= DISP_OPEN_LEASE)
  79                         return it->d.lustre.it_status;
  80                 else
  81                         return 0;
  82         }
  83         if (it_disposition(it, DISP_OPEN_OPEN)) {
  84                 if (phase >= DISP_OPEN_OPEN)
  85                         return it->d.lustre.it_status;
  86                 else
  87                         return 0;
  88         }
  89
  90         if (it_disposition(it, DISP_OPEN_CREATE)) {
  91                 if (phase >= DISP_OPEN_CREATE)
  92                         return it->d.lustre.it_status;
  93                 else
  94                         return 0;
  95         }
  96
  97         if (it_disposition(it, DISP_LOOKUP_EXECD)) {
  98                 if (phase >= DISP_LOOKUP_EXECD)
  99                         return it->d.lustre.it_status;
 100                 else
 101                         return 0;
 102         }
 103
 104         if (it_disposition(it, DISP_IT_EXECD)) {
 105                 if (phase >= DISP_IT_EXECD)
 106                         return it->d.lustre.it_status;
 107                 else
 108                         return 0;
 109         }
 110         CERROR("it disp: %X, status: %d\n", it->d.lustre.it_disposition,
 111                it->d.lustre.it_status);
 112         LBUG();
 113         return 0;
 114 }
 115 EXPORT_SYMBOL(it_open_error);
 116
 117 /* this must be called on a lockh that is known to have a referenced lock */
 118 int mdc_set_lock_data(struct obd_export *exp, __u64 *lockh, void *data,
 119                       __u64 *bits)
 120 {
 121         struct ldlm_lock *lock;
 122         struct inode *new_inode = data;
 123
 124         if(bits)
 125                 *bits = 0;
 126
 127         if (!*lockh)
 128                 return 0;
 129
 130         lock = ldlm_handle2lock((struct lustre_handle *)lockh);
 131
 132         LASSERT(lock != NULL);
 133         lock_res_and_lock(lock);
 134         if (lock->l_resource->lr_lvb_inode &&
 135             lock->l_resource->lr_lvb_inode != data) {
 136                 struct inode *old_inode = lock->l_resource->lr_lvb_inode;
 137                 LASSERTF(old_inode->i_state & I_FREEING,
 138                          "Found existing inode %p/%lu/%u state %lu in lock: "
 139                          "setting data to %p/%lu/%u\n", old_inode,
 140                          old_inode->i_ino, old_inode->i_generation,
 141                          old_inode->i_state,
 142                          new_inode, new_inode->i_ino, new_inode->i_generation);
 143         }
 144         lock->l_resource->lr_lvb_inode = new_inode;
 145         if (bits)
 146                 *bits = lock->l_policy_data.l_inodebits.bits;
 147
 148         unlock_res_and_lock(lock);
 149         LDLM_LOCK_PUT(lock);
 150
 151         return 0;
 152 }
 153
 154 ldlm_mode_t mdc_lock_match(struct obd_export *exp, __u64 flags,
 155                            const struct lu_fid *fid, ldlm_type_t type,
 156                            ldlm_policy_data_t *policy, ldlm_mode_t mode,
 157                            struct lustre_handle *lockh)
 158 {
 159         struct ldlm_res_id res_id;
 160         ldlm_mode_t rc;
 161
 162         fid_build_reg_res_name(fid, &res_id);
 163         rc = ldlm_lock_match(class_exp2obd(exp)->obd_namespace, flags,
 164                              &res_id, type, policy, mode, lockh, 0);
 165         return rc;
 166 }
 167
 168 int mdc_cancel_unused(struct obd_export *exp,
 169                       const struct lu_fid *fid,
 170                       ldlm_policy_data_t *policy,
 171                       ldlm_mode_t mode,
 172                       ldlm_cancel_flags_t flags,
 173                       void *opaque)
 174 {
 175         struct ldlm_res_id res_id;
 176         struct obd_device *obd = class_exp2obd(exp);
 177         int rc;
 178
 179         fid_build_reg_res_name(fid, &res_id);
 180         rc = ldlm_cli_cancel_unused_resource(obd->obd_namespace, &res_id,
 181                                              policy, mode, flags, opaque);
 182         return rc;
 183 }
 184
 185 int mdc_null_inode(struct obd_export *exp,
 186                    const struct lu_fid *fid)
 187 {
 188         struct ldlm_res_id res_id;
 189         struct ldlm_resource *res;
 190         struct ldlm_namespace *ns = class_exp2obd(exp)->obd_namespace;
 191
 192         LASSERTF(ns != NULL, "no namespace passed\n");
 193
 194         fid_build_reg_res_name(fid, &res_id);
 195
 196         res = ldlm_resource_get(ns, NULL, &res_id, 0, 0);
 197         if(res == NULL)
 198                 return 0;
 199
 200         lock_res(res);
 201         res->lr_lvb_inode = NULL;
 202         unlock_res(res);
 203
 204         ldlm_resource_putref(res);
 205         return 0;
 206 }
 207
 208 /* find any ldlm lock of the inode in mdc
 209  * return 0    not find
 210  *      1    find one
 211  *      < 0    error */
 212 int mdc_find_cbdata(struct obd_export *exp,
 213                     const struct lu_fid *fid,
 214                     ldlm_iterator_t it, void *data)
 215 {
 216         struct ldlm_res_id res_id;
 217         int rc = 0;
 218
 219         fid_build_reg_res_name((struct lu_fid*)fid, &res_id);
 220         rc = ldlm_resource_iterate(class_exp2obd(exp)->obd_namespace, &res_id,
 221                                    it, data);
 222         if (rc == LDLM_ITER_STOP)
 223                 return 1;
 224         else if (rc == LDLM_ITER_CONTINUE)
 225                 return 0;
 226         return rc;
 227 }
 228
 229 static inline void mdc_clear_replay_flag(struct ptlrpc_request *req, int rc)
 230 {
 231         /* Don't hold error requests for replay. */
 232         if (req->rq_replay) {
 233                 spin_lock(&req->rq_lock);
 234                 req->rq_replay = 0;
 235                 spin_unlock(&req->rq_lock);
 236         }
 237         if (rc && req->rq_transno != 0) {
 238                 DEBUG_REQ(D_ERROR, req, "transno returned on error rc %d", rc);
 239                 LBUG();
 240         }
 241 }
 242
 243 /* Save a large LOV EA into the request buffer so that it is available
 244  * for replay.  We don't do this in the initial request because the
 245  * original request doesn't need this buffer (at most it sends just the
 246  * lov_mds_md) and it is a waste of RAM/bandwidth to send the empty
 247  * buffer and may also be difficult to allocate and save a very large
 248  * request buffer for each open. (bug 5707)
 249  *
 250  * OOM here may cause recovery failure if lmm is needed (only for the
 251  * original open if the MDS crashed just when this client also OOM'd)
 252  * but this is incredibly unlikely, and questionable whether the client
 253  * could do MDS recovery under OOM anyways... */
 254 static void mdc_realloc_openmsg(struct ptlrpc_request *req,
 255                                 struct mdt_body *body)
 256 {
 257         int     rc;
 258
 259         /* FIXME: remove this explicit offset. */
 260         rc = sptlrpc_cli_enlarge_reqbuf(req, DLM_INTENT_REC_OFF + 4,
 261                                         body->eadatasize);
 262         if (rc) {
 263                 CERROR("Can't enlarge segment %d size to %d\n",
 264                        DLM_INTENT_REC_OFF + 4, body->eadatasize);
 265                 body->valid &= ~OBD_MD_FLEASIZE;
 266                 body->eadatasize = 0;
 267         }
 268 }
 269
 270 static struct ptlrpc_request *mdc_intent_open_pack(struct obd_export *exp,
 271                                                    struct lookup_intent *it,
 272                                                    struct md_op_data *op_data,
 273                                                    void *lmm, int lmmsize,
 274                                                    void *cb_data)
 275 {
 276         struct ptlrpc_request *req;
 277         struct obd_device     *obddev = class_exp2obd(exp);
 278         struct ldlm_intent    *lit;
 279         LIST_HEAD(cancels);
 280         int                 count = 0;
 281         int                 mode;
 282         int                 rc;
 283
 284         it->it_create_mode = (it->it_create_mode & ~S_IFMT) | S_IFREG;
 285
 286         /* XXX: openlock is not cancelled for cross-refs. */
 287         /* If inode is known, cancel conflicting OPEN locks. */
 288         if (fid_is_sane(&op_data->op_fid2)) {
 289                 if (it->it_flags & MDS_OPEN_LEASE) { /* try to get lease */
 290                         if (it->it_flags & FMODE_WRITE)
 291                                 mode = LCK_EX;
 292                         else
 293                                 mode = LCK_PR;
 294                 } else {
 295                         if (it->it_flags & (FMODE_WRITE|MDS_OPEN_TRUNC))
 296                                 mode = LCK_CW;
 297 #ifdef FMODE_EXEC
 298                         else if (it->it_flags & FMODE_EXEC)
 299                                 mode = LCK_PR;
 300 #endif
 301                         else
 302                                 mode = LCK_CR;
 303                 }
 304                 count = mdc_resource_get_unused(exp, &op_data->op_fid2,
 305                                                 &cancels, mode,
 306                                                 MDS_INODELOCK_OPEN);
 307         }
 308
 309         /* If CREATE, cancel parent's UPDATE lock. */
 310         if (it->it_op & IT_CREAT)
 311                 mode = LCK_EX;
 312         else
 313                 mode = LCK_CR;
 314         count += mdc_resource_get_unused(exp, &op_data->op_fid1,
 315                                          &cancels, mode,
 316                                          MDS_INODELOCK_UPDATE);
 317
 318         req = ptlrpc_request_alloc(class_exp2cliimp(exp),
 319                                    &RQF_LDLM_INTENT_OPEN);
 320         if (req == NULL) {
 321                 ldlm_lock_list_put(&cancels, l_bl_ast, count);
 322                 return ERR_PTR(-ENOMEM);
 323         }
 324
 325         /* parent capability */
 326         mdc_set_capa_size(req, &RMF_CAPA1, op_data->op_capa1);
 327         /* child capability, reserve the size according to parent capa, it will
 328          * be filled after we get the reply */
 329         mdc_set_capa_size(req, &RMF_CAPA2, op_data->op_capa1);
 330
 331         req_capsule_set_size(&req->rq_pill, &RMF_NAME, RCL_CLIENT,
 332                              op_data->op_namelen + 1);
 333         req_capsule_set_size(&req->rq_pill, &RMF_EADATA, RCL_CLIENT,
 334                              max(lmmsize, obddev->u.cli.cl_default_mds_easize));
 335
 336         rc = ldlm_prep_enqueue_req(exp, req, &cancels, count);
 337         if (rc) {
 338                 ptlrpc_request_free(req);
 339                 return NULL;
 340         }
 341
 342         spin_lock(&req->rq_lock);
 343         req->rq_replay = req->rq_import->imp_replayable;
 344         spin_unlock(&req->rq_lock);
 345
 346         /* pack the intent */
 347         lit = req_capsule_client_get(&req->rq_pill, &RMF_LDLM_INTENT);
 348         lit->opc = (__u64)it->it_op;
 349
 350         /* pack the intended request */
 351         mdc_open_pack(req, op_data, it->it_create_mode, 0, it->it_flags, lmm,
 352                       lmmsize);
 353
 354         /* for remote client, fetch remote perm for current user */
 355         if (client_is_remote(exp))
 356                 req_capsule_set_size(&req->rq_pill, &RMF_ACL, RCL_SERVER,
 357                                      sizeof(struct mdt_remote_perm));
 358         ptlrpc_request_set_replen(req);
 359         return req;
 360 }
 361
 362 static struct ptlrpc_request *
 363 mdc_intent_getxattr_pack(struct obd_export *exp,
 364                          struct lookup_intent *it,
 365                          struct md_op_data *op_data)
 366 {
 367         struct ptlrpc_request   *req;
 368         struct ldlm_intent      *lit;
 369         int                     rc, count = 0, maxdata;
 370         LIST_HEAD(cancels);
 371
 372
 373
 374         req = ptlrpc_request_alloc(class_exp2cliimp(exp),
 375                                         &RQF_LDLM_INTENT_GETXATTR);
 376         if (req == NULL)
 377                 return ERR_PTR(-ENOMEM);
 378
 379         mdc_set_capa_size(req, &RMF_CAPA1, op_data->op_capa1);
 380
 381         if (it->it_op == IT_SETXATTR)
 382                 /* If we want to upgrade to LCK_PW, let's cancel LCK_PR
 383                  * locks now. This avoids unnecessary ASTs. */
 384                 count = mdc_resource_get_unused(exp, &op_data->op_fid1,
 385                                                 &cancels, LCK_PW,
 386                                                 MDS_INODELOCK_XATTR);
 387
 388         rc = ldlm_prep_enqueue_req(exp, req, &cancels, count);
 389         if (rc) {
 390                 ptlrpc_request_free(req);
 391                 return ERR_PTR(rc);
 392         }
 393
 394         /* pack the intent */
 395         lit = req_capsule_client_get(&req->rq_pill, &RMF_LDLM_INTENT);
 396         lit->opc = IT_GETXATTR;
 397
 398         maxdata = class_exp2cliimp(exp)->imp_connect_data.ocd_max_easize;
 399
 400         /* pack the intended request */
 401         mdc_pack_body(req, &op_data->op_fid1, op_data->op_capa1,
 402                         op_data->op_valid, maxdata, -1, 0);
 403
 404         req_capsule_set_size(&req->rq_pill, &RMF_EADATA,
 405                                 RCL_SERVER, maxdata);
 406
 407         req_capsule_set_size(&req->rq_pill, &RMF_EAVALS,
 408                                 RCL_SERVER, maxdata);
 409
 410         req_capsule_set_size(&req->rq_pill, &RMF_EAVALS_LENS,
 411                                 RCL_SERVER, maxdata);
 412
 413         ptlrpc_request_set_replen(req);
 414
 415         return req;
 416 }
 417
 418 static struct ptlrpc_request *mdc_intent_unlink_pack(struct obd_export *exp,
 419                                                      struct lookup_intent *it,
 420                                                      struct md_op_data *op_data)
 421 {
 422         struct ptlrpc_request *req;
 423         struct obd_device     *obddev = class_exp2obd(exp);
 424         struct ldlm_intent    *lit;
 425         int                 rc;
 426
 427         req = ptlrpc_request_alloc(class_exp2cliimp(exp),
 428                                    &RQF_LDLM_INTENT_UNLINK);
 429         if (req == NULL)
 430                 return ERR_PTR(-ENOMEM);
 431
 432         mdc_set_capa_size(req, &RMF_CAPA1, op_data->op_capa1);
 433         req_capsule_set_size(&req->rq_pill, &RMF_NAME, RCL_CLIENT,
 434                              op_data->op_namelen + 1);
 435
 436         rc = ldlm_prep_enqueue_req(exp, req, NULL, 0);
 437         if (rc) {
 438                 ptlrpc_request_free(req);
 439                 return ERR_PTR(rc);
 440         }
 441
 442         /* pack the intent */
 443         lit = req_capsule_client_get(&req->rq_pill, &RMF_LDLM_INTENT);
 444         lit->opc = (__u64)it->it_op;
 445
 446         /* pack the intended request */
 447         mdc_unlink_pack(req, op_data);
 448
 449         req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER,
 450                              obddev->u.cli.cl_max_mds_easize);
 451         req_capsule_set_size(&req->rq_pill, &RMF_ACL, RCL_SERVER,
 452                              obddev->u.cli.cl_max_mds_cookiesize);
 453         ptlrpc_request_set_replen(req);
 454         return req;
 455 }
 456
 457 static struct ptlrpc_request *mdc_intent_getattr_pack(struct obd_export *exp,
 458                                                       struct lookup_intent *it,
 459                                                       struct md_op_data *op_data)
 460 {
 461         struct ptlrpc_request *req;
 462         struct obd_device     *obddev = class_exp2obd(exp);
 463         obd_valid             valid = OBD_MD_FLGETATTR | OBD_MD_FLEASIZE |
 464                                        OBD_MD_FLMODEASIZE | OBD_MD_FLDIREA |
 465                                        OBD_MD_FLMDSCAPA | OBD_MD_MEA |
 466                                        (client_is_remote(exp) ?
 467                                                OBD_MD_FLRMTPERM : OBD_MD_FLACL);
 468         struct ldlm_intent    *lit;
 469         int                 rc;
 470
 471         req = ptlrpc_request_alloc(class_exp2cliimp(exp),
 472                                    &RQF_LDLM_INTENT_GETATTR);
 473         if (req == NULL)
 474                 return ERR_PTR(-ENOMEM);
 475
 476         mdc_set_capa_size(req, &RMF_CAPA1, op_data->op_capa1);
 477         req_capsule_set_size(&req->rq_pill, &RMF_NAME, RCL_CLIENT,
 478                              op_data->op_namelen + 1);
 479
 480         rc = ldlm_prep_enqueue_req(exp, req, NULL, 0);
 481         if (rc) {
 482                 ptlrpc_request_free(req);
 483                 return ERR_PTR(rc);
 484         }
 485
 486         /* pack the intent */
 487         lit = req_capsule_client_get(&req->rq_pill, &RMF_LDLM_INTENT);
 488         lit->opc = (__u64)it->it_op;
 489
 490         /* pack the intended request */
 491         mdc_getattr_pack(req, valid, it->it_flags, op_data,
 492                          obddev->u.cli.cl_max_mds_easize);
 493
 494         req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER,
 495                              obddev->u.cli.cl_max_mds_easize);
 496         if (client_is_remote(exp))
 497                 req_capsule_set_size(&req->rq_pill, &RMF_ACL, RCL_SERVER,
 498                                      sizeof(struct mdt_remote_perm));
 499         ptlrpc_request_set_replen(req);
 500         return req;
 501 }
 502
 503 static struct ptlrpc_request *mdc_intent_layout_pack(struct obd_export *exp,
 504                                                      struct lookup_intent *it,
 505                                                      struct md_op_data *unused)
 506 {
 507         struct obd_device     *obd = class_exp2obd(exp);
 508         struct ptlrpc_request *req;
 509         struct ldlm_intent    *lit;
 510         struct layout_intent  *layout;
 511         int rc;
 512
 513         req = ptlrpc_request_alloc(class_exp2cliimp(exp),
 514                                 &RQF_LDLM_INTENT_LAYOUT);
 515         if (req == NULL)
 516                 return ERR_PTR(-ENOMEM);
 517
 518         req_capsule_set_size(&req->rq_pill, &RMF_EADATA, RCL_CLIENT, 0);
 519         rc = ldlm_prep_enqueue_req(exp, req, NULL, 0);
 520         if (rc) {
 521                 ptlrpc_request_free(req);
 522                 return ERR_PTR(rc);
 523         }
 524
 525         /* pack the intent */
 526         lit = req_capsule_client_get(&req->rq_pill, &RMF_LDLM_INTENT);
 527         lit->opc = (__u64)it->it_op;
 528
 529         /* pack the layout intent request */
 530         layout = req_capsule_client_get(&req->rq_pill, &RMF_LAYOUT_INTENT);
 531         /* LAYOUT_INTENT_ACCESS is generic, specific operation will be
 532          * set for replication */
 533         layout->li_opc = LAYOUT_INTENT_ACCESS;
 534
 535         req_capsule_set_size(&req->rq_pill, &RMF_DLM_LVB, RCL_SERVER,
 536                         obd->u.cli.cl_max_mds_easize);
 537         ptlrpc_request_set_replen(req);
 538         return req;
 539 }
 540
 541 static struct ptlrpc_request *
 542 mdc_enqueue_pack(struct obd_export *exp, int lvb_len)
 543 {
 544         struct ptlrpc_request *req;
 545         int rc;
 546
 547         req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_LDLM_ENQUEUE);
 548         if (req == NULL)
 549                 return ERR_PTR(-ENOMEM);
 550
 551         rc = ldlm_prep_enqueue_req(exp, req, NULL, 0);
 552         if (rc) {
 553                 ptlrpc_request_free(req);
 554                 return ERR_PTR(rc);
 555         }
 556
 557         req_capsule_set_size(&req->rq_pill, &RMF_DLM_LVB, RCL_SERVER, lvb_len);
 558         ptlrpc_request_set_replen(req);
 559         return req;
 560 }
 561
 562 static int mdc_finish_enqueue(struct obd_export *exp,
 563                               struct ptlrpc_request *req,
 564                               struct ldlm_enqueue_info *einfo,
 565                               struct lookup_intent *it,
 566                               struct lustre_handle *lockh,
 567                               int rc)
 568 {
 569         struct req_capsule  *pill = &req->rq_pill;
 570         struct ldlm_request *lockreq;
 571         struct ldlm_reply   *lockrep;
 572         struct lustre_intent_data *intent = &it->d.lustre;
 573         struct ldlm_lock    *lock;
 574         void            *lvb_data = NULL;
 575         int               lvb_len = 0;
 576
 577         LASSERT(rc >= 0);
 578         /* Similarly, if we're going to replay this request, we don't want to
 579          * actually get a lock, just perform the intent. */
 580         if (req->rq_transno || req->rq_replay) {
 581                 lockreq = req_capsule_client_get(pill, &RMF_DLM_REQ);
 582                 lockreq->lock_flags |= ldlm_flags_to_wire(LDLM_FL_INTENT_ONLY);
 583         }
 584
 585         if (rc == ELDLM_LOCK_ABORTED) {
 586                 einfo->ei_mode = 0;
 587                 memset(lockh, 0, sizeof(*lockh));
 588                 rc = 0;
 589         } else { /* rc = 0 */
 590                 lock = ldlm_handle2lock(lockh);
 591                 LASSERT(lock != NULL);
 592
 593                 /* If the server gave us back a different lock mode, we should
 594                  * fix up our variables. */
 595                 if (lock->l_req_mode != einfo->ei_mode) {
 596                         ldlm_lock_addref(lockh, lock->l_req_mode);
 597                         ldlm_lock_decref(lockh, einfo->ei_mode);
 598                         einfo->ei_mode = lock->l_req_mode;
 599                 }
 600                 LDLM_LOCK_PUT(lock);
 601         }
 602
 603         lockrep = req_capsule_server_get(pill, &RMF_DLM_REP);
 604         LASSERT(lockrep != NULL); /* checked by ldlm_cli_enqueue() */
 605
 606         intent->it_disposition = (int)lockrep->lock_policy_res1;
 607         intent->it_status = (int)lockrep->lock_policy_res2;
 608         intent->it_lock_mode = einfo->ei_mode;
 609         intent->it_lock_handle = lockh->cookie;
 610         intent->it_data = req;
 611
 612         /* Technically speaking rq_transno must already be zero if
 613          * it_status is in error, so the check is a bit redundant */
 614         if ((!req->rq_transno || intent->it_status < 0) && req->rq_replay)
 615                 mdc_clear_replay_flag(req, intent->it_status);
 616
 617         /* If we're doing an IT_OPEN which did not result in an actual
 618          * successful open, then we need to remove the bit which saves
 619          * this request for unconditional replay.
 620          *
 621          * It's important that we do this first!  Otherwise we might exit the
 622          * function without doing so, and try to replay a failed create
 623          * (bug 3440) */
 624         if (it->it_op & IT_OPEN && req->rq_replay &&
 625             (!it_disposition(it, DISP_OPEN_OPEN) ||intent->it_status != 0))
 626                 mdc_clear_replay_flag(req, intent->it_status);
 627
 628         DEBUG_REQ(D_RPCTRACE, req, "op: %d disposition: %x, status: %d",
 629                   it->it_op, intent->it_disposition, intent->it_status);
 630
 631         /* We know what to expect, so we do any byte flipping required here */
 632         if (it->it_op & (IT_OPEN | IT_UNLINK | IT_LOOKUP | IT_GETATTR)) {
 633                 struct mdt_body *body;
 634
 635                 body = req_capsule_server_get(pill, &RMF_MDT_BODY);
 636                 if (body == NULL) {
 637                         CERROR ("Can't swab mdt_body\n");
 638                         return -EPROTO;
 639                 }
 640
 641                 if (it_disposition(it, DISP_OPEN_OPEN) &&
 642                     !it_open_error(DISP_OPEN_OPEN, it)) {
 643                         /*
 644                          * If this is a successful OPEN request, we need to set
 645                          * replay handler and data early, so that if replay
 646                          * happens immediately after swabbing below, new reply
 647                          * is swabbed by that handler correctly.
 648                          */
 649                         mdc_set_open_replay_data(NULL, NULL, req);
 650                 }
 651
 652                 if ((body->valid & (OBD_MD_FLDIREA | OBD_MD_FLEASIZE)) != 0) {
 653                         void *eadata;
 654
 655                         mdc_update_max_ea_from_body(exp, body);
 656
 657                         /*
 658                          * The eadata is opaque; just check that it is there.
 659                          * Eventually, obd_unpackmd() will check the contents.
 660                          */
 661                         eadata = req_capsule_server_sized_get(pill, &RMF_MDT_MD,
 662                                                               body->eadatasize);
 663                         if (eadata == NULL)
 664                                 return -EPROTO;
 665
 666                         /* save lvb data and length in case this is for layout
 667                          * lock */
 668                         lvb_data = eadata;
 669                         lvb_len = body->eadatasize;
 670
 671                         /*
 672                          * We save the reply LOV EA in case we have to replay a
 673                          * create for recovery.  If we didn't allocate a large
 674                          * enough request buffer above we need to reallocate it
 675                          * here to hold the actual LOV EA.
 676                          *
 677                          * To not save LOV EA if request is not going to replay
 678                          * (for example error one).
 679                          */
 680                         if ((it->it_op & IT_OPEN) && req->rq_replay) {
 681                                 void *lmm;
 682                                 if (req_capsule_get_size(pill, &RMF_EADATA,
 683                                                          RCL_CLIENT) <
 684                                     body->eadatasize)
 685                                         mdc_realloc_openmsg(req, body);
 686                                 else
 687                                         req_capsule_shrink(pill, &RMF_EADATA,
 688                                                            body->eadatasize,
 689                                                            RCL_CLIENT);
 690
 691                                 req_capsule_set_size(pill, &RMF_EADATA,
 692                                                      RCL_CLIENT,
 693                                                      body->eadatasize);
 694
 695                                 lmm = req_capsule_client_get(pill, &RMF_EADATA);
 696                                 if (lmm)
 697                                         memcpy(lmm, eadata, body->eadatasize);
 698                         }
 699                 }
 700
 701                 if (body->valid & OBD_MD_FLRMTPERM) {
 702                         struct mdt_remote_perm *perm;
 703
 704                         LASSERT(client_is_remote(exp));
 705                         perm = req_capsule_server_swab_get(pill, &RMF_ACL,
 706                                                 lustre_swab_mdt_remote_perm);
 707                         if (perm == NULL)
 708                                 return -EPROTO;
 709                 }
 710                 if (body->valid & OBD_MD_FLMDSCAPA) {
 711                         struct lustre_capa *capa, *p;
 712
 713                         capa = req_capsule_server_get(pill, &RMF_CAPA1);
 714                         if (capa == NULL)
 715                                 return -EPROTO;
 716
 717                         if (it->it_op & IT_OPEN) {
 718                                 /* client fid capa will be checked in replay */
 719                                 p = req_capsule_client_get(pill, &RMF_CAPA2);
 720                                 LASSERT(p);
 721                                 *p = *capa;
 722                         }
 723                 }
 724                 if (body->valid & OBD_MD_FLOSSCAPA) {
 725                         struct lustre_capa *capa;
 726
 727                         capa = req_capsule_server_get(pill, &RMF_CAPA2);
 728                         if (capa == NULL)
 729                                 return -EPROTO;
 730                 }
 731         } else if (it->it_op & IT_LAYOUT) {
 732                 /* maybe the lock was granted right away and layout
 733                  * is packed into RMF_DLM_LVB of req */
 734                 lvb_len = req_capsule_get_size(pill, &RMF_DLM_LVB, RCL_SERVER);
 735                 if (lvb_len > 0) {
 736                         lvb_data = req_capsule_server_sized_get(pill,
 737                                                         &RMF_DLM_LVB, lvb_len);
 738                         if (lvb_data == NULL)
 739                                 return -EPROTO;
 740                 }
 741         }
 742
 743         /* fill in stripe data for layout lock */
 744         lock = ldlm_handle2lock(lockh);
 745         if (lock != NULL && ldlm_has_layout(lock) && lvb_data != NULL) {
 746                 void *lmm;
 747
 748                 LDLM_DEBUG(lock, "layout lock returned by: %s, lvb_len: %d\n",
 749                         ldlm_it2str(it->it_op), lvb_len);
 750
 751                 OBD_ALLOC_LARGE(lmm, lvb_len);
 752                 if (lmm == NULL) {
 753                         LDLM_LOCK_PUT(lock);
 754                         return -ENOMEM;
 755                 }
 756                 memcpy(lmm, lvb_data, lvb_len);
 757
 758                 /* install lvb_data */
 759                 lock_res_and_lock(lock);
 760                 if (lock->l_lvb_data == NULL) {
 761                         lock->l_lvb_data = lmm;
 762                         lock->l_lvb_len = lvb_len;
 763                         lmm = NULL;
 764                 }
 765                 unlock_res_and_lock(lock);
 766                 if (lmm != NULL)
 767                         OBD_FREE_LARGE(lmm, lvb_len);
 768         }
 769         if (lock != NULL)
 770                 LDLM_LOCK_PUT(lock);
 771
 772         return rc;
 773 }
 774
 775 /* We always reserve enough space in the reply packet for a stripe MD, because
 776  * we don't know in advance the file type. */
 777 int mdc_enqueue(struct obd_export *exp, struct ldlm_enqueue_info *einfo,
 778                 struct lookup_intent *it, struct md_op_data *op_data,
 779                 struct lustre_handle *lockh, void *lmm, int lmmsize,
 780                 struct ptlrpc_request **reqp, __u64 extra_lock_flags)
 781 {
 782         struct obd_device     *obddev = class_exp2obd(exp);
 783         struct ptlrpc_request *req = NULL;
 784         __u64             flags, saved_flags = extra_lock_flags;
 785         int                 rc;
 786         struct ldlm_res_id res_id;
 787         static const ldlm_policy_data_t lookup_policy =
 788                             { .l_inodebits = { MDS_INODELOCK_LOOKUP } };
 789         static const ldlm_policy_data_t update_policy =
 790                             { .l_inodebits = { MDS_INODELOCK_UPDATE } };
 791         static const ldlm_policy_data_t layout_policy =
 792                             { .l_inodebits = { MDS_INODELOCK_LAYOUT } };
 793         static const ldlm_policy_data_t getxattr_policy = {
 794                               .l_inodebits = { MDS_INODELOCK_XATTR } };
 795         ldlm_policy_data_t const *policy = &lookup_policy;
 796         int                 generation, resends = 0;
 797         struct ldlm_reply     *lockrep;
 798         enum lvb_type          lvb_type = 0;
 799
 800         LASSERTF(!it || einfo->ei_type == LDLM_IBITS, "lock type %d\n",
 801                  einfo->ei_type);
 802
 803         fid_build_reg_res_name(&op_data->op_fid1, &res_id);
 804
 805         if (it) {
 806                 saved_flags |= LDLM_FL_HAS_INTENT;
 807                 if (it->it_op & (IT_UNLINK | IT_GETATTR | IT_READDIR))
 808                         policy = &update_policy;
 809                 else if (it->it_op & IT_LAYOUT)
 810                         policy = &layout_policy;
 811                 else if (it->it_op & (IT_GETXATTR | IT_SETXATTR))
 812                         policy = &getxattr_policy;
 813         }
 814
 815         LASSERT(reqp == NULL);
 816
 817         generation = obddev->u.cli.cl_import->imp_generation;
 818 resend:
 819         flags = saved_flags;
 820         if (!it) {
 821                 /* The only way right now is FLOCK, in this case we hide flock
 822                    policy as lmm, but lmmsize is 0 */
 823                 LASSERT(lmm && lmmsize == 0);
 824                 LASSERTF(einfo->ei_type == LDLM_FLOCK, "lock type %d\n",
 825                          einfo->ei_type);
 826                 policy = (ldlm_policy_data_t *)lmm;
 827                 res_id.name[3] = LDLM_FLOCK;
 828         } else if (it->it_op & IT_OPEN) {
 829                 req = mdc_intent_open_pack(exp, it, op_data, lmm, lmmsize,
 830                                            einfo->ei_cbdata);
 831                 policy = &update_policy;
 832                 einfo->ei_cbdata = NULL;
 833                 lmm = NULL;
 834         } else if (it->it_op & IT_UNLINK) {
 835                 req = mdc_intent_unlink_pack(exp, it, op_data);
 836         } else if (it->it_op & (IT_GETATTR | IT_LOOKUP)) {
 837                 req = mdc_intent_getattr_pack(exp, it, op_data);
 838         } else if (it->it_op & IT_READDIR) {
 839                 req = mdc_enqueue_pack(exp, 0);
 840         } else if (it->it_op & IT_LAYOUT) {
 841                 if (!imp_connect_lvb_type(class_exp2cliimp(exp)))
 842                         return -EOPNOTSUPP;
 843                 req = mdc_intent_layout_pack(exp, it, op_data);
 844                 lvb_type = LVB_T_LAYOUT;
 845         } else if (it->it_op & (IT_GETXATTR | IT_SETXATTR)) {
 846                 req = mdc_intent_getxattr_pack(exp, it, op_data);
 847         } else {
 848                 LBUG();
 849                 return -EINVAL;
 850         }
 851
 852         if (IS_ERR(req))
 853                 return PTR_ERR(req);
 854
 855         if (req != NULL && it && it->it_op & IT_CREAT)
 856                 /* ask ptlrpc not to resend on EINPROGRESS since we have our own
 857                  * retry logic */
 858                 req->rq_no_retry_einprogress = 1;
 859
 860         if (resends) {
 861                 req->rq_generation_set = 1;
 862                 req->rq_import_generation = generation;
 863                 req->rq_sent = cfs_time_current_sec() + resends;
 864         }
 865
 866         /* It is important to obtain rpc_lock first (if applicable), so that
 867          * threads that are serialised with rpc_lock are not polluting our
 868          * rpcs in flight counter. We do not do flock request limiting, though*/
 869         if (it) {
 870                 mdc_get_rpc_lock(obddev->u.cli.cl_rpc_lock, it);
 871                 rc = mdc_enter_request(&obddev->u.cli);
 872                 if (rc != 0) {
 873                         mdc_put_rpc_lock(obddev->u.cli.cl_rpc_lock, it);
 874                         mdc_clear_replay_flag(req, 0);
 875                         ptlrpc_req_finished(req);
 876                         return rc;
 877                 }
 878         }
 879
 880         rc = ldlm_cli_enqueue(exp, &req, einfo, &res_id, policy, &flags, NULL,
 881                               0, lvb_type, lockh, 0);
 882         if (!it) {
 883                 /* For flock requests we immediatelly return without further
 884                    delay and let caller deal with the rest, since rest of
 885                    this function metadata processing makes no sense for flock
 886                    requests anyway. But in case of problem during comms with
 887                    Server (ETIMEDOUT) or any signal/kill attempt (EINTR), we
 888                    can not rely on caller and this mainly for F_UNLCKs
 889                    (explicits or automatically generated by Kernel to clean
 890                    current FLocks upon exit) that can't be trashed */
 891                 if ((rc == -EINTR) || (rc == -ETIMEDOUT))
 892                         goto resend;
 893                 return rc;
 894         }
 895
 896         mdc_exit_request(&obddev->u.cli);
 897         mdc_put_rpc_lock(obddev->u.cli.cl_rpc_lock, it);
 898
 899         if (rc < 0) {
 900                 CERROR("ldlm_cli_enqueue: %d\n", rc);
 901                 mdc_clear_replay_flag(req, rc);
 902                 ptlrpc_req_finished(req);
 903                 return rc;
 904         }
 905
 906         lockrep = req_capsule_server_get(&req->rq_pill, &RMF_DLM_REP);
 907         LASSERT(lockrep != NULL);
 908
 909         lockrep->lock_policy_res2 =
 910                 ptlrpc_status_ntoh(lockrep->lock_policy_res2);
 911
 912         /* Retry the create infinitely when we get -EINPROGRESS from
 913          * server. This is required by the new quota design. */
 914         if (it && it->it_op & IT_CREAT &&
 915             (int)lockrep->lock_policy_res2 == -EINPROGRESS) {
 916                 mdc_clear_replay_flag(req, rc);
 917                 ptlrpc_req_finished(req);
 918                 resends++;
 919
 920                 CDEBUG(D_HA, "%s: resend:%d op:%d "DFID"/"DFID"\n",
 921                        obddev->obd_name, resends, it->it_op,
 922                        PFID(&op_data->op_fid1), PFID(&op_data->op_fid2));
 923
 924                 if (generation == obddev->u.cli.cl_import->imp_generation) {
 925                         goto resend;
 926                 } else {
 927                         CDEBUG(D_HA, "resend cross eviction\n");
 928                         return -EIO;
 929                 }
 930         }
 931
 932         rc = mdc_finish_enqueue(exp, req, einfo, it, lockh, rc);
 933         if (rc < 0) {
 934                 if (lustre_handle_is_used(lockh)) {
 935                         ldlm_lock_decref(lockh, einfo->ei_mode);
 936                         memset(lockh, 0, sizeof(*lockh));
 937                 }
 938                 ptlrpc_req_finished(req);
 939         }
 940         return rc;
 941 }
 942
 943 static int mdc_finish_intent_lock(struct obd_export *exp,
 944                                   struct ptlrpc_request *request,
 945                                   struct md_op_data *op_data,
 946                                   struct lookup_intent *it,
 947                                   struct lustre_handle *lockh)
 948 {
 949         struct lustre_handle old_lock;
 950         struct mdt_body *mdt_body;
 951         struct ldlm_lock *lock;
 952         int rc;
 953
 954         LASSERT(request != NULL);
 955         LASSERT(request != LP_POISON);
 956         LASSERT(request->rq_repmsg != LP_POISON);
 957
 958         if (!it_disposition(it, DISP_IT_EXECD)) {
 959                 /* The server failed before it even started executing the
 960                  * intent, i.e. because it couldn't unpack the request. */
 961                 LASSERT(it->d.lustre.it_status != 0);
 962                 return it->d.lustre.it_status;
 963         }
 964         rc = it_open_error(DISP_IT_EXECD, it);
 965         if (rc)
 966                 return rc;
 967
 968         mdt_body = req_capsule_server_get(&request->rq_pill, &RMF_MDT_BODY);
 969         LASSERT(mdt_body != NULL);      /* mdc_enqueue checked */
 970
 971         /* If we were revalidating a fid/name pair, mark the intent in
 972          * case we fail and get called again from lookup */
 973         if (fid_is_sane(&op_data->op_fid2) &&
 974             it->it_create_mode & M_CHECK_STALE &&
 975             it->it_op != IT_GETATTR) {
 976                 it_set_disposition(it, DISP_ENQ_COMPLETE);
 977
 978                 /* Also: did we find the same inode? */
 979                 /* sever can return one of two fids:
 980                  * op_fid2 - new allocated fid - if file is created.
 981                  * op_fid3 - existent fid - if file only open.
 982                  * op_fid3 is saved in lmv_intent_open */
 983                 if ((!lu_fid_eq(&op_data->op_fid2, &mdt_body->fid1)) &&
 984                     (!lu_fid_eq(&op_data->op_fid3, &mdt_body->fid1))) {
 985                         CDEBUG(D_DENTRY, "Found stale data "DFID"("DFID")/"DFID
 986                                "\n", PFID(&op_data->op_fid2),
 987                                PFID(&op_data->op_fid2), PFID(&mdt_body->fid1));
 988                         return -ESTALE;
 989                 }
 990         }
 991
 992         rc = it_open_error(DISP_LOOKUP_EXECD, it);
 993         if (rc)
 994                 return rc;
 995
 996         /* keep requests around for the multiple phases of the call
 997          * this shows the DISP_XX must guarantee we make it into the call
 998          */
 999         if (!it_disposition(it, DISP_ENQ_CREATE_REF) &&
1000             it_disposition(it, DISP_OPEN_CREATE) &&
1001             !it_open_error(DISP_OPEN_CREATE, it)) {
1002                 it_set_disposition(it, DISP_ENQ_CREATE_REF);
1003                 ptlrpc_request_addref(request); /* balanced in ll_create_node */
1004         }
1005         if (!it_disposition(it, DISP_ENQ_OPEN_REF) &&
1006             it_disposition(it, DISP_OPEN_OPEN) &&
1007             !it_open_error(DISP_OPEN_OPEN, it)) {
1008                 it_set_disposition(it, DISP_ENQ_OPEN_REF);
1009                 ptlrpc_request_addref(request); /* balanced in ll_file_open */
1010                 /* BUG 11546 - eviction in the middle of open rpc processing */
1011                 OBD_FAIL_TIMEOUT(OBD_FAIL_MDC_ENQUEUE_PAUSE, obd_timeout);
1012         }
1013
1014         if (it->it_op & IT_CREAT) {
1015                 /* XXX this belongs in ll_create_it */
1016         } else if (it->it_op == IT_OPEN) {
1017                 LASSERT(!it_disposition(it, DISP_OPEN_CREATE));
1018         } else {
1019                 LASSERT(it->it_op & (IT_GETATTR | IT_LOOKUP | IT_LAYOUT));
1020         }
1021
1022         /* If we already have a matching lock, then cancel the new
1023          * one.  We have to set the data here instead of in
1024          * mdc_enqueue, because we need to use the child's inode as
1025          * the l_ast_data to match, and that's not available until
1026          * intent_finish has performed the iget().) */
1027         lock = ldlm_handle2lock(lockh);
1028         if (lock) {
1029                 ldlm_policy_data_t policy = lock->l_policy_data;
1030                 LDLM_DEBUG(lock, "matching against this");
1031
1032                 LASSERTF(fid_res_name_eq(&mdt_body->fid1,
1033                                          &lock->l_resource->lr_name),
1034                          "Lock res_id: "DLDLMRES", fid: "DFID"\n",
1035                          PLDLMRES(lock->l_resource), PFID(&mdt_body->fid1));
1036                 LDLM_LOCK_PUT(lock);
1037
1038                 memcpy(&old_lock, lockh, sizeof(*lockh));
1039                 if (ldlm_lock_match(NULL, LDLM_FL_BLOCK_GRANTED, NULL,
1040                                     LDLM_IBITS, &policy, LCK_NL, &old_lock, 0)) {
1041                         ldlm_lock_decref_and_cancel(lockh,
1042                                                     it->d.lustre.it_lock_mode);
1043                         memcpy(lockh, &old_lock, sizeof(old_lock));
1044                         it->d.lustre.it_lock_handle = lockh->cookie;
1045                 }
1046         }
1047         CDEBUG(D_DENTRY,"D_IT dentry %.*s intent: %s status %d disp %x rc %d\n",
1048                op_data->op_namelen, op_data->op_name, ldlm_it2str(it->it_op),
1049                it->d.lustre.it_status, it->d.lustre.it_disposition, rc);
1050         return rc;
1051 }
1052
1053 int mdc_revalidate_lock(struct obd_export *exp, struct lookup_intent *it,
1054                         struct lu_fid *fid, __u64 *bits)
1055 {
1056         /* We could just return 1 immediately, but since we should only
1057          * be called in revalidate_it if we already have a lock, let's
1058          * verify that. */
1059         struct ldlm_res_id res_id;
1060         struct lustre_handle lockh;
1061         ldlm_policy_data_t policy;
1062         ldlm_mode_t mode;
1063
1064         if (it->d.lustre.it_lock_handle) {
1065                 lockh.cookie = it->d.lustre.it_lock_handle;
1066                 mode = ldlm_revalidate_lock_handle(&lockh, bits);
1067         } else {
1068                 fid_build_reg_res_name(fid, &res_id);
1069                 switch (it->it_op) {
1070                 case IT_GETATTR:
1071                         policy.l_inodebits.bits = MDS_INODELOCK_UPDATE;
1072                         break;
1073                 case IT_LAYOUT:
1074                         policy.l_inodebits.bits = MDS_INODELOCK_LAYOUT;
1075                         break;
1076                 default:
1077                         policy.l_inodebits.bits = MDS_INODELOCK_LOOKUP;
1078                         break;
1079                 }
1080                 mode = ldlm_lock_match(exp->exp_obd->obd_namespace,
1081                                        LDLM_FL_BLOCK_GRANTED, &res_id,
1082                                        LDLM_IBITS, &policy,
1083                                        LCK_CR|LCK_CW|LCK_PR|LCK_PW, &lockh, 0);
1084         }
1085
1086         if (mode) {
1087                 it->d.lustre.it_lock_handle = lockh.cookie;
1088                 it->d.lustre.it_lock_mode = mode;
1089         } else {
1090                 it->d.lustre.it_lock_handle = 0;
1091                 it->d.lustre.it_lock_mode = 0;
1092         }
1093
1094         return !!mode;
1095 }
1096
1097 /*
1098  * This long block is all about fixing up the lock and request state
1099  * so that it is correct as of the moment _before_ the operation was
1100  * applied; that way, the VFS will think that everything is normal and
1101  * call Lustre's regular VFS methods.
1102  *
1103  * If we're performing a creation, that means that unless the creation
1104  * failed with EEXIST, we should fake up a negative dentry.
1105  *
1106  * For everything else, we want to lookup to succeed.
1107  *
1108  * One additional note: if CREATE or OPEN succeeded, we add an extra
1109  * reference to the request because we need to keep it around until
1110  * ll_create/ll_open gets called.
1111  *
1112  * The server will return to us, in it_disposition, an indication of
1113  * exactly what d.lustre.it_status refers to.
1114  *
1115  * If DISP_OPEN_OPEN is set, then d.lustre.it_status refers to the open() call,
1116  * otherwise if DISP_OPEN_CREATE is set, then it status is the
1117  * creation failure mode.  In either case, one of DISP_LOOKUP_NEG or
1118  * DISP_LOOKUP_POS will be set, indicating whether the child lookup
1119  * was successful.
1120  *
1121  * Else, if DISP_LOOKUP_EXECD then d.lustre.it_status is the rc of the
1122  * child lookup.
1123  */
1124 int mdc_intent_lock(struct obd_export *exp, struct md_op_data *op_data,
1125                     void *lmm, int lmmsize, struct lookup_intent *it,
1126                     int lookup_flags, struct ptlrpc_request **reqp,
1127                     ldlm_blocking_callback cb_blocking,
1128                     __u64 extra_lock_flags)
1129 {
1130         struct lustre_handle lockh;
1131         int rc = 0;
1132
1133         LASSERT(it);
1134
1135         CDEBUG(D_DLMTRACE, "(name: %.*s,"DFID") in obj "DFID
1136                 ", intent: %s flags %#Lo\n", op_data->op_namelen,
1137                 op_data->op_name, PFID(&op_data->op_fid2),
1138                 PFID(&op_data->op_fid1), ldlm_it2str(it->it_op),
1139                 it->it_flags);
1140
1141         lockh.cookie = 0;
1142         if (fid_is_sane(&op_data->op_fid2) &&
1143             (it->it_op & (IT_LOOKUP | IT_GETATTR))) {
1144                 /* We could just return 1 immediately, but since we should only
1145                  * be called in revalidate_it if we already have a lock, let's
1146                  * verify that. */
1147                 it->d.lustre.it_lock_handle = 0;
1148                 rc = mdc_revalidate_lock(exp, it, &op_data->op_fid2, NULL);
1149                 /* Only return failure if it was not GETATTR by cfid
1150                    (from inode_revalidate) */
1151                 if (rc || op_data->op_namelen != 0)
1152                         return rc;
1153         }
1154
1155         /* lookup_it may be called only after revalidate_it has run, because
1156          * revalidate_it cannot return errors, only zero.  Returning zero causes
1157          * this call to lookup, which *can* return an error.
1158          *
1159          * We only want to execute the request associated with the intent one
1160          * time, however, so don't send the request again.  Instead, skip past
1161          * this and use the request from revalidate.  In this case, revalidate
1162          * never dropped its reference, so the refcounts are all OK */
1163         if (!it_disposition(it, DISP_ENQ_COMPLETE)) {
1164                 struct ldlm_enqueue_info einfo = {
1165                         .ei_type        = LDLM_IBITS,
1166                         .ei_mode        = it_to_lock_mode(it),
1167                         .ei_cb_bl       = cb_blocking,
1168                         .ei_cb_cp       = ldlm_completion_ast,
1169                 };
1170
1171                 /* For case if upper layer did not alloc fid, do it now. */
1172                 if (!fid_is_sane(&op_data->op_fid2) && it->it_op & IT_CREAT) {
1173                         rc = mdc_fid_alloc(exp, &op_data->op_fid2, op_data);
1174                         if (rc < 0) {
1175                                 CERROR("Can't alloc new fid, rc %d\n", rc);
1176                                 return rc;
1177                         }
1178                 }
1179                 rc = mdc_enqueue(exp, &einfo, it, op_data, &lockh,
1180                                  lmm, lmmsize, NULL, extra_lock_flags);
1181                 if (rc < 0)
1182                         return rc;
1183         } else if (!fid_is_sane(&op_data->op_fid2) ||
1184                    !(it->it_create_mode & M_CHECK_STALE)) {
1185                 /* DISP_ENQ_COMPLETE set means there is extra reference on
1186                  * request referenced from this intent, saved for subsequent
1187                  * lookup.  This path is executed when we proceed to this
1188                  * lookup, so we clear DISP_ENQ_COMPLETE */
1189                 it_clear_disposition(it, DISP_ENQ_COMPLETE);
1190         }
1191         *reqp = it->d.lustre.it_data;
1192         rc = mdc_finish_intent_lock(exp, *reqp, op_data, it, &lockh);
1193         return rc;
1194 }
1195
1196 static int mdc_intent_getattr_async_interpret(const struct lu_env *env,
1197                                               struct ptlrpc_request *req,
1198                                               void *args, int rc)
1199 {
1200         struct mdc_getattr_args  *ga = args;
1201         struct obd_export       *exp = ga->ga_exp;
1202         struct md_enqueue_info   *minfo = ga->ga_minfo;
1203         struct ldlm_enqueue_info *einfo = ga->ga_einfo;
1204         struct lookup_intent     *it;
1205         struct lustre_handle     *lockh;
1206         struct obd_device       *obddev;
1207         struct ldlm_reply        *lockrep;
1208         __u64                flags = LDLM_FL_HAS_INTENT;
1209
1210         it    = &minfo->mi_it;
1211         lockh = &minfo->mi_lockh;
1212
1213         obddev = class_exp2obd(exp);
1214
1215         mdc_exit_request(&obddev->u.cli);
1216         if (OBD_FAIL_CHECK(OBD_FAIL_MDC_GETATTR_ENQUEUE))
1217                 rc = -ETIMEDOUT;
1218
1219         rc = ldlm_cli_enqueue_fini(exp, req, einfo->ei_type, 1, einfo->ei_mode,
1220                                    &flags, NULL, 0, lockh, rc);
1221         if (rc < 0) {
1222                 CERROR("ldlm_cli_enqueue_fini: %d\n", rc);
1223                 mdc_clear_replay_flag(req, rc);
1224                 GOTO(out, rc);
1225         }
1226
1227         lockrep = req_capsule_server_get(&req->rq_pill, &RMF_DLM_REP);
1228         LASSERT(lockrep != NULL);
1229
1230         lockrep->lock_policy_res2 =
1231                 ptlrpc_status_ntoh(lockrep->lock_policy_res2);
1232
1233         rc = mdc_finish_enqueue(exp, req, einfo, it, lockh, rc);
1234         if (rc)
1235                 GOTO(out, rc);
1236
1237         rc = mdc_finish_intent_lock(exp, req, &minfo->mi_data, it, lockh);
1238
1239 out:
1240         OBD_FREE_PTR(einfo);
1241         minfo->mi_cb(req, minfo, rc);
1242         return 0;
1243 }
1244
1245 int mdc_intent_getattr_async(struct obd_export *exp,
1246                              struct md_enqueue_info *minfo,
1247                              struct ldlm_enqueue_info *einfo)
1248 {
1249         struct md_op_data       *op_data = &minfo->mi_data;
1250         struct lookup_intent    *it = &minfo->mi_it;
1251         struct ptlrpc_request   *req;
1252         struct mdc_getattr_args *ga;
1253         struct obd_device       *obddev = class_exp2obd(exp);
1254         struct ldlm_res_id       res_id;
1255         /*XXX: Both MDS_INODELOCK_LOOKUP and MDS_INODELOCK_UPDATE are needed
1256          *     for statahead currently. Consider CMD in future, such two bits
1257          *     maybe managed by different MDS, should be adjusted then. */
1258         ldlm_policy_data_t       policy = {
1259                                         .l_inodebits = { MDS_INODELOCK_LOOKUP |
1260                                                          MDS_INODELOCK_UPDATE }
1261                                  };
1262         int                   rc = 0;
1263         __u64               flags = LDLM_FL_HAS_INTENT;
1264
1265         CDEBUG(D_DLMTRACE,
1266                 "name: %.*s in inode "DFID", intent: %s flags %#Lo\n",
1267                 op_data->op_namelen, op_data->op_name, PFID(&op_data->op_fid1),
1268                 ldlm_it2str(it->it_op), it->it_flags);
1269
1270         fid_build_reg_res_name(&op_data->op_fid1, &res_id);
1271         req = mdc_intent_getattr_pack(exp, it, op_data);
1272         if (!req)
1273                 return -ENOMEM;
1274
1275         rc = mdc_enter_request(&obddev->u.cli);
1276         if (rc != 0) {
1277                 ptlrpc_req_finished(req);
1278                 return rc;
1279         }
1280
1281         rc = ldlm_cli_enqueue(exp, &req, einfo, &res_id, &policy, &flags, NULL,
1282                               0, LVB_T_NONE, &minfo->mi_lockh, 1);
1283         if (rc < 0) {
1284                 mdc_exit_request(&obddev->u.cli);
1285                 ptlrpc_req_finished(req);
1286                 return rc;
1287         }
1288
1289         CLASSERT(sizeof(*ga) <= sizeof(req->rq_async_args));
1290         ga = ptlrpc_req_async_args(req);
1291         ga->ga_exp = exp;
1292         ga->ga_minfo = minfo;
1293         ga->ga_einfo = einfo;
1294
1295         req->rq_interpret_reply = mdc_intent_getattr_async_interpret;
1296         ptlrpcd_add_req(req, PDL_POLICY_LOCAL, -1);
1297
1298         return 0;
1299 }