4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2012, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
38 * Author: Peter Braam <braam@clusterfs.com>
39 * Author: Phil Schwan <phil@clusterfs.com>
40 * Author: Andreas Dilger <adilger@clusterfs.com>
43 #define DEBUG_SUBSYSTEM S_LLITE
44 #include <lustre_dlm.h>
45 #include <lustre_lite.h>
46 #include <linux/pagemap.h>
47 #include <linux/file.h>
48 #include "llite_internal.h"
49 #include <lustre/ll_fiemap.h>
51 #include "cl_object.h"
54 ll_put_grouplock(struct inode
*inode
, struct file
*file
, unsigned long arg
);
56 static int ll_lease_close(struct obd_client_handle
*och
, struct inode
*inode
,
59 static enum llioc_iter
60 ll_iocontrol_call(struct inode
*inode
, struct file
*file
,
61 unsigned int cmd
, unsigned long arg
, int *rcp
);
63 static struct ll_file_data
*ll_file_data_get(void)
65 struct ll_file_data
*fd
;
67 OBD_SLAB_ALLOC_PTR_GFP(fd
, ll_file_data_slab
, GFP_NOFS
);
70 fd
->fd_write_failed
= false;
74 static void ll_file_data_put(struct ll_file_data
*fd
)
77 OBD_SLAB_FREE_PTR(fd
, ll_file_data_slab
);
80 void ll_pack_inode2opdata(struct inode
*inode
, struct md_op_data
*op_data
,
81 struct lustre_handle
*fh
)
83 op_data
->op_fid1
= ll_i2info(inode
)->lli_fid
;
84 op_data
->op_attr
.ia_mode
= inode
->i_mode
;
85 op_data
->op_attr
.ia_atime
= inode
->i_atime
;
86 op_data
->op_attr
.ia_mtime
= inode
->i_mtime
;
87 op_data
->op_attr
.ia_ctime
= inode
->i_ctime
;
88 op_data
->op_attr
.ia_size
= i_size_read(inode
);
89 op_data
->op_attr_blocks
= inode
->i_blocks
;
90 ((struct ll_iattr
*)&op_data
->op_attr
)->ia_attr_flags
=
91 ll_inode_to_ext_flags(inode
->i_flags
);
92 op_data
->op_ioepoch
= ll_i2info(inode
)->lli_ioepoch
;
94 op_data
->op_handle
= *fh
;
95 op_data
->op_capa1
= ll_mdscapa_get(inode
);
97 if (LLIF_DATA_MODIFIED
& ll_i2info(inode
)->lli_flags
)
98 op_data
->op_bias
|= MDS_DATA_MODIFIED
;
102 * Closes the IO epoch and packs all the attributes into @op_data for
105 static void ll_prepare_close(struct inode
*inode
, struct md_op_data
*op_data
,
106 struct obd_client_handle
*och
)
108 op_data
->op_attr
.ia_valid
= ATTR_MODE
| ATTR_ATIME
| ATTR_ATIME_SET
|
109 ATTR_MTIME
| ATTR_MTIME_SET
|
110 ATTR_CTIME
| ATTR_CTIME_SET
;
112 if (!(och
->och_flags
& FMODE_WRITE
))
115 if (!exp_connect_som(ll_i2mdexp(inode
)) || !S_ISREG(inode
->i_mode
))
116 op_data
->op_attr
.ia_valid
|= ATTR_SIZE
| ATTR_BLOCKS
;
118 ll_ioepoch_close(inode
, op_data
, &och
, 0);
121 ll_pack_inode2opdata(inode
, op_data
, &och
->och_fh
);
122 ll_prep_md_op_data(op_data
, inode
, NULL
, NULL
,
123 0, 0, LUSTRE_OPC_ANY
, NULL
);
126 static int ll_close_inode_openhandle(struct obd_export
*md_exp
,
128 struct obd_client_handle
*och
,
129 const __u64
*data_version
)
131 struct obd_export
*exp
= ll_i2mdexp(inode
);
132 struct md_op_data
*op_data
;
133 struct ptlrpc_request
*req
= NULL
;
134 struct obd_device
*obd
= class_exp2obd(exp
);
140 * XXX: in case of LMV, is this correct to access
143 CERROR("Invalid MDC connection handle "LPX64
"\n",
144 ll_i2mdexp(inode
)->exp_handle
.h_cookie
);
148 OBD_ALLOC_PTR(op_data
);
150 GOTO(out
, rc
= -ENOMEM
); // XXX We leak openhandle and request here.
152 ll_prepare_close(inode
, op_data
, och
);
153 if (data_version
!= NULL
) {
154 /* Pass in data_version implies release. */
155 op_data
->op_bias
|= MDS_HSM_RELEASE
;
156 op_data
->op_data_version
= *data_version
;
157 op_data
->op_lease_handle
= och
->och_lease_handle
;
158 op_data
->op_attr
.ia_valid
|= ATTR_SIZE
| ATTR_BLOCKS
;
160 epoch_close
= (op_data
->op_flags
& MF_EPOCH_CLOSE
);
161 rc
= md_close(md_exp
, op_data
, och
->och_mod
, &req
);
163 /* This close must have the epoch closed. */
164 LASSERT(epoch_close
);
165 /* MDS has instructed us to obtain Size-on-MDS attribute from
166 * OSTs and send setattr to back to MDS. */
167 rc
= ll_som_update(inode
, op_data
);
169 CERROR("inode %lu mdc Size-on-MDS update failed: "
170 "rc = %d\n", inode
->i_ino
, rc
);
174 CERROR("inode %lu mdc close failed: rc = %d\n",
178 /* DATA_MODIFIED flag was successfully sent on close, cancel data
179 * modification flag. */
180 if (rc
== 0 && (op_data
->op_bias
& MDS_DATA_MODIFIED
)) {
181 struct ll_inode_info
*lli
= ll_i2info(inode
);
183 spin_lock(&lli
->lli_lock
);
184 lli
->lli_flags
&= ~LLIF_DATA_MODIFIED
;
185 spin_unlock(&lli
->lli_lock
);
189 rc
= ll_objects_destroy(req
, inode
);
191 CERROR("inode %lu ll_objects destroy: rc = %d\n",
194 if (rc
== 0 && op_data
->op_bias
& MDS_HSM_RELEASE
) {
195 struct mdt_body
*body
;
196 body
= req_capsule_server_get(&req
->rq_pill
, &RMF_MDT_BODY
);
197 if (!(body
->valid
& OBD_MD_FLRELEASED
))
201 ll_finish_md_op_data(op_data
);
204 if (exp_connect_som(exp
) && !epoch_close
&&
205 S_ISREG(inode
->i_mode
) && (och
->och_flags
& FMODE_WRITE
)) {
206 ll_queue_done_writing(inode
, LLIF_DONE_WRITING
);
208 md_clear_open_replay_data(md_exp
, och
);
209 /* Free @och if it is not waiting for DONE_WRITING. */
210 och
->och_fh
.cookie
= DEAD_HANDLE_MAGIC
;
213 if (req
) /* This is close request */
214 ptlrpc_req_finished(req
);
218 int ll_md_real_close(struct inode
*inode
, fmode_t fmode
)
220 struct ll_inode_info
*lli
= ll_i2info(inode
);
221 struct obd_client_handle
**och_p
;
222 struct obd_client_handle
*och
;
226 if (fmode
& FMODE_WRITE
) {
227 och_p
= &lli
->lli_mds_write_och
;
228 och_usecount
= &lli
->lli_open_fd_write_count
;
229 } else if (fmode
& FMODE_EXEC
) {
230 och_p
= &lli
->lli_mds_exec_och
;
231 och_usecount
= &lli
->lli_open_fd_exec_count
;
233 LASSERT(fmode
& FMODE_READ
);
234 och_p
= &lli
->lli_mds_read_och
;
235 och_usecount
= &lli
->lli_open_fd_read_count
;
238 mutex_lock(&lli
->lli_och_mutex
);
239 if (*och_usecount
> 0) {
240 /* There are still users of this handle, so skip
242 mutex_unlock(&lli
->lli_och_mutex
);
248 mutex_unlock(&lli
->lli_och_mutex
);
251 /* There might be a race and this handle may already
253 rc
= ll_close_inode_openhandle(ll_i2sbi(inode
)->ll_md_exp
,
260 static int ll_md_close(struct obd_export
*md_exp
, struct inode
*inode
,
263 struct ll_file_data
*fd
= LUSTRE_FPRIVATE(file
);
264 struct ll_inode_info
*lli
= ll_i2info(inode
);
267 /* clear group lock, if present */
268 if (unlikely(fd
->fd_flags
& LL_FILE_GROUP_LOCKED
))
269 ll_put_grouplock(inode
, file
, fd
->fd_grouplock
.cg_gid
);
271 if (fd
->fd_lease_och
!= NULL
) {
274 /* Usually the lease is not released when the
275 * application crashed, we need to release here. */
276 rc
= ll_lease_close(fd
->fd_lease_och
, inode
, &lease_broken
);
277 CDEBUG(rc
? D_ERROR
: D_INODE
, "Clean up lease "DFID
" %d/%d\n",
278 PFID(&lli
->lli_fid
), rc
, lease_broken
);
280 fd
->fd_lease_och
= NULL
;
283 if (fd
->fd_och
!= NULL
) {
284 rc
= ll_close_inode_openhandle(md_exp
, inode
, fd
->fd_och
, NULL
);
289 /* Let's see if we have good enough OPEN lock on the file and if
290 we can skip talking to MDS */
291 if (file
->f_dentry
->d_inode
) { /* Can this ever be false? */
293 int flags
= LDLM_FL_BLOCK_GRANTED
| LDLM_FL_TEST_LOCK
;
294 struct lustre_handle lockh
;
295 struct inode
*inode
= file
->f_dentry
->d_inode
;
296 ldlm_policy_data_t policy
= {.l_inodebits
={MDS_INODELOCK_OPEN
}};
298 mutex_lock(&lli
->lli_och_mutex
);
299 if (fd
->fd_omode
& FMODE_WRITE
) {
301 LASSERT(lli
->lli_open_fd_write_count
);
302 lli
->lli_open_fd_write_count
--;
303 } else if (fd
->fd_omode
& FMODE_EXEC
) {
305 LASSERT(lli
->lli_open_fd_exec_count
);
306 lli
->lli_open_fd_exec_count
--;
309 LASSERT(lli
->lli_open_fd_read_count
);
310 lli
->lli_open_fd_read_count
--;
312 mutex_unlock(&lli
->lli_och_mutex
);
314 if (!md_lock_match(md_exp
, flags
, ll_inode2fid(inode
),
315 LDLM_IBITS
, &policy
, lockmode
,
317 rc
= ll_md_real_close(file
->f_dentry
->d_inode
,
321 CERROR("Releasing a file %p with negative dentry %p. Name %s",
322 file
, file
->f_dentry
, file
->f_dentry
->d_name
.name
);
326 LUSTRE_FPRIVATE(file
) = NULL
;
327 ll_file_data_put(fd
);
328 ll_capa_close(inode
);
333 /* While this returns an error code, fput() the caller does not, so we need
334 * to make every effort to clean up all of our state here. Also, applications
335 * rarely check close errors and even if an error is returned they will not
336 * re-try the close call.
338 int ll_file_release(struct inode
*inode
, struct file
*file
)
340 struct ll_file_data
*fd
;
341 struct ll_sb_info
*sbi
= ll_i2sbi(inode
);
342 struct ll_inode_info
*lli
= ll_i2info(inode
);
345 CDEBUG(D_VFSTRACE
, "VFS Op:inode=%lu/%u(%p)\n", inode
->i_ino
,
346 inode
->i_generation
, inode
);
348 #ifdef CONFIG_FS_POSIX_ACL
349 if (sbi
->ll_flags
& LL_SBI_RMT_CLIENT
&&
350 inode
== inode
->i_sb
->s_root
->d_inode
) {
351 struct ll_file_data
*fd
= LUSTRE_FPRIVATE(file
);
354 if (unlikely(fd
->fd_flags
& LL_FILE_RMTACL
)) {
355 fd
->fd_flags
&= ~LL_FILE_RMTACL
;
356 rct_del(&sbi
->ll_rct
, current_pid());
357 et_search_free(&sbi
->ll_et
, current_pid());
362 if (inode
->i_sb
->s_root
!= file
->f_dentry
)
363 ll_stats_ops_tally(sbi
, LPROC_LL_RELEASE
, 1);
364 fd
= LUSTRE_FPRIVATE(file
);
367 /* The last ref on @file, maybe not the the owner pid of statahead.
368 * Different processes can open the same dir, "ll_opendir_key" means:
369 * it is me that should stop the statahead thread. */
370 if (S_ISDIR(inode
->i_mode
) && lli
->lli_opendir_key
== fd
&&
371 lli
->lli_opendir_pid
!= 0)
372 ll_stop_statahead(inode
, lli
->lli_opendir_key
);
374 if (inode
->i_sb
->s_root
== file
->f_dentry
) {
375 LUSTRE_FPRIVATE(file
) = NULL
;
376 ll_file_data_put(fd
);
380 if (!S_ISDIR(inode
->i_mode
)) {
381 lov_read_and_clear_async_rc(lli
->lli_clob
);
382 lli
->lli_async_rc
= 0;
385 rc
= ll_md_close(sbi
->ll_md_exp
, inode
, file
);
387 if (CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_DUMP_LOG
, cfs_fail_val
))
388 libcfs_debug_dumplog();
393 static int ll_intent_file_open(struct file
*file
, void *lmm
,
394 int lmmsize
, struct lookup_intent
*itp
)
396 struct ll_sb_info
*sbi
= ll_i2sbi(file
->f_dentry
->d_inode
);
397 struct dentry
*parent
= file
->f_dentry
->d_parent
;
398 const char *name
= file
->f_dentry
->d_name
.name
;
399 const int len
= file
->f_dentry
->d_name
.len
;
400 struct md_op_data
*op_data
;
401 struct ptlrpc_request
*req
;
402 __u32 opc
= LUSTRE_OPC_ANY
;
408 /* Usually we come here only for NFSD, and we want open lock.
409 But we can also get here with pre 2.6.15 patchless kernels, and in
410 that case that lock is also ok */
411 /* We can also get here if there was cached open handle in revalidate_it
412 * but it disappeared while we were getting from there to ll_file_open.
413 * But this means this file was closed and immediately opened which
414 * makes a good candidate for using OPEN lock */
415 /* If lmmsize & lmm are not 0, we are just setting stripe info
416 * parameters. No need for the open lock */
417 if (lmm
== NULL
&& lmmsize
== 0) {
418 itp
->it_flags
|= MDS_OPEN_LOCK
;
419 if (itp
->it_flags
& FMODE_WRITE
)
420 opc
= LUSTRE_OPC_CREATE
;
423 op_data
= ll_prep_md_op_data(NULL
, parent
->d_inode
,
424 file
->f_dentry
->d_inode
, name
, len
,
427 return PTR_ERR(op_data
);
429 itp
->it_flags
|= MDS_OPEN_BY_FID
;
430 rc
= md_intent_lock(sbi
->ll_md_exp
, op_data
, lmm
, lmmsize
, itp
,
431 0 /*unused */, &req
, ll_md_blocking_ast
, 0);
432 ll_finish_md_op_data(op_data
);
434 /* reason for keep own exit path - don`t flood log
435 * with messages with -ESTALE errors.
437 if (!it_disposition(itp
, DISP_OPEN_OPEN
) ||
438 it_open_error(DISP_OPEN_OPEN
, itp
))
440 ll_release_openhandle(file
->f_dentry
, itp
);
444 if (it_disposition(itp
, DISP_LOOKUP_NEG
))
445 GOTO(out
, rc
= -ENOENT
);
447 if (rc
!= 0 || it_open_error(DISP_OPEN_OPEN
, itp
)) {
448 rc
= rc
? rc
: it_open_error(DISP_OPEN_OPEN
, itp
);
449 CDEBUG(D_VFSTRACE
, "lock enqueue: err: %d\n", rc
);
453 rc
= ll_prep_inode(&file
->f_dentry
->d_inode
, req
, NULL
, itp
);
454 if (!rc
&& itp
->d
.lustre
.it_lock_mode
)
455 ll_set_lock_data(sbi
->ll_md_exp
, file
->f_dentry
->d_inode
,
459 ptlrpc_req_finished(req
);
460 ll_intent_drop_lock(itp
);
466 * Assign an obtained @ioepoch to client's inode. No lock is needed, MDS does
467 * not believe attributes if a few ioepoch holders exist. Attributes for
468 * previous ioepoch if new one is opened are also skipped by MDS.
470 void ll_ioepoch_open(struct ll_inode_info
*lli
, __u64 ioepoch
)
472 if (ioepoch
&& lli
->lli_ioepoch
!= ioepoch
) {
473 lli
->lli_ioepoch
= ioepoch
;
474 CDEBUG(D_INODE
, "Epoch "LPU64
" opened on "DFID
"\n",
475 ioepoch
, PFID(&lli
->lli_fid
));
479 static int ll_och_fill(struct obd_export
*md_exp
, struct lookup_intent
*it
,
480 struct obd_client_handle
*och
)
482 struct ptlrpc_request
*req
= it
->d
.lustre
.it_data
;
483 struct mdt_body
*body
;
485 body
= req_capsule_server_get(&req
->rq_pill
, &RMF_MDT_BODY
);
486 och
->och_fh
= body
->handle
;
487 och
->och_fid
= body
->fid1
;
488 och
->och_lease_handle
.cookie
= it
->d
.lustre
.it_lock_handle
;
489 och
->och_magic
= OBD_CLIENT_HANDLE_MAGIC
;
490 och
->och_flags
= it
->it_flags
;
492 return md_set_open_replay_data(md_exp
, och
, it
);
495 static int ll_local_open(struct file
*file
, struct lookup_intent
*it
,
496 struct ll_file_data
*fd
, struct obd_client_handle
*och
)
498 struct inode
*inode
= file
->f_dentry
->d_inode
;
499 struct ll_inode_info
*lli
= ll_i2info(inode
);
501 LASSERT(!LUSTRE_FPRIVATE(file
));
506 struct ptlrpc_request
*req
= it
->d
.lustre
.it_data
;
507 struct mdt_body
*body
;
510 rc
= ll_och_fill(ll_i2sbi(inode
)->ll_md_exp
, it
, och
);
514 body
= req_capsule_server_get(&req
->rq_pill
, &RMF_MDT_BODY
);
515 ll_ioepoch_open(lli
, body
->ioepoch
);
518 LUSTRE_FPRIVATE(file
) = fd
;
519 ll_readahead_init(inode
, &fd
->fd_ras
);
520 fd
->fd_omode
= it
->it_flags
& (FMODE_READ
| FMODE_WRITE
| FMODE_EXEC
);
524 /* Open a file, and (for the very first open) create objects on the OSTs at
525 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
526 * creation or open until ll_lov_setstripe() ioctl is called.
528 * If we already have the stripe MD locally then we don't request it in
529 * md_open(), by passing a lmm_size = 0.
531 * It is up to the application to ensure no other processes open this file
532 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
533 * used. We might be able to avoid races of that sort by getting lli_open_sem
534 * before returning in the O_LOV_DELAY_CREATE case and dropping it here
535 * or in ll_file_release(), but I'm not sure that is desirable/necessary.
537 int ll_file_open(struct inode
*inode
, struct file
*file
)
539 struct ll_inode_info
*lli
= ll_i2info(inode
);
540 struct lookup_intent
*it
, oit
= { .it_op
= IT_OPEN
,
541 .it_flags
= file
->f_flags
};
542 struct obd_client_handle
**och_p
= NULL
;
543 __u64
*och_usecount
= NULL
;
544 struct ll_file_data
*fd
;
545 int rc
= 0, opendir_set
= 0;
547 CDEBUG(D_VFSTRACE
, "VFS Op:inode=%lu/%u(%p), flags %o\n", inode
->i_ino
,
548 inode
->i_generation
, inode
, file
->f_flags
);
550 it
= file
->private_data
; /* XXX: compat macro */
551 file
->private_data
= NULL
; /* prevent ll_local_open assertion */
553 fd
= ll_file_data_get();
555 GOTO(out_openerr
, rc
= -ENOMEM
);
558 if (S_ISDIR(inode
->i_mode
)) {
559 spin_lock(&lli
->lli_sa_lock
);
560 if (lli
->lli_opendir_key
== NULL
&& lli
->lli_sai
== NULL
&&
561 lli
->lli_opendir_pid
== 0) {
562 lli
->lli_opendir_key
= fd
;
563 lli
->lli_opendir_pid
= current_pid();
566 spin_unlock(&lli
->lli_sa_lock
);
569 if (inode
->i_sb
->s_root
== file
->f_dentry
) {
570 LUSTRE_FPRIVATE(file
) = fd
;
574 if (!it
|| !it
->d
.lustre
.it_disposition
) {
575 /* Convert f_flags into access mode. We cannot use file->f_mode,
576 * because everything but O_ACCMODE mask was stripped from
578 if ((oit
.it_flags
+ 1) & O_ACCMODE
)
580 if (file
->f_flags
& O_TRUNC
)
581 oit
.it_flags
|= FMODE_WRITE
;
583 /* kernel only call f_op->open in dentry_open. filp_open calls
584 * dentry_open after call to open_namei that checks permissions.
585 * Only nfsd_open call dentry_open directly without checking
586 * permissions and because of that this code below is safe. */
587 if (oit
.it_flags
& (FMODE_WRITE
| FMODE_READ
))
588 oit
.it_flags
|= MDS_OPEN_OWNEROVERRIDE
;
590 /* We do not want O_EXCL here, presumably we opened the file
591 * already? XXX - NFS implications? */
592 oit
.it_flags
&= ~O_EXCL
;
594 /* bug20584, if "it_flags" contains O_CREAT, the file will be
595 * created if necessary, then "IT_CREAT" should be set to keep
596 * consistent with it */
597 if (oit
.it_flags
& O_CREAT
)
598 oit
.it_op
|= IT_CREAT
;
604 /* Let's see if we have file open on MDS already. */
605 if (it
->it_flags
& FMODE_WRITE
) {
606 och_p
= &lli
->lli_mds_write_och
;
607 och_usecount
= &lli
->lli_open_fd_write_count
;
608 } else if (it
->it_flags
& FMODE_EXEC
) {
609 och_p
= &lli
->lli_mds_exec_och
;
610 och_usecount
= &lli
->lli_open_fd_exec_count
;
612 och_p
= &lli
->lli_mds_read_och
;
613 och_usecount
= &lli
->lli_open_fd_read_count
;
616 mutex_lock(&lli
->lli_och_mutex
);
617 if (*och_p
) { /* Open handle is present */
618 if (it_disposition(it
, DISP_OPEN_OPEN
)) {
619 /* Well, there's extra open request that we do not need,
620 let's close it somehow. This will decref request. */
621 rc
= it_open_error(DISP_OPEN_OPEN
, it
);
623 mutex_unlock(&lli
->lli_och_mutex
);
624 GOTO(out_openerr
, rc
);
627 ll_release_openhandle(file
->f_dentry
, it
);
631 rc
= ll_local_open(file
, it
, fd
, NULL
);
634 mutex_unlock(&lli
->lli_och_mutex
);
635 GOTO(out_openerr
, rc
);
638 LASSERT(*och_usecount
== 0);
639 if (!it
->d
.lustre
.it_disposition
) {
640 /* We cannot just request lock handle now, new ELC code
641 means that one of other OPEN locks for this file
642 could be cancelled, and since blocking ast handler
643 would attempt to grab och_mutex as well, that would
644 result in a deadlock */
645 mutex_unlock(&lli
->lli_och_mutex
);
646 it
->it_create_mode
|= M_CHECK_STALE
;
647 rc
= ll_intent_file_open(file
, NULL
, 0, it
);
648 it
->it_create_mode
&= ~M_CHECK_STALE
;
650 GOTO(out_openerr
, rc
);
654 OBD_ALLOC(*och_p
, sizeof (struct obd_client_handle
));
656 GOTO(out_och_free
, rc
= -ENOMEM
);
660 /* md_intent_lock() didn't get a request ref if there was an
661 * open error, so don't do cleanup on the request here
663 /* XXX (green): Should not we bail out on any error here, not
664 * just open error? */
665 rc
= it_open_error(DISP_OPEN_OPEN
, it
);
667 GOTO(out_och_free
, rc
);
669 LASSERT(it_disposition(it
, DISP_ENQ_OPEN_REF
));
671 rc
= ll_local_open(file
, it
, fd
, *och_p
);
673 GOTO(out_och_free
, rc
);
675 mutex_unlock(&lli
->lli_och_mutex
);
678 /* Must do this outside lli_och_mutex lock to prevent deadlock where
679 different kind of OPEN lock for this same inode gets cancelled
680 by ldlm_cancel_lru */
681 if (!S_ISREG(inode
->i_mode
))
682 GOTO(out_och_free
, rc
);
686 if (!lli
->lli_has_smd
&&
687 (cl_is_lov_delay_create(file
->f_flags
) ||
688 (file
->f_mode
& FMODE_WRITE
) == 0)) {
689 CDEBUG(D_INODE
, "object creation was delayed\n");
690 GOTO(out_och_free
, rc
);
692 cl_lov_delay_create_clear(&file
->f_flags
);
693 GOTO(out_och_free
, rc
);
697 if (och_p
&& *och_p
) {
698 OBD_FREE(*och_p
, sizeof (struct obd_client_handle
));
699 *och_p
= NULL
; /* OBD_FREE writes some magic there */
702 mutex_unlock(&lli
->lli_och_mutex
);
705 if (opendir_set
!= 0)
706 ll_stop_statahead(inode
, lli
->lli_opendir_key
);
708 ll_file_data_put(fd
);
710 ll_stats_ops_tally(ll_i2sbi(inode
), LPROC_LL_OPEN
, 1);
713 if (it
&& it_disposition(it
, DISP_ENQ_OPEN_REF
)) {
714 ptlrpc_req_finished(it
->d
.lustre
.it_data
);
715 it_clear_disposition(it
, DISP_ENQ_OPEN_REF
);
721 static int ll_md_blocking_lease_ast(struct ldlm_lock
*lock
,
722 struct ldlm_lock_desc
*desc
, void *data
, int flag
)
725 struct lustre_handle lockh
;
728 case LDLM_CB_BLOCKING
:
729 ldlm_lock2handle(lock
, &lockh
);
730 rc
= ldlm_cli_cancel(&lockh
, LCF_ASYNC
);
732 CDEBUG(D_INODE
, "ldlm_cli_cancel: %d\n", rc
);
736 case LDLM_CB_CANCELING
:
744 * Acquire a lease and open the file.
746 static struct obd_client_handle
*
747 ll_lease_open(struct inode
*inode
, struct file
*file
, fmode_t fmode
,
750 struct lookup_intent it
= { .it_op
= IT_OPEN
};
751 struct ll_sb_info
*sbi
= ll_i2sbi(inode
);
752 struct md_op_data
*op_data
;
753 struct ptlrpc_request
*req
;
754 struct lustre_handle old_handle
= { 0 };
755 struct obd_client_handle
*och
= NULL
;
759 if (fmode
!= FMODE_WRITE
&& fmode
!= FMODE_READ
)
760 return ERR_PTR(-EINVAL
);
763 struct ll_inode_info
*lli
= ll_i2info(inode
);
764 struct ll_file_data
*fd
= LUSTRE_FPRIVATE(file
);
765 struct obd_client_handle
**och_p
;
768 if (!(fmode
& file
->f_mode
) || (file
->f_mode
& FMODE_EXEC
))
769 return ERR_PTR(-EPERM
);
771 /* Get the openhandle of the file */
773 mutex_lock(&lli
->lli_och_mutex
);
774 if (fd
->fd_lease_och
!= NULL
) {
775 mutex_unlock(&lli
->lli_och_mutex
);
779 if (fd
->fd_och
== NULL
) {
780 if (file
->f_mode
& FMODE_WRITE
) {
781 LASSERT(lli
->lli_mds_write_och
!= NULL
);
782 och_p
= &lli
->lli_mds_write_och
;
783 och_usecount
= &lli
->lli_open_fd_write_count
;
785 LASSERT(lli
->lli_mds_read_och
!= NULL
);
786 och_p
= &lli
->lli_mds_read_och
;
787 och_usecount
= &lli
->lli_open_fd_read_count
;
789 if (*och_usecount
== 1) {
796 mutex_unlock(&lli
->lli_och_mutex
);
797 if (rc
< 0) /* more than 1 opener */
800 LASSERT(fd
->fd_och
!= NULL
);
801 old_handle
= fd
->fd_och
->och_fh
;
806 return ERR_PTR(-ENOMEM
);
808 op_data
= ll_prep_md_op_data(NULL
, inode
, inode
, NULL
, 0, 0,
809 LUSTRE_OPC_ANY
, NULL
);
811 GOTO(out
, rc
= PTR_ERR(op_data
));
813 /* To tell the MDT this openhandle is from the same owner */
814 op_data
->op_handle
= old_handle
;
816 it
.it_flags
= fmode
| open_flags
;
817 it
.it_flags
|= MDS_OPEN_LOCK
| MDS_OPEN_BY_FID
| MDS_OPEN_LEASE
;
818 rc
= md_intent_lock(sbi
->ll_md_exp
, op_data
, NULL
, 0, &it
, 0, &req
,
819 ll_md_blocking_lease_ast
,
820 /* LDLM_FL_NO_LRU: To not put the lease lock into LRU list, otherwise
821 * it can be cancelled which may mislead applications that the lease is
823 * LDLM_FL_EXCL: Set this flag so that it won't be matched by normal
824 * open in ll_md_blocking_ast(). Otherwise as ll_md_blocking_lease_ast
825 * doesn't deal with openhandle, so normal openhandle will be leaked. */
826 LDLM_FL_NO_LRU
| LDLM_FL_EXCL
);
827 ll_finish_md_op_data(op_data
);
828 ptlrpc_req_finished(req
);
830 GOTO(out_release_it
, rc
);
832 if (it_disposition(&it
, DISP_LOOKUP_NEG
))
833 GOTO(out_release_it
, rc
= -ENOENT
);
835 rc
= it_open_error(DISP_OPEN_OPEN
, &it
);
837 GOTO(out_release_it
, rc
);
839 LASSERT(it_disposition(&it
, DISP_ENQ_OPEN_REF
));
840 ll_och_fill(sbi
->ll_md_exp
, &it
, och
);
842 if (!it_disposition(&it
, DISP_OPEN_LEASE
)) /* old server? */
843 GOTO(out_close
, rc
= -EOPNOTSUPP
);
845 /* already get lease, handle lease lock */
846 ll_set_lock_data(sbi
->ll_md_exp
, inode
, &it
, NULL
);
847 if (it
.d
.lustre
.it_lock_mode
== 0 ||
848 it
.d
.lustre
.it_lock_bits
!= MDS_INODELOCK_OPEN
) {
849 /* open lock must return for lease */
850 CERROR(DFID
"lease granted but no open lock, %d/%llu.\n",
851 PFID(ll_inode2fid(inode
)), it
.d
.lustre
.it_lock_mode
,
852 it
.d
.lustre
.it_lock_bits
);
853 GOTO(out_close
, rc
= -EPROTO
);
856 ll_intent_release(&it
);
860 rc2
= ll_close_inode_openhandle(sbi
->ll_md_exp
, inode
, och
, NULL
);
862 CERROR("Close openhandle returned %d\n", rc2
);
864 /* cancel open lock */
865 if (it
.d
.lustre
.it_lock_mode
!= 0) {
866 ldlm_lock_decref_and_cancel(&och
->och_lease_handle
,
867 it
.d
.lustre
.it_lock_mode
);
868 it
.d
.lustre
.it_lock_mode
= 0;
871 ll_intent_release(&it
);
878 * Release lease and close the file.
879 * It will check if the lease has ever broken.
881 static int ll_lease_close(struct obd_client_handle
*och
, struct inode
*inode
,
884 struct ldlm_lock
*lock
;
885 bool cancelled
= true;
888 lock
= ldlm_handle2lock(&och
->och_lease_handle
);
890 lock_res_and_lock(lock
);
891 cancelled
= ldlm_is_cancel(lock
);
892 unlock_res_and_lock(lock
);
896 CDEBUG(D_INODE
, "lease for "DFID
" broken? %d\n",
897 PFID(&ll_i2info(inode
)->lli_fid
), cancelled
);
900 ldlm_cli_cancel(&och
->och_lease_handle
, 0);
901 if (lease_broken
!= NULL
)
902 *lease_broken
= cancelled
;
904 rc
= ll_close_inode_openhandle(ll_i2sbi(inode
)->ll_md_exp
, inode
, och
,
909 /* Fills the obdo with the attributes for the lsm */
910 static int ll_lsm_getattr(struct lov_stripe_md
*lsm
, struct obd_export
*exp
,
911 struct obd_capa
*capa
, struct obdo
*obdo
,
912 __u64 ioepoch
, int sync
)
914 struct ptlrpc_request_set
*set
;
915 struct obd_info oinfo
= { { { 0 } } };
918 LASSERT(lsm
!= NULL
);
922 oinfo
.oi_oa
->o_oi
= lsm
->lsm_oi
;
923 oinfo
.oi_oa
->o_mode
= S_IFREG
;
924 oinfo
.oi_oa
->o_ioepoch
= ioepoch
;
925 oinfo
.oi_oa
->o_valid
= OBD_MD_FLID
| OBD_MD_FLTYPE
|
926 OBD_MD_FLSIZE
| OBD_MD_FLBLOCKS
|
927 OBD_MD_FLBLKSZ
| OBD_MD_FLATIME
|
928 OBD_MD_FLMTIME
| OBD_MD_FLCTIME
|
929 OBD_MD_FLGROUP
| OBD_MD_FLEPOCH
|
930 OBD_MD_FLDATAVERSION
;
931 oinfo
.oi_capa
= capa
;
933 oinfo
.oi_oa
->o_valid
|= OBD_MD_FLFLAGS
;
934 oinfo
.oi_oa
->o_flags
|= OBD_FL_SRVLOCK
;
937 set
= ptlrpc_prep_set();
939 CERROR("can't allocate ptlrpc set\n");
942 rc
= obd_getattr_async(exp
, &oinfo
, set
);
944 rc
= ptlrpc_set_wait(set
);
945 ptlrpc_set_destroy(set
);
948 oinfo
.oi_oa
->o_valid
&= (OBD_MD_FLBLOCKS
| OBD_MD_FLBLKSZ
|
949 OBD_MD_FLATIME
| OBD_MD_FLMTIME
|
950 OBD_MD_FLCTIME
| OBD_MD_FLSIZE
|
951 OBD_MD_FLDATAVERSION
);
956 * Performs the getattr on the inode and updates its fields.
957 * If @sync != 0, perform the getattr under the server-side lock.
959 int ll_inode_getattr(struct inode
*inode
, struct obdo
*obdo
,
960 __u64 ioepoch
, int sync
)
962 struct obd_capa
*capa
= ll_mdscapa_get(inode
);
963 struct lov_stripe_md
*lsm
;
966 lsm
= ccc_inode_lsm_get(inode
);
967 rc
= ll_lsm_getattr(lsm
, ll_i2dtexp(inode
),
968 capa
, obdo
, ioepoch
, sync
);
971 struct ost_id
*oi
= lsm
? &lsm
->lsm_oi
: &obdo
->o_oi
;
973 obdo_refresh_inode(inode
, obdo
, obdo
->o_valid
);
974 CDEBUG(D_INODE
, "objid "DOSTID
" size %llu, blocks %llu,"
975 " blksize %lu\n", POSTID(oi
), i_size_read(inode
),
976 (unsigned long long)inode
->i_blocks
,
977 (unsigned long)ll_inode_blksize(inode
));
979 ccc_inode_lsm_put(inode
, lsm
);
983 int ll_merge_lvb(const struct lu_env
*env
, struct inode
*inode
)
985 struct ll_inode_info
*lli
= ll_i2info(inode
);
986 struct cl_object
*obj
= lli
->lli_clob
;
987 struct cl_attr
*attr
= ccc_env_thread_attr(env
);
991 ll_inode_size_lock(inode
);
992 /* merge timestamps the most recently obtained from mds with
993 timestamps obtained from osts */
994 LTIME_S(inode
->i_atime
) = lli
->lli_lvb
.lvb_atime
;
995 LTIME_S(inode
->i_mtime
) = lli
->lli_lvb
.lvb_mtime
;
996 LTIME_S(inode
->i_ctime
) = lli
->lli_lvb
.lvb_ctime
;
997 inode_init_lvb(inode
, &lvb
);
999 cl_object_attr_lock(obj
);
1000 rc
= cl_object_attr_get(env
, obj
, attr
);
1001 cl_object_attr_unlock(obj
);
1004 if (lvb
.lvb_atime
< attr
->cat_atime
)
1005 lvb
.lvb_atime
= attr
->cat_atime
;
1006 if (lvb
.lvb_ctime
< attr
->cat_ctime
)
1007 lvb
.lvb_ctime
= attr
->cat_ctime
;
1008 if (lvb
.lvb_mtime
< attr
->cat_mtime
)
1009 lvb
.lvb_mtime
= attr
->cat_mtime
;
1011 CDEBUG(D_VFSTRACE
, DFID
" updating i_size "LPU64
"\n",
1012 PFID(&lli
->lli_fid
), attr
->cat_size
);
1013 cl_isize_write_nolock(inode
, attr
->cat_size
);
1015 inode
->i_blocks
= attr
->cat_blocks
;
1017 LTIME_S(inode
->i_mtime
) = lvb
.lvb_mtime
;
1018 LTIME_S(inode
->i_atime
) = lvb
.lvb_atime
;
1019 LTIME_S(inode
->i_ctime
) = lvb
.lvb_ctime
;
1021 ll_inode_size_unlock(inode
);
1026 int ll_glimpse_ioctl(struct ll_sb_info
*sbi
, struct lov_stripe_md
*lsm
,
1029 struct obdo obdo
= { 0 };
1032 rc
= ll_lsm_getattr(lsm
, sbi
->ll_dt_exp
, NULL
, &obdo
, 0, 0);
1034 st
->st_size
= obdo
.o_size
;
1035 st
->st_blocks
= obdo
.o_blocks
;
1036 st
->st_mtime
= obdo
.o_mtime
;
1037 st
->st_atime
= obdo
.o_atime
;
1038 st
->st_ctime
= obdo
.o_ctime
;
1043 static bool file_is_noatime(const struct file
*file
)
1045 const struct vfsmount
*mnt
= file
->f_path
.mnt
;
1046 const struct inode
*inode
= file
->f_path
.dentry
->d_inode
;
1048 /* Adapted from file_accessed() and touch_atime().*/
1049 if (file
->f_flags
& O_NOATIME
)
1052 if (inode
->i_flags
& S_NOATIME
)
1055 if (IS_NOATIME(inode
))
1058 if (mnt
->mnt_flags
& (MNT_NOATIME
| MNT_READONLY
))
1061 if ((mnt
->mnt_flags
& MNT_NODIRATIME
) && S_ISDIR(inode
->i_mode
))
1064 if ((inode
->i_sb
->s_flags
& MS_NODIRATIME
) && S_ISDIR(inode
->i_mode
))
1070 void ll_io_init(struct cl_io
*io
, const struct file
*file
, int write
)
1072 struct inode
*inode
= file
->f_dentry
->d_inode
;
1074 io
->u
.ci_rw
.crw_nonblock
= file
->f_flags
& O_NONBLOCK
;
1076 io
->u
.ci_wr
.wr_append
= !!(file
->f_flags
& O_APPEND
);
1077 io
->u
.ci_wr
.wr_sync
= file
->f_flags
& O_SYNC
||
1078 file
->f_flags
& O_DIRECT
||
1081 io
->ci_obj
= ll_i2info(inode
)->lli_clob
;
1082 io
->ci_lockreq
= CILR_MAYBE
;
1083 if (ll_file_nolock(file
)) {
1084 io
->ci_lockreq
= CILR_NEVER
;
1085 io
->ci_no_srvlock
= 1;
1086 } else if (file
->f_flags
& O_APPEND
) {
1087 io
->ci_lockreq
= CILR_MANDATORY
;
1090 io
->ci_noatime
= file_is_noatime(file
);
1094 ll_file_io_generic(const struct lu_env
*env
, struct vvp_io_args
*args
,
1095 struct file
*file
, enum cl_io_type iot
,
1096 loff_t
*ppos
, size_t count
)
1098 struct ll_inode_info
*lli
= ll_i2info(file
->f_dentry
->d_inode
);
1099 struct ll_file_data
*fd
= LUSTRE_FPRIVATE(file
);
1104 io
= ccc_env_thread_io(env
);
1105 ll_io_init(io
, file
, iot
== CIT_WRITE
);
1107 if (cl_io_rw_init(env
, io
, iot
, *ppos
, count
) == 0) {
1108 struct vvp_io
*vio
= vvp_env_io(env
);
1109 struct ccc_io
*cio
= ccc_env_io(env
);
1110 int write_mutex_locked
= 0;
1112 cio
->cui_fd
= LUSTRE_FPRIVATE(file
);
1113 vio
->cui_io_subtype
= args
->via_io_subtype
;
1115 switch (vio
->cui_io_subtype
) {
1117 cio
->cui_iov
= args
->u
.normal
.via_iov
;
1118 cio
->cui_nrsegs
= args
->u
.normal
.via_nrsegs
;
1119 cio
->cui_tot_nrsegs
= cio
->cui_nrsegs
;
1120 cio
->cui_iocb
= args
->u
.normal
.via_iocb
;
1121 if ((iot
== CIT_WRITE
) &&
1122 !(cio
->cui_fd
->fd_flags
& LL_FILE_GROUP_LOCKED
)) {
1123 if (mutex_lock_interruptible(&lli
->
1125 GOTO(out
, result
= -ERESTARTSYS
);
1126 write_mutex_locked
= 1;
1127 } else if (iot
== CIT_READ
) {
1128 down_read(&lli
->lli_trunc_sem
);
1132 vio
->u
.splice
.cui_pipe
= args
->u
.splice
.via_pipe
;
1133 vio
->u
.splice
.cui_flags
= args
->u
.splice
.via_flags
;
1136 CERROR("Unknown IO type - %u\n", vio
->cui_io_subtype
);
1139 result
= cl_io_loop(env
, io
);
1140 if (write_mutex_locked
)
1141 mutex_unlock(&lli
->lli_write_mutex
);
1142 else if (args
->via_io_subtype
== IO_NORMAL
&& iot
== CIT_READ
)
1143 up_read(&lli
->lli_trunc_sem
);
1145 /* cl_io_rw_init() handled IO */
1146 result
= io
->ci_result
;
1149 if (io
->ci_nob
> 0) {
1150 result
= io
->ci_nob
;
1151 *ppos
= io
->u
.ci_wr
.wr
.crw_pos
;
1155 cl_io_fini(env
, io
);
1156 /* If any bit been read/written (result != 0), we just return
1157 * short read/write instead of restart io. */
1158 if ((result
== 0 || result
== -ENODATA
) && io
->ci_need_restart
) {
1159 CDEBUG(D_VFSTRACE
, "Restart %s on %s from %lld, count:%zd\n",
1160 iot
== CIT_READ
? "read" : "write",
1161 file
->f_dentry
->d_name
.name
, *ppos
, count
);
1162 LASSERTF(io
->ci_nob
== 0, "%zd", io
->ci_nob
);
1166 if (iot
== CIT_READ
) {
1168 ll_stats_ops_tally(ll_i2sbi(file
->f_dentry
->d_inode
),
1169 LPROC_LL_READ_BYTES
, result
);
1170 } else if (iot
== CIT_WRITE
) {
1172 ll_stats_ops_tally(ll_i2sbi(file
->f_dentry
->d_inode
),
1173 LPROC_LL_WRITE_BYTES
, result
);
1174 fd
->fd_write_failed
= false;
1175 } else if (result
!= -ERESTARTSYS
) {
1176 fd
->fd_write_failed
= true;
1183 static ssize_t
ll_file_aio_read(struct kiocb
*iocb
, const struct iovec
*iov
,
1184 unsigned long nr_segs
, loff_t pos
)
1187 struct vvp_io_args
*args
;
1192 result
= generic_segment_checks(iov
, &nr_segs
, &count
, VERIFY_WRITE
);
1196 env
= cl_env_get(&refcheck
);
1198 return PTR_ERR(env
);
1200 args
= vvp_env_args(env
, IO_NORMAL
);
1201 args
->u
.normal
.via_iov
= (struct iovec
*)iov
;
1202 args
->u
.normal
.via_nrsegs
= nr_segs
;
1203 args
->u
.normal
.via_iocb
= iocb
;
1205 result
= ll_file_io_generic(env
, args
, iocb
->ki_filp
, CIT_READ
,
1206 &iocb
->ki_pos
, count
);
1207 cl_env_put(env
, &refcheck
);
1211 static ssize_t
ll_file_read(struct file
*file
, char *buf
, size_t count
,
1215 struct iovec
*local_iov
;
1216 struct kiocb
*kiocb
;
1220 env
= cl_env_get(&refcheck
);
1222 return PTR_ERR(env
);
1224 local_iov
= &vvp_env_info(env
)->vti_local_iov
;
1225 kiocb
= &vvp_env_info(env
)->vti_kiocb
;
1226 local_iov
->iov_base
= (void __user
*)buf
;
1227 local_iov
->iov_len
= count
;
1228 init_sync_kiocb(kiocb
, file
);
1229 kiocb
->ki_pos
= *ppos
;
1230 kiocb
->ki_nbytes
= count
;
1232 result
= ll_file_aio_read(kiocb
, local_iov
, 1, kiocb
->ki_pos
);
1233 *ppos
= kiocb
->ki_pos
;
1235 cl_env_put(env
, &refcheck
);
1240 * Write to a file (through the page cache).
1242 static ssize_t
ll_file_aio_write(struct kiocb
*iocb
, const struct iovec
*iov
,
1243 unsigned long nr_segs
, loff_t pos
)
1246 struct vvp_io_args
*args
;
1251 result
= generic_segment_checks(iov
, &nr_segs
, &count
, VERIFY_READ
);
1255 env
= cl_env_get(&refcheck
);
1257 return PTR_ERR(env
);
1259 args
= vvp_env_args(env
, IO_NORMAL
);
1260 args
->u
.normal
.via_iov
= (struct iovec
*)iov
;
1261 args
->u
.normal
.via_nrsegs
= nr_segs
;
1262 args
->u
.normal
.via_iocb
= iocb
;
1264 result
= ll_file_io_generic(env
, args
, iocb
->ki_filp
, CIT_WRITE
,
1265 &iocb
->ki_pos
, count
);
1266 cl_env_put(env
, &refcheck
);
1270 static ssize_t
ll_file_write(struct file
*file
, const char *buf
, size_t count
,
1274 struct iovec
*local_iov
;
1275 struct kiocb
*kiocb
;
1279 env
= cl_env_get(&refcheck
);
1281 return PTR_ERR(env
);
1283 local_iov
= &vvp_env_info(env
)->vti_local_iov
;
1284 kiocb
= &vvp_env_info(env
)->vti_kiocb
;
1285 local_iov
->iov_base
= (void __user
*)buf
;
1286 local_iov
->iov_len
= count
;
1287 init_sync_kiocb(kiocb
, file
);
1288 kiocb
->ki_pos
= *ppos
;
1289 kiocb
->ki_nbytes
= count
;
1291 result
= ll_file_aio_write(kiocb
, local_iov
, 1, kiocb
->ki_pos
);
1292 *ppos
= kiocb
->ki_pos
;
1294 cl_env_put(env
, &refcheck
);
1301 * Send file content (through pagecache) somewhere with helper
1303 static ssize_t
ll_file_splice_read(struct file
*in_file
, loff_t
*ppos
,
1304 struct pipe_inode_info
*pipe
, size_t count
,
1308 struct vvp_io_args
*args
;
1312 env
= cl_env_get(&refcheck
);
1314 return PTR_ERR(env
);
1316 args
= vvp_env_args(env
, IO_SPLICE
);
1317 args
->u
.splice
.via_pipe
= pipe
;
1318 args
->u
.splice
.via_flags
= flags
;
1320 result
= ll_file_io_generic(env
, args
, in_file
, CIT_READ
, ppos
, count
);
1321 cl_env_put(env
, &refcheck
);
1325 static int ll_lov_recreate(struct inode
*inode
, struct ost_id
*oi
,
1328 struct obd_export
*exp
= ll_i2dtexp(inode
);
1329 struct obd_trans_info oti
= { 0 };
1330 struct obdo
*oa
= NULL
;
1333 struct lov_stripe_md
*lsm
= NULL
, *lsm2
;
1339 lsm
= ccc_inode_lsm_get(inode
);
1340 if (!lsm_has_objects(lsm
))
1341 GOTO(out
, rc
= -ENOENT
);
1343 lsm_size
= sizeof(*lsm
) + (sizeof(struct lov_oinfo
) *
1344 (lsm
->lsm_stripe_count
));
1346 OBD_ALLOC_LARGE(lsm2
, lsm_size
);
1348 GOTO(out
, rc
= -ENOMEM
);
1351 oa
->o_nlink
= ost_idx
;
1352 oa
->o_flags
|= OBD_FL_RECREATE_OBJS
;
1353 oa
->o_valid
= OBD_MD_FLID
| OBD_MD_FLFLAGS
| OBD_MD_FLGROUP
;
1354 obdo_from_inode(oa
, inode
, OBD_MD_FLTYPE
| OBD_MD_FLATIME
|
1355 OBD_MD_FLMTIME
| OBD_MD_FLCTIME
);
1356 obdo_set_parent_fid(oa
, &ll_i2info(inode
)->lli_fid
);
1357 memcpy(lsm2
, lsm
, lsm_size
);
1358 ll_inode_size_lock(inode
);
1359 rc
= obd_create(NULL
, exp
, oa
, &lsm2
, &oti
);
1360 ll_inode_size_unlock(inode
);
1362 OBD_FREE_LARGE(lsm2
, lsm_size
);
1365 ccc_inode_lsm_put(inode
, lsm
);
1370 static int ll_lov_recreate_obj(struct inode
*inode
, unsigned long arg
)
1372 struct ll_recreate_obj ucreat
;
1375 if (!capable(CFS_CAP_SYS_ADMIN
))
1378 if (copy_from_user(&ucreat
, (struct ll_recreate_obj
*)arg
,
1382 ostid_set_seq_mdt0(&oi
);
1383 ostid_set_id(&oi
, ucreat
.lrc_id
);
1384 return ll_lov_recreate(inode
, &oi
, ucreat
.lrc_ost_idx
);
1387 static int ll_lov_recreate_fid(struct inode
*inode
, unsigned long arg
)
1393 if (!capable(CFS_CAP_SYS_ADMIN
))
1396 if (copy_from_user(&fid
, (struct lu_fid
*)arg
, sizeof(fid
)))
1399 fid_to_ostid(&fid
, &oi
);
1400 ost_idx
= (fid_seq(&fid
) >> 16) & 0xffff;
1401 return ll_lov_recreate(inode
, &oi
, ost_idx
);
1404 int ll_lov_setstripe_ea_info(struct inode
*inode
, struct file
*file
,
1405 int flags
, struct lov_user_md
*lum
, int lum_size
)
1407 struct lov_stripe_md
*lsm
= NULL
;
1408 struct lookup_intent oit
= {.it_op
= IT_OPEN
, .it_flags
= flags
};
1411 lsm
= ccc_inode_lsm_get(inode
);
1413 ccc_inode_lsm_put(inode
, lsm
);
1414 CDEBUG(D_IOCTL
, "stripe already exists for ino %lu\n",
1416 GOTO(out
, rc
= -EEXIST
);
1419 ll_inode_size_lock(inode
);
1420 rc
= ll_intent_file_open(file
, lum
, lum_size
, &oit
);
1422 GOTO(out_unlock
, rc
);
1423 rc
= oit
.d
.lustre
.it_status
;
1425 GOTO(out_req_free
, rc
);
1427 ll_release_openhandle(file
->f_dentry
, &oit
);
1430 ll_inode_size_unlock(inode
);
1431 ll_intent_release(&oit
);
1432 ccc_inode_lsm_put(inode
, lsm
);
1434 cl_lov_delay_create_clear(&file
->f_flags
);
1437 ptlrpc_req_finished((struct ptlrpc_request
*) oit
.d
.lustre
.it_data
);
1441 int ll_lov_getstripe_ea_info(struct inode
*inode
, const char *filename
,
1442 struct lov_mds_md
**lmmp
, int *lmm_size
,
1443 struct ptlrpc_request
**request
)
1445 struct ll_sb_info
*sbi
= ll_i2sbi(inode
);
1446 struct mdt_body
*body
;
1447 struct lov_mds_md
*lmm
= NULL
;
1448 struct ptlrpc_request
*req
= NULL
;
1449 struct md_op_data
*op_data
;
1452 rc
= ll_get_default_mdsize(sbi
, &lmmsize
);
1456 op_data
= ll_prep_md_op_data(NULL
, inode
, NULL
, filename
,
1457 strlen(filename
), lmmsize
,
1458 LUSTRE_OPC_ANY
, NULL
);
1459 if (IS_ERR(op_data
))
1460 return PTR_ERR(op_data
);
1462 op_data
->op_valid
= OBD_MD_FLEASIZE
| OBD_MD_FLDIREA
;
1463 rc
= md_getattr_name(sbi
->ll_md_exp
, op_data
, &req
);
1464 ll_finish_md_op_data(op_data
);
1466 CDEBUG(D_INFO
, "md_getattr_name failed "
1467 "on %s: rc %d\n", filename
, rc
);
1471 body
= req_capsule_server_get(&req
->rq_pill
, &RMF_MDT_BODY
);
1472 LASSERT(body
!= NULL
); /* checked by mdc_getattr_name */
1474 lmmsize
= body
->eadatasize
;
1476 if (!(body
->valid
& (OBD_MD_FLEASIZE
| OBD_MD_FLDIREA
)) ||
1478 GOTO(out
, rc
= -ENODATA
);
1481 lmm
= req_capsule_server_sized_get(&req
->rq_pill
, &RMF_MDT_MD
, lmmsize
);
1482 LASSERT(lmm
!= NULL
);
1484 if ((lmm
->lmm_magic
!= cpu_to_le32(LOV_MAGIC_V1
)) &&
1485 (lmm
->lmm_magic
!= cpu_to_le32(LOV_MAGIC_V3
))) {
1486 GOTO(out
, rc
= -EPROTO
);
1490 * This is coming from the MDS, so is probably in
1491 * little endian. We convert it to host endian before
1492 * passing it to userspace.
1494 if (LOV_MAGIC
!= cpu_to_le32(LOV_MAGIC
)) {
1497 stripe_count
= le16_to_cpu(lmm
->lmm_stripe_count
);
1498 if (le32_to_cpu(lmm
->lmm_pattern
) & LOV_PATTERN_F_RELEASED
)
1501 /* if function called for directory - we should
1502 * avoid swab not existent lsm objects */
1503 if (lmm
->lmm_magic
== cpu_to_le32(LOV_MAGIC_V1
)) {
1504 lustre_swab_lov_user_md_v1((struct lov_user_md_v1
*)lmm
);
1505 if (S_ISREG(body
->mode
))
1506 lustre_swab_lov_user_md_objects(
1507 ((struct lov_user_md_v1
*)lmm
)->lmm_objects
,
1509 } else if (lmm
->lmm_magic
== cpu_to_le32(LOV_MAGIC_V3
)) {
1510 lustre_swab_lov_user_md_v3((struct lov_user_md_v3
*)lmm
);
1511 if (S_ISREG(body
->mode
))
1512 lustre_swab_lov_user_md_objects(
1513 ((struct lov_user_md_v3
*)lmm
)->lmm_objects
,
1520 *lmm_size
= lmmsize
;
1525 static int ll_lov_setea(struct inode
*inode
, struct file
*file
,
1528 int flags
= MDS_OPEN_HAS_OBJS
| FMODE_WRITE
;
1529 struct lov_user_md
*lump
;
1530 int lum_size
= sizeof(struct lov_user_md
) +
1531 sizeof(struct lov_user_ost_data
);
1534 if (!capable(CFS_CAP_SYS_ADMIN
))
1537 OBD_ALLOC_LARGE(lump
, lum_size
);
1541 if (copy_from_user(lump
, (struct lov_user_md
*)arg
, lum_size
)) {
1542 OBD_FREE_LARGE(lump
, lum_size
);
1546 rc
= ll_lov_setstripe_ea_info(inode
, file
, flags
, lump
, lum_size
);
1548 OBD_FREE_LARGE(lump
, lum_size
);
1552 static int ll_lov_setstripe(struct inode
*inode
, struct file
*file
,
1555 struct lov_user_md_v3 lumv3
;
1556 struct lov_user_md_v1
*lumv1
= (struct lov_user_md_v1
*)&lumv3
;
1557 struct lov_user_md_v1
*lumv1p
= (struct lov_user_md_v1
*)arg
;
1558 struct lov_user_md_v3
*lumv3p
= (struct lov_user_md_v3
*)arg
;
1560 int flags
= FMODE_WRITE
;
1562 /* first try with v1 which is smaller than v3 */
1563 lum_size
= sizeof(struct lov_user_md_v1
);
1564 if (copy_from_user(lumv1
, lumv1p
, lum_size
))
1567 if (lumv1
->lmm_magic
== LOV_USER_MAGIC_V3
) {
1568 lum_size
= sizeof(struct lov_user_md_v3
);
1569 if (copy_from_user(&lumv3
, lumv3p
, lum_size
))
1573 rc
= ll_lov_setstripe_ea_info(inode
, file
, flags
, lumv1
, lum_size
);
1575 struct lov_stripe_md
*lsm
;
1578 put_user(0, &lumv1p
->lmm_stripe_count
);
1580 ll_layout_refresh(inode
, &gen
);
1581 lsm
= ccc_inode_lsm_get(inode
);
1582 rc
= obd_iocontrol(LL_IOC_LOV_GETSTRIPE
, ll_i2dtexp(inode
),
1583 0, lsm
, (void *)arg
);
1584 ccc_inode_lsm_put(inode
, lsm
);
1589 static int ll_lov_getstripe(struct inode
*inode
, unsigned long arg
)
1591 struct lov_stripe_md
*lsm
;
1594 lsm
= ccc_inode_lsm_get(inode
);
1596 rc
= obd_iocontrol(LL_IOC_LOV_GETSTRIPE
, ll_i2dtexp(inode
), 0,
1598 ccc_inode_lsm_put(inode
, lsm
);
1603 ll_get_grouplock(struct inode
*inode
, struct file
*file
, unsigned long arg
)
1605 struct ll_inode_info
*lli
= ll_i2info(inode
);
1606 struct ll_file_data
*fd
= LUSTRE_FPRIVATE(file
);
1607 struct ccc_grouplock grouplock
;
1610 if (ll_file_nolock(file
))
1613 spin_lock(&lli
->lli_lock
);
1614 if (fd
->fd_flags
& LL_FILE_GROUP_LOCKED
) {
1615 CWARN("group lock already existed with gid %lu\n",
1616 fd
->fd_grouplock
.cg_gid
);
1617 spin_unlock(&lli
->lli_lock
);
1620 LASSERT(fd
->fd_grouplock
.cg_lock
== NULL
);
1621 spin_unlock(&lli
->lli_lock
);
1623 rc
= cl_get_grouplock(cl_i2info(inode
)->lli_clob
,
1624 arg
, (file
->f_flags
& O_NONBLOCK
), &grouplock
);
1628 spin_lock(&lli
->lli_lock
);
1629 if (fd
->fd_flags
& LL_FILE_GROUP_LOCKED
) {
1630 spin_unlock(&lli
->lli_lock
);
1631 CERROR("another thread just won the race\n");
1632 cl_put_grouplock(&grouplock
);
1636 fd
->fd_flags
|= LL_FILE_GROUP_LOCKED
;
1637 fd
->fd_grouplock
= grouplock
;
1638 spin_unlock(&lli
->lli_lock
);
1640 CDEBUG(D_INFO
, "group lock %lu obtained\n", arg
);
1644 int ll_put_grouplock(struct inode
*inode
, struct file
*file
, unsigned long arg
)
1646 struct ll_inode_info
*lli
= ll_i2info(inode
);
1647 struct ll_file_data
*fd
= LUSTRE_FPRIVATE(file
);
1648 struct ccc_grouplock grouplock
;
1650 spin_lock(&lli
->lli_lock
);
1651 if (!(fd
->fd_flags
& LL_FILE_GROUP_LOCKED
)) {
1652 spin_unlock(&lli
->lli_lock
);
1653 CWARN("no group lock held\n");
1656 LASSERT(fd
->fd_grouplock
.cg_lock
!= NULL
);
1658 if (fd
->fd_grouplock
.cg_gid
!= arg
) {
1659 CWARN("group lock %lu doesn't match current id %lu\n",
1660 arg
, fd
->fd_grouplock
.cg_gid
);
1661 spin_unlock(&lli
->lli_lock
);
1665 grouplock
= fd
->fd_grouplock
;
1666 memset(&fd
->fd_grouplock
, 0, sizeof(fd
->fd_grouplock
));
1667 fd
->fd_flags
&= ~LL_FILE_GROUP_LOCKED
;
1668 spin_unlock(&lli
->lli_lock
);
1670 cl_put_grouplock(&grouplock
);
1671 CDEBUG(D_INFO
, "group lock %lu released\n", arg
);
1676 * Close inode open handle
1678 * \param dentry [in] dentry which contains the inode
1679 * \param it [in,out] intent which contains open info and result
1682 * \retval <0 failure
1684 int ll_release_openhandle(struct dentry
*dentry
, struct lookup_intent
*it
)
1686 struct inode
*inode
= dentry
->d_inode
;
1687 struct obd_client_handle
*och
;
1692 /* Root ? Do nothing. */
1693 if (dentry
->d_inode
->i_sb
->s_root
== dentry
)
1696 /* No open handle to close? Move away */
1697 if (!it_disposition(it
, DISP_OPEN_OPEN
))
1700 LASSERT(it_open_error(DISP_OPEN_OPEN
, it
) == 0);
1702 OBD_ALLOC(och
, sizeof(*och
));
1704 GOTO(out
, rc
= -ENOMEM
);
1706 ll_och_fill(ll_i2sbi(inode
)->ll_md_exp
, it
, och
);
1708 rc
= ll_close_inode_openhandle(ll_i2sbi(inode
)->ll_md_exp
,
1711 /* this one is in place of ll_file_open */
1712 if (it_disposition(it
, DISP_ENQ_OPEN_REF
)) {
1713 ptlrpc_req_finished(it
->d
.lustre
.it_data
);
1714 it_clear_disposition(it
, DISP_ENQ_OPEN_REF
);
1720 * Get size for inode for which FIEMAP mapping is requested.
1721 * Make the FIEMAP get_info call and returns the result.
1723 static int ll_do_fiemap(struct inode
*inode
, struct ll_user_fiemap
*fiemap
,
1726 struct obd_export
*exp
= ll_i2dtexp(inode
);
1727 struct lov_stripe_md
*lsm
= NULL
;
1728 struct ll_fiemap_info_key fm_key
= { .name
= KEY_FIEMAP
, };
1729 __u32 vallen
= num_bytes
;
1732 /* Checks for fiemap flags */
1733 if (fiemap
->fm_flags
& ~LUSTRE_FIEMAP_FLAGS_COMPAT
) {
1734 fiemap
->fm_flags
&= ~LUSTRE_FIEMAP_FLAGS_COMPAT
;
1738 /* Check for FIEMAP_FLAG_SYNC */
1739 if (fiemap
->fm_flags
& FIEMAP_FLAG_SYNC
) {
1740 rc
= filemap_fdatawrite(inode
->i_mapping
);
1745 lsm
= ccc_inode_lsm_get(inode
);
1749 /* If the stripe_count > 1 and the application does not understand
1750 * DEVICE_ORDER flag, then it cannot interpret the extents correctly.
1752 if (lsm
->lsm_stripe_count
> 1 &&
1753 !(fiemap
->fm_flags
& FIEMAP_FLAG_DEVICE_ORDER
))
1754 GOTO(out
, rc
= -EOPNOTSUPP
);
1756 fm_key
.oa
.o_oi
= lsm
->lsm_oi
;
1757 fm_key
.oa
.o_valid
= OBD_MD_FLID
| OBD_MD_FLGROUP
;
1759 obdo_from_inode(&fm_key
.oa
, inode
, OBD_MD_FLSIZE
);
1760 obdo_set_parent_fid(&fm_key
.oa
, &ll_i2info(inode
)->lli_fid
);
1761 /* If filesize is 0, then there would be no objects for mapping */
1762 if (fm_key
.oa
.o_size
== 0) {
1763 fiemap
->fm_mapped_extents
= 0;
1767 memcpy(&fm_key
.fiemap
, fiemap
, sizeof(*fiemap
));
1769 rc
= obd_get_info(NULL
, exp
, sizeof(fm_key
), &fm_key
, &vallen
,
1772 CERROR("obd_get_info failed: rc = %d\n", rc
);
1775 ccc_inode_lsm_put(inode
, lsm
);
1779 int ll_fid2path(struct inode
*inode
, void *arg
)
1781 struct obd_export
*exp
= ll_i2mdexp(inode
);
1782 struct getinfo_fid2path
*gfout
, *gfin
;
1785 if (!capable(CFS_CAP_DAC_READ_SEARCH
) &&
1786 !(ll_i2sbi(inode
)->ll_flags
& LL_SBI_USER_FID2PATH
))
1789 /* Need to get the buflen */
1790 OBD_ALLOC_PTR(gfin
);
1793 if (copy_from_user(gfin
, arg
, sizeof(*gfin
))) {
1798 outsize
= sizeof(*gfout
) + gfin
->gf_pathlen
;
1799 OBD_ALLOC(gfout
, outsize
);
1800 if (gfout
== NULL
) {
1804 memcpy(gfout
, gfin
, sizeof(*gfout
));
1807 /* Call mdc_iocontrol */
1808 rc
= obd_iocontrol(OBD_IOC_FID2PATH
, exp
, outsize
, gfout
, NULL
);
1812 if (copy_to_user(arg
, gfout
, outsize
))
1816 OBD_FREE(gfout
, outsize
);
1820 static int ll_ioctl_fiemap(struct inode
*inode
, unsigned long arg
)
1822 struct ll_user_fiemap
*fiemap_s
;
1823 size_t num_bytes
, ret_bytes
;
1824 unsigned int extent_count
;
1827 /* Get the extent count so we can calculate the size of
1828 * required fiemap buffer */
1829 if (get_user(extent_count
,
1830 &((struct ll_user_fiemap __user
*)arg
)->fm_extent_count
))
1834 (SIZE_MAX
- sizeof(*fiemap_s
)) / sizeof(struct ll_fiemap_extent
))
1836 num_bytes
= sizeof(*fiemap_s
) + (extent_count
*
1837 sizeof(struct ll_fiemap_extent
));
1839 OBD_ALLOC_LARGE(fiemap_s
, num_bytes
);
1840 if (fiemap_s
== NULL
)
1843 /* get the fiemap value */
1844 if (copy_from_user(fiemap_s
, (struct ll_user_fiemap __user
*)arg
,
1846 GOTO(error
, rc
= -EFAULT
);
1848 /* If fm_extent_count is non-zero, read the first extent since
1849 * it is used to calculate end_offset and device from previous
1852 if (copy_from_user(&fiemap_s
->fm_extents
[0],
1853 (char __user
*)arg
+ sizeof(*fiemap_s
),
1854 sizeof(struct ll_fiemap_extent
)))
1855 GOTO(error
, rc
= -EFAULT
);
1858 rc
= ll_do_fiemap(inode
, fiemap_s
, num_bytes
);
1862 ret_bytes
= sizeof(struct ll_user_fiemap
);
1864 if (extent_count
!= 0)
1865 ret_bytes
+= (fiemap_s
->fm_mapped_extents
*
1866 sizeof(struct ll_fiemap_extent
));
1868 if (copy_to_user((void *)arg
, fiemap_s
, ret_bytes
))
1872 OBD_FREE_LARGE(fiemap_s
, num_bytes
);
1877 * Read the data_version for inode.
1879 * This value is computed using stripe object version on OST.
1880 * Version is computed using server side locking.
1882 * @param extent_lock Take extent lock. Not needed if a process is already
1883 * holding the OST object group locks.
1885 int ll_data_version(struct inode
*inode
, __u64
*data_version
,
1888 struct lov_stripe_md
*lsm
= NULL
;
1889 struct ll_sb_info
*sbi
= ll_i2sbi(inode
);
1890 struct obdo
*obdo
= NULL
;
1893 /* If no stripe, we consider version is 0. */
1894 lsm
= ccc_inode_lsm_get(inode
);
1895 if (!lsm_has_objects(lsm
)) {
1897 CDEBUG(D_INODE
, "No object for inode\n");
1901 OBD_ALLOC_PTR(obdo
);
1903 GOTO(out
, rc
= -ENOMEM
);
1905 rc
= ll_lsm_getattr(lsm
, sbi
->ll_dt_exp
, NULL
, obdo
, 0, extent_lock
);
1907 if (!(obdo
->o_valid
& OBD_MD_FLDATAVERSION
))
1910 *data_version
= obdo
->o_data_version
;
1915 ccc_inode_lsm_put(inode
, lsm
);
1920 * Trigger a HSM release request for the provided inode.
1922 int ll_hsm_release(struct inode
*inode
)
1924 struct cl_env_nest nest
;
1926 struct obd_client_handle
*och
= NULL
;
1927 __u64 data_version
= 0;
1931 CDEBUG(D_INODE
, "%s: Releasing file "DFID
".\n",
1932 ll_get_fsname(inode
->i_sb
, NULL
, 0),
1933 PFID(&ll_i2info(inode
)->lli_fid
));
1935 och
= ll_lease_open(inode
, NULL
, FMODE_WRITE
, MDS_OPEN_RELEASE
);
1937 GOTO(out
, rc
= PTR_ERR(och
));
1939 /* Grab latest data_version and [am]time values */
1940 rc
= ll_data_version(inode
, &data_version
, 1);
1944 env
= cl_env_nested_get(&nest
);
1946 GOTO(out
, rc
= PTR_ERR(env
));
1948 ll_merge_lvb(env
, inode
);
1949 cl_env_nested_put(&nest
, env
);
1951 /* Release the file.
1952 * NB: lease lock handle is released in mdc_hsm_release_pack() because
1953 * we still need it to pack l_remote_handle to MDT. */
1954 rc
= ll_close_inode_openhandle(ll_i2sbi(inode
)->ll_md_exp
, inode
, och
,
1960 if (och
!= NULL
&& !IS_ERR(och
)) /* close the file */
1961 ll_lease_close(och
, inode
, NULL
);
1966 struct ll_swap_stack
{
1967 struct iattr ia1
, ia2
;
1969 struct inode
*inode1
, *inode2
;
1970 bool check_dv1
, check_dv2
;
1973 static int ll_swap_layouts(struct file
*file1
, struct file
*file2
,
1974 struct lustre_swap_layouts
*lsl
)
1976 struct mdc_swap_layouts msl
;
1977 struct md_op_data
*op_data
;
1980 struct ll_swap_stack
*llss
= NULL
;
1983 OBD_ALLOC_PTR(llss
);
1987 llss
->inode1
= file1
->f_dentry
->d_inode
;
1988 llss
->inode2
= file2
->f_dentry
->d_inode
;
1990 if (!S_ISREG(llss
->inode2
->i_mode
))
1991 GOTO(free
, rc
= -EINVAL
);
1993 if (inode_permission(llss
->inode1
, MAY_WRITE
) ||
1994 inode_permission(llss
->inode2
, MAY_WRITE
))
1995 GOTO(free
, rc
= -EPERM
);
1997 if (llss
->inode2
->i_sb
!= llss
->inode1
->i_sb
)
1998 GOTO(free
, rc
= -EXDEV
);
2000 /* we use 2 bool because it is easier to swap than 2 bits */
2001 if (lsl
->sl_flags
& SWAP_LAYOUTS_CHECK_DV1
)
2002 llss
->check_dv1
= true;
2004 if (lsl
->sl_flags
& SWAP_LAYOUTS_CHECK_DV2
)
2005 llss
->check_dv2
= true;
2007 /* we cannot use lsl->sl_dvX directly because we may swap them */
2008 llss
->dv1
= lsl
->sl_dv1
;
2009 llss
->dv2
= lsl
->sl_dv2
;
2011 rc
= lu_fid_cmp(ll_inode2fid(llss
->inode1
), ll_inode2fid(llss
->inode2
));
2012 if (rc
== 0) /* same file, done! */
2015 if (rc
< 0) { /* sequentialize it */
2016 swap(llss
->inode1
, llss
->inode2
);
2018 swap(llss
->dv1
, llss
->dv2
);
2019 swap(llss
->check_dv1
, llss
->check_dv2
);
2023 if (gid
!= 0) { /* application asks to flush dirty cache */
2024 rc
= ll_get_grouplock(llss
->inode1
, file1
, gid
);
2028 rc
= ll_get_grouplock(llss
->inode2
, file2
, gid
);
2030 ll_put_grouplock(llss
->inode1
, file1
, gid
);
2035 /* to be able to restore mtime and atime after swap
2036 * we need to first save them */
2038 (SWAP_LAYOUTS_KEEP_MTIME
| SWAP_LAYOUTS_KEEP_ATIME
)) {
2039 llss
->ia1
.ia_mtime
= llss
->inode1
->i_mtime
;
2040 llss
->ia1
.ia_atime
= llss
->inode1
->i_atime
;
2041 llss
->ia1
.ia_valid
= ATTR_MTIME
| ATTR_ATIME
;
2042 llss
->ia2
.ia_mtime
= llss
->inode2
->i_mtime
;
2043 llss
->ia2
.ia_atime
= llss
->inode2
->i_atime
;
2044 llss
->ia2
.ia_valid
= ATTR_MTIME
| ATTR_ATIME
;
2047 /* ultimate check, before swapping the layouts we check if
2048 * dataversion has changed (if requested) */
2049 if (llss
->check_dv1
) {
2050 rc
= ll_data_version(llss
->inode1
, &dv
, 0);
2053 if (dv
!= llss
->dv1
)
2054 GOTO(putgl
, rc
= -EAGAIN
);
2057 if (llss
->check_dv2
) {
2058 rc
= ll_data_version(llss
->inode2
, &dv
, 0);
2061 if (dv
!= llss
->dv2
)
2062 GOTO(putgl
, rc
= -EAGAIN
);
2065 /* struct md_op_data is used to send the swap args to the mdt
2066 * only flags is missing, so we use struct mdc_swap_layouts
2067 * through the md_op_data->op_data */
2068 /* flags from user space have to be converted before they are send to
2069 * server, no flag is sent today, they are only used on the client */
2072 op_data
= ll_prep_md_op_data(NULL
, llss
->inode1
, llss
->inode2
, NULL
, 0,
2073 0, LUSTRE_OPC_ANY
, &msl
);
2074 if (IS_ERR(op_data
))
2075 GOTO(free
, rc
= PTR_ERR(op_data
));
2077 rc
= obd_iocontrol(LL_IOC_LOV_SWAP_LAYOUTS
, ll_i2mdexp(llss
->inode1
),
2078 sizeof(*op_data
), op_data
, NULL
);
2079 ll_finish_md_op_data(op_data
);
2083 ll_put_grouplock(llss
->inode2
, file2
, gid
);
2084 ll_put_grouplock(llss
->inode1
, file1
, gid
);
2087 /* rc can be set from obd_iocontrol() or from a GOTO(putgl, ...) */
2091 /* clear useless flags */
2092 if (!(lsl
->sl_flags
& SWAP_LAYOUTS_KEEP_MTIME
)) {
2093 llss
->ia1
.ia_valid
&= ~ATTR_MTIME
;
2094 llss
->ia2
.ia_valid
&= ~ATTR_MTIME
;
2097 if (!(lsl
->sl_flags
& SWAP_LAYOUTS_KEEP_ATIME
)) {
2098 llss
->ia1
.ia_valid
&= ~ATTR_ATIME
;
2099 llss
->ia2
.ia_valid
&= ~ATTR_ATIME
;
2102 /* update time if requested */
2104 if (llss
->ia2
.ia_valid
!= 0) {
2105 mutex_lock(&llss
->inode1
->i_mutex
);
2106 rc
= ll_setattr(file1
->f_dentry
, &llss
->ia2
);
2107 mutex_unlock(&llss
->inode1
->i_mutex
);
2110 if (llss
->ia1
.ia_valid
!= 0) {
2113 mutex_lock(&llss
->inode2
->i_mutex
);
2114 rc1
= ll_setattr(file2
->f_dentry
, &llss
->ia1
);
2115 mutex_unlock(&llss
->inode2
->i_mutex
);
2127 static int ll_hsm_state_set(struct inode
*inode
, struct hsm_state_set
*hss
)
2129 struct md_op_data
*op_data
;
2132 /* Non-root users are forbidden to set or clear flags which are
2133 * NOT defined in HSM_USER_MASK. */
2134 if (((hss
->hss_setmask
| hss
->hss_clearmask
) & ~HSM_USER_MASK
) &&
2135 !capable(CFS_CAP_SYS_ADMIN
))
2138 op_data
= ll_prep_md_op_data(NULL
, inode
, NULL
, NULL
, 0, 0,
2139 LUSTRE_OPC_ANY
, hss
);
2140 if (IS_ERR(op_data
))
2141 return PTR_ERR(op_data
);
2143 rc
= obd_iocontrol(LL_IOC_HSM_STATE_SET
, ll_i2mdexp(inode
),
2144 sizeof(*op_data
), op_data
, NULL
);
2146 ll_finish_md_op_data(op_data
);
2151 static int ll_hsm_import(struct inode
*inode
, struct file
*file
,
2152 struct hsm_user_import
*hui
)
2154 struct hsm_state_set
*hss
= NULL
;
2155 struct iattr
*attr
= NULL
;
2159 if (!S_ISREG(inode
->i_mode
))
2165 GOTO(out
, rc
= -ENOMEM
);
2167 hss
->hss_valid
= HSS_SETMASK
| HSS_ARCHIVE_ID
;
2168 hss
->hss_archive_id
= hui
->hui_archive_id
;
2169 hss
->hss_setmask
= HS_ARCHIVED
| HS_EXISTS
| HS_RELEASED
;
2170 rc
= ll_hsm_state_set(inode
, hss
);
2174 OBD_ALLOC_PTR(attr
);
2176 GOTO(out
, rc
= -ENOMEM
);
2178 attr
->ia_mode
= hui
->hui_mode
& (S_IRWXU
| S_IRWXG
| S_IRWXO
);
2179 attr
->ia_mode
|= S_IFREG
;
2180 attr
->ia_uid
= make_kuid(&init_user_ns
, hui
->hui_uid
);
2181 attr
->ia_gid
= make_kgid(&init_user_ns
, hui
->hui_gid
);
2182 attr
->ia_size
= hui
->hui_size
;
2183 attr
->ia_mtime
.tv_sec
= hui
->hui_mtime
;
2184 attr
->ia_mtime
.tv_nsec
= hui
->hui_mtime_ns
;
2185 attr
->ia_atime
.tv_sec
= hui
->hui_atime
;
2186 attr
->ia_atime
.tv_nsec
= hui
->hui_atime_ns
;
2188 attr
->ia_valid
= ATTR_SIZE
| ATTR_MODE
| ATTR_FORCE
|
2189 ATTR_UID
| ATTR_GID
|
2190 ATTR_MTIME
| ATTR_MTIME_SET
|
2191 ATTR_ATIME
| ATTR_ATIME_SET
;
2193 rc
= ll_setattr_raw(file
->f_dentry
, attr
, true);
2208 ll_file_ioctl(struct file
*file
, unsigned int cmd
, unsigned long arg
)
2210 struct inode
*inode
= file
->f_dentry
->d_inode
;
2211 struct ll_file_data
*fd
= LUSTRE_FPRIVATE(file
);
2214 CDEBUG(D_VFSTRACE
, "VFS Op:inode=%lu/%u(%p),cmd=%x\n", inode
->i_ino
,
2215 inode
->i_generation
, inode
, cmd
);
2216 ll_stats_ops_tally(ll_i2sbi(inode
), LPROC_LL_IOCTL
, 1);
2218 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
2219 if (_IOC_TYPE(cmd
) == 'T' || _IOC_TYPE(cmd
) == 't') /* tty ioctls */
2223 case LL_IOC_GETFLAGS
:
2224 /* Get the current value of the file flags */
2225 return put_user(fd
->fd_flags
, (int *)arg
);
2226 case LL_IOC_SETFLAGS
:
2227 case LL_IOC_CLRFLAGS
:
2228 /* Set or clear specific file flags */
2229 /* XXX This probably needs checks to ensure the flags are
2230 * not abused, and to handle any flag side effects.
2232 if (get_user(flags
, (int *) arg
))
2235 if (cmd
== LL_IOC_SETFLAGS
) {
2236 if ((flags
& LL_FILE_IGNORE_LOCK
) &&
2237 !(file
->f_flags
& O_DIRECT
)) {
2238 CERROR("%s: unable to disable locking on "
2239 "non-O_DIRECT file\n", current
->comm
);
2243 fd
->fd_flags
|= flags
;
2245 fd
->fd_flags
&= ~flags
;
2248 case LL_IOC_LOV_SETSTRIPE
:
2249 return ll_lov_setstripe(inode
, file
, arg
);
2250 case LL_IOC_LOV_SETEA
:
2251 return ll_lov_setea(inode
, file
, arg
);
2252 case LL_IOC_LOV_SWAP_LAYOUTS
: {
2254 struct lustre_swap_layouts lsl
;
2256 if (copy_from_user(&lsl
, (char *)arg
,
2257 sizeof(struct lustre_swap_layouts
)))
2260 if ((file
->f_flags
& O_ACCMODE
) == 0) /* O_RDONLY */
2263 file2
= fget(lsl
.sl_fd
);
2268 if ((file2
->f_flags
& O_ACCMODE
) != 0) /* O_WRONLY or O_RDWR */
2269 rc
= ll_swap_layouts(file
, file2
, &lsl
);
2273 case LL_IOC_LOV_GETSTRIPE
:
2274 return ll_lov_getstripe(inode
, arg
);
2275 case LL_IOC_RECREATE_OBJ
:
2276 return ll_lov_recreate_obj(inode
, arg
);
2277 case LL_IOC_RECREATE_FID
:
2278 return ll_lov_recreate_fid(inode
, arg
);
2279 case FSFILT_IOC_FIEMAP
:
2280 return ll_ioctl_fiemap(inode
, arg
);
2281 case FSFILT_IOC_GETFLAGS
:
2282 case FSFILT_IOC_SETFLAGS
:
2283 return ll_iocontrol(inode
, file
, cmd
, arg
);
2284 case FSFILT_IOC_GETVERSION_OLD
:
2285 case FSFILT_IOC_GETVERSION
:
2286 return put_user(inode
->i_generation
, (int *)arg
);
2287 case LL_IOC_GROUP_LOCK
:
2288 return ll_get_grouplock(inode
, file
, arg
);
2289 case LL_IOC_GROUP_UNLOCK
:
2290 return ll_put_grouplock(inode
, file
, arg
);
2291 case IOC_OBD_STATFS
:
2292 return ll_obd_statfs(inode
, (void *)arg
);
2294 /* We need to special case any other ioctls we want to handle,
2295 * to send them to the MDS/OST as appropriate and to properly
2296 * network encode the arg field.
2297 case FSFILT_IOC_SETVERSION_OLD:
2298 case FSFILT_IOC_SETVERSION:
2300 case LL_IOC_FLUSHCTX
:
2301 return ll_flush_ctx(inode
);
2302 case LL_IOC_PATH2FID
: {
2303 if (copy_to_user((void *)arg
, ll_inode2fid(inode
),
2304 sizeof(struct lu_fid
)))
2309 case OBD_IOC_FID2PATH
:
2310 return ll_fid2path(inode
, (void *)arg
);
2311 case LL_IOC_DATA_VERSION
: {
2312 struct ioc_data_version idv
;
2315 if (copy_from_user(&idv
, (char *)arg
, sizeof(idv
)))
2318 rc
= ll_data_version(inode
, &idv
.idv_version
,
2319 !(idv
.idv_flags
& LL_DV_NOFLUSH
));
2321 if (rc
== 0 && copy_to_user((char *) arg
, &idv
, sizeof(idv
)))
2327 case LL_IOC_GET_MDTIDX
: {
2330 mdtidx
= ll_get_mdt_idx(inode
);
2334 if (put_user((int)mdtidx
, (int*)arg
))
2339 case OBD_IOC_GETDTNAME
:
2340 case OBD_IOC_GETMDNAME
:
2341 return ll_get_obd_name(inode
, cmd
, arg
);
2342 case LL_IOC_HSM_STATE_GET
: {
2343 struct md_op_data
*op_data
;
2344 struct hsm_user_state
*hus
;
2351 op_data
= ll_prep_md_op_data(NULL
, inode
, NULL
, NULL
, 0, 0,
2352 LUSTRE_OPC_ANY
, hus
);
2353 if (IS_ERR(op_data
)) {
2355 return PTR_ERR(op_data
);
2358 rc
= obd_iocontrol(cmd
, ll_i2mdexp(inode
), sizeof(*op_data
),
2361 if (copy_to_user((void *)arg
, hus
, sizeof(*hus
)))
2364 ll_finish_md_op_data(op_data
);
2368 case LL_IOC_HSM_STATE_SET
: {
2369 struct hsm_state_set
*hss
;
2376 if (copy_from_user(hss
, (char *)arg
, sizeof(*hss
))) {
2381 rc
= ll_hsm_state_set(inode
, hss
);
2386 case LL_IOC_HSM_ACTION
: {
2387 struct md_op_data
*op_data
;
2388 struct hsm_current_action
*hca
;
2395 op_data
= ll_prep_md_op_data(NULL
, inode
, NULL
, NULL
, 0, 0,
2396 LUSTRE_OPC_ANY
, hca
);
2397 if (IS_ERR(op_data
)) {
2399 return PTR_ERR(op_data
);
2402 rc
= obd_iocontrol(cmd
, ll_i2mdexp(inode
), sizeof(*op_data
),
2405 if (copy_to_user((char *)arg
, hca
, sizeof(*hca
)))
2408 ll_finish_md_op_data(op_data
);
2412 case LL_IOC_SET_LEASE
: {
2413 struct ll_inode_info
*lli
= ll_i2info(inode
);
2414 struct obd_client_handle
*och
= NULL
;
2420 if (!(file
->f_mode
& FMODE_WRITE
))
2425 if (!(file
->f_mode
& FMODE_READ
))
2430 mutex_lock(&lli
->lli_och_mutex
);
2431 if (fd
->fd_lease_och
!= NULL
) {
2432 och
= fd
->fd_lease_och
;
2433 fd
->fd_lease_och
= NULL
;
2435 mutex_unlock(&lli
->lli_och_mutex
);
2438 mode
= och
->och_flags
&
2439 (FMODE_READ
|FMODE_WRITE
);
2440 rc
= ll_lease_close(och
, inode
, &lease_broken
);
2441 if (rc
== 0 && lease_broken
)
2447 /* return the type of lease or error */
2448 return rc
< 0 ? rc
: (int)mode
;
2453 CDEBUG(D_INODE
, "Set lease with mode %d\n", mode
);
2455 /* apply for lease */
2456 och
= ll_lease_open(inode
, file
, mode
, 0);
2458 return PTR_ERR(och
);
2461 mutex_lock(&lli
->lli_och_mutex
);
2462 if (fd
->fd_lease_och
== NULL
) {
2463 fd
->fd_lease_och
= och
;
2466 mutex_unlock(&lli
->lli_och_mutex
);
2468 /* impossible now that only excl is supported for now */
2469 ll_lease_close(och
, inode
, &lease_broken
);
2474 case LL_IOC_GET_LEASE
: {
2475 struct ll_inode_info
*lli
= ll_i2info(inode
);
2476 struct ldlm_lock
*lock
= NULL
;
2479 mutex_lock(&lli
->lli_och_mutex
);
2480 if (fd
->fd_lease_och
!= NULL
) {
2481 struct obd_client_handle
*och
= fd
->fd_lease_och
;
2483 lock
= ldlm_handle2lock(&och
->och_lease_handle
);
2485 lock_res_and_lock(lock
);
2486 if (!ldlm_is_cancel(lock
))
2487 rc
= och
->och_flags
&
2488 (FMODE_READ
| FMODE_WRITE
);
2489 unlock_res_and_lock(lock
);
2490 ldlm_lock_put(lock
);
2493 mutex_unlock(&lli
->lli_och_mutex
);
2496 case LL_IOC_HSM_IMPORT
: {
2497 struct hsm_user_import
*hui
;
2503 if (copy_from_user(hui
, (void *)arg
, sizeof(*hui
))) {
2508 rc
= ll_hsm_import(inode
, file
, hui
);
2517 ll_iocontrol_call(inode
, file
, cmd
, arg
, &err
))
2520 return obd_iocontrol(cmd
, ll_i2dtexp(inode
), 0, NULL
,
2527 static loff_t
ll_file_seek(struct file
*file
, loff_t offset
, int origin
)
2529 struct inode
*inode
= file
->f_dentry
->d_inode
;
2530 loff_t retval
, eof
= 0;
2532 retval
= offset
+ ((origin
== SEEK_END
) ? i_size_read(inode
) :
2533 (origin
== SEEK_CUR
) ? file
->f_pos
: 0);
2534 CDEBUG(D_VFSTRACE
, "VFS Op:inode=%lu/%u(%p), to=%llu=%#llx(%d)\n",
2535 inode
->i_ino
, inode
->i_generation
, inode
, retval
, retval
,
2537 ll_stats_ops_tally(ll_i2sbi(inode
), LPROC_LL_LLSEEK
, 1);
2539 if (origin
== SEEK_END
|| origin
== SEEK_HOLE
|| origin
== SEEK_DATA
) {
2540 retval
= ll_glimpse_size(inode
);
2543 eof
= i_size_read(inode
);
2546 retval
= generic_file_llseek_size(file
, offset
, origin
,
2547 ll_file_maxbytes(inode
), eof
);
2551 static int ll_flush(struct file
*file
, fl_owner_t id
)
2553 struct inode
*inode
= file
->f_dentry
->d_inode
;
2554 struct ll_inode_info
*lli
= ll_i2info(inode
);
2555 struct ll_file_data
*fd
= LUSTRE_FPRIVATE(file
);
2558 LASSERT(!S_ISDIR(inode
->i_mode
));
2560 /* catch async errors that were recorded back when async writeback
2561 * failed for pages in this mapping. */
2562 rc
= lli
->lli_async_rc
;
2563 lli
->lli_async_rc
= 0;
2564 err
= lov_read_and_clear_async_rc(lli
->lli_clob
);
2568 /* The application has been told write failure already.
2569 * Do not report failure again. */
2570 if (fd
->fd_write_failed
)
2572 return rc
? -EIO
: 0;
2576 * Called to make sure a portion of file has been written out.
2577 * if @mode is not CL_FSYNC_LOCAL, it will send OST_SYNC RPCs to OST.
2579 * Return how many pages have been written.
2581 int cl_sync_file_range(struct inode
*inode
, loff_t start
, loff_t end
,
2582 enum cl_fsync_mode mode
, int ignore_layout
)
2584 struct cl_env_nest nest
;
2587 struct obd_capa
*capa
= NULL
;
2588 struct cl_fsync_io
*fio
;
2591 if (mode
!= CL_FSYNC_NONE
&& mode
!= CL_FSYNC_LOCAL
&&
2592 mode
!= CL_FSYNC_DISCARD
&& mode
!= CL_FSYNC_ALL
)
2595 env
= cl_env_nested_get(&nest
);
2597 return PTR_ERR(env
);
2599 capa
= ll_osscapa_get(inode
, CAPA_OPC_OSS_WRITE
);
2601 io
= ccc_env_thread_io(env
);
2602 io
->ci_obj
= cl_i2info(inode
)->lli_clob
;
2603 io
->ci_ignore_layout
= ignore_layout
;
2605 /* initialize parameters for sync */
2606 fio
= &io
->u
.ci_fsync
;
2607 fio
->fi_capa
= capa
;
2608 fio
->fi_start
= start
;
2610 fio
->fi_fid
= ll_inode2fid(inode
);
2611 fio
->fi_mode
= mode
;
2612 fio
->fi_nr_written
= 0;
2614 if (cl_io_init(env
, io
, CIT_FSYNC
, io
->ci_obj
) == 0)
2615 result
= cl_io_loop(env
, io
);
2617 result
= io
->ci_result
;
2619 result
= fio
->fi_nr_written
;
2620 cl_io_fini(env
, io
);
2621 cl_env_nested_put(&nest
, env
);
2629 * When dentry is provided (the 'else' case), *file->f_dentry may be
2630 * null and dentry must be used directly rather than pulled from
2631 * *file->f_dentry as is done otherwise.
2634 int ll_fsync(struct file
*file
, loff_t start
, loff_t end
, int datasync
)
2636 struct dentry
*dentry
= file
->f_dentry
;
2637 struct inode
*inode
= dentry
->d_inode
;
2638 struct ll_inode_info
*lli
= ll_i2info(inode
);
2639 struct ptlrpc_request
*req
;
2640 struct obd_capa
*oc
;
2643 CDEBUG(D_VFSTRACE
, "VFS Op:inode=%lu/%u(%p)\n", inode
->i_ino
,
2644 inode
->i_generation
, inode
);
2645 ll_stats_ops_tally(ll_i2sbi(inode
), LPROC_LL_FSYNC
, 1);
2647 rc
= filemap_write_and_wait_range(inode
->i_mapping
, start
, end
);
2648 mutex_lock(&inode
->i_mutex
);
2650 /* catch async errors that were recorded back when async writeback
2651 * failed for pages in this mapping. */
2652 if (!S_ISDIR(inode
->i_mode
)) {
2653 err
= lli
->lli_async_rc
;
2654 lli
->lli_async_rc
= 0;
2657 err
= lov_read_and_clear_async_rc(lli
->lli_clob
);
2662 oc
= ll_mdscapa_get(inode
);
2663 err
= md_sync(ll_i2sbi(inode
)->ll_md_exp
, ll_inode2fid(inode
), oc
,
2669 ptlrpc_req_finished(req
);
2671 if (S_ISREG(inode
->i_mode
)) {
2672 struct ll_file_data
*fd
= LUSTRE_FPRIVATE(file
);
2674 err
= cl_sync_file_range(inode
, start
, end
, CL_FSYNC_ALL
, 0);
2675 if (rc
== 0 && err
< 0)
2678 fd
->fd_write_failed
= true;
2680 fd
->fd_write_failed
= false;
2683 mutex_unlock(&inode
->i_mutex
);
2688 ll_file_flock(struct file
*file
, int cmd
, struct file_lock
*file_lock
)
2690 struct inode
*inode
= file
->f_dentry
->d_inode
;
2691 struct ll_sb_info
*sbi
= ll_i2sbi(inode
);
2692 struct ldlm_enqueue_info einfo
= {
2693 .ei_type
= LDLM_FLOCK
,
2694 .ei_cb_cp
= ldlm_flock_completion_ast
,
2695 .ei_cbdata
= file_lock
,
2697 struct md_op_data
*op_data
;
2698 struct lustre_handle lockh
= {0};
2699 ldlm_policy_data_t flock
= {{0}};
2704 CDEBUG(D_VFSTRACE
, "VFS Op:inode=%lu file_lock=%p\n",
2705 inode
->i_ino
, file_lock
);
2707 ll_stats_ops_tally(ll_i2sbi(inode
), LPROC_LL_FLOCK
, 1);
2709 if (file_lock
->fl_flags
& FL_FLOCK
)
2710 LASSERT((cmd
== F_SETLKW
) || (cmd
== F_SETLK
));
2711 else if (!(file_lock
->fl_flags
& FL_POSIX
))
2714 flock
.l_flock
.owner
= (unsigned long)file_lock
->fl_owner
;
2715 flock
.l_flock
.pid
= file_lock
->fl_pid
;
2716 flock
.l_flock
.start
= file_lock
->fl_start
;
2717 flock
.l_flock
.end
= file_lock
->fl_end
;
2719 /* Somewhat ugly workaround for svc lockd.
2720 * lockd installs custom fl_lmops->lm_compare_owner that checks
2721 * for the fl_owner to be the same (which it always is on local node
2722 * I guess between lockd processes) and then compares pid.
2723 * As such we assign pid to the owner field to make it all work,
2724 * conflict with normal locks is unlikely since pid space and
2725 * pointer space for current->files are not intersecting */
2726 if (file_lock
->fl_lmops
&& file_lock
->fl_lmops
->lm_compare_owner
)
2727 flock
.l_flock
.owner
= (unsigned long)file_lock
->fl_pid
;
2729 switch (file_lock
->fl_type
) {
2731 einfo
.ei_mode
= LCK_PR
;
2734 /* An unlock request may or may not have any relation to
2735 * existing locks so we may not be able to pass a lock handle
2736 * via a normal ldlm_lock_cancel() request. The request may even
2737 * unlock a byte range in the middle of an existing lock. In
2738 * order to process an unlock request we need all of the same
2739 * information that is given with a normal read or write record
2740 * lock request. To avoid creating another ldlm unlock (cancel)
2741 * message we'll treat a LCK_NL flock request as an unlock. */
2742 einfo
.ei_mode
= LCK_NL
;
2745 einfo
.ei_mode
= LCK_PW
;
2748 CDEBUG(D_INFO
, "Unknown fcntl lock type: %d\n",
2749 file_lock
->fl_type
);
2764 flags
= LDLM_FL_BLOCK_NOWAIT
;
2770 flags
= LDLM_FL_TEST_LOCK
;
2771 /* Save the old mode so that if the mode in the lock changes we
2772 * can decrement the appropriate reader or writer refcount. */
2773 file_lock
->fl_type
= einfo
.ei_mode
;
2776 CERROR("unknown fcntl lock command: %d\n", cmd
);
2780 op_data
= ll_prep_md_op_data(NULL
, inode
, NULL
, NULL
, 0, 0,
2781 LUSTRE_OPC_ANY
, NULL
);
2782 if (IS_ERR(op_data
))
2783 return PTR_ERR(op_data
);
2785 CDEBUG(D_DLMTRACE
, "inode=%lu, pid=%u, flags=%#x, mode=%u, "
2786 "start="LPU64
", end="LPU64
"\n", inode
->i_ino
, flock
.l_flock
.pid
,
2787 flags
, einfo
.ei_mode
, flock
.l_flock
.start
, flock
.l_flock
.end
);
2789 rc
= md_enqueue(sbi
->ll_md_exp
, &einfo
, NULL
,
2790 op_data
, &lockh
, &flock
, 0, NULL
/* req */, flags
);
2792 if ((file_lock
->fl_flags
& FL_FLOCK
) &&
2793 (rc
== 0 || file_lock
->fl_type
== F_UNLCK
))
2794 rc2
= flock_lock_file_wait(file
, file_lock
);
2795 if ((file_lock
->fl_flags
& FL_POSIX
) &&
2796 (rc
== 0 || file_lock
->fl_type
== F_UNLCK
) &&
2797 !(flags
& LDLM_FL_TEST_LOCK
))
2798 rc2
= posix_lock_file_wait(file
, file_lock
);
2800 if (rc2
&& file_lock
->fl_type
!= F_UNLCK
) {
2801 einfo
.ei_mode
= LCK_NL
;
2802 md_enqueue(sbi
->ll_md_exp
, &einfo
, NULL
,
2803 op_data
, &lockh
, &flock
, 0, NULL
/* req */, flags
);
2807 ll_finish_md_op_data(op_data
);
2813 ll_file_noflock(struct file
*file
, int cmd
, struct file_lock
*file_lock
)
2819 * test if some locks matching bits and l_req_mode are acquired
2820 * - bits can be in different locks
2821 * - if found clear the common lock bits in *bits
2822 * - the bits not found, are kept in *bits
2824 * \param bits [IN] searched lock bits [IN]
2825 * \param l_req_mode [IN] searched lock mode
2826 * \retval boolean, true iff all bits are found
2828 int ll_have_md_lock(struct inode
*inode
, __u64
*bits
, ldlm_mode_t l_req_mode
)
2830 struct lustre_handle lockh
;
2831 ldlm_policy_data_t policy
;
2832 ldlm_mode_t mode
= (l_req_mode
== LCK_MINMODE
) ?
2833 (LCK_CR
|LCK_CW
|LCK_PR
|LCK_PW
) : l_req_mode
;
2841 fid
= &ll_i2info(inode
)->lli_fid
;
2842 CDEBUG(D_INFO
, "trying to match res "DFID
" mode %s\n", PFID(fid
),
2843 ldlm_lockname
[mode
]);
2845 flags
= LDLM_FL_BLOCK_GRANTED
| LDLM_FL_CBPENDING
| LDLM_FL_TEST_LOCK
;
2846 for (i
= 0; i
<= MDS_INODELOCK_MAXSHIFT
&& *bits
!= 0; i
++) {
2847 policy
.l_inodebits
.bits
= *bits
& (1 << i
);
2848 if (policy
.l_inodebits
.bits
== 0)
2851 if (md_lock_match(ll_i2mdexp(inode
), flags
, fid
, LDLM_IBITS
,
2852 &policy
, mode
, &lockh
)) {
2853 struct ldlm_lock
*lock
;
2855 lock
= ldlm_handle2lock(&lockh
);
2858 ~(lock
->l_policy_data
.l_inodebits
.bits
);
2859 LDLM_LOCK_PUT(lock
);
2861 *bits
&= ~policy
.l_inodebits
.bits
;
2868 ldlm_mode_t
ll_take_md_lock(struct inode
*inode
, __u64 bits
,
2869 struct lustre_handle
*lockh
, __u64 flags
,
2872 ldlm_policy_data_t policy
= { .l_inodebits
= {bits
}};
2876 fid
= &ll_i2info(inode
)->lli_fid
;
2877 CDEBUG(D_INFO
, "trying to match res "DFID
"\n", PFID(fid
));
2879 rc
= md_lock_match(ll_i2mdexp(inode
), LDLM_FL_BLOCK_GRANTED
|flags
,
2880 fid
, LDLM_IBITS
, &policy
, mode
, lockh
);
2885 static int ll_inode_revalidate_fini(struct inode
*inode
, int rc
)
2887 /* Already unlinked. Just update nlink and return success */
2888 if (rc
== -ENOENT
) {
2890 /* This path cannot be hit for regular files unless in
2891 * case of obscure races, so no need to validate size.
2893 if (!S_ISREG(inode
->i_mode
) && !S_ISDIR(inode
->i_mode
))
2895 } else if (rc
!= 0) {
2896 CDEBUG_LIMIT((rc
== -EACCES
|| rc
== -EIDRM
) ? D_INFO
: D_ERROR
,
2897 "%s: revalidate FID "DFID
" error: rc = %d\n",
2898 ll_get_fsname(inode
->i_sb
, NULL
, 0),
2899 PFID(ll_inode2fid(inode
)), rc
);
2905 static int __ll_inode_revalidate(struct dentry
*dentry
, __u64 ibits
)
2907 struct inode
*inode
= dentry
->d_inode
;
2908 struct ptlrpc_request
*req
= NULL
;
2909 struct obd_export
*exp
;
2912 LASSERT(inode
!= NULL
);
2914 CDEBUG(D_VFSTRACE
, "VFS Op:inode=%lu/%u(%p),name=%s\n",
2915 inode
->i_ino
, inode
->i_generation
, inode
, dentry
->d_name
.name
);
2917 exp
= ll_i2mdexp(inode
);
2919 /* XXX: Enable OBD_CONNECT_ATTRFID to reduce unnecessary getattr RPC.
2920 * But under CMD case, it caused some lock issues, should be fixed
2921 * with new CMD ibits lock. See bug 12718 */
2922 if (exp_connect_flags(exp
) & OBD_CONNECT_ATTRFID
) {
2923 struct lookup_intent oit
= { .it_op
= IT_GETATTR
};
2924 struct md_op_data
*op_data
;
2926 if (ibits
== MDS_INODELOCK_LOOKUP
)
2927 oit
.it_op
= IT_LOOKUP
;
2929 /* Call getattr by fid, so do not provide name at all. */
2930 op_data
= ll_prep_md_op_data(NULL
, dentry
->d_inode
,
2931 dentry
->d_inode
, NULL
, 0, 0,
2932 LUSTRE_OPC_ANY
, NULL
);
2933 if (IS_ERR(op_data
))
2934 return PTR_ERR(op_data
);
2936 oit
.it_create_mode
|= M_CHECK_STALE
;
2937 rc
= md_intent_lock(exp
, op_data
, NULL
, 0,
2938 /* we are not interested in name
2941 ll_md_blocking_ast
, 0);
2942 ll_finish_md_op_data(op_data
);
2943 oit
.it_create_mode
&= ~M_CHECK_STALE
;
2945 rc
= ll_inode_revalidate_fini(inode
, rc
);
2949 rc
= ll_revalidate_it_finish(req
, &oit
, dentry
);
2951 ll_intent_release(&oit
);
2955 /* Unlinked? Unhash dentry, so it is not picked up later by
2956 do_lookup() -> ll_revalidate_it(). We cannot use d_drop
2957 here to preserve get_cwd functionality on 2.6.
2959 if (!dentry
->d_inode
->i_nlink
)
2960 d_lustre_invalidate(dentry
, 0);
2962 ll_lookup_finish_locks(&oit
, dentry
);
2963 } else if (!ll_have_md_lock(dentry
->d_inode
, &ibits
, LCK_MINMODE
)) {
2964 struct ll_sb_info
*sbi
= ll_i2sbi(dentry
->d_inode
);
2965 obd_valid valid
= OBD_MD_FLGETATTR
;
2966 struct md_op_data
*op_data
;
2969 if (S_ISREG(inode
->i_mode
)) {
2970 rc
= ll_get_default_mdsize(sbi
, &ealen
);
2973 valid
|= OBD_MD_FLEASIZE
| OBD_MD_FLMODEASIZE
;
2976 op_data
= ll_prep_md_op_data(NULL
, inode
, NULL
, NULL
,
2977 0, ealen
, LUSTRE_OPC_ANY
,
2979 if (IS_ERR(op_data
))
2980 return PTR_ERR(op_data
);
2982 op_data
->op_valid
= valid
;
2983 /* Once OBD_CONNECT_ATTRFID is not supported, we can't find one
2984 * capa for this inode. Because we only keep capas of dirs
2986 rc
= md_getattr(sbi
->ll_md_exp
, op_data
, &req
);
2987 ll_finish_md_op_data(op_data
);
2989 rc
= ll_inode_revalidate_fini(inode
, rc
);
2993 rc
= ll_prep_inode(&inode
, req
, NULL
, NULL
);
2996 ptlrpc_req_finished(req
);
3000 static int ll_inode_revalidate(struct dentry
*dentry
, __u64 ibits
)
3002 struct inode
*inode
= dentry
->d_inode
;
3005 rc
= __ll_inode_revalidate(dentry
, ibits
);
3009 /* if object isn't regular file, don't validate size */
3010 if (!S_ISREG(inode
->i_mode
)) {
3011 LTIME_S(inode
->i_atime
) = ll_i2info(inode
)->lli_lvb
.lvb_atime
;
3012 LTIME_S(inode
->i_mtime
) = ll_i2info(inode
)->lli_lvb
.lvb_mtime
;
3013 LTIME_S(inode
->i_ctime
) = ll_i2info(inode
)->lli_lvb
.lvb_ctime
;
3015 /* In case of restore, the MDT has the right size and has
3016 * already send it back without granting the layout lock,
3017 * inode is up-to-date so glimpse is useless.
3018 * Also to glimpse we need the layout, in case of a running
3019 * restore the MDT holds the layout lock so the glimpse will
3020 * block up to the end of restore (getattr will block)
3022 if (!(ll_i2info(inode
)->lli_flags
& LLIF_FILE_RESTORING
))
3023 rc
= ll_glimpse_size(inode
);
3028 int ll_getattr(struct vfsmount
*mnt
, struct dentry
*de
, struct kstat
*stat
)
3030 struct inode
*inode
= de
->d_inode
;
3031 struct ll_sb_info
*sbi
= ll_i2sbi(inode
);
3032 struct ll_inode_info
*lli
= ll_i2info(inode
);
3035 res
= ll_inode_revalidate(de
, MDS_INODELOCK_UPDATE
|
3036 MDS_INODELOCK_LOOKUP
);
3037 ll_stats_ops_tally(sbi
, LPROC_LL_GETATTR
, 1);
3042 stat
->dev
= inode
->i_sb
->s_dev
;
3043 if (ll_need_32bit_api(sbi
))
3044 stat
->ino
= cl_fid_build_ino(&lli
->lli_fid
, 1);
3046 stat
->ino
= inode
->i_ino
;
3047 stat
->mode
= inode
->i_mode
;
3048 stat
->nlink
= inode
->i_nlink
;
3049 stat
->uid
= inode
->i_uid
;
3050 stat
->gid
= inode
->i_gid
;
3051 stat
->rdev
= inode
->i_rdev
;
3052 stat
->atime
= inode
->i_atime
;
3053 stat
->mtime
= inode
->i_mtime
;
3054 stat
->ctime
= inode
->i_ctime
;
3055 stat
->blksize
= 1 << inode
->i_blkbits
;
3057 stat
->size
= i_size_read(inode
);
3058 stat
->blocks
= inode
->i_blocks
;
3063 static int ll_fiemap(struct inode
*inode
, struct fiemap_extent_info
*fieinfo
,
3064 __u64 start
, __u64 len
)
3068 struct ll_user_fiemap
*fiemap
;
3069 unsigned int extent_count
= fieinfo
->fi_extents_max
;
3071 num_bytes
= sizeof(*fiemap
) + (extent_count
*
3072 sizeof(struct ll_fiemap_extent
));
3073 OBD_ALLOC_LARGE(fiemap
, num_bytes
);
3078 fiemap
->fm_flags
= fieinfo
->fi_flags
;
3079 fiemap
->fm_extent_count
= fieinfo
->fi_extents_max
;
3080 fiemap
->fm_start
= start
;
3081 fiemap
->fm_length
= len
;
3082 if (extent_count
> 0)
3083 memcpy(&fiemap
->fm_extents
[0], fieinfo
->fi_extents_start
,
3084 sizeof(struct ll_fiemap_extent
));
3086 rc
= ll_do_fiemap(inode
, fiemap
, num_bytes
);
3088 fieinfo
->fi_flags
= fiemap
->fm_flags
;
3089 fieinfo
->fi_extents_mapped
= fiemap
->fm_mapped_extents
;
3090 if (extent_count
> 0)
3091 memcpy(fieinfo
->fi_extents_start
, &fiemap
->fm_extents
[0],
3092 fiemap
->fm_mapped_extents
*
3093 sizeof(struct ll_fiemap_extent
));
3095 OBD_FREE_LARGE(fiemap
, num_bytes
);
3099 struct posix_acl
*ll_get_acl(struct inode
*inode
, int type
)
3101 struct ll_inode_info
*lli
= ll_i2info(inode
);
3102 struct posix_acl
*acl
= NULL
;
3104 spin_lock(&lli
->lli_lock
);
3105 /* VFS' acl_permission_check->check_acl will release the refcount */
3106 acl
= posix_acl_dup(lli
->lli_posix_acl
);
3107 spin_unlock(&lli
->lli_lock
);
3113 int ll_inode_permission(struct inode
*inode
, int mask
)
3117 #ifdef MAY_NOT_BLOCK
3118 if (mask
& MAY_NOT_BLOCK
)
3122 /* as root inode are NOT getting validated in lookup operation,
3123 * need to do it before permission check. */
3125 if (inode
== inode
->i_sb
->s_root
->d_inode
) {
3126 rc
= __ll_inode_revalidate(inode
->i_sb
->s_root
,
3127 MDS_INODELOCK_LOOKUP
);
3132 CDEBUG(D_VFSTRACE
, "VFS Op:inode=%lu/%u(%p), inode mode %x mask %o\n",
3133 inode
->i_ino
, inode
->i_generation
, inode
, inode
->i_mode
, mask
);
3135 if (ll_i2sbi(inode
)->ll_flags
& LL_SBI_RMT_CLIENT
)
3136 return lustre_check_remote_perm(inode
, mask
);
3138 ll_stats_ops_tally(ll_i2sbi(inode
), LPROC_LL_INODE_PERM
, 1);
3139 rc
= generic_permission(inode
, mask
);
3144 /* -o localflock - only provides locally consistent flock locks */
3145 struct file_operations ll_file_operations
= {
3146 .read
= ll_file_read
,
3147 .aio_read
= ll_file_aio_read
,
3148 .write
= ll_file_write
,
3149 .aio_write
= ll_file_aio_write
,
3150 .unlocked_ioctl
= ll_file_ioctl
,
3151 .open
= ll_file_open
,
3152 .release
= ll_file_release
,
3153 .mmap
= ll_file_mmap
,
3154 .llseek
= ll_file_seek
,
3155 .splice_read
= ll_file_splice_read
,
3160 struct file_operations ll_file_operations_flock
= {
3161 .read
= ll_file_read
,
3162 .aio_read
= ll_file_aio_read
,
3163 .write
= ll_file_write
,
3164 .aio_write
= ll_file_aio_write
,
3165 .unlocked_ioctl
= ll_file_ioctl
,
3166 .open
= ll_file_open
,
3167 .release
= ll_file_release
,
3168 .mmap
= ll_file_mmap
,
3169 .llseek
= ll_file_seek
,
3170 .splice_read
= ll_file_splice_read
,
3173 .flock
= ll_file_flock
,
3174 .lock
= ll_file_flock
3177 /* These are for -o noflock - to return ENOSYS on flock calls */
3178 struct file_operations ll_file_operations_noflock
= {
3179 .read
= ll_file_read
,
3180 .aio_read
= ll_file_aio_read
,
3181 .write
= ll_file_write
,
3182 .aio_write
= ll_file_aio_write
,
3183 .unlocked_ioctl
= ll_file_ioctl
,
3184 .open
= ll_file_open
,
3185 .release
= ll_file_release
,
3186 .mmap
= ll_file_mmap
,
3187 .llseek
= ll_file_seek
,
3188 .splice_read
= ll_file_splice_read
,
3191 .flock
= ll_file_noflock
,
3192 .lock
= ll_file_noflock
3195 struct inode_operations ll_file_inode_operations
= {
3196 .setattr
= ll_setattr
,
3197 .getattr
= ll_getattr
,
3198 .permission
= ll_inode_permission
,
3199 .setxattr
= ll_setxattr
,
3200 .getxattr
= ll_getxattr
,
3201 .listxattr
= ll_listxattr
,
3202 .removexattr
= ll_removexattr
,
3203 .fiemap
= ll_fiemap
,
3204 .get_acl
= ll_get_acl
,
3207 /* dynamic ioctl number support routines */
3208 static struct llioc_ctl_data
{
3209 struct rw_semaphore ioc_sem
;
3210 struct list_head ioc_head
;
3212 __RWSEM_INITIALIZER(llioc
.ioc_sem
),
3213 LIST_HEAD_INIT(llioc
.ioc_head
)
3218 struct list_head iocd_list
;
3219 unsigned int iocd_size
;
3220 llioc_callback_t iocd_cb
;
3221 unsigned int iocd_count
;
3222 unsigned int iocd_cmd
[0];
3225 void *ll_iocontrol_register(llioc_callback_t cb
, int count
, unsigned int *cmd
)
3228 struct llioc_data
*in_data
= NULL
;
3230 if (cb
== NULL
|| cmd
== NULL
||
3231 count
> LLIOC_MAX_CMD
|| count
< 0)
3234 size
= sizeof(*in_data
) + count
* sizeof(unsigned int);
3235 OBD_ALLOC(in_data
, size
);
3236 if (in_data
== NULL
)
3239 memset(in_data
, 0, sizeof(*in_data
));
3240 in_data
->iocd_size
= size
;
3241 in_data
->iocd_cb
= cb
;
3242 in_data
->iocd_count
= count
;
3243 memcpy(in_data
->iocd_cmd
, cmd
, sizeof(unsigned int) * count
);
3245 down_write(&llioc
.ioc_sem
);
3246 list_add_tail(&in_data
->iocd_list
, &llioc
.ioc_head
);
3247 up_write(&llioc
.ioc_sem
);
3252 void ll_iocontrol_unregister(void *magic
)
3254 struct llioc_data
*tmp
;
3259 down_write(&llioc
.ioc_sem
);
3260 list_for_each_entry(tmp
, &llioc
.ioc_head
, iocd_list
) {
3262 unsigned int size
= tmp
->iocd_size
;
3264 list_del(&tmp
->iocd_list
);
3265 up_write(&llioc
.ioc_sem
);
3267 OBD_FREE(tmp
, size
);
3271 up_write(&llioc
.ioc_sem
);
3273 CWARN("didn't find iocontrol register block with magic: %p\n", magic
);
3276 EXPORT_SYMBOL(ll_iocontrol_register
);
3277 EXPORT_SYMBOL(ll_iocontrol_unregister
);
3279 static enum llioc_iter
3280 ll_iocontrol_call(struct inode
*inode
, struct file
*file
,
3281 unsigned int cmd
, unsigned long arg
, int *rcp
)
3283 enum llioc_iter ret
= LLIOC_CONT
;
3284 struct llioc_data
*data
;
3285 int rc
= -EINVAL
, i
;
3287 down_read(&llioc
.ioc_sem
);
3288 list_for_each_entry(data
, &llioc
.ioc_head
, iocd_list
) {
3289 for (i
= 0; i
< data
->iocd_count
; i
++) {
3290 if (cmd
!= data
->iocd_cmd
[i
])
3293 ret
= data
->iocd_cb(inode
, file
, cmd
, arg
, data
, &rc
);
3297 if (ret
== LLIOC_STOP
)
3300 up_read(&llioc
.ioc_sem
);
3307 int ll_layout_conf(struct inode
*inode
, const struct cl_object_conf
*conf
)
3309 struct ll_inode_info
*lli
= ll_i2info(inode
);
3310 struct cl_env_nest nest
;
3314 if (lli
->lli_clob
== NULL
)
3317 env
= cl_env_nested_get(&nest
);
3319 return PTR_ERR(env
);
3321 result
= cl_conf_set(env
, lli
->lli_clob
, conf
);
3322 cl_env_nested_put(&nest
, env
);
3324 if (conf
->coc_opc
== OBJECT_CONF_SET
) {
3325 struct ldlm_lock
*lock
= conf
->coc_lock
;
3327 LASSERT(lock
!= NULL
);
3328 LASSERT(ldlm_has_layout(lock
));
3330 /* it can only be allowed to match after layout is
3331 * applied to inode otherwise false layout would be
3332 * seen. Applying layout should happen before dropping
3333 * the intent lock. */
3334 ldlm_lock_allow_match(lock
);
3340 /* Fetch layout from MDT with getxattr request, if it's not ready yet */
3341 static int ll_layout_fetch(struct inode
*inode
, struct ldlm_lock
*lock
)
3344 struct ll_sb_info
*sbi
= ll_i2sbi(inode
);
3345 struct obd_capa
*oc
;
3346 struct ptlrpc_request
*req
;
3347 struct mdt_body
*body
;
3353 CDEBUG(D_INODE
, DFID
" LVB_READY=%d l_lvb_data=%p l_lvb_len=%d\n",
3354 PFID(ll_inode2fid(inode
)), !!(lock
->l_flags
& LDLM_FL_LVB_READY
),
3355 lock
->l_lvb_data
, lock
->l_lvb_len
);
3357 if ((lock
->l_lvb_data
!= NULL
) && (lock
->l_flags
& LDLM_FL_LVB_READY
))
3360 /* if layout lock was granted right away, the layout is returned
3361 * within DLM_LVB of dlm reply; otherwise if the lock was ever
3362 * blocked and then granted via completion ast, we have to fetch
3363 * layout here. Please note that we can't use the LVB buffer in
3364 * completion AST because it doesn't have a large enough buffer */
3365 oc
= ll_mdscapa_get(inode
);
3366 rc
= ll_get_default_mdsize(sbi
, &lmmsize
);
3368 rc
= md_getxattr(sbi
->ll_md_exp
, ll_inode2fid(inode
), oc
,
3369 OBD_MD_FLXATTR
, XATTR_NAME_LOV
, NULL
, 0,
3375 body
= req_capsule_server_get(&req
->rq_pill
, &RMF_MDT_BODY
);
3377 GOTO(out
, rc
= -EPROTO
);
3379 lmmsize
= body
->eadatasize
;
3380 if (lmmsize
== 0) /* empty layout */
3383 lmm
= req_capsule_server_sized_get(&req
->rq_pill
, &RMF_EADATA
, lmmsize
);
3385 GOTO(out
, rc
= -EFAULT
);
3387 OBD_ALLOC_LARGE(lvbdata
, lmmsize
);
3388 if (lvbdata
== NULL
)
3389 GOTO(out
, rc
= -ENOMEM
);
3391 memcpy(lvbdata
, lmm
, lmmsize
);
3392 lock_res_and_lock(lock
);
3393 if (lock
->l_lvb_data
!= NULL
)
3394 OBD_FREE_LARGE(lock
->l_lvb_data
, lock
->l_lvb_len
);
3396 lock
->l_lvb_data
= lvbdata
;
3397 lock
->l_lvb_len
= lmmsize
;
3398 unlock_res_and_lock(lock
);
3401 ptlrpc_req_finished(req
);
3406 * Apply the layout to the inode. Layout lock is held and will be released
3409 static int ll_layout_lock_set(struct lustre_handle
*lockh
, ldlm_mode_t mode
,
3410 struct inode
*inode
, __u32
*gen
, bool reconf
)
3412 struct ll_inode_info
*lli
= ll_i2info(inode
);
3413 struct ll_sb_info
*sbi
= ll_i2sbi(inode
);
3414 struct ldlm_lock
*lock
;
3415 struct lustre_md md
= { NULL
};
3416 struct cl_object_conf conf
;
3419 bool wait_layout
= false;
3421 LASSERT(lustre_handle_is_used(lockh
));
3423 lock
= ldlm_handle2lock(lockh
);
3424 LASSERT(lock
!= NULL
);
3425 LASSERT(ldlm_has_layout(lock
));
3427 LDLM_DEBUG(lock
, "File %p/"DFID
" being reconfigured: %d.\n",
3428 inode
, PFID(&lli
->lli_fid
), reconf
);
3430 /* in case this is a caching lock and reinstate with new inode */
3431 md_set_lock_data(sbi
->ll_md_exp
, &lockh
->cookie
, inode
, NULL
);
3433 lock_res_and_lock(lock
);
3434 lvb_ready
= !!(lock
->l_flags
& LDLM_FL_LVB_READY
);
3435 unlock_res_and_lock(lock
);
3436 /* checking lvb_ready is racy but this is okay. The worst case is
3437 * that multi processes may configure the file on the same time. */
3438 if (lvb_ready
|| !reconf
) {
3441 /* layout_gen must be valid if layout lock is not
3442 * cancelled and stripe has already set */
3443 *gen
= ll_layout_version_get(lli
);
3449 rc
= ll_layout_fetch(inode
, lock
);
3453 /* for layout lock, lmm is returned in lock's lvb.
3454 * lvb_data is immutable if the lock is held so it's safe to access it
3455 * without res lock. See the description in ldlm_lock_decref_internal()
3456 * for the condition to free lvb_data of layout lock */
3457 if (lock
->l_lvb_data
!= NULL
) {
3458 rc
= obd_unpackmd(sbi
->ll_dt_exp
, &md
.lsm
,
3459 lock
->l_lvb_data
, lock
->l_lvb_len
);
3461 *gen
= LL_LAYOUT_GEN_EMPTY
;
3463 *gen
= md
.lsm
->lsm_layout_gen
;
3466 CERROR("%s: file "DFID
" unpackmd error: %d\n",
3467 ll_get_fsname(inode
->i_sb
, NULL
, 0),
3468 PFID(&lli
->lli_fid
), rc
);
3474 /* set layout to file. Unlikely this will fail as old layout was
3475 * surely eliminated */
3476 memset(&conf
, 0, sizeof(conf
));
3477 conf
.coc_opc
= OBJECT_CONF_SET
;
3478 conf
.coc_inode
= inode
;
3479 conf
.coc_lock
= lock
;
3480 conf
.u
.coc_md
= &md
;
3481 rc
= ll_layout_conf(inode
, &conf
);
3484 obd_free_memmd(sbi
->ll_dt_exp
, &md
.lsm
);
3486 /* refresh layout failed, need to wait */
3487 wait_layout
= rc
== -EBUSY
;
3490 LDLM_LOCK_PUT(lock
);
3491 ldlm_lock_decref(lockh
, mode
);
3493 /* wait for IO to complete if it's still being used. */
3495 CDEBUG(D_INODE
, "%s: %p/"DFID
" wait for layout reconf.\n",
3496 ll_get_fsname(inode
->i_sb
, NULL
, 0),
3497 inode
, PFID(&lli
->lli_fid
));
3499 memset(&conf
, 0, sizeof(conf
));
3500 conf
.coc_opc
= OBJECT_CONF_WAIT
;
3501 conf
.coc_inode
= inode
;
3502 rc
= ll_layout_conf(inode
, &conf
);
3506 CDEBUG(D_INODE
, "file: "DFID
" waiting layout return: %d.\n",
3507 PFID(&lli
->lli_fid
), rc
);
3513 * This function checks if there exists a LAYOUT lock on the client side,
3514 * or enqueues it if it doesn't have one in cache.
3516 * This function will not hold layout lock so it may be revoked any time after
3517 * this function returns. Any operations depend on layout should be redone
3520 * This function should be called before lov_io_init() to get an uptodate
3521 * layout version, the caller should save the version number and after IO
3522 * is finished, this function should be called again to verify that layout
3523 * is not changed during IO time.
3525 int ll_layout_refresh(struct inode
*inode
, __u32
*gen
)
3527 struct ll_inode_info
*lli
= ll_i2info(inode
);
3528 struct ll_sb_info
*sbi
= ll_i2sbi(inode
);
3529 struct md_op_data
*op_data
;
3530 struct lookup_intent it
;
3531 struct lustre_handle lockh
;
3533 struct ldlm_enqueue_info einfo
= {
3534 .ei_type
= LDLM_IBITS
,
3536 .ei_cb_bl
= ll_md_blocking_ast
,
3537 .ei_cb_cp
= ldlm_completion_ast
,
3541 *gen
= ll_layout_version_get(lli
);
3542 if (!(sbi
->ll_flags
& LL_SBI_LAYOUT_LOCK
) || *gen
!= LL_LAYOUT_GEN_NONE
)
3546 LASSERT(fid_is_sane(ll_inode2fid(inode
)));
3547 LASSERT(S_ISREG(inode
->i_mode
));
3549 /* take layout lock mutex to enqueue layout lock exclusively. */
3550 mutex_lock(&lli
->lli_layout_mutex
);
3553 /* mostly layout lock is caching on the local side, so try to match
3554 * it before grabbing layout lock mutex. */
3555 mode
= ll_take_md_lock(inode
, MDS_INODELOCK_LAYOUT
, &lockh
, 0,
3556 LCK_CR
| LCK_CW
| LCK_PR
| LCK_PW
);
3557 if (mode
!= 0) { /* hit cached lock */
3558 rc
= ll_layout_lock_set(&lockh
, mode
, inode
, gen
, true);
3562 mutex_unlock(&lli
->lli_layout_mutex
);
3566 op_data
= ll_prep_md_op_data(NULL
, inode
, inode
, NULL
,
3567 0, 0, LUSTRE_OPC_ANY
, NULL
);
3568 if (IS_ERR(op_data
)) {
3569 mutex_unlock(&lli
->lli_layout_mutex
);
3570 return PTR_ERR(op_data
);
3573 /* have to enqueue one */
3574 memset(&it
, 0, sizeof(it
));
3575 it
.it_op
= IT_LAYOUT
;
3576 lockh
.cookie
= 0ULL;
3578 LDLM_DEBUG_NOLOCK("%s: requeue layout lock for file %p/"DFID
".\n",
3579 ll_get_fsname(inode
->i_sb
, NULL
, 0), inode
,
3580 PFID(&lli
->lli_fid
));
3582 rc
= md_enqueue(sbi
->ll_md_exp
, &einfo
, &it
, op_data
, &lockh
,
3584 if (it
.d
.lustre
.it_data
!= NULL
)
3585 ptlrpc_req_finished(it
.d
.lustre
.it_data
);
3586 it
.d
.lustre
.it_data
= NULL
;
3588 ll_finish_md_op_data(op_data
);
3590 mode
= it
.d
.lustre
.it_lock_mode
;
3591 it
.d
.lustre
.it_lock_mode
= 0;
3592 ll_intent_drop_lock(&it
);
3595 /* set lock data in case this is a new lock */
3596 ll_set_lock_data(sbi
->ll_md_exp
, inode
, &it
, NULL
);
3597 rc
= ll_layout_lock_set(&lockh
, mode
, inode
, gen
, true);
3601 mutex_unlock(&lli
->lli_layout_mutex
);
3607 * This function send a restore request to the MDT
3609 int ll_layout_restore(struct inode
*inode
)
3611 struct hsm_user_request
*hur
;
3614 len
= sizeof(struct hsm_user_request
) +
3615 sizeof(struct hsm_user_item
);
3616 OBD_ALLOC(hur
, len
);
3620 hur
->hur_request
.hr_action
= HUA_RESTORE
;
3621 hur
->hur_request
.hr_archive_id
= 0;
3622 hur
->hur_request
.hr_flags
= 0;
3623 memcpy(&hur
->hur_user_item
[0].hui_fid
, &ll_i2info(inode
)->lli_fid
,
3624 sizeof(hur
->hur_user_item
[0].hui_fid
));
3625 hur
->hur_user_item
[0].hui_extent
.length
= -1;
3626 hur
->hur_request
.hr_itemcount
= 1;
3627 rc
= obd_iocontrol(LL_IOC_HSM_REQUEST
, cl_i2sbi(inode
)->ll_md_exp
,