4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2012, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
38 * Author: Peter Braam <braam@clusterfs.com>
39 * Author: Phil Schwan <phil@clusterfs.com>
40 * Author: Andreas Dilger <adilger@clusterfs.com>
43 #define DEBUG_SUBSYSTEM S_LLITE
44 #include <lustre_dlm.h>
45 #include <lustre_lite.h>
46 #include <linux/pagemap.h>
47 #include <linux/file.h>
48 #include "llite_internal.h"
49 #include <lustre/ll_fiemap.h>
51 #include "cl_object.h"
53 struct ll_file_data
*ll_file_data_get(void)
55 struct ll_file_data
*fd
;
57 OBD_SLAB_ALLOC_PTR_GFP(fd
, ll_file_data_slab
, __GFP_IO
);
60 fd
->fd_write_failed
= false;
64 static void ll_file_data_put(struct ll_file_data
*fd
)
67 OBD_SLAB_FREE_PTR(fd
, ll_file_data_slab
);
70 void ll_pack_inode2opdata(struct inode
*inode
, struct md_op_data
*op_data
,
71 struct lustre_handle
*fh
)
73 op_data
->op_fid1
= ll_i2info(inode
)->lli_fid
;
74 op_data
->op_attr
.ia_mode
= inode
->i_mode
;
75 op_data
->op_attr
.ia_atime
= inode
->i_atime
;
76 op_data
->op_attr
.ia_mtime
= inode
->i_mtime
;
77 op_data
->op_attr
.ia_ctime
= inode
->i_ctime
;
78 op_data
->op_attr
.ia_size
= i_size_read(inode
);
79 op_data
->op_attr_blocks
= inode
->i_blocks
;
80 ((struct ll_iattr
*)&op_data
->op_attr
)->ia_attr_flags
=
81 ll_inode_to_ext_flags(inode
->i_flags
);
82 op_data
->op_ioepoch
= ll_i2info(inode
)->lli_ioepoch
;
84 op_data
->op_handle
= *fh
;
85 op_data
->op_capa1
= ll_mdscapa_get(inode
);
87 if (LLIF_DATA_MODIFIED
& ll_i2info(inode
)->lli_flags
)
88 op_data
->op_bias
|= MDS_DATA_MODIFIED
;
92 * Closes the IO epoch and packs all the attributes into @op_data for
95 static void ll_prepare_close(struct inode
*inode
, struct md_op_data
*op_data
,
96 struct obd_client_handle
*och
)
98 op_data
->op_attr
.ia_valid
= ATTR_MODE
| ATTR_ATIME
| ATTR_ATIME_SET
|
99 ATTR_MTIME
| ATTR_MTIME_SET
|
100 ATTR_CTIME
| ATTR_CTIME_SET
;
102 if (!(och
->och_flags
& FMODE_WRITE
))
105 if (!exp_connect_som(ll_i2mdexp(inode
)) || !S_ISREG(inode
->i_mode
))
106 op_data
->op_attr
.ia_valid
|= ATTR_SIZE
| ATTR_BLOCKS
;
108 ll_ioepoch_close(inode
, op_data
, &och
, 0);
111 ll_pack_inode2opdata(inode
, op_data
, &och
->och_fh
);
112 ll_prep_md_op_data(op_data
, inode
, NULL
, NULL
,
113 0, 0, LUSTRE_OPC_ANY
, NULL
);
116 static int ll_close_inode_openhandle(struct obd_export
*md_exp
,
118 struct obd_client_handle
*och
,
119 const __u64
*data_version
)
121 struct obd_export
*exp
= ll_i2mdexp(inode
);
122 struct md_op_data
*op_data
;
123 struct ptlrpc_request
*req
= NULL
;
124 struct obd_device
*obd
= class_exp2obd(exp
);
130 * XXX: in case of LMV, is this correct to access
133 CERROR("Invalid MDC connection handle "LPX64
"\n",
134 ll_i2mdexp(inode
)->exp_handle
.h_cookie
);
138 OBD_ALLOC_PTR(op_data
);
140 GOTO(out
, rc
= -ENOMEM
); // XXX We leak openhandle and request here.
142 ll_prepare_close(inode
, op_data
, och
);
143 if (data_version
!= NULL
) {
144 /* Pass in data_version implies release. */
145 op_data
->op_bias
|= MDS_HSM_RELEASE
;
146 op_data
->op_data_version
= *data_version
;
147 op_data
->op_lease_handle
= och
->och_lease_handle
;
148 op_data
->op_attr
.ia_valid
|= ATTR_SIZE
| ATTR_BLOCKS
;
150 epoch_close
= (op_data
->op_flags
& MF_EPOCH_CLOSE
);
151 rc
= md_close(md_exp
, op_data
, och
->och_mod
, &req
);
153 /* This close must have the epoch closed. */
154 LASSERT(epoch_close
);
155 /* MDS has instructed us to obtain Size-on-MDS attribute from
156 * OSTs and send setattr to back to MDS. */
157 rc
= ll_som_update(inode
, op_data
);
159 CERROR("inode %lu mdc Size-on-MDS update failed: "
160 "rc = %d\n", inode
->i_ino
, rc
);
164 CERROR("inode %lu mdc close failed: rc = %d\n",
168 /* DATA_MODIFIED flag was successfully sent on close, cancel data
169 * modification flag. */
170 if (rc
== 0 && (op_data
->op_bias
& MDS_DATA_MODIFIED
)) {
171 struct ll_inode_info
*lli
= ll_i2info(inode
);
173 spin_lock(&lli
->lli_lock
);
174 lli
->lli_flags
&= ~LLIF_DATA_MODIFIED
;
175 spin_unlock(&lli
->lli_lock
);
179 rc
= ll_objects_destroy(req
, inode
);
181 CERROR("inode %lu ll_objects destroy: rc = %d\n",
184 if (rc
== 0 && op_data
->op_bias
& MDS_HSM_RELEASE
) {
185 struct mdt_body
*body
;
186 body
= req_capsule_server_get(&req
->rq_pill
, &RMF_MDT_BODY
);
187 if (!(body
->valid
& OBD_MD_FLRELEASED
))
191 ll_finish_md_op_data(op_data
);
194 if (exp_connect_som(exp
) && !epoch_close
&&
195 S_ISREG(inode
->i_mode
) && (och
->och_flags
& FMODE_WRITE
)) {
196 ll_queue_done_writing(inode
, LLIF_DONE_WRITING
);
198 md_clear_open_replay_data(md_exp
, och
);
199 /* Free @och if it is not waiting for DONE_WRITING. */
200 och
->och_fh
.cookie
= DEAD_HANDLE_MAGIC
;
203 if (req
) /* This is close request */
204 ptlrpc_req_finished(req
);
208 int ll_md_real_close(struct inode
*inode
, int flags
)
210 struct ll_inode_info
*lli
= ll_i2info(inode
);
211 struct obd_client_handle
**och_p
;
212 struct obd_client_handle
*och
;
216 if (flags
& FMODE_WRITE
) {
217 och_p
= &lli
->lli_mds_write_och
;
218 och_usecount
= &lli
->lli_open_fd_write_count
;
219 } else if (flags
& FMODE_EXEC
) {
220 och_p
= &lli
->lli_mds_exec_och
;
221 och_usecount
= &lli
->lli_open_fd_exec_count
;
223 LASSERT(flags
& FMODE_READ
);
224 och_p
= &lli
->lli_mds_read_och
;
225 och_usecount
= &lli
->lli_open_fd_read_count
;
228 mutex_lock(&lli
->lli_och_mutex
);
229 if (*och_usecount
) { /* There are still users of this handle, so
231 mutex_unlock(&lli
->lli_och_mutex
);
236 mutex_unlock(&lli
->lli_och_mutex
);
238 if (och
) { /* There might be a race and somebody have freed this och
240 rc
= ll_close_inode_openhandle(ll_i2sbi(inode
)->ll_md_exp
,
247 int ll_md_close(struct obd_export
*md_exp
, struct inode
*inode
,
250 struct ll_file_data
*fd
= LUSTRE_FPRIVATE(file
);
251 struct ll_inode_info
*lli
= ll_i2info(inode
);
254 /* clear group lock, if present */
255 if (unlikely(fd
->fd_flags
& LL_FILE_GROUP_LOCKED
))
256 ll_put_grouplock(inode
, file
, fd
->fd_grouplock
.cg_gid
);
258 if (fd
->fd_lease_och
!= NULL
) {
261 /* Usually the lease is not released when the
262 * application crashed, we need to release here. */
263 rc
= ll_lease_close(fd
->fd_lease_och
, inode
, &lease_broken
);
264 CDEBUG(rc
? D_ERROR
: D_INODE
, "Clean up lease "DFID
" %d/%d\n",
265 PFID(&lli
->lli_fid
), rc
, lease_broken
);
267 fd
->fd_lease_och
= NULL
;
270 if (fd
->fd_och
!= NULL
) {
271 rc
= ll_close_inode_openhandle(md_exp
, inode
, fd
->fd_och
, NULL
);
276 /* Let's see if we have good enough OPEN lock on the file and if
277 we can skip talking to MDS */
278 if (file
->f_dentry
->d_inode
) { /* Can this ever be false? */
280 int flags
= LDLM_FL_BLOCK_GRANTED
| LDLM_FL_TEST_LOCK
;
281 struct lustre_handle lockh
;
282 struct inode
*inode
= file
->f_dentry
->d_inode
;
283 ldlm_policy_data_t policy
= {.l_inodebits
={MDS_INODELOCK_OPEN
}};
285 mutex_lock(&lli
->lli_och_mutex
);
286 if (fd
->fd_omode
& FMODE_WRITE
) {
288 LASSERT(lli
->lli_open_fd_write_count
);
289 lli
->lli_open_fd_write_count
--;
290 } else if (fd
->fd_omode
& FMODE_EXEC
) {
292 LASSERT(lli
->lli_open_fd_exec_count
);
293 lli
->lli_open_fd_exec_count
--;
296 LASSERT(lli
->lli_open_fd_read_count
);
297 lli
->lli_open_fd_read_count
--;
299 mutex_unlock(&lli
->lli_och_mutex
);
301 if (!md_lock_match(md_exp
, flags
, ll_inode2fid(inode
),
302 LDLM_IBITS
, &policy
, lockmode
,
304 rc
= ll_md_real_close(file
->f_dentry
->d_inode
,
308 CERROR("Releasing a file %p with negative dentry %p. Name %s",
309 file
, file
->f_dentry
, file
->f_dentry
->d_name
.name
);
313 LUSTRE_FPRIVATE(file
) = NULL
;
314 ll_file_data_put(fd
);
315 ll_capa_close(inode
);
320 /* While this returns an error code, fput() the caller does not, so we need
321 * to make every effort to clean up all of our state here. Also, applications
322 * rarely check close errors and even if an error is returned they will not
323 * re-try the close call.
325 int ll_file_release(struct inode
*inode
, struct file
*file
)
327 struct ll_file_data
*fd
;
328 struct ll_sb_info
*sbi
= ll_i2sbi(inode
);
329 struct ll_inode_info
*lli
= ll_i2info(inode
);
332 CDEBUG(D_VFSTRACE
, "VFS Op:inode=%lu/%u(%p)\n", inode
->i_ino
,
333 inode
->i_generation
, inode
);
335 #ifdef CONFIG_FS_POSIX_ACL
336 if (sbi
->ll_flags
& LL_SBI_RMT_CLIENT
&&
337 inode
== inode
->i_sb
->s_root
->d_inode
) {
338 struct ll_file_data
*fd
= LUSTRE_FPRIVATE(file
);
341 if (unlikely(fd
->fd_flags
& LL_FILE_RMTACL
)) {
342 fd
->fd_flags
&= ~LL_FILE_RMTACL
;
343 rct_del(&sbi
->ll_rct
, current_pid());
344 et_search_free(&sbi
->ll_et
, current_pid());
349 if (inode
->i_sb
->s_root
!= file
->f_dentry
)
350 ll_stats_ops_tally(sbi
, LPROC_LL_RELEASE
, 1);
351 fd
= LUSTRE_FPRIVATE(file
);
354 /* The last ref on @file, maybe not the the owner pid of statahead.
355 * Different processes can open the same dir, "ll_opendir_key" means:
356 * it is me that should stop the statahead thread. */
357 if (S_ISDIR(inode
->i_mode
) && lli
->lli_opendir_key
== fd
&&
358 lli
->lli_opendir_pid
!= 0)
359 ll_stop_statahead(inode
, lli
->lli_opendir_key
);
361 if (inode
->i_sb
->s_root
== file
->f_dentry
) {
362 LUSTRE_FPRIVATE(file
) = NULL
;
363 ll_file_data_put(fd
);
367 if (!S_ISDIR(inode
->i_mode
)) {
368 lov_read_and_clear_async_rc(lli
->lli_clob
);
369 lli
->lli_async_rc
= 0;
372 rc
= ll_md_close(sbi
->ll_md_exp
, inode
, file
);
374 if (CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_DUMP_LOG
, cfs_fail_val
))
375 libcfs_debug_dumplog();
380 static int ll_intent_file_open(struct file
*file
, void *lmm
,
381 int lmmsize
, struct lookup_intent
*itp
)
383 struct ll_sb_info
*sbi
= ll_i2sbi(file
->f_dentry
->d_inode
);
384 struct dentry
*parent
= file
->f_dentry
->d_parent
;
385 const char *name
= file
->f_dentry
->d_name
.name
;
386 const int len
= file
->f_dentry
->d_name
.len
;
387 struct md_op_data
*op_data
;
388 struct ptlrpc_request
*req
;
389 __u32 opc
= LUSTRE_OPC_ANY
;
395 /* Usually we come here only for NFSD, and we want open lock.
396 But we can also get here with pre 2.6.15 patchless kernels, and in
397 that case that lock is also ok */
398 /* We can also get here if there was cached open handle in revalidate_it
399 * but it disappeared while we were getting from there to ll_file_open.
400 * But this means this file was closed and immediately opened which
401 * makes a good candidate for using OPEN lock */
402 /* If lmmsize & lmm are not 0, we are just setting stripe info
403 * parameters. No need for the open lock */
404 if (lmm
== NULL
&& lmmsize
== 0) {
405 itp
->it_flags
|= MDS_OPEN_LOCK
;
406 if (itp
->it_flags
& FMODE_WRITE
)
407 opc
= LUSTRE_OPC_CREATE
;
410 op_data
= ll_prep_md_op_data(NULL
, parent
->d_inode
,
411 file
->f_dentry
->d_inode
, name
, len
,
414 return PTR_ERR(op_data
);
416 itp
->it_flags
|= MDS_OPEN_BY_FID
;
417 rc
= md_intent_lock(sbi
->ll_md_exp
, op_data
, lmm
, lmmsize
, itp
,
418 0 /*unused */, &req
, ll_md_blocking_ast
, 0);
419 ll_finish_md_op_data(op_data
);
421 /* reason for keep own exit path - don`t flood log
422 * with messages with -ESTALE errors.
424 if (!it_disposition(itp
, DISP_OPEN_OPEN
) ||
425 it_open_error(DISP_OPEN_OPEN
, itp
))
427 ll_release_openhandle(file
->f_dentry
, itp
);
431 if (it_disposition(itp
, DISP_LOOKUP_NEG
))
432 GOTO(out
, rc
= -ENOENT
);
434 if (rc
!= 0 || it_open_error(DISP_OPEN_OPEN
, itp
)) {
435 rc
= rc
? rc
: it_open_error(DISP_OPEN_OPEN
, itp
);
436 CDEBUG(D_VFSTRACE
, "lock enqueue: err: %d\n", rc
);
440 rc
= ll_prep_inode(&file
->f_dentry
->d_inode
, req
, NULL
, itp
);
441 if (!rc
&& itp
->d
.lustre
.it_lock_mode
)
442 ll_set_lock_data(sbi
->ll_md_exp
, file
->f_dentry
->d_inode
,
446 ptlrpc_req_finished(itp
->d
.lustre
.it_data
);
447 it_clear_disposition(itp
, DISP_ENQ_COMPLETE
);
448 ll_intent_drop_lock(itp
);
454 * Assign an obtained @ioepoch to client's inode. No lock is needed, MDS does
455 * not believe attributes if a few ioepoch holders exist. Attributes for
456 * previous ioepoch if new one is opened are also skipped by MDS.
458 void ll_ioepoch_open(struct ll_inode_info
*lli
, __u64 ioepoch
)
460 if (ioepoch
&& lli
->lli_ioepoch
!= ioepoch
) {
461 lli
->lli_ioepoch
= ioepoch
;
462 CDEBUG(D_INODE
, "Epoch "LPU64
" opened on "DFID
"\n",
463 ioepoch
, PFID(&lli
->lli_fid
));
467 static int ll_och_fill(struct obd_export
*md_exp
, struct lookup_intent
*it
,
468 struct obd_client_handle
*och
)
470 struct ptlrpc_request
*req
= it
->d
.lustre
.it_data
;
471 struct mdt_body
*body
;
473 body
= req_capsule_server_get(&req
->rq_pill
, &RMF_MDT_BODY
);
474 och
->och_fh
= body
->handle
;
475 och
->och_fid
= body
->fid1
;
476 och
->och_lease_handle
.cookie
= it
->d
.lustre
.it_lock_handle
;
477 och
->och_magic
= OBD_CLIENT_HANDLE_MAGIC
;
478 och
->och_flags
= it
->it_flags
;
480 return md_set_open_replay_data(md_exp
, och
, req
);
483 int ll_local_open(struct file
*file
, struct lookup_intent
*it
,
484 struct ll_file_data
*fd
, struct obd_client_handle
*och
)
486 struct inode
*inode
= file
->f_dentry
->d_inode
;
487 struct ll_inode_info
*lli
= ll_i2info(inode
);
489 LASSERT(!LUSTRE_FPRIVATE(file
));
494 struct ptlrpc_request
*req
= it
->d
.lustre
.it_data
;
495 struct mdt_body
*body
;
498 rc
= ll_och_fill(ll_i2sbi(inode
)->ll_md_exp
, it
, och
);
502 body
= req_capsule_server_get(&req
->rq_pill
, &RMF_MDT_BODY
);
503 ll_ioepoch_open(lli
, body
->ioepoch
);
506 LUSTRE_FPRIVATE(file
) = fd
;
507 ll_readahead_init(inode
, &fd
->fd_ras
);
508 fd
->fd_omode
= it
->it_flags
& (FMODE_READ
| FMODE_WRITE
| FMODE_EXEC
);
512 /* Open a file, and (for the very first open) create objects on the OSTs at
513 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
514 * creation or open until ll_lov_setstripe() ioctl is called.
516 * If we already have the stripe MD locally then we don't request it in
517 * md_open(), by passing a lmm_size = 0.
519 * It is up to the application to ensure no other processes open this file
520 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
521 * used. We might be able to avoid races of that sort by getting lli_open_sem
522 * before returning in the O_LOV_DELAY_CREATE case and dropping it here
523 * or in ll_file_release(), but I'm not sure that is desirable/necessary.
525 int ll_file_open(struct inode
*inode
, struct file
*file
)
527 struct ll_inode_info
*lli
= ll_i2info(inode
);
528 struct lookup_intent
*it
, oit
= { .it_op
= IT_OPEN
,
529 .it_flags
= file
->f_flags
};
530 struct obd_client_handle
**och_p
= NULL
;
531 __u64
*och_usecount
= NULL
;
532 struct ll_file_data
*fd
;
533 int rc
= 0, opendir_set
= 0;
535 CDEBUG(D_VFSTRACE
, "VFS Op:inode=%lu/%u(%p), flags %o\n", inode
->i_ino
,
536 inode
->i_generation
, inode
, file
->f_flags
);
538 it
= file
->private_data
; /* XXX: compat macro */
539 file
->private_data
= NULL
; /* prevent ll_local_open assertion */
541 fd
= ll_file_data_get();
543 GOTO(out_openerr
, rc
= -ENOMEM
);
546 if (S_ISDIR(inode
->i_mode
)) {
547 spin_lock(&lli
->lli_sa_lock
);
548 if (lli
->lli_opendir_key
== NULL
&& lli
->lli_sai
== NULL
&&
549 lli
->lli_opendir_pid
== 0) {
550 lli
->lli_opendir_key
= fd
;
551 lli
->lli_opendir_pid
= current_pid();
554 spin_unlock(&lli
->lli_sa_lock
);
557 if (inode
->i_sb
->s_root
== file
->f_dentry
) {
558 LUSTRE_FPRIVATE(file
) = fd
;
562 if (!it
|| !it
->d
.lustre
.it_disposition
) {
563 /* Convert f_flags into access mode. We cannot use file->f_mode,
564 * because everything but O_ACCMODE mask was stripped from
566 if ((oit
.it_flags
+ 1) & O_ACCMODE
)
568 if (file
->f_flags
& O_TRUNC
)
569 oit
.it_flags
|= FMODE_WRITE
;
571 /* kernel only call f_op->open in dentry_open. filp_open calls
572 * dentry_open after call to open_namei that checks permissions.
573 * Only nfsd_open call dentry_open directly without checking
574 * permissions and because of that this code below is safe. */
575 if (oit
.it_flags
& (FMODE_WRITE
| FMODE_READ
))
576 oit
.it_flags
|= MDS_OPEN_OWNEROVERRIDE
;
578 /* We do not want O_EXCL here, presumably we opened the file
579 * already? XXX - NFS implications? */
580 oit
.it_flags
&= ~O_EXCL
;
582 /* bug20584, if "it_flags" contains O_CREAT, the file will be
583 * created if necessary, then "IT_CREAT" should be set to keep
584 * consistent with it */
585 if (oit
.it_flags
& O_CREAT
)
586 oit
.it_op
|= IT_CREAT
;
592 /* Let's see if we have file open on MDS already. */
593 if (it
->it_flags
& FMODE_WRITE
) {
594 och_p
= &lli
->lli_mds_write_och
;
595 och_usecount
= &lli
->lli_open_fd_write_count
;
596 } else if (it
->it_flags
& FMODE_EXEC
) {
597 och_p
= &lli
->lli_mds_exec_och
;
598 och_usecount
= &lli
->lli_open_fd_exec_count
;
600 och_p
= &lli
->lli_mds_read_och
;
601 och_usecount
= &lli
->lli_open_fd_read_count
;
604 mutex_lock(&lli
->lli_och_mutex
);
605 if (*och_p
) { /* Open handle is present */
606 if (it_disposition(it
, DISP_OPEN_OPEN
)) {
607 /* Well, there's extra open request that we do not need,
608 let's close it somehow. This will decref request. */
609 rc
= it_open_error(DISP_OPEN_OPEN
, it
);
611 mutex_unlock(&lli
->lli_och_mutex
);
612 GOTO(out_openerr
, rc
);
615 ll_release_openhandle(file
->f_dentry
, it
);
619 rc
= ll_local_open(file
, it
, fd
, NULL
);
622 mutex_unlock(&lli
->lli_och_mutex
);
623 GOTO(out_openerr
, rc
);
626 LASSERT(*och_usecount
== 0);
627 if (!it
->d
.lustre
.it_disposition
) {
628 /* We cannot just request lock handle now, new ELC code
629 means that one of other OPEN locks for this file
630 could be cancelled, and since blocking ast handler
631 would attempt to grab och_mutex as well, that would
632 result in a deadlock */
633 mutex_unlock(&lli
->lli_och_mutex
);
634 it
->it_create_mode
|= M_CHECK_STALE
;
635 rc
= ll_intent_file_open(file
, NULL
, 0, it
);
636 it
->it_create_mode
&= ~M_CHECK_STALE
;
638 GOTO(out_openerr
, rc
);
642 OBD_ALLOC(*och_p
, sizeof (struct obd_client_handle
));
644 GOTO(out_och_free
, rc
= -ENOMEM
);
648 /* md_intent_lock() didn't get a request ref if there was an
649 * open error, so don't do cleanup on the request here
651 /* XXX (green): Should not we bail out on any error here, not
652 * just open error? */
653 rc
= it_open_error(DISP_OPEN_OPEN
, it
);
655 GOTO(out_och_free
, rc
);
657 LASSERT(it_disposition(it
, DISP_ENQ_OPEN_REF
));
659 rc
= ll_local_open(file
, it
, fd
, *och_p
);
661 GOTO(out_och_free
, rc
);
663 mutex_unlock(&lli
->lli_och_mutex
);
666 /* Must do this outside lli_och_mutex lock to prevent deadlock where
667 different kind of OPEN lock for this same inode gets cancelled
668 by ldlm_cancel_lru */
669 if (!S_ISREG(inode
->i_mode
))
670 GOTO(out_och_free
, rc
);
674 if (!lli
->lli_has_smd
&&
675 (cl_is_lov_delay_create(file
->f_flags
) ||
676 (file
->f_mode
& FMODE_WRITE
) == 0)) {
677 CDEBUG(D_INODE
, "object creation was delayed\n");
678 GOTO(out_och_free
, rc
);
680 cl_lov_delay_create_clear(&file
->f_flags
);
681 GOTO(out_och_free
, rc
);
685 if (och_p
&& *och_p
) {
686 OBD_FREE(*och_p
, sizeof (struct obd_client_handle
));
687 *och_p
= NULL
; /* OBD_FREE writes some magic there */
690 mutex_unlock(&lli
->lli_och_mutex
);
693 if (opendir_set
!= 0)
694 ll_stop_statahead(inode
, lli
->lli_opendir_key
);
696 ll_file_data_put(fd
);
698 ll_stats_ops_tally(ll_i2sbi(inode
), LPROC_LL_OPEN
, 1);
701 if (it
&& it_disposition(it
, DISP_ENQ_OPEN_REF
)) {
702 ptlrpc_req_finished(it
->d
.lustre
.it_data
);
703 it_clear_disposition(it
, DISP_ENQ_OPEN_REF
);
709 static int ll_md_blocking_lease_ast(struct ldlm_lock
*lock
,
710 struct ldlm_lock_desc
*desc
, void *data
, int flag
)
713 struct lustre_handle lockh
;
716 case LDLM_CB_BLOCKING
:
717 ldlm_lock2handle(lock
, &lockh
);
718 rc
= ldlm_cli_cancel(&lockh
, LCF_ASYNC
);
720 CDEBUG(D_INODE
, "ldlm_cli_cancel: %d\n", rc
);
724 case LDLM_CB_CANCELING
:
732 * Acquire a lease and open the file.
734 struct obd_client_handle
*ll_lease_open(struct inode
*inode
, struct file
*file
,
735 fmode_t fmode
, __u64 open_flags
)
737 struct lookup_intent it
= { .it_op
= IT_OPEN
};
738 struct ll_sb_info
*sbi
= ll_i2sbi(inode
);
739 struct md_op_data
*op_data
;
740 struct ptlrpc_request
*req
;
741 struct lustre_handle old_handle
= { 0 };
742 struct obd_client_handle
*och
= NULL
;
746 if (fmode
!= FMODE_WRITE
&& fmode
!= FMODE_READ
)
747 return ERR_PTR(-EINVAL
);
750 struct ll_inode_info
*lli
= ll_i2info(inode
);
751 struct ll_file_data
*fd
= LUSTRE_FPRIVATE(file
);
752 struct obd_client_handle
**och_p
;
755 if (!(fmode
& file
->f_mode
) || (file
->f_mode
& FMODE_EXEC
))
756 return ERR_PTR(-EPERM
);
758 /* Get the openhandle of the file */
760 mutex_lock(&lli
->lli_och_mutex
);
761 if (fd
->fd_lease_och
!= NULL
) {
762 mutex_unlock(&lli
->lli_och_mutex
);
766 if (fd
->fd_och
== NULL
) {
767 if (file
->f_mode
& FMODE_WRITE
) {
768 LASSERT(lli
->lli_mds_write_och
!= NULL
);
769 och_p
= &lli
->lli_mds_write_och
;
770 och_usecount
= &lli
->lli_open_fd_write_count
;
772 LASSERT(lli
->lli_mds_read_och
!= NULL
);
773 och_p
= &lli
->lli_mds_read_och
;
774 och_usecount
= &lli
->lli_open_fd_read_count
;
776 if (*och_usecount
== 1) {
783 mutex_unlock(&lli
->lli_och_mutex
);
784 if (rc
< 0) /* more than 1 opener */
787 LASSERT(fd
->fd_och
!= NULL
);
788 old_handle
= fd
->fd_och
->och_fh
;
793 return ERR_PTR(-ENOMEM
);
795 op_data
= ll_prep_md_op_data(NULL
, inode
, inode
, NULL
, 0, 0,
796 LUSTRE_OPC_ANY
, NULL
);
798 GOTO(out
, rc
= PTR_ERR(op_data
));
800 /* To tell the MDT this openhandle is from the same owner */
801 op_data
->op_handle
= old_handle
;
803 it
.it_flags
= fmode
| open_flags
;
804 it
.it_flags
|= MDS_OPEN_LOCK
| MDS_OPEN_BY_FID
| MDS_OPEN_LEASE
;
805 rc
= md_intent_lock(sbi
->ll_md_exp
, op_data
, NULL
, 0, &it
, 0, &req
,
806 ll_md_blocking_lease_ast
,
807 /* LDLM_FL_NO_LRU: To not put the lease lock into LRU list, otherwise
808 * it can be cancelled which may mislead applications that the lease is
810 * LDLM_FL_EXCL: Set this flag so that it won't be matched by normal
811 * open in ll_md_blocking_ast(). Otherwise as ll_md_blocking_lease_ast
812 * doesn't deal with openhandle, so normal openhandle will be leaked. */
813 LDLM_FL_NO_LRU
| LDLM_FL_EXCL
);
814 ll_finish_md_op_data(op_data
);
816 ptlrpc_req_finished(req
);
817 it_clear_disposition(&it
, DISP_ENQ_COMPLETE
);
820 GOTO(out_release_it
, rc
);
822 if (it_disposition(&it
, DISP_LOOKUP_NEG
))
823 GOTO(out_release_it
, rc
= -ENOENT
);
825 rc
= it_open_error(DISP_OPEN_OPEN
, &it
);
827 GOTO(out_release_it
, rc
);
829 LASSERT(it_disposition(&it
, DISP_ENQ_OPEN_REF
));
830 ll_och_fill(sbi
->ll_md_exp
, &it
, och
);
832 if (!it_disposition(&it
, DISP_OPEN_LEASE
)) /* old server? */
833 GOTO(out_close
, rc
= -EOPNOTSUPP
);
835 /* already get lease, handle lease lock */
836 ll_set_lock_data(sbi
->ll_md_exp
, inode
, &it
, NULL
);
837 if (it
.d
.lustre
.it_lock_mode
== 0 ||
838 it
.d
.lustre
.it_lock_bits
!= MDS_INODELOCK_OPEN
) {
839 /* open lock must return for lease */
840 CERROR(DFID
"lease granted but no open lock, %d/%llu.\n",
841 PFID(ll_inode2fid(inode
)), it
.d
.lustre
.it_lock_mode
,
842 it
.d
.lustre
.it_lock_bits
);
843 GOTO(out_close
, rc
= -EPROTO
);
846 ll_intent_release(&it
);
850 rc2
= ll_close_inode_openhandle(sbi
->ll_md_exp
, inode
, och
, NULL
);
852 CERROR("Close openhandle returned %d\n", rc2
);
854 /* cancel open lock */
855 if (it
.d
.lustre
.it_lock_mode
!= 0) {
856 ldlm_lock_decref_and_cancel(&och
->och_lease_handle
,
857 it
.d
.lustre
.it_lock_mode
);
858 it
.d
.lustre
.it_lock_mode
= 0;
861 ll_intent_release(&it
);
866 EXPORT_SYMBOL(ll_lease_open
);
869 * Release lease and close the file.
870 * It will check if the lease has ever broken.
872 int ll_lease_close(struct obd_client_handle
*och
, struct inode
*inode
,
875 struct ldlm_lock
*lock
;
876 bool cancelled
= true;
879 lock
= ldlm_handle2lock(&och
->och_lease_handle
);
881 lock_res_and_lock(lock
);
882 cancelled
= ldlm_is_cancel(lock
);
883 unlock_res_and_lock(lock
);
887 CDEBUG(D_INODE
, "lease for "DFID
" broken? %d\n",
888 PFID(&ll_i2info(inode
)->lli_fid
), cancelled
);
891 ldlm_cli_cancel(&och
->och_lease_handle
, 0);
892 if (lease_broken
!= NULL
)
893 *lease_broken
= cancelled
;
895 rc
= ll_close_inode_openhandle(ll_i2sbi(inode
)->ll_md_exp
, inode
, och
,
899 EXPORT_SYMBOL(ll_lease_close
);
901 /* Fills the obdo with the attributes for the lsm */
902 static int ll_lsm_getattr(struct lov_stripe_md
*lsm
, struct obd_export
*exp
,
903 struct obd_capa
*capa
, struct obdo
*obdo
,
904 __u64 ioepoch
, int sync
)
906 struct ptlrpc_request_set
*set
;
907 struct obd_info oinfo
= { { { 0 } } };
910 LASSERT(lsm
!= NULL
);
914 oinfo
.oi_oa
->o_oi
= lsm
->lsm_oi
;
915 oinfo
.oi_oa
->o_mode
= S_IFREG
;
916 oinfo
.oi_oa
->o_ioepoch
= ioepoch
;
917 oinfo
.oi_oa
->o_valid
= OBD_MD_FLID
| OBD_MD_FLTYPE
|
918 OBD_MD_FLSIZE
| OBD_MD_FLBLOCKS
|
919 OBD_MD_FLBLKSZ
| OBD_MD_FLATIME
|
920 OBD_MD_FLMTIME
| OBD_MD_FLCTIME
|
921 OBD_MD_FLGROUP
| OBD_MD_FLEPOCH
|
922 OBD_MD_FLDATAVERSION
;
923 oinfo
.oi_capa
= capa
;
925 oinfo
.oi_oa
->o_valid
|= OBD_MD_FLFLAGS
;
926 oinfo
.oi_oa
->o_flags
|= OBD_FL_SRVLOCK
;
929 set
= ptlrpc_prep_set();
931 CERROR("can't allocate ptlrpc set\n");
934 rc
= obd_getattr_async(exp
, &oinfo
, set
);
936 rc
= ptlrpc_set_wait(set
);
937 ptlrpc_set_destroy(set
);
940 oinfo
.oi_oa
->o_valid
&= (OBD_MD_FLBLOCKS
| OBD_MD_FLBLKSZ
|
941 OBD_MD_FLATIME
| OBD_MD_FLMTIME
|
942 OBD_MD_FLCTIME
| OBD_MD_FLSIZE
|
943 OBD_MD_FLDATAVERSION
);
948 * Performs the getattr on the inode and updates its fields.
949 * If @sync != 0, perform the getattr under the server-side lock.
951 int ll_inode_getattr(struct inode
*inode
, struct obdo
*obdo
,
952 __u64 ioepoch
, int sync
)
954 struct obd_capa
*capa
= ll_mdscapa_get(inode
);
955 struct lov_stripe_md
*lsm
;
958 lsm
= ccc_inode_lsm_get(inode
);
959 rc
= ll_lsm_getattr(lsm
, ll_i2dtexp(inode
),
960 capa
, obdo
, ioepoch
, sync
);
963 struct ost_id
*oi
= lsm
? &lsm
->lsm_oi
: &obdo
->o_oi
;
965 obdo_refresh_inode(inode
, obdo
, obdo
->o_valid
);
966 CDEBUG(D_INODE
, "objid "DOSTID
" size %llu, blocks %llu,"
967 " blksize %lu\n", POSTID(oi
), i_size_read(inode
),
968 (unsigned long long)inode
->i_blocks
,
969 (unsigned long)ll_inode_blksize(inode
));
971 ccc_inode_lsm_put(inode
, lsm
);
975 int ll_merge_lvb(const struct lu_env
*env
, struct inode
*inode
)
977 struct ll_inode_info
*lli
= ll_i2info(inode
);
978 struct cl_object
*obj
= lli
->lli_clob
;
979 struct cl_attr
*attr
= ccc_env_thread_attr(env
);
983 ll_inode_size_lock(inode
);
984 /* merge timestamps the most recently obtained from mds with
985 timestamps obtained from osts */
986 LTIME_S(inode
->i_atime
) = lli
->lli_lvb
.lvb_atime
;
987 LTIME_S(inode
->i_mtime
) = lli
->lli_lvb
.lvb_mtime
;
988 LTIME_S(inode
->i_ctime
) = lli
->lli_lvb
.lvb_ctime
;
989 inode_init_lvb(inode
, &lvb
);
991 cl_object_attr_lock(obj
);
992 rc
= cl_object_attr_get(env
, obj
, attr
);
993 cl_object_attr_unlock(obj
);
996 if (lvb
.lvb_atime
< attr
->cat_atime
)
997 lvb
.lvb_atime
= attr
->cat_atime
;
998 if (lvb
.lvb_ctime
< attr
->cat_ctime
)
999 lvb
.lvb_ctime
= attr
->cat_ctime
;
1000 if (lvb
.lvb_mtime
< attr
->cat_mtime
)
1001 lvb
.lvb_mtime
= attr
->cat_mtime
;
1003 CDEBUG(D_VFSTRACE
, DFID
" updating i_size "LPU64
"\n",
1004 PFID(&lli
->lli_fid
), attr
->cat_size
);
1005 cl_isize_write_nolock(inode
, attr
->cat_size
);
1007 inode
->i_blocks
= attr
->cat_blocks
;
1009 LTIME_S(inode
->i_mtime
) = lvb
.lvb_mtime
;
1010 LTIME_S(inode
->i_atime
) = lvb
.lvb_atime
;
1011 LTIME_S(inode
->i_ctime
) = lvb
.lvb_ctime
;
1013 ll_inode_size_unlock(inode
);
1018 int ll_glimpse_ioctl(struct ll_sb_info
*sbi
, struct lov_stripe_md
*lsm
,
1021 struct obdo obdo
= { 0 };
1024 rc
= ll_lsm_getattr(lsm
, sbi
->ll_dt_exp
, NULL
, &obdo
, 0, 0);
1026 st
->st_size
= obdo
.o_size
;
1027 st
->st_blocks
= obdo
.o_blocks
;
1028 st
->st_mtime
= obdo
.o_mtime
;
1029 st
->st_atime
= obdo
.o_atime
;
1030 st
->st_ctime
= obdo
.o_ctime
;
1035 void ll_io_init(struct cl_io
*io
, const struct file
*file
, int write
)
1037 struct inode
*inode
= file
->f_dentry
->d_inode
;
1039 io
->u
.ci_rw
.crw_nonblock
= file
->f_flags
& O_NONBLOCK
;
1041 io
->u
.ci_wr
.wr_append
= !!(file
->f_flags
& O_APPEND
);
1042 io
->u
.ci_wr
.wr_sync
= file
->f_flags
& O_SYNC
||
1043 file
->f_flags
& O_DIRECT
||
1046 io
->ci_obj
= ll_i2info(inode
)->lli_clob
;
1047 io
->ci_lockreq
= CILR_MAYBE
;
1048 if (ll_file_nolock(file
)) {
1049 io
->ci_lockreq
= CILR_NEVER
;
1050 io
->ci_no_srvlock
= 1;
1051 } else if (file
->f_flags
& O_APPEND
) {
1052 io
->ci_lockreq
= CILR_MANDATORY
;
1057 ll_file_io_generic(const struct lu_env
*env
, struct vvp_io_args
*args
,
1058 struct file
*file
, enum cl_io_type iot
,
1059 loff_t
*ppos
, size_t count
)
1061 struct ll_inode_info
*lli
= ll_i2info(file
->f_dentry
->d_inode
);
1062 struct ll_file_data
*fd
= LUSTRE_FPRIVATE(file
);
1067 io
= ccc_env_thread_io(env
);
1068 ll_io_init(io
, file
, iot
== CIT_WRITE
);
1070 if (cl_io_rw_init(env
, io
, iot
, *ppos
, count
) == 0) {
1071 struct vvp_io
*vio
= vvp_env_io(env
);
1072 struct ccc_io
*cio
= ccc_env_io(env
);
1073 int write_mutex_locked
= 0;
1075 cio
->cui_fd
= LUSTRE_FPRIVATE(file
);
1076 vio
->cui_io_subtype
= args
->via_io_subtype
;
1078 switch (vio
->cui_io_subtype
) {
1080 cio
->cui_iov
= args
->u
.normal
.via_iov
;
1081 cio
->cui_nrsegs
= args
->u
.normal
.via_nrsegs
;
1082 cio
->cui_tot_nrsegs
= cio
->cui_nrsegs
;
1083 cio
->cui_iocb
= args
->u
.normal
.via_iocb
;
1084 if ((iot
== CIT_WRITE
) &&
1085 !(cio
->cui_fd
->fd_flags
& LL_FILE_GROUP_LOCKED
)) {
1086 if (mutex_lock_interruptible(&lli
->
1088 GOTO(out
, result
= -ERESTARTSYS
);
1089 write_mutex_locked
= 1;
1090 } else if (iot
== CIT_READ
) {
1091 down_read(&lli
->lli_trunc_sem
);
1095 vio
->u
.splice
.cui_pipe
= args
->u
.splice
.via_pipe
;
1096 vio
->u
.splice
.cui_flags
= args
->u
.splice
.via_flags
;
1099 CERROR("Unknow IO type - %u\n", vio
->cui_io_subtype
);
1102 result
= cl_io_loop(env
, io
);
1103 if (write_mutex_locked
)
1104 mutex_unlock(&lli
->lli_write_mutex
);
1105 else if (args
->via_io_subtype
== IO_NORMAL
&& iot
== CIT_READ
)
1106 up_read(&lli
->lli_trunc_sem
);
1108 /* cl_io_rw_init() handled IO */
1109 result
= io
->ci_result
;
1112 if (io
->ci_nob
> 0) {
1113 result
= io
->ci_nob
;
1114 *ppos
= io
->u
.ci_wr
.wr
.crw_pos
;
1118 cl_io_fini(env
, io
);
1119 /* If any bit been read/written (result != 0), we just return
1120 * short read/write instead of restart io. */
1121 if ((result
== 0 || result
== -ENODATA
) && io
->ci_need_restart
) {
1122 CDEBUG(D_VFSTRACE
, "Restart %s on %s from %lld, count:%zd\n",
1123 iot
== CIT_READ
? "read" : "write",
1124 file
->f_dentry
->d_name
.name
, *ppos
, count
);
1125 LASSERTF(io
->ci_nob
== 0, "%zd", io
->ci_nob
);
1129 if (iot
== CIT_READ
) {
1131 ll_stats_ops_tally(ll_i2sbi(file
->f_dentry
->d_inode
),
1132 LPROC_LL_READ_BYTES
, result
);
1133 } else if (iot
== CIT_WRITE
) {
1135 ll_stats_ops_tally(ll_i2sbi(file
->f_dentry
->d_inode
),
1136 LPROC_LL_WRITE_BYTES
, result
);
1137 fd
->fd_write_failed
= false;
1138 } else if (result
!= -ERESTARTSYS
) {
1139 fd
->fd_write_failed
= true;
1146 static ssize_t
ll_file_aio_read(struct kiocb
*iocb
, const struct iovec
*iov
,
1147 unsigned long nr_segs
, loff_t pos
)
1150 struct vvp_io_args
*args
;
1155 result
= generic_segment_checks(iov
, &nr_segs
, &count
, VERIFY_WRITE
);
1159 env
= cl_env_get(&refcheck
);
1161 return PTR_ERR(env
);
1163 args
= vvp_env_args(env
, IO_NORMAL
);
1164 args
->u
.normal
.via_iov
= (struct iovec
*)iov
;
1165 args
->u
.normal
.via_nrsegs
= nr_segs
;
1166 args
->u
.normal
.via_iocb
= iocb
;
1168 result
= ll_file_io_generic(env
, args
, iocb
->ki_filp
, CIT_READ
,
1169 &iocb
->ki_pos
, count
);
1170 cl_env_put(env
, &refcheck
);
1174 static ssize_t
ll_file_read(struct file
*file
, char *buf
, size_t count
,
1178 struct iovec
*local_iov
;
1179 struct kiocb
*kiocb
;
1183 env
= cl_env_get(&refcheck
);
1185 return PTR_ERR(env
);
1187 local_iov
= &vvp_env_info(env
)->vti_local_iov
;
1188 kiocb
= &vvp_env_info(env
)->vti_kiocb
;
1189 local_iov
->iov_base
= (void __user
*)buf
;
1190 local_iov
->iov_len
= count
;
1191 init_sync_kiocb(kiocb
, file
);
1192 kiocb
->ki_pos
= *ppos
;
1193 kiocb
->ki_nbytes
= count
;
1195 result
= ll_file_aio_read(kiocb
, local_iov
, 1, kiocb
->ki_pos
);
1196 *ppos
= kiocb
->ki_pos
;
1198 cl_env_put(env
, &refcheck
);
1203 * Write to a file (through the page cache).
1205 static ssize_t
ll_file_aio_write(struct kiocb
*iocb
, const struct iovec
*iov
,
1206 unsigned long nr_segs
, loff_t pos
)
1209 struct vvp_io_args
*args
;
1214 result
= generic_segment_checks(iov
, &nr_segs
, &count
, VERIFY_READ
);
1218 env
= cl_env_get(&refcheck
);
1220 return PTR_ERR(env
);
1222 args
= vvp_env_args(env
, IO_NORMAL
);
1223 args
->u
.normal
.via_iov
= (struct iovec
*)iov
;
1224 args
->u
.normal
.via_nrsegs
= nr_segs
;
1225 args
->u
.normal
.via_iocb
= iocb
;
1227 result
= ll_file_io_generic(env
, args
, iocb
->ki_filp
, CIT_WRITE
,
1228 &iocb
->ki_pos
, count
);
1229 cl_env_put(env
, &refcheck
);
1233 static ssize_t
ll_file_write(struct file
*file
, const char *buf
, size_t count
,
1237 struct iovec
*local_iov
;
1238 struct kiocb
*kiocb
;
1242 env
= cl_env_get(&refcheck
);
1244 return PTR_ERR(env
);
1246 local_iov
= &vvp_env_info(env
)->vti_local_iov
;
1247 kiocb
= &vvp_env_info(env
)->vti_kiocb
;
1248 local_iov
->iov_base
= (void __user
*)buf
;
1249 local_iov
->iov_len
= count
;
1250 init_sync_kiocb(kiocb
, file
);
1251 kiocb
->ki_pos
= *ppos
;
1252 kiocb
->ki_nbytes
= count
;
1254 result
= ll_file_aio_write(kiocb
, local_iov
, 1, kiocb
->ki_pos
);
1255 *ppos
= kiocb
->ki_pos
;
1257 cl_env_put(env
, &refcheck
);
1264 * Send file content (through pagecache) somewhere with helper
1266 static ssize_t
ll_file_splice_read(struct file
*in_file
, loff_t
*ppos
,
1267 struct pipe_inode_info
*pipe
, size_t count
,
1271 struct vvp_io_args
*args
;
1275 env
= cl_env_get(&refcheck
);
1277 return PTR_ERR(env
);
1279 args
= vvp_env_args(env
, IO_SPLICE
);
1280 args
->u
.splice
.via_pipe
= pipe
;
1281 args
->u
.splice
.via_flags
= flags
;
1283 result
= ll_file_io_generic(env
, args
, in_file
, CIT_READ
, ppos
, count
);
1284 cl_env_put(env
, &refcheck
);
1288 static int ll_lov_recreate(struct inode
*inode
, struct ost_id
*oi
,
1291 struct obd_export
*exp
= ll_i2dtexp(inode
);
1292 struct obd_trans_info oti
= { 0 };
1293 struct obdo
*oa
= NULL
;
1296 struct lov_stripe_md
*lsm
= NULL
, *lsm2
;
1302 lsm
= ccc_inode_lsm_get(inode
);
1303 if (!lsm_has_objects(lsm
))
1304 GOTO(out
, rc
= -ENOENT
);
1306 lsm_size
= sizeof(*lsm
) + (sizeof(struct lov_oinfo
) *
1307 (lsm
->lsm_stripe_count
));
1309 OBD_ALLOC_LARGE(lsm2
, lsm_size
);
1311 GOTO(out
, rc
= -ENOMEM
);
1314 oa
->o_nlink
= ost_idx
;
1315 oa
->o_flags
|= OBD_FL_RECREATE_OBJS
;
1316 oa
->o_valid
= OBD_MD_FLID
| OBD_MD_FLFLAGS
| OBD_MD_FLGROUP
;
1317 obdo_from_inode(oa
, inode
, OBD_MD_FLTYPE
| OBD_MD_FLATIME
|
1318 OBD_MD_FLMTIME
| OBD_MD_FLCTIME
);
1319 obdo_set_parent_fid(oa
, &ll_i2info(inode
)->lli_fid
);
1320 memcpy(lsm2
, lsm
, lsm_size
);
1321 ll_inode_size_lock(inode
);
1322 rc
= obd_create(NULL
, exp
, oa
, &lsm2
, &oti
);
1323 ll_inode_size_unlock(inode
);
1325 OBD_FREE_LARGE(lsm2
, lsm_size
);
1328 ccc_inode_lsm_put(inode
, lsm
);
1333 static int ll_lov_recreate_obj(struct inode
*inode
, unsigned long arg
)
1335 struct ll_recreate_obj ucreat
;
1338 if (!capable(CFS_CAP_SYS_ADMIN
))
1341 if (copy_from_user(&ucreat
, (struct ll_recreate_obj
*)arg
,
1345 ostid_set_seq_mdt0(&oi
);
1346 ostid_set_id(&oi
, ucreat
.lrc_id
);
1347 return ll_lov_recreate(inode
, &oi
, ucreat
.lrc_ost_idx
);
1350 static int ll_lov_recreate_fid(struct inode
*inode
, unsigned long arg
)
1356 if (!capable(CFS_CAP_SYS_ADMIN
))
1359 if (copy_from_user(&fid
, (struct lu_fid
*)arg
, sizeof(fid
)))
1362 fid_to_ostid(&fid
, &oi
);
1363 ost_idx
= (fid_seq(&fid
) >> 16) & 0xffff;
1364 return ll_lov_recreate(inode
, &oi
, ost_idx
);
1367 int ll_lov_setstripe_ea_info(struct inode
*inode
, struct file
*file
,
1368 int flags
, struct lov_user_md
*lum
, int lum_size
)
1370 struct lov_stripe_md
*lsm
= NULL
;
1371 struct lookup_intent oit
= {.it_op
= IT_OPEN
, .it_flags
= flags
};
1374 lsm
= ccc_inode_lsm_get(inode
);
1376 ccc_inode_lsm_put(inode
, lsm
);
1377 CDEBUG(D_IOCTL
, "stripe already exists for ino %lu\n",
1379 GOTO(out
, rc
= -EEXIST
);
1382 ll_inode_size_lock(inode
);
1383 rc
= ll_intent_file_open(file
, lum
, lum_size
, &oit
);
1385 GOTO(out_unlock
, rc
);
1386 rc
= oit
.d
.lustre
.it_status
;
1388 GOTO(out_req_free
, rc
);
1390 ll_release_openhandle(file
->f_dentry
, &oit
);
1393 ll_inode_size_unlock(inode
);
1394 ll_intent_release(&oit
);
1395 ccc_inode_lsm_put(inode
, lsm
);
1397 cl_lov_delay_create_clear(&file
->f_flags
);
1400 ptlrpc_req_finished((struct ptlrpc_request
*) oit
.d
.lustre
.it_data
);
1404 int ll_lov_getstripe_ea_info(struct inode
*inode
, const char *filename
,
1405 struct lov_mds_md
**lmmp
, int *lmm_size
,
1406 struct ptlrpc_request
**request
)
1408 struct ll_sb_info
*sbi
= ll_i2sbi(inode
);
1409 struct mdt_body
*body
;
1410 struct lov_mds_md
*lmm
= NULL
;
1411 struct ptlrpc_request
*req
= NULL
;
1412 struct md_op_data
*op_data
;
1415 rc
= ll_get_max_mdsize(sbi
, &lmmsize
);
1419 op_data
= ll_prep_md_op_data(NULL
, inode
, NULL
, filename
,
1420 strlen(filename
), lmmsize
,
1421 LUSTRE_OPC_ANY
, NULL
);
1422 if (IS_ERR(op_data
))
1423 return PTR_ERR(op_data
);
1425 op_data
->op_valid
= OBD_MD_FLEASIZE
| OBD_MD_FLDIREA
;
1426 rc
= md_getattr_name(sbi
->ll_md_exp
, op_data
, &req
);
1427 ll_finish_md_op_data(op_data
);
1429 CDEBUG(D_INFO
, "md_getattr_name failed "
1430 "on %s: rc %d\n", filename
, rc
);
1434 body
= req_capsule_server_get(&req
->rq_pill
, &RMF_MDT_BODY
);
1435 LASSERT(body
!= NULL
); /* checked by mdc_getattr_name */
1437 lmmsize
= body
->eadatasize
;
1439 if (!(body
->valid
& (OBD_MD_FLEASIZE
| OBD_MD_FLDIREA
)) ||
1441 GOTO(out
, rc
= -ENODATA
);
1444 lmm
= req_capsule_server_sized_get(&req
->rq_pill
, &RMF_MDT_MD
, lmmsize
);
1445 LASSERT(lmm
!= NULL
);
1447 if ((lmm
->lmm_magic
!= cpu_to_le32(LOV_MAGIC_V1
)) &&
1448 (lmm
->lmm_magic
!= cpu_to_le32(LOV_MAGIC_V3
))) {
1449 GOTO(out
, rc
= -EPROTO
);
1453 * This is coming from the MDS, so is probably in
1454 * little endian. We convert it to host endian before
1455 * passing it to userspace.
1457 if (LOV_MAGIC
!= cpu_to_le32(LOV_MAGIC
)) {
1460 stripe_count
= le16_to_cpu(lmm
->lmm_stripe_count
);
1461 if (le32_to_cpu(lmm
->lmm_pattern
) & LOV_PATTERN_F_RELEASED
)
1464 /* if function called for directory - we should
1465 * avoid swab not existent lsm objects */
1466 if (lmm
->lmm_magic
== cpu_to_le32(LOV_MAGIC_V1
)) {
1467 lustre_swab_lov_user_md_v1((struct lov_user_md_v1
*)lmm
);
1468 if (S_ISREG(body
->mode
))
1469 lustre_swab_lov_user_md_objects(
1470 ((struct lov_user_md_v1
*)lmm
)->lmm_objects
,
1472 } else if (lmm
->lmm_magic
== cpu_to_le32(LOV_MAGIC_V3
)) {
1473 lustre_swab_lov_user_md_v3((struct lov_user_md_v3
*)lmm
);
1474 if (S_ISREG(body
->mode
))
1475 lustre_swab_lov_user_md_objects(
1476 ((struct lov_user_md_v3
*)lmm
)->lmm_objects
,
1483 *lmm_size
= lmmsize
;
1488 static int ll_lov_setea(struct inode
*inode
, struct file
*file
,
1491 int flags
= MDS_OPEN_HAS_OBJS
| FMODE_WRITE
;
1492 struct lov_user_md
*lump
;
1493 int lum_size
= sizeof(struct lov_user_md
) +
1494 sizeof(struct lov_user_ost_data
);
1497 if (!capable(CFS_CAP_SYS_ADMIN
))
1500 OBD_ALLOC_LARGE(lump
, lum_size
);
1504 if (copy_from_user(lump
, (struct lov_user_md
*)arg
, lum_size
)) {
1505 OBD_FREE_LARGE(lump
, lum_size
);
1509 rc
= ll_lov_setstripe_ea_info(inode
, file
, flags
, lump
, lum_size
);
1511 OBD_FREE_LARGE(lump
, lum_size
);
1515 static int ll_lov_setstripe(struct inode
*inode
, struct file
*file
,
1518 struct lov_user_md_v3 lumv3
;
1519 struct lov_user_md_v1
*lumv1
= (struct lov_user_md_v1
*)&lumv3
;
1520 struct lov_user_md_v1
*lumv1p
= (struct lov_user_md_v1
*)arg
;
1521 struct lov_user_md_v3
*lumv3p
= (struct lov_user_md_v3
*)arg
;
1523 int flags
= FMODE_WRITE
;
1525 /* first try with v1 which is smaller than v3 */
1526 lum_size
= sizeof(struct lov_user_md_v1
);
1527 if (copy_from_user(lumv1
, lumv1p
, lum_size
))
1530 if (lumv1
->lmm_magic
== LOV_USER_MAGIC_V3
) {
1531 lum_size
= sizeof(struct lov_user_md_v3
);
1532 if (copy_from_user(&lumv3
, lumv3p
, lum_size
))
1536 rc
= ll_lov_setstripe_ea_info(inode
, file
, flags
, lumv1
, lum_size
);
1538 struct lov_stripe_md
*lsm
;
1541 put_user(0, &lumv1p
->lmm_stripe_count
);
1543 ll_layout_refresh(inode
, &gen
);
1544 lsm
= ccc_inode_lsm_get(inode
);
1545 rc
= obd_iocontrol(LL_IOC_LOV_GETSTRIPE
, ll_i2dtexp(inode
),
1546 0, lsm
, (void *)arg
);
1547 ccc_inode_lsm_put(inode
, lsm
);
1552 static int ll_lov_getstripe(struct inode
*inode
, unsigned long arg
)
1554 struct lov_stripe_md
*lsm
;
1557 lsm
= ccc_inode_lsm_get(inode
);
1559 rc
= obd_iocontrol(LL_IOC_LOV_GETSTRIPE
, ll_i2dtexp(inode
), 0,
1561 ccc_inode_lsm_put(inode
, lsm
);
1565 int ll_get_grouplock(struct inode
*inode
, struct file
*file
, unsigned long arg
)
1567 struct ll_inode_info
*lli
= ll_i2info(inode
);
1568 struct ll_file_data
*fd
= LUSTRE_FPRIVATE(file
);
1569 struct ccc_grouplock grouplock
;
1572 if (ll_file_nolock(file
))
1575 spin_lock(&lli
->lli_lock
);
1576 if (fd
->fd_flags
& LL_FILE_GROUP_LOCKED
) {
1577 CWARN("group lock already existed with gid %lu\n",
1578 fd
->fd_grouplock
.cg_gid
);
1579 spin_unlock(&lli
->lli_lock
);
1582 LASSERT(fd
->fd_grouplock
.cg_lock
== NULL
);
1583 spin_unlock(&lli
->lli_lock
);
1585 rc
= cl_get_grouplock(cl_i2info(inode
)->lli_clob
,
1586 arg
, (file
->f_flags
& O_NONBLOCK
), &grouplock
);
1590 spin_lock(&lli
->lli_lock
);
1591 if (fd
->fd_flags
& LL_FILE_GROUP_LOCKED
) {
1592 spin_unlock(&lli
->lli_lock
);
1593 CERROR("another thread just won the race\n");
1594 cl_put_grouplock(&grouplock
);
1598 fd
->fd_flags
|= LL_FILE_GROUP_LOCKED
;
1599 fd
->fd_grouplock
= grouplock
;
1600 spin_unlock(&lli
->lli_lock
);
1602 CDEBUG(D_INFO
, "group lock %lu obtained\n", arg
);
1606 int ll_put_grouplock(struct inode
*inode
, struct file
*file
, unsigned long arg
)
1608 struct ll_inode_info
*lli
= ll_i2info(inode
);
1609 struct ll_file_data
*fd
= LUSTRE_FPRIVATE(file
);
1610 struct ccc_grouplock grouplock
;
1612 spin_lock(&lli
->lli_lock
);
1613 if (!(fd
->fd_flags
& LL_FILE_GROUP_LOCKED
)) {
1614 spin_unlock(&lli
->lli_lock
);
1615 CWARN("no group lock held\n");
1618 LASSERT(fd
->fd_grouplock
.cg_lock
!= NULL
);
1620 if (fd
->fd_grouplock
.cg_gid
!= arg
) {
1621 CWARN("group lock %lu doesn't match current id %lu\n",
1622 arg
, fd
->fd_grouplock
.cg_gid
);
1623 spin_unlock(&lli
->lli_lock
);
1627 grouplock
= fd
->fd_grouplock
;
1628 memset(&fd
->fd_grouplock
, 0, sizeof(fd
->fd_grouplock
));
1629 fd
->fd_flags
&= ~LL_FILE_GROUP_LOCKED
;
1630 spin_unlock(&lli
->lli_lock
);
1632 cl_put_grouplock(&grouplock
);
1633 CDEBUG(D_INFO
, "group lock %lu released\n", arg
);
1638 * Close inode open handle
1640 * \param dentry [in] dentry which contains the inode
1641 * \param it [in,out] intent which contains open info and result
1644 * \retval <0 failure
1646 int ll_release_openhandle(struct dentry
*dentry
, struct lookup_intent
*it
)
1648 struct inode
*inode
= dentry
->d_inode
;
1649 struct obd_client_handle
*och
;
1654 /* Root ? Do nothing. */
1655 if (dentry
->d_inode
->i_sb
->s_root
== dentry
)
1658 /* No open handle to close? Move away */
1659 if (!it_disposition(it
, DISP_OPEN_OPEN
))
1662 LASSERT(it_open_error(DISP_OPEN_OPEN
, it
) == 0);
1664 OBD_ALLOC(och
, sizeof(*och
));
1666 GOTO(out
, rc
= -ENOMEM
);
1668 ll_och_fill(ll_i2sbi(inode
)->ll_md_exp
, it
, och
);
1670 rc
= ll_close_inode_openhandle(ll_i2sbi(inode
)->ll_md_exp
,
1673 /* this one is in place of ll_file_open */
1674 if (it_disposition(it
, DISP_ENQ_OPEN_REF
)) {
1675 ptlrpc_req_finished(it
->d
.lustre
.it_data
);
1676 it_clear_disposition(it
, DISP_ENQ_OPEN_REF
);
1682 * Get size for inode for which FIEMAP mapping is requested.
1683 * Make the FIEMAP get_info call and returns the result.
1685 int ll_do_fiemap(struct inode
*inode
, struct ll_user_fiemap
*fiemap
,
1688 struct obd_export
*exp
= ll_i2dtexp(inode
);
1689 struct lov_stripe_md
*lsm
= NULL
;
1690 struct ll_fiemap_info_key fm_key
= { .name
= KEY_FIEMAP
, };
1691 int vallen
= num_bytes
;
1694 /* Checks for fiemap flags */
1695 if (fiemap
->fm_flags
& ~LUSTRE_FIEMAP_FLAGS_COMPAT
) {
1696 fiemap
->fm_flags
&= ~LUSTRE_FIEMAP_FLAGS_COMPAT
;
1700 /* Check for FIEMAP_FLAG_SYNC */
1701 if (fiemap
->fm_flags
& FIEMAP_FLAG_SYNC
) {
1702 rc
= filemap_fdatawrite(inode
->i_mapping
);
1707 lsm
= ccc_inode_lsm_get(inode
);
1711 /* If the stripe_count > 1 and the application does not understand
1712 * DEVICE_ORDER flag, then it cannot interpret the extents correctly.
1714 if (lsm
->lsm_stripe_count
> 1 &&
1715 !(fiemap
->fm_flags
& FIEMAP_FLAG_DEVICE_ORDER
))
1716 GOTO(out
, rc
= -EOPNOTSUPP
);
1718 fm_key
.oa
.o_oi
= lsm
->lsm_oi
;
1719 fm_key
.oa
.o_valid
= OBD_MD_FLID
| OBD_MD_FLGROUP
;
1721 obdo_from_inode(&fm_key
.oa
, inode
, OBD_MD_FLSIZE
);
1722 obdo_set_parent_fid(&fm_key
.oa
, &ll_i2info(inode
)->lli_fid
);
1723 /* If filesize is 0, then there would be no objects for mapping */
1724 if (fm_key
.oa
.o_size
== 0) {
1725 fiemap
->fm_mapped_extents
= 0;
1729 memcpy(&fm_key
.fiemap
, fiemap
, sizeof(*fiemap
));
1731 rc
= obd_get_info(NULL
, exp
, sizeof(fm_key
), &fm_key
, &vallen
,
1734 CERROR("obd_get_info failed: rc = %d\n", rc
);
1737 ccc_inode_lsm_put(inode
, lsm
);
1741 int ll_fid2path(struct inode
*inode
, void *arg
)
1743 struct obd_export
*exp
= ll_i2mdexp(inode
);
1744 struct getinfo_fid2path
*gfout
, *gfin
;
1747 if (!capable(CFS_CAP_DAC_READ_SEARCH
) &&
1748 !(ll_i2sbi(inode
)->ll_flags
& LL_SBI_USER_FID2PATH
))
1751 /* Need to get the buflen */
1752 OBD_ALLOC_PTR(gfin
);
1755 if (copy_from_user(gfin
, arg
, sizeof(*gfin
))) {
1760 outsize
= sizeof(*gfout
) + gfin
->gf_pathlen
;
1761 OBD_ALLOC(gfout
, outsize
);
1762 if (gfout
== NULL
) {
1766 memcpy(gfout
, gfin
, sizeof(*gfout
));
1769 /* Call mdc_iocontrol */
1770 rc
= obd_iocontrol(OBD_IOC_FID2PATH
, exp
, outsize
, gfout
, NULL
);
1774 if (copy_to_user(arg
, gfout
, outsize
))
1778 OBD_FREE(gfout
, outsize
);
1782 static int ll_ioctl_fiemap(struct inode
*inode
, unsigned long arg
)
1784 struct ll_user_fiemap
*fiemap_s
;
1785 size_t num_bytes
, ret_bytes
;
1786 unsigned int extent_count
;
1789 /* Get the extent count so we can calculate the size of
1790 * required fiemap buffer */
1791 if (get_user(extent_count
,
1792 &((struct ll_user_fiemap __user
*)arg
)->fm_extent_count
))
1794 num_bytes
= sizeof(*fiemap_s
) + (extent_count
*
1795 sizeof(struct ll_fiemap_extent
));
1797 OBD_ALLOC_LARGE(fiemap_s
, num_bytes
);
1798 if (fiemap_s
== NULL
)
1801 /* get the fiemap value */
1802 if (copy_from_user(fiemap_s
, (struct ll_user_fiemap __user
*)arg
,
1804 GOTO(error
, rc
= -EFAULT
);
1806 /* If fm_extent_count is non-zero, read the first extent since
1807 * it is used to calculate end_offset and device from previous
1810 if (copy_from_user(&fiemap_s
->fm_extents
[0],
1811 (char __user
*)arg
+ sizeof(*fiemap_s
),
1812 sizeof(struct ll_fiemap_extent
)))
1813 GOTO(error
, rc
= -EFAULT
);
1816 rc
= ll_do_fiemap(inode
, fiemap_s
, num_bytes
);
1820 ret_bytes
= sizeof(struct ll_user_fiemap
);
1822 if (extent_count
!= 0)
1823 ret_bytes
+= (fiemap_s
->fm_mapped_extents
*
1824 sizeof(struct ll_fiemap_extent
));
1826 if (copy_to_user((void *)arg
, fiemap_s
, ret_bytes
))
1830 OBD_FREE_LARGE(fiemap_s
, num_bytes
);
1835 * Read the data_version for inode.
1837 * This value is computed using stripe object version on OST.
1838 * Version is computed using server side locking.
1840 * @param extent_lock Take extent lock. Not needed if a process is already
1841 * holding the OST object group locks.
1843 int ll_data_version(struct inode
*inode
, __u64
*data_version
,
1846 struct lov_stripe_md
*lsm
= NULL
;
1847 struct ll_sb_info
*sbi
= ll_i2sbi(inode
);
1848 struct obdo
*obdo
= NULL
;
1851 /* If no stripe, we consider version is 0. */
1852 lsm
= ccc_inode_lsm_get(inode
);
1853 if (!lsm_has_objects(lsm
)) {
1855 CDEBUG(D_INODE
, "No object for inode\n");
1859 OBD_ALLOC_PTR(obdo
);
1861 GOTO(out
, rc
= -ENOMEM
);
1863 rc
= ll_lsm_getattr(lsm
, sbi
->ll_dt_exp
, NULL
, obdo
, 0, extent_lock
);
1865 if (!(obdo
->o_valid
& OBD_MD_FLDATAVERSION
))
1868 *data_version
= obdo
->o_data_version
;
1873 ccc_inode_lsm_put(inode
, lsm
);
1878 * Trigger a HSM release request for the provided inode.
1880 int ll_hsm_release(struct inode
*inode
)
1882 struct cl_env_nest nest
;
1884 struct obd_client_handle
*och
= NULL
;
1885 __u64 data_version
= 0;
1889 CDEBUG(D_INODE
, "%s: Releasing file "DFID
".\n",
1890 ll_get_fsname(inode
->i_sb
, NULL
, 0),
1891 PFID(&ll_i2info(inode
)->lli_fid
));
1893 och
= ll_lease_open(inode
, NULL
, FMODE_WRITE
, MDS_OPEN_RELEASE
);
1895 GOTO(out
, rc
= PTR_ERR(och
));
1897 /* Grab latest data_version and [am]time values */
1898 rc
= ll_data_version(inode
, &data_version
, 1);
1902 env
= cl_env_nested_get(&nest
);
1904 GOTO(out
, rc
= PTR_ERR(env
));
1906 ll_merge_lvb(env
, inode
);
1907 cl_env_nested_put(&nest
, env
);
1909 /* Release the file.
1910 * NB: lease lock handle is released in mdc_hsm_release_pack() because
1911 * we still need it to pack l_remote_handle to MDT. */
1912 rc
= ll_close_inode_openhandle(ll_i2sbi(inode
)->ll_md_exp
, inode
, och
,
1918 if (och
!= NULL
&& !IS_ERR(och
)) /* close the file */
1919 ll_lease_close(och
, inode
, NULL
);
1924 struct ll_swap_stack
{
1925 struct iattr ia1
, ia2
;
1927 struct inode
*inode1
, *inode2
;
1928 bool check_dv1
, check_dv2
;
1931 static int ll_swap_layouts(struct file
*file1
, struct file
*file2
,
1932 struct lustre_swap_layouts
*lsl
)
1934 struct mdc_swap_layouts msl
;
1935 struct md_op_data
*op_data
;
1938 struct ll_swap_stack
*llss
= NULL
;
1941 OBD_ALLOC_PTR(llss
);
1945 llss
->inode1
= file1
->f_dentry
->d_inode
;
1946 llss
->inode2
= file2
->f_dentry
->d_inode
;
1948 if (!S_ISREG(llss
->inode2
->i_mode
))
1949 GOTO(free
, rc
= -EINVAL
);
1951 if (inode_permission(llss
->inode1
, MAY_WRITE
) ||
1952 inode_permission(llss
->inode2
, MAY_WRITE
))
1953 GOTO(free
, rc
= -EPERM
);
1955 if (llss
->inode2
->i_sb
!= llss
->inode1
->i_sb
)
1956 GOTO(free
, rc
= -EXDEV
);
1958 /* we use 2 bool because it is easier to swap than 2 bits */
1959 if (lsl
->sl_flags
& SWAP_LAYOUTS_CHECK_DV1
)
1960 llss
->check_dv1
= true;
1962 if (lsl
->sl_flags
& SWAP_LAYOUTS_CHECK_DV2
)
1963 llss
->check_dv2
= true;
1965 /* we cannot use lsl->sl_dvX directly because we may swap them */
1966 llss
->dv1
= lsl
->sl_dv1
;
1967 llss
->dv2
= lsl
->sl_dv2
;
1969 rc
= lu_fid_cmp(ll_inode2fid(llss
->inode1
), ll_inode2fid(llss
->inode2
));
1970 if (rc
== 0) /* same file, done! */
1973 if (rc
< 0) { /* sequentialize it */
1974 swap(llss
->inode1
, llss
->inode2
);
1976 swap(llss
->dv1
, llss
->dv2
);
1977 swap(llss
->check_dv1
, llss
->check_dv2
);
1981 if (gid
!= 0) { /* application asks to flush dirty cache */
1982 rc
= ll_get_grouplock(llss
->inode1
, file1
, gid
);
1986 rc
= ll_get_grouplock(llss
->inode2
, file2
, gid
);
1988 ll_put_grouplock(llss
->inode1
, file1
, gid
);
1993 /* to be able to restore mtime and atime after swap
1994 * we need to first save them */
1996 (SWAP_LAYOUTS_KEEP_MTIME
| SWAP_LAYOUTS_KEEP_ATIME
)) {
1997 llss
->ia1
.ia_mtime
= llss
->inode1
->i_mtime
;
1998 llss
->ia1
.ia_atime
= llss
->inode1
->i_atime
;
1999 llss
->ia1
.ia_valid
= ATTR_MTIME
| ATTR_ATIME
;
2000 llss
->ia2
.ia_mtime
= llss
->inode2
->i_mtime
;
2001 llss
->ia2
.ia_atime
= llss
->inode2
->i_atime
;
2002 llss
->ia2
.ia_valid
= ATTR_MTIME
| ATTR_ATIME
;
2005 /* ultimate check, before swaping the layouts we check if
2006 * dataversion has changed (if requested) */
2007 if (llss
->check_dv1
) {
2008 rc
= ll_data_version(llss
->inode1
, &dv
, 0);
2011 if (dv
!= llss
->dv1
)
2012 GOTO(putgl
, rc
= -EAGAIN
);
2015 if (llss
->check_dv2
) {
2016 rc
= ll_data_version(llss
->inode2
, &dv
, 0);
2019 if (dv
!= llss
->dv2
)
2020 GOTO(putgl
, rc
= -EAGAIN
);
2023 /* struct md_op_data is used to send the swap args to the mdt
2024 * only flags is missing, so we use struct mdc_swap_layouts
2025 * through the md_op_data->op_data */
2026 /* flags from user space have to be converted before they are send to
2027 * server, no flag is sent today, they are only used on the client */
2030 op_data
= ll_prep_md_op_data(NULL
, llss
->inode1
, llss
->inode2
, NULL
, 0,
2031 0, LUSTRE_OPC_ANY
, &msl
);
2032 if (IS_ERR(op_data
))
2033 GOTO(free
, rc
= PTR_ERR(op_data
));
2035 rc
= obd_iocontrol(LL_IOC_LOV_SWAP_LAYOUTS
, ll_i2mdexp(llss
->inode1
),
2036 sizeof(*op_data
), op_data
, NULL
);
2037 ll_finish_md_op_data(op_data
);
2041 ll_put_grouplock(llss
->inode2
, file2
, gid
);
2042 ll_put_grouplock(llss
->inode1
, file1
, gid
);
2045 /* rc can be set from obd_iocontrol() or from a GOTO(putgl, ...) */
2049 /* clear useless flags */
2050 if (!(lsl
->sl_flags
& SWAP_LAYOUTS_KEEP_MTIME
)) {
2051 llss
->ia1
.ia_valid
&= ~ATTR_MTIME
;
2052 llss
->ia2
.ia_valid
&= ~ATTR_MTIME
;
2055 if (!(lsl
->sl_flags
& SWAP_LAYOUTS_KEEP_ATIME
)) {
2056 llss
->ia1
.ia_valid
&= ~ATTR_ATIME
;
2057 llss
->ia2
.ia_valid
&= ~ATTR_ATIME
;
2060 /* update time if requested */
2062 if (llss
->ia2
.ia_valid
!= 0) {
2063 mutex_lock(&llss
->inode1
->i_mutex
);
2064 rc
= ll_setattr(file1
->f_dentry
, &llss
->ia2
);
2065 mutex_unlock(&llss
->inode1
->i_mutex
);
2068 if (llss
->ia1
.ia_valid
!= 0) {
2071 mutex_lock(&llss
->inode2
->i_mutex
);
2072 rc1
= ll_setattr(file2
->f_dentry
, &llss
->ia1
);
2073 mutex_unlock(&llss
->inode2
->i_mutex
);
2085 static int ll_hsm_state_set(struct inode
*inode
, struct hsm_state_set
*hss
)
2087 struct md_op_data
*op_data
;
2090 /* Non-root users are forbidden to set or clear flags which are
2091 * NOT defined in HSM_USER_MASK. */
2092 if (((hss
->hss_setmask
| hss
->hss_clearmask
) & ~HSM_USER_MASK
) &&
2093 !capable(CFS_CAP_SYS_ADMIN
))
2096 op_data
= ll_prep_md_op_data(NULL
, inode
, NULL
, NULL
, 0, 0,
2097 LUSTRE_OPC_ANY
, hss
);
2098 if (IS_ERR(op_data
))
2099 return PTR_ERR(op_data
);
2101 rc
= obd_iocontrol(LL_IOC_HSM_STATE_SET
, ll_i2mdexp(inode
),
2102 sizeof(*op_data
), op_data
, NULL
);
2104 ll_finish_md_op_data(op_data
);
2109 static int ll_hsm_import(struct inode
*inode
, struct file
*file
,
2110 struct hsm_user_import
*hui
)
2112 struct hsm_state_set
*hss
= NULL
;
2113 struct iattr
*attr
= NULL
;
2117 if (!S_ISREG(inode
->i_mode
))
2123 GOTO(out
, rc
= -ENOMEM
);
2125 hss
->hss_valid
= HSS_SETMASK
| HSS_ARCHIVE_ID
;
2126 hss
->hss_archive_id
= hui
->hui_archive_id
;
2127 hss
->hss_setmask
= HS_ARCHIVED
| HS_EXISTS
| HS_RELEASED
;
2128 rc
= ll_hsm_state_set(inode
, hss
);
2132 OBD_ALLOC_PTR(attr
);
2134 GOTO(out
, rc
= -ENOMEM
);
2136 attr
->ia_mode
= hui
->hui_mode
& (S_IRWXU
| S_IRWXG
| S_IRWXO
);
2137 attr
->ia_mode
|= S_IFREG
;
2138 attr
->ia_uid
= make_kuid(&init_user_ns
, hui
->hui_uid
);
2139 attr
->ia_gid
= make_kgid(&init_user_ns
, hui
->hui_gid
);
2140 attr
->ia_size
= hui
->hui_size
;
2141 attr
->ia_mtime
.tv_sec
= hui
->hui_mtime
;
2142 attr
->ia_mtime
.tv_nsec
= hui
->hui_mtime_ns
;
2143 attr
->ia_atime
.tv_sec
= hui
->hui_atime
;
2144 attr
->ia_atime
.tv_nsec
= hui
->hui_atime_ns
;
2146 attr
->ia_valid
= ATTR_SIZE
| ATTR_MODE
| ATTR_FORCE
|
2147 ATTR_UID
| ATTR_GID
|
2148 ATTR_MTIME
| ATTR_MTIME_SET
|
2149 ATTR_ATIME
| ATTR_ATIME_SET
;
2151 rc
= ll_setattr_raw(file
->f_dentry
, attr
, true);
2165 long ll_file_ioctl(struct file
*file
, unsigned int cmd
, unsigned long arg
)
2167 struct inode
*inode
= file
->f_dentry
->d_inode
;
2168 struct ll_file_data
*fd
= LUSTRE_FPRIVATE(file
);
2171 CDEBUG(D_VFSTRACE
, "VFS Op:inode=%lu/%u(%p),cmd=%x\n", inode
->i_ino
,
2172 inode
->i_generation
, inode
, cmd
);
2173 ll_stats_ops_tally(ll_i2sbi(inode
), LPROC_LL_IOCTL
, 1);
2175 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
2176 if (_IOC_TYPE(cmd
) == 'T' || _IOC_TYPE(cmd
) == 't') /* tty ioctls */
2180 case LL_IOC_GETFLAGS
:
2181 /* Get the current value of the file flags */
2182 return put_user(fd
->fd_flags
, (int *)arg
);
2183 case LL_IOC_SETFLAGS
:
2184 case LL_IOC_CLRFLAGS
:
2185 /* Set or clear specific file flags */
2186 /* XXX This probably needs checks to ensure the flags are
2187 * not abused, and to handle any flag side effects.
2189 if (get_user(flags
, (int *) arg
))
2192 if (cmd
== LL_IOC_SETFLAGS
) {
2193 if ((flags
& LL_FILE_IGNORE_LOCK
) &&
2194 !(file
->f_flags
& O_DIRECT
)) {
2195 CERROR("%s: unable to disable locking on "
2196 "non-O_DIRECT file\n", current
->comm
);
2200 fd
->fd_flags
|= flags
;
2202 fd
->fd_flags
&= ~flags
;
2205 case LL_IOC_LOV_SETSTRIPE
:
2206 return ll_lov_setstripe(inode
, file
, arg
);
2207 case LL_IOC_LOV_SETEA
:
2208 return ll_lov_setea(inode
, file
, arg
);
2209 case LL_IOC_LOV_SWAP_LAYOUTS
: {
2211 struct lustre_swap_layouts lsl
;
2213 if (copy_from_user(&lsl
, (char *)arg
,
2214 sizeof(struct lustre_swap_layouts
)))
2217 if ((file
->f_flags
& O_ACCMODE
) == 0) /* O_RDONLY */
2220 file2
= fget(lsl
.sl_fd
);
2225 if ((file2
->f_flags
& O_ACCMODE
) != 0) /* O_WRONLY or O_RDWR */
2226 rc
= ll_swap_layouts(file
, file2
, &lsl
);
2230 case LL_IOC_LOV_GETSTRIPE
:
2231 return ll_lov_getstripe(inode
, arg
);
2232 case LL_IOC_RECREATE_OBJ
:
2233 return ll_lov_recreate_obj(inode
, arg
);
2234 case LL_IOC_RECREATE_FID
:
2235 return ll_lov_recreate_fid(inode
, arg
);
2236 case FSFILT_IOC_FIEMAP
:
2237 return ll_ioctl_fiemap(inode
, arg
);
2238 case FSFILT_IOC_GETFLAGS
:
2239 case FSFILT_IOC_SETFLAGS
:
2240 return ll_iocontrol(inode
, file
, cmd
, arg
);
2241 case FSFILT_IOC_GETVERSION_OLD
:
2242 case FSFILT_IOC_GETVERSION
:
2243 return put_user(inode
->i_generation
, (int *)arg
);
2244 case LL_IOC_GROUP_LOCK
:
2245 return ll_get_grouplock(inode
, file
, arg
);
2246 case LL_IOC_GROUP_UNLOCK
:
2247 return ll_put_grouplock(inode
, file
, arg
);
2248 case IOC_OBD_STATFS
:
2249 return ll_obd_statfs(inode
, (void *)arg
);
2251 /* We need to special case any other ioctls we want to handle,
2252 * to send them to the MDS/OST as appropriate and to properly
2253 * network encode the arg field.
2254 case FSFILT_IOC_SETVERSION_OLD:
2255 case FSFILT_IOC_SETVERSION:
2257 case LL_IOC_FLUSHCTX
:
2258 return ll_flush_ctx(inode
);
2259 case LL_IOC_PATH2FID
: {
2260 if (copy_to_user((void *)arg
, ll_inode2fid(inode
),
2261 sizeof(struct lu_fid
)))
2266 case OBD_IOC_FID2PATH
:
2267 return ll_fid2path(inode
, (void *)arg
);
2268 case LL_IOC_DATA_VERSION
: {
2269 struct ioc_data_version idv
;
2272 if (copy_from_user(&idv
, (char *)arg
, sizeof(idv
)))
2275 rc
= ll_data_version(inode
, &idv
.idv_version
,
2276 !(idv
.idv_flags
& LL_DV_NOFLUSH
));
2278 if (rc
== 0 && copy_to_user((char *) arg
, &idv
, sizeof(idv
)))
2284 case LL_IOC_GET_MDTIDX
: {
2287 mdtidx
= ll_get_mdt_idx(inode
);
2291 if (put_user((int)mdtidx
, (int*)arg
))
2296 case OBD_IOC_GETDTNAME
:
2297 case OBD_IOC_GETMDNAME
:
2298 return ll_get_obd_name(inode
, cmd
, arg
);
2299 case LL_IOC_HSM_STATE_GET
: {
2300 struct md_op_data
*op_data
;
2301 struct hsm_user_state
*hus
;
2308 op_data
= ll_prep_md_op_data(NULL
, inode
, NULL
, NULL
, 0, 0,
2309 LUSTRE_OPC_ANY
, hus
);
2310 if (IS_ERR(op_data
)) {
2312 return PTR_ERR(op_data
);
2315 rc
= obd_iocontrol(cmd
, ll_i2mdexp(inode
), sizeof(*op_data
),
2318 if (copy_to_user((void *)arg
, hus
, sizeof(*hus
)))
2321 ll_finish_md_op_data(op_data
);
2325 case LL_IOC_HSM_STATE_SET
: {
2326 struct hsm_state_set
*hss
;
2333 if (copy_from_user(hss
, (char *)arg
, sizeof(*hss
))) {
2338 rc
= ll_hsm_state_set(inode
, hss
);
2343 case LL_IOC_HSM_ACTION
: {
2344 struct md_op_data
*op_data
;
2345 struct hsm_current_action
*hca
;
2352 op_data
= ll_prep_md_op_data(NULL
, inode
, NULL
, NULL
, 0, 0,
2353 LUSTRE_OPC_ANY
, hca
);
2354 if (IS_ERR(op_data
)) {
2356 return PTR_ERR(op_data
);
2359 rc
= obd_iocontrol(cmd
, ll_i2mdexp(inode
), sizeof(*op_data
),
2362 if (copy_to_user((char *)arg
, hca
, sizeof(*hca
)))
2365 ll_finish_md_op_data(op_data
);
2369 case LL_IOC_SET_LEASE
: {
2370 struct ll_inode_info
*lli
= ll_i2info(inode
);
2371 struct obd_client_handle
*och
= NULL
;
2377 if (!(file
->f_mode
& FMODE_WRITE
))
2382 if (!(file
->f_mode
& FMODE_READ
))
2387 mutex_lock(&lli
->lli_och_mutex
);
2388 if (fd
->fd_lease_och
!= NULL
) {
2389 och
= fd
->fd_lease_och
;
2390 fd
->fd_lease_och
= NULL
;
2392 mutex_unlock(&lli
->lli_och_mutex
);
2395 mode
= och
->och_flags
&
2396 (FMODE_READ
|FMODE_WRITE
);
2397 rc
= ll_lease_close(och
, inode
, &lease_broken
);
2398 if (rc
== 0 && lease_broken
)
2404 /* return the type of lease or error */
2405 return rc
< 0 ? rc
: (int)mode
;
2410 CDEBUG(D_INODE
, "Set lease with mode %d\n", mode
);
2412 /* apply for lease */
2413 och
= ll_lease_open(inode
, file
, mode
, 0);
2415 return PTR_ERR(och
);
2418 mutex_lock(&lli
->lli_och_mutex
);
2419 if (fd
->fd_lease_och
== NULL
) {
2420 fd
->fd_lease_och
= och
;
2423 mutex_unlock(&lli
->lli_och_mutex
);
2425 /* impossible now that only excl is supported for now */
2426 ll_lease_close(och
, inode
, &lease_broken
);
2431 case LL_IOC_GET_LEASE
: {
2432 struct ll_inode_info
*lli
= ll_i2info(inode
);
2433 struct ldlm_lock
*lock
= NULL
;
2436 mutex_lock(&lli
->lli_och_mutex
);
2437 if (fd
->fd_lease_och
!= NULL
) {
2438 struct obd_client_handle
*och
= fd
->fd_lease_och
;
2440 lock
= ldlm_handle2lock(&och
->och_lease_handle
);
2442 lock_res_and_lock(lock
);
2443 if (!ldlm_is_cancel(lock
))
2444 rc
= och
->och_flags
&
2445 (FMODE_READ
| FMODE_WRITE
);
2446 unlock_res_and_lock(lock
);
2447 ldlm_lock_put(lock
);
2450 mutex_unlock(&lli
->lli_och_mutex
);
2453 case LL_IOC_HSM_IMPORT
: {
2454 struct hsm_user_import
*hui
;
2460 if (copy_from_user(hui
, (void *)arg
, sizeof(*hui
))) {
2465 rc
= ll_hsm_import(inode
, file
, hui
);
2474 ll_iocontrol_call(inode
, file
, cmd
, arg
, &err
))
2477 return obd_iocontrol(cmd
, ll_i2dtexp(inode
), 0, NULL
,
2484 loff_t
ll_file_seek(struct file
*file
, loff_t offset
, int origin
)
2486 struct inode
*inode
= file
->f_dentry
->d_inode
;
2487 loff_t retval
, eof
= 0;
2489 retval
= offset
+ ((origin
== SEEK_END
) ? i_size_read(inode
) :
2490 (origin
== SEEK_CUR
) ? file
->f_pos
: 0);
2491 CDEBUG(D_VFSTRACE
, "VFS Op:inode=%lu/%u(%p), to=%llu=%#llx(%d)\n",
2492 inode
->i_ino
, inode
->i_generation
, inode
, retval
, retval
,
2494 ll_stats_ops_tally(ll_i2sbi(inode
), LPROC_LL_LLSEEK
, 1);
2496 if (origin
== SEEK_END
|| origin
== SEEK_HOLE
|| origin
== SEEK_DATA
) {
2497 retval
= ll_glimpse_size(inode
);
2500 eof
= i_size_read(inode
);
2503 retval
= generic_file_llseek_size(file
, offset
, origin
,
2504 ll_file_maxbytes(inode
), eof
);
2508 int ll_flush(struct file
*file
, fl_owner_t id
)
2510 struct inode
*inode
= file
->f_dentry
->d_inode
;
2511 struct ll_inode_info
*lli
= ll_i2info(inode
);
2512 struct ll_file_data
*fd
= LUSTRE_FPRIVATE(file
);
2515 LASSERT(!S_ISDIR(inode
->i_mode
));
2517 /* catch async errors that were recorded back when async writeback
2518 * failed for pages in this mapping. */
2519 rc
= lli
->lli_async_rc
;
2520 lli
->lli_async_rc
= 0;
2521 err
= lov_read_and_clear_async_rc(lli
->lli_clob
);
2525 /* The application has been told write failure already.
2526 * Do not report failure again. */
2527 if (fd
->fd_write_failed
)
2529 return rc
? -EIO
: 0;
2533 * Called to make sure a portion of file has been written out.
2534 * if @local_only is not true, it will send OST_SYNC RPCs to ost.
2536 * Return how many pages have been written.
2538 int cl_sync_file_range(struct inode
*inode
, loff_t start
, loff_t end
,
2539 enum cl_fsync_mode mode
, int ignore_layout
)
2541 struct cl_env_nest nest
;
2544 struct obd_capa
*capa
= NULL
;
2545 struct cl_fsync_io
*fio
;
2548 if (mode
!= CL_FSYNC_NONE
&& mode
!= CL_FSYNC_LOCAL
&&
2549 mode
!= CL_FSYNC_DISCARD
&& mode
!= CL_FSYNC_ALL
)
2552 env
= cl_env_nested_get(&nest
);
2554 return PTR_ERR(env
);
2556 capa
= ll_osscapa_get(inode
, CAPA_OPC_OSS_WRITE
);
2558 io
= ccc_env_thread_io(env
);
2559 io
->ci_obj
= cl_i2info(inode
)->lli_clob
;
2560 io
->ci_ignore_layout
= ignore_layout
;
2562 /* initialize parameters for sync */
2563 fio
= &io
->u
.ci_fsync
;
2564 fio
->fi_capa
= capa
;
2565 fio
->fi_start
= start
;
2567 fio
->fi_fid
= ll_inode2fid(inode
);
2568 fio
->fi_mode
= mode
;
2569 fio
->fi_nr_written
= 0;
2571 if (cl_io_init(env
, io
, CIT_FSYNC
, io
->ci_obj
) == 0)
2572 result
= cl_io_loop(env
, io
);
2574 result
= io
->ci_result
;
2576 result
= fio
->fi_nr_written
;
2577 cl_io_fini(env
, io
);
2578 cl_env_nested_put(&nest
, env
);
2586 * When dentry is provided (the 'else' case), *file->f_dentry may be
2587 * null and dentry must be used directly rather than pulled from
2588 * *file->f_dentry as is done otherwise.
2591 int ll_fsync(struct file
*file
, loff_t start
, loff_t end
, int datasync
)
2593 struct dentry
*dentry
= file
->f_dentry
;
2594 struct inode
*inode
= dentry
->d_inode
;
2595 struct ll_inode_info
*lli
= ll_i2info(inode
);
2596 struct ptlrpc_request
*req
;
2597 struct obd_capa
*oc
;
2600 CDEBUG(D_VFSTRACE
, "VFS Op:inode=%lu/%u(%p)\n", inode
->i_ino
,
2601 inode
->i_generation
, inode
);
2602 ll_stats_ops_tally(ll_i2sbi(inode
), LPROC_LL_FSYNC
, 1);
2604 rc
= filemap_write_and_wait_range(inode
->i_mapping
, start
, end
);
2605 mutex_lock(&inode
->i_mutex
);
2607 /* catch async errors that were recorded back when async writeback
2608 * failed for pages in this mapping. */
2609 if (!S_ISDIR(inode
->i_mode
)) {
2610 err
= lli
->lli_async_rc
;
2611 lli
->lli_async_rc
= 0;
2614 err
= lov_read_and_clear_async_rc(lli
->lli_clob
);
2619 oc
= ll_mdscapa_get(inode
);
2620 err
= md_sync(ll_i2sbi(inode
)->ll_md_exp
, ll_inode2fid(inode
), oc
,
2626 ptlrpc_req_finished(req
);
2628 if (datasync
&& S_ISREG(inode
->i_mode
)) {
2629 struct ll_file_data
*fd
= LUSTRE_FPRIVATE(file
);
2631 err
= cl_sync_file_range(inode
, 0, OBD_OBJECT_EOF
,
2633 if (rc
== 0 && err
< 0)
2636 fd
->fd_write_failed
= true;
2638 fd
->fd_write_failed
= false;
2641 mutex_unlock(&inode
->i_mutex
);
2645 int ll_file_flock(struct file
*file
, int cmd
, struct file_lock
*file_lock
)
2647 struct inode
*inode
= file
->f_dentry
->d_inode
;
2648 struct ll_sb_info
*sbi
= ll_i2sbi(inode
);
2649 struct ldlm_enqueue_info einfo
= {
2650 .ei_type
= LDLM_FLOCK
,
2651 .ei_cb_cp
= ldlm_flock_completion_ast
,
2652 .ei_cbdata
= file_lock
,
2654 struct md_op_data
*op_data
;
2655 struct lustre_handle lockh
= {0};
2656 ldlm_policy_data_t flock
= {{0}};
2661 CDEBUG(D_VFSTRACE
, "VFS Op:inode=%lu file_lock=%p\n",
2662 inode
->i_ino
, file_lock
);
2664 ll_stats_ops_tally(ll_i2sbi(inode
), LPROC_LL_FLOCK
, 1);
2666 if (file_lock
->fl_flags
& FL_FLOCK
) {
2667 LASSERT((cmd
== F_SETLKW
) || (cmd
== F_SETLK
));
2668 /* flocks are whole-file locks */
2669 flock
.l_flock
.end
= OFFSET_MAX
;
2670 /* For flocks owner is determined by the local file desctiptor*/
2671 flock
.l_flock
.owner
= (unsigned long)file_lock
->fl_file
;
2672 } else if (file_lock
->fl_flags
& FL_POSIX
) {
2673 flock
.l_flock
.owner
= (unsigned long)file_lock
->fl_owner
;
2674 flock
.l_flock
.start
= file_lock
->fl_start
;
2675 flock
.l_flock
.end
= file_lock
->fl_end
;
2679 flock
.l_flock
.pid
= file_lock
->fl_pid
;
2681 /* Somewhat ugly workaround for svc lockd.
2682 * lockd installs custom fl_lmops->lm_compare_owner that checks
2683 * for the fl_owner to be the same (which it always is on local node
2684 * I guess between lockd processes) and then compares pid.
2685 * As such we assign pid to the owner field to make it all work,
2686 * conflict with normal locks is unlikely since pid space and
2687 * pointer space for current->files are not intersecting */
2688 if (file_lock
->fl_lmops
&& file_lock
->fl_lmops
->lm_compare_owner
)
2689 flock
.l_flock
.owner
= (unsigned long)file_lock
->fl_pid
;
2691 switch (file_lock
->fl_type
) {
2693 einfo
.ei_mode
= LCK_PR
;
2696 /* An unlock request may or may not have any relation to
2697 * existing locks so we may not be able to pass a lock handle
2698 * via a normal ldlm_lock_cancel() request. The request may even
2699 * unlock a byte range in the middle of an existing lock. In
2700 * order to process an unlock request we need all of the same
2701 * information that is given with a normal read or write record
2702 * lock request. To avoid creating another ldlm unlock (cancel)
2703 * message we'll treat a LCK_NL flock request as an unlock. */
2704 einfo
.ei_mode
= LCK_NL
;
2707 einfo
.ei_mode
= LCK_PW
;
2710 CDEBUG(D_INFO
, "Unknown fcntl lock type: %d\n",
2711 file_lock
->fl_type
);
2726 flags
= LDLM_FL_BLOCK_NOWAIT
;
2732 flags
= LDLM_FL_TEST_LOCK
;
2733 /* Save the old mode so that if the mode in the lock changes we
2734 * can decrement the appropriate reader or writer refcount. */
2735 file_lock
->fl_type
= einfo
.ei_mode
;
2738 CERROR("unknown fcntl lock command: %d\n", cmd
);
2742 op_data
= ll_prep_md_op_data(NULL
, inode
, NULL
, NULL
, 0, 0,
2743 LUSTRE_OPC_ANY
, NULL
);
2744 if (IS_ERR(op_data
))
2745 return PTR_ERR(op_data
);
2747 CDEBUG(D_DLMTRACE
, "inode=%lu, pid=%u, flags=%#x, mode=%u, "
2748 "start="LPU64
", end="LPU64
"\n", inode
->i_ino
, flock
.l_flock
.pid
,
2749 flags
, einfo
.ei_mode
, flock
.l_flock
.start
, flock
.l_flock
.end
);
2751 rc
= md_enqueue(sbi
->ll_md_exp
, &einfo
, NULL
,
2752 op_data
, &lockh
, &flock
, 0, NULL
/* req */, flags
);
2754 if ((file_lock
->fl_flags
& FL_FLOCK
) &&
2755 (rc
== 0 || file_lock
->fl_type
== F_UNLCK
))
2756 rc2
= flock_lock_file_wait(file
, file_lock
);
2757 if ((file_lock
->fl_flags
& FL_POSIX
) &&
2758 (rc
== 0 || file_lock
->fl_type
== F_UNLCK
) &&
2759 !(flags
& LDLM_FL_TEST_LOCK
))
2760 rc2
= posix_lock_file_wait(file
, file_lock
);
2762 if (rc2
&& file_lock
->fl_type
!= F_UNLCK
) {
2763 einfo
.ei_mode
= LCK_NL
;
2764 md_enqueue(sbi
->ll_md_exp
, &einfo
, NULL
,
2765 op_data
, &lockh
, &flock
, 0, NULL
/* req */, flags
);
2769 ll_finish_md_op_data(op_data
);
2774 int ll_file_noflock(struct file
*file
, int cmd
, struct file_lock
*file_lock
)
2780 * test if some locks matching bits and l_req_mode are acquired
2781 * - bits can be in different locks
2782 * - if found clear the common lock bits in *bits
2783 * - the bits not found, are kept in *bits
2785 * \param bits [IN] searched lock bits [IN]
2786 * \param l_req_mode [IN] searched lock mode
2787 * \retval boolean, true iff all bits are found
2789 int ll_have_md_lock(struct inode
*inode
, __u64
*bits
, ldlm_mode_t l_req_mode
)
2791 struct lustre_handle lockh
;
2792 ldlm_policy_data_t policy
;
2793 ldlm_mode_t mode
= (l_req_mode
== LCK_MINMODE
) ?
2794 (LCK_CR
|LCK_CW
|LCK_PR
|LCK_PW
) : l_req_mode
;
2802 fid
= &ll_i2info(inode
)->lli_fid
;
2803 CDEBUG(D_INFO
, "trying to match res "DFID
" mode %s\n", PFID(fid
),
2804 ldlm_lockname
[mode
]);
2806 flags
= LDLM_FL_BLOCK_GRANTED
| LDLM_FL_CBPENDING
| LDLM_FL_TEST_LOCK
;
2807 for (i
= 0; i
<= MDS_INODELOCK_MAXSHIFT
&& *bits
!= 0; i
++) {
2808 policy
.l_inodebits
.bits
= *bits
& (1 << i
);
2809 if (policy
.l_inodebits
.bits
== 0)
2812 if (md_lock_match(ll_i2mdexp(inode
), flags
, fid
, LDLM_IBITS
,
2813 &policy
, mode
, &lockh
)) {
2814 struct ldlm_lock
*lock
;
2816 lock
= ldlm_handle2lock(&lockh
);
2819 ~(lock
->l_policy_data
.l_inodebits
.bits
);
2820 LDLM_LOCK_PUT(lock
);
2822 *bits
&= ~policy
.l_inodebits
.bits
;
2829 ldlm_mode_t
ll_take_md_lock(struct inode
*inode
, __u64 bits
,
2830 struct lustre_handle
*lockh
, __u64 flags
,
2833 ldlm_policy_data_t policy
= { .l_inodebits
= {bits
}};
2837 fid
= &ll_i2info(inode
)->lli_fid
;
2838 CDEBUG(D_INFO
, "trying to match res "DFID
"\n", PFID(fid
));
2840 rc
= md_lock_match(ll_i2mdexp(inode
), LDLM_FL_BLOCK_GRANTED
|flags
,
2841 fid
, LDLM_IBITS
, &policy
, mode
, lockh
);
2846 static int ll_inode_revalidate_fini(struct inode
*inode
, int rc
)
2848 /* Already unlinked. Just update nlink and return success */
2849 if (rc
== -ENOENT
) {
2851 /* This path cannot be hit for regular files unless in
2852 * case of obscure races, so no need to validate size.
2854 if (!S_ISREG(inode
->i_mode
) && !S_ISDIR(inode
->i_mode
))
2856 } else if (rc
!= 0) {
2857 CERROR("%s: revalidate FID "DFID
" error: rc = %d\n",
2858 ll_get_fsname(inode
->i_sb
, NULL
, 0),
2859 PFID(ll_inode2fid(inode
)), rc
);
2865 int __ll_inode_revalidate_it(struct dentry
*dentry
, struct lookup_intent
*it
,
2868 struct inode
*inode
= dentry
->d_inode
;
2869 struct ptlrpc_request
*req
= NULL
;
2870 struct obd_export
*exp
;
2873 LASSERT(inode
!= NULL
);
2875 CDEBUG(D_VFSTRACE
, "VFS Op:inode=%lu/%u(%p),name=%s\n",
2876 inode
->i_ino
, inode
->i_generation
, inode
, dentry
->d_name
.name
);
2878 exp
= ll_i2mdexp(inode
);
2880 /* XXX: Enable OBD_CONNECT_ATTRFID to reduce unnecessary getattr RPC.
2881 * But under CMD case, it caused some lock issues, should be fixed
2882 * with new CMD ibits lock. See bug 12718 */
2883 if (exp_connect_flags(exp
) & OBD_CONNECT_ATTRFID
) {
2884 struct lookup_intent oit
= { .it_op
= IT_GETATTR
};
2885 struct md_op_data
*op_data
;
2887 if (ibits
== MDS_INODELOCK_LOOKUP
)
2888 oit
.it_op
= IT_LOOKUP
;
2890 /* Call getattr by fid, so do not provide name at all. */
2891 op_data
= ll_prep_md_op_data(NULL
, dentry
->d_parent
->d_inode
,
2892 dentry
->d_inode
, NULL
, 0, 0,
2893 LUSTRE_OPC_ANY
, NULL
);
2894 if (IS_ERR(op_data
))
2895 return PTR_ERR(op_data
);
2897 oit
.it_create_mode
|= M_CHECK_STALE
;
2898 rc
= md_intent_lock(exp
, op_data
, NULL
, 0,
2899 /* we are not interested in name
2902 ll_md_blocking_ast
, 0);
2903 ll_finish_md_op_data(op_data
);
2904 oit
.it_create_mode
&= ~M_CHECK_STALE
;
2906 rc
= ll_inode_revalidate_fini(inode
, rc
);
2910 rc
= ll_revalidate_it_finish(req
, &oit
, dentry
);
2912 ll_intent_release(&oit
);
2916 /* Unlinked? Unhash dentry, so it is not picked up later by
2917 do_lookup() -> ll_revalidate_it(). We cannot use d_drop
2918 here to preserve get_cwd functionality on 2.6.
2920 if (!dentry
->d_inode
->i_nlink
)
2921 d_lustre_invalidate(dentry
, 0);
2923 ll_lookup_finish_locks(&oit
, dentry
);
2924 } else if (!ll_have_md_lock(dentry
->d_inode
, &ibits
, LCK_MINMODE
)) {
2925 struct ll_sb_info
*sbi
= ll_i2sbi(dentry
->d_inode
);
2926 obd_valid valid
= OBD_MD_FLGETATTR
;
2927 struct md_op_data
*op_data
;
2930 if (S_ISREG(inode
->i_mode
)) {
2931 rc
= ll_get_max_mdsize(sbi
, &ealen
);
2934 valid
|= OBD_MD_FLEASIZE
| OBD_MD_FLMODEASIZE
;
2937 op_data
= ll_prep_md_op_data(NULL
, inode
, NULL
, NULL
,
2938 0, ealen
, LUSTRE_OPC_ANY
,
2940 if (IS_ERR(op_data
))
2941 return PTR_ERR(op_data
);
2943 op_data
->op_valid
= valid
;
2944 /* Once OBD_CONNECT_ATTRFID is not supported, we can't find one
2945 * capa for this inode. Because we only keep capas of dirs
2947 rc
= md_getattr(sbi
->ll_md_exp
, op_data
, &req
);
2948 ll_finish_md_op_data(op_data
);
2950 rc
= ll_inode_revalidate_fini(inode
, rc
);
2954 rc
= ll_prep_inode(&inode
, req
, NULL
, NULL
);
2957 ptlrpc_req_finished(req
);
2961 int ll_inode_revalidate_it(struct dentry
*dentry
, struct lookup_intent
*it
,
2964 struct inode
*inode
= dentry
->d_inode
;
2967 rc
= __ll_inode_revalidate_it(dentry
, it
, ibits
);
2971 /* if object isn't regular file, don't validate size */
2972 if (!S_ISREG(inode
->i_mode
)) {
2973 LTIME_S(inode
->i_atime
) = ll_i2info(inode
)->lli_lvb
.lvb_atime
;
2974 LTIME_S(inode
->i_mtime
) = ll_i2info(inode
)->lli_lvb
.lvb_mtime
;
2975 LTIME_S(inode
->i_ctime
) = ll_i2info(inode
)->lli_lvb
.lvb_ctime
;
2977 /* In case of restore, the MDT has the right size and has
2978 * already send it back without granting the layout lock,
2979 * inode is up-to-date so glimpse is useless.
2980 * Also to glimpse we need the layout, in case of a running
2981 * restore the MDT holds the layout lock so the glimpse will
2982 * block up to the end of restore (getattr will block)
2984 if (!(ll_i2info(inode
)->lli_flags
& LLIF_FILE_RESTORING
))
2985 rc
= ll_glimpse_size(inode
);
2990 int ll_getattr_it(struct vfsmount
*mnt
, struct dentry
*de
,
2991 struct lookup_intent
*it
, struct kstat
*stat
)
2993 struct inode
*inode
= de
->d_inode
;
2994 struct ll_sb_info
*sbi
= ll_i2sbi(inode
);
2995 struct ll_inode_info
*lli
= ll_i2info(inode
);
2998 res
= ll_inode_revalidate_it(de
, it
, MDS_INODELOCK_UPDATE
|
2999 MDS_INODELOCK_LOOKUP
);
3000 ll_stats_ops_tally(sbi
, LPROC_LL_GETATTR
, 1);
3005 stat
->dev
= inode
->i_sb
->s_dev
;
3006 if (ll_need_32bit_api(sbi
))
3007 stat
->ino
= cl_fid_build_ino(&lli
->lli_fid
, 1);
3009 stat
->ino
= inode
->i_ino
;
3010 stat
->mode
= inode
->i_mode
;
3011 stat
->nlink
= inode
->i_nlink
;
3012 stat
->uid
= inode
->i_uid
;
3013 stat
->gid
= inode
->i_gid
;
3014 stat
->rdev
= inode
->i_rdev
;
3015 stat
->atime
= inode
->i_atime
;
3016 stat
->mtime
= inode
->i_mtime
;
3017 stat
->ctime
= inode
->i_ctime
;
3018 stat
->blksize
= 1 << inode
->i_blkbits
;
3020 stat
->size
= i_size_read(inode
);
3021 stat
->blocks
= inode
->i_blocks
;
3025 int ll_getattr(struct vfsmount
*mnt
, struct dentry
*de
, struct kstat
*stat
)
3027 struct lookup_intent it
= { .it_op
= IT_GETATTR
};
3029 return ll_getattr_it(mnt
, de
, &it
, stat
);
3032 int ll_fiemap(struct inode
*inode
, struct fiemap_extent_info
*fieinfo
,
3033 __u64 start
, __u64 len
)
3037 struct ll_user_fiemap
*fiemap
;
3038 unsigned int extent_count
= fieinfo
->fi_extents_max
;
3040 num_bytes
= sizeof(*fiemap
) + (extent_count
*
3041 sizeof(struct ll_fiemap_extent
));
3042 OBD_ALLOC_LARGE(fiemap
, num_bytes
);
3047 fiemap
->fm_flags
= fieinfo
->fi_flags
;
3048 fiemap
->fm_extent_count
= fieinfo
->fi_extents_max
;
3049 fiemap
->fm_start
= start
;
3050 fiemap
->fm_length
= len
;
3051 memcpy(&fiemap
->fm_extents
[0], fieinfo
->fi_extents_start
,
3052 sizeof(struct ll_fiemap_extent
));
3054 rc
= ll_do_fiemap(inode
, fiemap
, num_bytes
);
3056 fieinfo
->fi_flags
= fiemap
->fm_flags
;
3057 fieinfo
->fi_extents_mapped
= fiemap
->fm_mapped_extents
;
3058 memcpy(fieinfo
->fi_extents_start
, &fiemap
->fm_extents
[0],
3059 fiemap
->fm_mapped_extents
* sizeof(struct ll_fiemap_extent
));
3061 OBD_FREE_LARGE(fiemap
, num_bytes
);
3065 struct posix_acl
* ll_get_acl(struct inode
*inode
, int type
)
3067 struct ll_inode_info
*lli
= ll_i2info(inode
);
3068 struct posix_acl
*acl
= NULL
;
3070 spin_lock(&lli
->lli_lock
);
3071 /* VFS' acl_permission_check->check_acl will release the refcount */
3072 acl
= posix_acl_dup(lli
->lli_posix_acl
);
3073 spin_unlock(&lli
->lli_lock
);
3079 int ll_inode_permission(struct inode
*inode
, int mask
)
3083 #ifdef MAY_NOT_BLOCK
3084 if (mask
& MAY_NOT_BLOCK
)
3088 /* as root inode are NOT getting validated in lookup operation,
3089 * need to do it before permission check. */
3091 if (inode
== inode
->i_sb
->s_root
->d_inode
) {
3092 struct lookup_intent it
= { .it_op
= IT_LOOKUP
};
3094 rc
= __ll_inode_revalidate_it(inode
->i_sb
->s_root
, &it
,
3095 MDS_INODELOCK_LOOKUP
);
3100 CDEBUG(D_VFSTRACE
, "VFS Op:inode=%lu/%u(%p), inode mode %x mask %o\n",
3101 inode
->i_ino
, inode
->i_generation
, inode
, inode
->i_mode
, mask
);
3103 if (ll_i2sbi(inode
)->ll_flags
& LL_SBI_RMT_CLIENT
)
3104 return lustre_check_remote_perm(inode
, mask
);
3106 ll_stats_ops_tally(ll_i2sbi(inode
), LPROC_LL_INODE_PERM
, 1);
3107 rc
= generic_permission(inode
, mask
);
3112 /* -o localflock - only provides locally consistent flock locks */
3113 struct file_operations ll_file_operations
= {
3114 .read
= ll_file_read
,
3115 .aio_read
= ll_file_aio_read
,
3116 .write
= ll_file_write
,
3117 .aio_write
= ll_file_aio_write
,
3118 .unlocked_ioctl
= ll_file_ioctl
,
3119 .open
= ll_file_open
,
3120 .release
= ll_file_release
,
3121 .mmap
= ll_file_mmap
,
3122 .llseek
= ll_file_seek
,
3123 .splice_read
= ll_file_splice_read
,
3128 struct file_operations ll_file_operations_flock
= {
3129 .read
= ll_file_read
,
3130 .aio_read
= ll_file_aio_read
,
3131 .write
= ll_file_write
,
3132 .aio_write
= ll_file_aio_write
,
3133 .unlocked_ioctl
= ll_file_ioctl
,
3134 .open
= ll_file_open
,
3135 .release
= ll_file_release
,
3136 .mmap
= ll_file_mmap
,
3137 .llseek
= ll_file_seek
,
3138 .splice_read
= ll_file_splice_read
,
3141 .flock
= ll_file_flock
,
3142 .lock
= ll_file_flock
3145 /* These are for -o noflock - to return ENOSYS on flock calls */
3146 struct file_operations ll_file_operations_noflock
= {
3147 .read
= ll_file_read
,
3148 .aio_read
= ll_file_aio_read
,
3149 .write
= ll_file_write
,
3150 .aio_write
= ll_file_aio_write
,
3151 .unlocked_ioctl
= ll_file_ioctl
,
3152 .open
= ll_file_open
,
3153 .release
= ll_file_release
,
3154 .mmap
= ll_file_mmap
,
3155 .llseek
= ll_file_seek
,
3156 .splice_read
= ll_file_splice_read
,
3159 .flock
= ll_file_noflock
,
3160 .lock
= ll_file_noflock
3163 struct inode_operations ll_file_inode_operations
= {
3164 .setattr
= ll_setattr
,
3165 .getattr
= ll_getattr
,
3166 .permission
= ll_inode_permission
,
3167 .setxattr
= ll_setxattr
,
3168 .getxattr
= ll_getxattr
,
3169 .listxattr
= ll_listxattr
,
3170 .removexattr
= ll_removexattr
,
3171 .fiemap
= ll_fiemap
,
3172 .get_acl
= ll_get_acl
,
3175 /* dynamic ioctl number support routins */
3176 static struct llioc_ctl_data
{
3177 struct rw_semaphore ioc_sem
;
3178 struct list_head ioc_head
;
3180 __RWSEM_INITIALIZER(llioc
.ioc_sem
),
3181 LIST_HEAD_INIT(llioc
.ioc_head
)
3186 struct list_head iocd_list
;
3187 unsigned int iocd_size
;
3188 llioc_callback_t iocd_cb
;
3189 unsigned int iocd_count
;
3190 unsigned int iocd_cmd
[0];
3193 void *ll_iocontrol_register(llioc_callback_t cb
, int count
, unsigned int *cmd
)
3196 struct llioc_data
*in_data
= NULL
;
3198 if (cb
== NULL
|| cmd
== NULL
||
3199 count
> LLIOC_MAX_CMD
|| count
< 0)
3202 size
= sizeof(*in_data
) + count
* sizeof(unsigned int);
3203 OBD_ALLOC(in_data
, size
);
3204 if (in_data
== NULL
)
3207 memset(in_data
, 0, sizeof(*in_data
));
3208 in_data
->iocd_size
= size
;
3209 in_data
->iocd_cb
= cb
;
3210 in_data
->iocd_count
= count
;
3211 memcpy(in_data
->iocd_cmd
, cmd
, sizeof(unsigned int) * count
);
3213 down_write(&llioc
.ioc_sem
);
3214 list_add_tail(&in_data
->iocd_list
, &llioc
.ioc_head
);
3215 up_write(&llioc
.ioc_sem
);
3220 void ll_iocontrol_unregister(void *magic
)
3222 struct llioc_data
*tmp
;
3227 down_write(&llioc
.ioc_sem
);
3228 list_for_each_entry(tmp
, &llioc
.ioc_head
, iocd_list
) {
3230 unsigned int size
= tmp
->iocd_size
;
3232 list_del(&tmp
->iocd_list
);
3233 up_write(&llioc
.ioc_sem
);
3235 OBD_FREE(tmp
, size
);
3239 up_write(&llioc
.ioc_sem
);
3241 CWARN("didn't find iocontrol register block with magic: %p\n", magic
);
3244 EXPORT_SYMBOL(ll_iocontrol_register
);
3245 EXPORT_SYMBOL(ll_iocontrol_unregister
);
3247 enum llioc_iter
ll_iocontrol_call(struct inode
*inode
, struct file
*file
,
3248 unsigned int cmd
, unsigned long arg
, int *rcp
)
3250 enum llioc_iter ret
= LLIOC_CONT
;
3251 struct llioc_data
*data
;
3252 int rc
= -EINVAL
, i
;
3254 down_read(&llioc
.ioc_sem
);
3255 list_for_each_entry(data
, &llioc
.ioc_head
, iocd_list
) {
3256 for (i
= 0; i
< data
->iocd_count
; i
++) {
3257 if (cmd
!= data
->iocd_cmd
[i
])
3260 ret
= data
->iocd_cb(inode
, file
, cmd
, arg
, data
, &rc
);
3264 if (ret
== LLIOC_STOP
)
3267 up_read(&llioc
.ioc_sem
);
3274 int ll_layout_conf(struct inode
*inode
, const struct cl_object_conf
*conf
)
3276 struct ll_inode_info
*lli
= ll_i2info(inode
);
3277 struct cl_env_nest nest
;
3281 if (lli
->lli_clob
== NULL
)
3284 env
= cl_env_nested_get(&nest
);
3286 return PTR_ERR(env
);
3288 result
= cl_conf_set(env
, lli
->lli_clob
, conf
);
3289 cl_env_nested_put(&nest
, env
);
3291 if (conf
->coc_opc
== OBJECT_CONF_SET
) {
3292 struct ldlm_lock
*lock
= conf
->coc_lock
;
3294 LASSERT(lock
!= NULL
);
3295 LASSERT(ldlm_has_layout(lock
));
3297 /* it can only be allowed to match after layout is
3298 * applied to inode otherwise false layout would be
3299 * seen. Applying layout shoud happen before dropping
3300 * the intent lock. */
3301 ldlm_lock_allow_match(lock
);
3307 /* Fetch layout from MDT with getxattr request, if it's not ready yet */
3308 static int ll_layout_fetch(struct inode
*inode
, struct ldlm_lock
*lock
)
3311 struct ll_sb_info
*sbi
= ll_i2sbi(inode
);
3312 struct obd_capa
*oc
;
3313 struct ptlrpc_request
*req
;
3314 struct mdt_body
*body
;
3320 CDEBUG(D_INODE
, DFID
" LVB_READY=%d l_lvb_data=%p l_lvb_len=%d\n",
3321 PFID(ll_inode2fid(inode
)), !!(lock
->l_flags
& LDLM_FL_LVB_READY
),
3322 lock
->l_lvb_data
, lock
->l_lvb_len
);
3324 if ((lock
->l_lvb_data
!= NULL
) && (lock
->l_flags
& LDLM_FL_LVB_READY
))
3327 /* if layout lock was granted right away, the layout is returned
3328 * within DLM_LVB of dlm reply; otherwise if the lock was ever
3329 * blocked and then granted via completion ast, we have to fetch
3330 * layout here. Please note that we can't use the LVB buffer in
3331 * completion AST because it doesn't have a large enough buffer */
3332 oc
= ll_mdscapa_get(inode
);
3333 rc
= ll_get_max_mdsize(sbi
, &lmmsize
);
3335 rc
= md_getxattr(sbi
->ll_md_exp
, ll_inode2fid(inode
), oc
,
3336 OBD_MD_FLXATTR
, XATTR_NAME_LOV
, NULL
, 0,
3342 body
= req_capsule_server_get(&req
->rq_pill
, &RMF_MDT_BODY
);
3343 if (body
== NULL
|| body
->eadatasize
> lmmsize
)
3344 GOTO(out
, rc
= -EPROTO
);
3346 lmmsize
= body
->eadatasize
;
3347 if (lmmsize
== 0) /* empty layout */
3350 lmm
= req_capsule_server_sized_get(&req
->rq_pill
, &RMF_EADATA
, lmmsize
);
3352 GOTO(out
, rc
= -EFAULT
);
3354 OBD_ALLOC_LARGE(lvbdata
, lmmsize
);
3355 if (lvbdata
== NULL
)
3356 GOTO(out
, rc
= -ENOMEM
);
3358 memcpy(lvbdata
, lmm
, lmmsize
);
3359 lock_res_and_lock(lock
);
3360 if (lock
->l_lvb_data
!= NULL
)
3361 OBD_FREE_LARGE(lock
->l_lvb_data
, lock
->l_lvb_len
);
3363 lock
->l_lvb_data
= lvbdata
;
3364 lock
->l_lvb_len
= lmmsize
;
3365 unlock_res_and_lock(lock
);
3368 ptlrpc_req_finished(req
);
3373 * Apply the layout to the inode. Layout lock is held and will be released
3376 static int ll_layout_lock_set(struct lustre_handle
*lockh
, ldlm_mode_t mode
,
3377 struct inode
*inode
, __u32
*gen
, bool reconf
)
3379 struct ll_inode_info
*lli
= ll_i2info(inode
);
3380 struct ll_sb_info
*sbi
= ll_i2sbi(inode
);
3381 struct ldlm_lock
*lock
;
3382 struct lustre_md md
= { NULL
};
3383 struct cl_object_conf conf
;
3386 bool wait_layout
= false;
3388 LASSERT(lustre_handle_is_used(lockh
));
3390 lock
= ldlm_handle2lock(lockh
);
3391 LASSERT(lock
!= NULL
);
3392 LASSERT(ldlm_has_layout(lock
));
3394 LDLM_DEBUG(lock
, "File %p/"DFID
" being reconfigured: %d.\n",
3395 inode
, PFID(&lli
->lli_fid
), reconf
);
3397 /* in case this is a caching lock and reinstate with new inode */
3398 md_set_lock_data(sbi
->ll_md_exp
, &lockh
->cookie
, inode
, NULL
);
3400 lock_res_and_lock(lock
);
3401 lvb_ready
= !!(lock
->l_flags
& LDLM_FL_LVB_READY
);
3402 unlock_res_and_lock(lock
);
3403 /* checking lvb_ready is racy but this is okay. The worst case is
3404 * that multi processes may configure the file on the same time. */
3405 if (lvb_ready
|| !reconf
) {
3408 /* layout_gen must be valid if layout lock is not
3409 * cancelled and stripe has already set */
3410 *gen
= lli
->lli_layout_gen
;
3416 rc
= ll_layout_fetch(inode
, lock
);
3420 /* for layout lock, lmm is returned in lock's lvb.
3421 * lvb_data is immutable if the lock is held so it's safe to access it
3422 * without res lock. See the description in ldlm_lock_decref_internal()
3423 * for the condition to free lvb_data of layout lock */
3424 if (lock
->l_lvb_data
!= NULL
) {
3425 rc
= obd_unpackmd(sbi
->ll_dt_exp
, &md
.lsm
,
3426 lock
->l_lvb_data
, lock
->l_lvb_len
);
3428 *gen
= LL_LAYOUT_GEN_EMPTY
;
3430 *gen
= md
.lsm
->lsm_layout_gen
;
3433 CERROR("%s: file "DFID
" unpackmd error: %d\n",
3434 ll_get_fsname(inode
->i_sb
, NULL
, 0),
3435 PFID(&lli
->lli_fid
), rc
);
3441 /* set layout to file. Unlikely this will fail as old layout was
3442 * surely eliminated */
3443 memset(&conf
, 0, sizeof(conf
));
3444 conf
.coc_opc
= OBJECT_CONF_SET
;
3445 conf
.coc_inode
= inode
;
3446 conf
.coc_lock
= lock
;
3447 conf
.u
.coc_md
= &md
;
3448 rc
= ll_layout_conf(inode
, &conf
);
3451 obd_free_memmd(sbi
->ll_dt_exp
, &md
.lsm
);
3453 /* refresh layout failed, need to wait */
3454 wait_layout
= rc
== -EBUSY
;
3457 LDLM_LOCK_PUT(lock
);
3458 ldlm_lock_decref(lockh
, mode
);
3460 /* wait for IO to complete if it's still being used. */
3462 CDEBUG(D_INODE
, "%s: %p/"DFID
" wait for layout reconf.\n",
3463 ll_get_fsname(inode
->i_sb
, NULL
, 0),
3464 inode
, PFID(&lli
->lli_fid
));
3466 memset(&conf
, 0, sizeof(conf
));
3467 conf
.coc_opc
= OBJECT_CONF_WAIT
;
3468 conf
.coc_inode
= inode
;
3469 rc
= ll_layout_conf(inode
, &conf
);
3473 CDEBUG(D_INODE
, "file: "DFID
" waiting layout return: %d.\n",
3474 PFID(&lli
->lli_fid
), rc
);
3480 * This function checks if there exists a LAYOUT lock on the client side,
3481 * or enqueues it if it doesn't have one in cache.
3483 * This function will not hold layout lock so it may be revoked any time after
3484 * this function returns. Any operations depend on layout should be redone
3487 * This function should be called before lov_io_init() to get an uptodate
3488 * layout version, the caller should save the version number and after IO
3489 * is finished, this function should be called again to verify that layout
3490 * is not changed during IO time.
3492 int ll_layout_refresh(struct inode
*inode
, __u32
*gen
)
3494 struct ll_inode_info
*lli
= ll_i2info(inode
);
3495 struct ll_sb_info
*sbi
= ll_i2sbi(inode
);
3496 struct md_op_data
*op_data
;
3497 struct lookup_intent it
;
3498 struct lustre_handle lockh
;
3500 struct ldlm_enqueue_info einfo
= {
3501 .ei_type
= LDLM_IBITS
,
3503 .ei_cb_bl
= ll_md_blocking_ast
,
3504 .ei_cb_cp
= ldlm_completion_ast
,
3508 *gen
= lli
->lli_layout_gen
;
3509 if (!(sbi
->ll_flags
& LL_SBI_LAYOUT_LOCK
))
3513 LASSERT(fid_is_sane(ll_inode2fid(inode
)));
3514 LASSERT(S_ISREG(inode
->i_mode
));
3516 /* mostly layout lock is caching on the local side, so try to match
3517 * it before grabbing layout lock mutex. */
3518 mode
= ll_take_md_lock(inode
, MDS_INODELOCK_LAYOUT
, &lockh
, 0,
3519 LCK_CR
| LCK_CW
| LCK_PR
| LCK_PW
);
3520 if (mode
!= 0) { /* hit cached lock */
3521 rc
= ll_layout_lock_set(&lockh
, mode
, inode
, gen
, false);
3525 /* better hold lli_layout_mutex to try again otherwise
3526 * it will have starvation problem. */
3529 /* take layout lock mutex to enqueue layout lock exclusively. */
3530 mutex_lock(&lli
->lli_layout_mutex
);
3533 /* try again. Maybe somebody else has done this. */
3534 mode
= ll_take_md_lock(inode
, MDS_INODELOCK_LAYOUT
, &lockh
, 0,
3535 LCK_CR
| LCK_CW
| LCK_PR
| LCK_PW
);
3536 if (mode
!= 0) { /* hit cached lock */
3537 rc
= ll_layout_lock_set(&lockh
, mode
, inode
, gen
, true);
3541 mutex_unlock(&lli
->lli_layout_mutex
);
3545 op_data
= ll_prep_md_op_data(NULL
, inode
, inode
, NULL
,
3546 0, 0, LUSTRE_OPC_ANY
, NULL
);
3547 if (IS_ERR(op_data
)) {
3548 mutex_unlock(&lli
->lli_layout_mutex
);
3549 return PTR_ERR(op_data
);
3552 /* have to enqueue one */
3553 memset(&it
, 0, sizeof(it
));
3554 it
.it_op
= IT_LAYOUT
;
3555 lockh
.cookie
= 0ULL;
3557 LDLM_DEBUG_NOLOCK("%s: requeue layout lock for file %p/"DFID
".\n",
3558 ll_get_fsname(inode
->i_sb
, NULL
, 0), inode
,
3559 PFID(&lli
->lli_fid
));
3561 rc
= md_enqueue(sbi
->ll_md_exp
, &einfo
, &it
, op_data
, &lockh
,
3563 if (it
.d
.lustre
.it_data
!= NULL
)
3564 ptlrpc_req_finished(it
.d
.lustre
.it_data
);
3565 it
.d
.lustre
.it_data
= NULL
;
3567 ll_finish_md_op_data(op_data
);
3569 mode
= it
.d
.lustre
.it_lock_mode
;
3570 it
.d
.lustre
.it_lock_mode
= 0;
3571 ll_intent_drop_lock(&it
);
3574 /* set lock data in case this is a new lock */
3575 ll_set_lock_data(sbi
->ll_md_exp
, inode
, &it
, NULL
);
3576 rc
= ll_layout_lock_set(&lockh
, mode
, inode
, gen
, true);
3580 mutex_unlock(&lli
->lli_layout_mutex
);
3586 * This function send a restore request to the MDT
3588 int ll_layout_restore(struct inode
*inode
)
3590 struct hsm_user_request
*hur
;
3593 len
= sizeof(struct hsm_user_request
) +
3594 sizeof(struct hsm_user_item
);
3595 OBD_ALLOC(hur
, len
);
3599 hur
->hur_request
.hr_action
= HUA_RESTORE
;
3600 hur
->hur_request
.hr_archive_id
= 0;
3601 hur
->hur_request
.hr_flags
= 0;
3602 memcpy(&hur
->hur_user_item
[0].hui_fid
, &ll_i2info(inode
)->lli_fid
,
3603 sizeof(hur
->hur_user_item
[0].hui_fid
));
3604 hur
->hur_user_item
[0].hui_extent
.length
= -1;
3605 hur
->hur_request
.hr_itemcount
= 1;
3606 rc
= obd_iocontrol(LL_IOC_HSM_REQUEST
, cl_i2sbi(inode
)->ll_md_exp
,