4c28f39e8b18b583c28edf8aa0b313c7d19045eb
[deliverable/linux.git] / drivers / staging / lustre / lustre / llite / file.c
1 /*
2 * GPL HEADER START
3 *
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19 *
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
22 * have any questions.
23 *
24 * GPL HEADER END
25 */
26 /*
27 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
29 *
30 * Copyright (c) 2011, 2012, Intel Corporation.
31 */
32 /*
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
35 *
36 * lustre/llite/file.c
37 *
38 * Author: Peter Braam <braam@clusterfs.com>
39 * Author: Phil Schwan <phil@clusterfs.com>
40 * Author: Andreas Dilger <adilger@clusterfs.com>
41 */
42
43 #define DEBUG_SUBSYSTEM S_LLITE
44 #include <lustre_dlm.h>
45 #include <lustre_lite.h>
46 #include <linux/pagemap.h>
47 #include <linux/file.h>
48 #include "llite_internal.h"
49 #include <lustre/ll_fiemap.h>
50
51 #include "cl_object.h"
52
53 struct ll_file_data *ll_file_data_get(void)
54 {
55 struct ll_file_data *fd;
56
57 OBD_SLAB_ALLOC_PTR_GFP(fd, ll_file_data_slab, __GFP_IO);
58 if (fd == NULL)
59 return NULL;
60 fd->fd_write_failed = false;
61 return fd;
62 }
63
64 static void ll_file_data_put(struct ll_file_data *fd)
65 {
66 if (fd != NULL)
67 OBD_SLAB_FREE_PTR(fd, ll_file_data_slab);
68 }
69
70 void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
71 struct lustre_handle *fh)
72 {
73 op_data->op_fid1 = ll_i2info(inode)->lli_fid;
74 op_data->op_attr.ia_mode = inode->i_mode;
75 op_data->op_attr.ia_atime = inode->i_atime;
76 op_data->op_attr.ia_mtime = inode->i_mtime;
77 op_data->op_attr.ia_ctime = inode->i_ctime;
78 op_data->op_attr.ia_size = i_size_read(inode);
79 op_data->op_attr_blocks = inode->i_blocks;
80 ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags =
81 ll_inode_to_ext_flags(inode->i_flags);
82 op_data->op_ioepoch = ll_i2info(inode)->lli_ioepoch;
83 if (fh)
84 op_data->op_handle = *fh;
85 op_data->op_capa1 = ll_mdscapa_get(inode);
86
87 if (LLIF_DATA_MODIFIED & ll_i2info(inode)->lli_flags)
88 op_data->op_bias |= MDS_DATA_MODIFIED;
89 }
90
91 /**
92 * Closes the IO epoch and packs all the attributes into @op_data for
93 * the CLOSE rpc.
94 */
95 static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
96 struct obd_client_handle *och)
97 {
98 op_data->op_attr.ia_valid = ATTR_MODE | ATTR_ATIME | ATTR_ATIME_SET |
99 ATTR_MTIME | ATTR_MTIME_SET |
100 ATTR_CTIME | ATTR_CTIME_SET;
101
102 if (!(och->och_flags & FMODE_WRITE))
103 goto out;
104
105 if (!exp_connect_som(ll_i2mdexp(inode)) || !S_ISREG(inode->i_mode))
106 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
107 else
108 ll_ioepoch_close(inode, op_data, &och, 0);
109
110 out:
111 ll_pack_inode2opdata(inode, op_data, &och->och_fh);
112 ll_prep_md_op_data(op_data, inode, NULL, NULL,
113 0, 0, LUSTRE_OPC_ANY, NULL);
114 }
115
116 static int ll_close_inode_openhandle(struct obd_export *md_exp,
117 struct inode *inode,
118 struct obd_client_handle *och,
119 const __u64 *data_version)
120 {
121 struct obd_export *exp = ll_i2mdexp(inode);
122 struct md_op_data *op_data;
123 struct ptlrpc_request *req = NULL;
124 struct obd_device *obd = class_exp2obd(exp);
125 int epoch_close = 1;
126 int rc;
127
128 if (obd == NULL) {
129 /*
130 * XXX: in case of LMV, is this correct to access
131 * ->exp_handle?
132 */
133 CERROR("Invalid MDC connection handle "LPX64"\n",
134 ll_i2mdexp(inode)->exp_handle.h_cookie);
135 GOTO(out, rc = 0);
136 }
137
138 OBD_ALLOC_PTR(op_data);
139 if (op_data == NULL)
140 GOTO(out, rc = -ENOMEM); // XXX We leak openhandle and request here.
141
142 ll_prepare_close(inode, op_data, och);
143 if (data_version != NULL) {
144 /* Pass in data_version implies release. */
145 op_data->op_bias |= MDS_HSM_RELEASE;
146 op_data->op_data_version = *data_version;
147 op_data->op_lease_handle = och->och_lease_handle;
148 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
149 }
150 epoch_close = (op_data->op_flags & MF_EPOCH_CLOSE);
151 rc = md_close(md_exp, op_data, och->och_mod, &req);
152 if (rc == -EAGAIN) {
153 /* This close must have the epoch closed. */
154 LASSERT(epoch_close);
155 /* MDS has instructed us to obtain Size-on-MDS attribute from
156 * OSTs and send setattr to back to MDS. */
157 rc = ll_som_update(inode, op_data);
158 if (rc) {
159 CERROR("inode %lu mdc Size-on-MDS update failed: "
160 "rc = %d\n", inode->i_ino, rc);
161 rc = 0;
162 }
163 } else if (rc) {
164 CERROR("inode %lu mdc close failed: rc = %d\n",
165 inode->i_ino, rc);
166 }
167
168 /* DATA_MODIFIED flag was successfully sent on close, cancel data
169 * modification flag. */
170 if (rc == 0 && (op_data->op_bias & MDS_DATA_MODIFIED)) {
171 struct ll_inode_info *lli = ll_i2info(inode);
172
173 spin_lock(&lli->lli_lock);
174 lli->lli_flags &= ~LLIF_DATA_MODIFIED;
175 spin_unlock(&lli->lli_lock);
176 }
177
178 if (rc == 0) {
179 rc = ll_objects_destroy(req, inode);
180 if (rc)
181 CERROR("inode %lu ll_objects destroy: rc = %d\n",
182 inode->i_ino, rc);
183 }
184 if (rc == 0 && op_data->op_bias & MDS_HSM_RELEASE) {
185 struct mdt_body *body;
186 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
187 if (!(body->valid & OBD_MD_FLRELEASED))
188 rc = -EBUSY;
189 }
190
191 ll_finish_md_op_data(op_data);
192
193 out:
194 if (exp_connect_som(exp) && !epoch_close &&
195 S_ISREG(inode->i_mode) && (och->och_flags & FMODE_WRITE)) {
196 ll_queue_done_writing(inode, LLIF_DONE_WRITING);
197 } else {
198 md_clear_open_replay_data(md_exp, och);
199 /* Free @och if it is not waiting for DONE_WRITING. */
200 och->och_fh.cookie = DEAD_HANDLE_MAGIC;
201 OBD_FREE_PTR(och);
202 }
203 if (req) /* This is close request */
204 ptlrpc_req_finished(req);
205 return rc;
206 }
207
208 int ll_md_real_close(struct inode *inode, int flags)
209 {
210 struct ll_inode_info *lli = ll_i2info(inode);
211 struct obd_client_handle **och_p;
212 struct obd_client_handle *och;
213 __u64 *och_usecount;
214 int rc = 0;
215
216 if (flags & FMODE_WRITE) {
217 och_p = &lli->lli_mds_write_och;
218 och_usecount = &lli->lli_open_fd_write_count;
219 } else if (flags & FMODE_EXEC) {
220 och_p = &lli->lli_mds_exec_och;
221 och_usecount = &lli->lli_open_fd_exec_count;
222 } else {
223 LASSERT(flags & FMODE_READ);
224 och_p = &lli->lli_mds_read_och;
225 och_usecount = &lli->lli_open_fd_read_count;
226 }
227
228 mutex_lock(&lli->lli_och_mutex);
229 if (*och_usecount) { /* There are still users of this handle, so
230 skip freeing it. */
231 mutex_unlock(&lli->lli_och_mutex);
232 return 0;
233 }
234 och=*och_p;
235 *och_p = NULL;
236 mutex_unlock(&lli->lli_och_mutex);
237
238 if (och) { /* There might be a race and somebody have freed this och
239 already */
240 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
241 inode, och, NULL);
242 }
243
244 return rc;
245 }
246
247 int ll_md_close(struct obd_export *md_exp, struct inode *inode,
248 struct file *file)
249 {
250 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
251 struct ll_inode_info *lli = ll_i2info(inode);
252 int rc = 0;
253
254 /* clear group lock, if present */
255 if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED))
256 ll_put_grouplock(inode, file, fd->fd_grouplock.cg_gid);
257
258 if (fd->fd_lease_och != NULL) {
259 bool lease_broken;
260
261 /* Usually the lease is not released when the
262 * application crashed, we need to release here. */
263 rc = ll_lease_close(fd->fd_lease_och, inode, &lease_broken);
264 CDEBUG(rc ? D_ERROR : D_INODE, "Clean up lease "DFID" %d/%d\n",
265 PFID(&lli->lli_fid), rc, lease_broken);
266
267 fd->fd_lease_och = NULL;
268 }
269
270 if (fd->fd_och != NULL) {
271 rc = ll_close_inode_openhandle(md_exp, inode, fd->fd_och, NULL);
272 fd->fd_och = NULL;
273 GOTO(out, rc);
274 }
275
276 /* Let's see if we have good enough OPEN lock on the file and if
277 we can skip talking to MDS */
278 if (file->f_dentry->d_inode) { /* Can this ever be false? */
279 int lockmode;
280 int flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
281 struct lustre_handle lockh;
282 struct inode *inode = file->f_dentry->d_inode;
283 ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_OPEN}};
284
285 mutex_lock(&lli->lli_och_mutex);
286 if (fd->fd_omode & FMODE_WRITE) {
287 lockmode = LCK_CW;
288 LASSERT(lli->lli_open_fd_write_count);
289 lli->lli_open_fd_write_count--;
290 } else if (fd->fd_omode & FMODE_EXEC) {
291 lockmode = LCK_PR;
292 LASSERT(lli->lli_open_fd_exec_count);
293 lli->lli_open_fd_exec_count--;
294 } else {
295 lockmode = LCK_CR;
296 LASSERT(lli->lli_open_fd_read_count);
297 lli->lli_open_fd_read_count--;
298 }
299 mutex_unlock(&lli->lli_och_mutex);
300
301 if (!md_lock_match(md_exp, flags, ll_inode2fid(inode),
302 LDLM_IBITS, &policy, lockmode,
303 &lockh)) {
304 rc = ll_md_real_close(file->f_dentry->d_inode,
305 fd->fd_omode);
306 }
307 } else {
308 CERROR("Releasing a file %p with negative dentry %p. Name %s",
309 file, file->f_dentry, file->f_dentry->d_name.name);
310 }
311
312 out:
313 LUSTRE_FPRIVATE(file) = NULL;
314 ll_file_data_put(fd);
315 ll_capa_close(inode);
316
317 return rc;
318 }
319
320 /* While this returns an error code, fput() the caller does not, so we need
321 * to make every effort to clean up all of our state here. Also, applications
322 * rarely check close errors and even if an error is returned they will not
323 * re-try the close call.
324 */
325 int ll_file_release(struct inode *inode, struct file *file)
326 {
327 struct ll_file_data *fd;
328 struct ll_sb_info *sbi = ll_i2sbi(inode);
329 struct ll_inode_info *lli = ll_i2info(inode);
330 int rc;
331
332 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
333 inode->i_generation, inode);
334
335 #ifdef CONFIG_FS_POSIX_ACL
336 if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
337 inode == inode->i_sb->s_root->d_inode) {
338 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
339
340 LASSERT(fd != NULL);
341 if (unlikely(fd->fd_flags & LL_FILE_RMTACL)) {
342 fd->fd_flags &= ~LL_FILE_RMTACL;
343 rct_del(&sbi->ll_rct, current_pid());
344 et_search_free(&sbi->ll_et, current_pid());
345 }
346 }
347 #endif
348
349 if (inode->i_sb->s_root != file->f_dentry)
350 ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
351 fd = LUSTRE_FPRIVATE(file);
352 LASSERT(fd != NULL);
353
354 /* The last ref on @file, maybe not the the owner pid of statahead.
355 * Different processes can open the same dir, "ll_opendir_key" means:
356 * it is me that should stop the statahead thread. */
357 if (S_ISDIR(inode->i_mode) && lli->lli_opendir_key == fd &&
358 lli->lli_opendir_pid != 0)
359 ll_stop_statahead(inode, lli->lli_opendir_key);
360
361 if (inode->i_sb->s_root == file->f_dentry) {
362 LUSTRE_FPRIVATE(file) = NULL;
363 ll_file_data_put(fd);
364 return 0;
365 }
366
367 if (!S_ISDIR(inode->i_mode)) {
368 lov_read_and_clear_async_rc(lli->lli_clob);
369 lli->lli_async_rc = 0;
370 }
371
372 rc = ll_md_close(sbi->ll_md_exp, inode, file);
373
374 if (CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_DUMP_LOG, cfs_fail_val))
375 libcfs_debug_dumplog();
376
377 return rc;
378 }
379
380 static int ll_intent_file_open(struct file *file, void *lmm,
381 int lmmsize, struct lookup_intent *itp)
382 {
383 struct ll_sb_info *sbi = ll_i2sbi(file->f_dentry->d_inode);
384 struct dentry *parent = file->f_dentry->d_parent;
385 const char *name = file->f_dentry->d_name.name;
386 const int len = file->f_dentry->d_name.len;
387 struct md_op_data *op_data;
388 struct ptlrpc_request *req;
389 __u32 opc = LUSTRE_OPC_ANY;
390 int rc;
391
392 if (!parent)
393 return -ENOENT;
394
395 /* Usually we come here only for NFSD, and we want open lock.
396 But we can also get here with pre 2.6.15 patchless kernels, and in
397 that case that lock is also ok */
398 /* We can also get here if there was cached open handle in revalidate_it
399 * but it disappeared while we were getting from there to ll_file_open.
400 * But this means this file was closed and immediately opened which
401 * makes a good candidate for using OPEN lock */
402 /* If lmmsize & lmm are not 0, we are just setting stripe info
403 * parameters. No need for the open lock */
404 if (lmm == NULL && lmmsize == 0) {
405 itp->it_flags |= MDS_OPEN_LOCK;
406 if (itp->it_flags & FMODE_WRITE)
407 opc = LUSTRE_OPC_CREATE;
408 }
409
410 op_data = ll_prep_md_op_data(NULL, parent->d_inode,
411 file->f_dentry->d_inode, name, len,
412 O_RDWR, opc, NULL);
413 if (IS_ERR(op_data))
414 return PTR_ERR(op_data);
415
416 itp->it_flags |= MDS_OPEN_BY_FID;
417 rc = md_intent_lock(sbi->ll_md_exp, op_data, lmm, lmmsize, itp,
418 0 /*unused */, &req, ll_md_blocking_ast, 0);
419 ll_finish_md_op_data(op_data);
420 if (rc == -ESTALE) {
421 /* reason for keep own exit path - don`t flood log
422 * with messages with -ESTALE errors.
423 */
424 if (!it_disposition(itp, DISP_OPEN_OPEN) ||
425 it_open_error(DISP_OPEN_OPEN, itp))
426 GOTO(out, rc);
427 ll_release_openhandle(file->f_dentry, itp);
428 GOTO(out, rc);
429 }
430
431 if (it_disposition(itp, DISP_LOOKUP_NEG))
432 GOTO(out, rc = -ENOENT);
433
434 if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) {
435 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp);
436 CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc);
437 GOTO(out, rc);
438 }
439
440 rc = ll_prep_inode(&file->f_dentry->d_inode, req, NULL, itp);
441 if (!rc && itp->d.lustre.it_lock_mode)
442 ll_set_lock_data(sbi->ll_md_exp, file->f_dentry->d_inode,
443 itp, NULL);
444
445 out:
446 ptlrpc_req_finished(itp->d.lustre.it_data);
447 it_clear_disposition(itp, DISP_ENQ_COMPLETE);
448 ll_intent_drop_lock(itp);
449
450 return rc;
451 }
452
453 /**
454 * Assign an obtained @ioepoch to client's inode. No lock is needed, MDS does
455 * not believe attributes if a few ioepoch holders exist. Attributes for
456 * previous ioepoch if new one is opened are also skipped by MDS.
457 */
458 void ll_ioepoch_open(struct ll_inode_info *lli, __u64 ioepoch)
459 {
460 if (ioepoch && lli->lli_ioepoch != ioepoch) {
461 lli->lli_ioepoch = ioepoch;
462 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
463 ioepoch, PFID(&lli->lli_fid));
464 }
465 }
466
467 static int ll_och_fill(struct obd_export *md_exp, struct lookup_intent *it,
468 struct obd_client_handle *och)
469 {
470 struct ptlrpc_request *req = it->d.lustre.it_data;
471 struct mdt_body *body;
472
473 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
474 och->och_fh = body->handle;
475 och->och_fid = body->fid1;
476 och->och_lease_handle.cookie = it->d.lustre.it_lock_handle;
477 och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
478 och->och_flags = it->it_flags;
479
480 return md_set_open_replay_data(md_exp, och, req);
481 }
482
483 int ll_local_open(struct file *file, struct lookup_intent *it,
484 struct ll_file_data *fd, struct obd_client_handle *och)
485 {
486 struct inode *inode = file->f_dentry->d_inode;
487 struct ll_inode_info *lli = ll_i2info(inode);
488
489 LASSERT(!LUSTRE_FPRIVATE(file));
490
491 LASSERT(fd != NULL);
492
493 if (och) {
494 struct ptlrpc_request *req = it->d.lustre.it_data;
495 struct mdt_body *body;
496 int rc;
497
498 rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
499 if (rc != 0)
500 return rc;
501
502 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
503 ll_ioepoch_open(lli, body->ioepoch);
504 }
505
506 LUSTRE_FPRIVATE(file) = fd;
507 ll_readahead_init(inode, &fd->fd_ras);
508 fd->fd_omode = it->it_flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC);
509 return 0;
510 }
511
512 /* Open a file, and (for the very first open) create objects on the OSTs at
513 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
514 * creation or open until ll_lov_setstripe() ioctl is called.
515 *
516 * If we already have the stripe MD locally then we don't request it in
517 * md_open(), by passing a lmm_size = 0.
518 *
519 * It is up to the application to ensure no other processes open this file
520 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
521 * used. We might be able to avoid races of that sort by getting lli_open_sem
522 * before returning in the O_LOV_DELAY_CREATE case and dropping it here
523 * or in ll_file_release(), but I'm not sure that is desirable/necessary.
524 */
525 int ll_file_open(struct inode *inode, struct file *file)
526 {
527 struct ll_inode_info *lli = ll_i2info(inode);
528 struct lookup_intent *it, oit = { .it_op = IT_OPEN,
529 .it_flags = file->f_flags };
530 struct obd_client_handle **och_p = NULL;
531 __u64 *och_usecount = NULL;
532 struct ll_file_data *fd;
533 int rc = 0, opendir_set = 0;
534
535 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), flags %o\n", inode->i_ino,
536 inode->i_generation, inode, file->f_flags);
537
538 it = file->private_data; /* XXX: compat macro */
539 file->private_data = NULL; /* prevent ll_local_open assertion */
540
541 fd = ll_file_data_get();
542 if (fd == NULL)
543 GOTO(out_openerr, rc = -ENOMEM);
544
545 fd->fd_file = file;
546 if (S_ISDIR(inode->i_mode)) {
547 spin_lock(&lli->lli_sa_lock);
548 if (lli->lli_opendir_key == NULL && lli->lli_sai == NULL &&
549 lli->lli_opendir_pid == 0) {
550 lli->lli_opendir_key = fd;
551 lli->lli_opendir_pid = current_pid();
552 opendir_set = 1;
553 }
554 spin_unlock(&lli->lli_sa_lock);
555 }
556
557 if (inode->i_sb->s_root == file->f_dentry) {
558 LUSTRE_FPRIVATE(file) = fd;
559 return 0;
560 }
561
562 if (!it || !it->d.lustre.it_disposition) {
563 /* Convert f_flags into access mode. We cannot use file->f_mode,
564 * because everything but O_ACCMODE mask was stripped from
565 * there */
566 if ((oit.it_flags + 1) & O_ACCMODE)
567 oit.it_flags++;
568 if (file->f_flags & O_TRUNC)
569 oit.it_flags |= FMODE_WRITE;
570
571 /* kernel only call f_op->open in dentry_open. filp_open calls
572 * dentry_open after call to open_namei that checks permissions.
573 * Only nfsd_open call dentry_open directly without checking
574 * permissions and because of that this code below is safe. */
575 if (oit.it_flags & (FMODE_WRITE | FMODE_READ))
576 oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
577
578 /* We do not want O_EXCL here, presumably we opened the file
579 * already? XXX - NFS implications? */
580 oit.it_flags &= ~O_EXCL;
581
582 /* bug20584, if "it_flags" contains O_CREAT, the file will be
583 * created if necessary, then "IT_CREAT" should be set to keep
584 * consistent with it */
585 if (oit.it_flags & O_CREAT)
586 oit.it_op |= IT_CREAT;
587
588 it = &oit;
589 }
590
591 restart:
592 /* Let's see if we have file open on MDS already. */
593 if (it->it_flags & FMODE_WRITE) {
594 och_p = &lli->lli_mds_write_och;
595 och_usecount = &lli->lli_open_fd_write_count;
596 } else if (it->it_flags & FMODE_EXEC) {
597 och_p = &lli->lli_mds_exec_och;
598 och_usecount = &lli->lli_open_fd_exec_count;
599 } else {
600 och_p = &lli->lli_mds_read_och;
601 och_usecount = &lli->lli_open_fd_read_count;
602 }
603
604 mutex_lock(&lli->lli_och_mutex);
605 if (*och_p) { /* Open handle is present */
606 if (it_disposition(it, DISP_OPEN_OPEN)) {
607 /* Well, there's extra open request that we do not need,
608 let's close it somehow. This will decref request. */
609 rc = it_open_error(DISP_OPEN_OPEN, it);
610 if (rc) {
611 mutex_unlock(&lli->lli_och_mutex);
612 GOTO(out_openerr, rc);
613 }
614
615 ll_release_openhandle(file->f_dentry, it);
616 }
617 (*och_usecount)++;
618
619 rc = ll_local_open(file, it, fd, NULL);
620 if (rc) {
621 (*och_usecount)--;
622 mutex_unlock(&lli->lli_och_mutex);
623 GOTO(out_openerr, rc);
624 }
625 } else {
626 LASSERT(*och_usecount == 0);
627 if (!it->d.lustre.it_disposition) {
628 /* We cannot just request lock handle now, new ELC code
629 means that one of other OPEN locks for this file
630 could be cancelled, and since blocking ast handler
631 would attempt to grab och_mutex as well, that would
632 result in a deadlock */
633 mutex_unlock(&lli->lli_och_mutex);
634 it->it_create_mode |= M_CHECK_STALE;
635 rc = ll_intent_file_open(file, NULL, 0, it);
636 it->it_create_mode &= ~M_CHECK_STALE;
637 if (rc)
638 GOTO(out_openerr, rc);
639
640 goto restart;
641 }
642 OBD_ALLOC(*och_p, sizeof (struct obd_client_handle));
643 if (!*och_p)
644 GOTO(out_och_free, rc = -ENOMEM);
645
646 (*och_usecount)++;
647
648 /* md_intent_lock() didn't get a request ref if there was an
649 * open error, so don't do cleanup on the request here
650 * (bug 3430) */
651 /* XXX (green): Should not we bail out on any error here, not
652 * just open error? */
653 rc = it_open_error(DISP_OPEN_OPEN, it);
654 if (rc)
655 GOTO(out_och_free, rc);
656
657 LASSERT(it_disposition(it, DISP_ENQ_OPEN_REF));
658
659 rc = ll_local_open(file, it, fd, *och_p);
660 if (rc)
661 GOTO(out_och_free, rc);
662 }
663 mutex_unlock(&lli->lli_och_mutex);
664 fd = NULL;
665
666 /* Must do this outside lli_och_mutex lock to prevent deadlock where
667 different kind of OPEN lock for this same inode gets cancelled
668 by ldlm_cancel_lru */
669 if (!S_ISREG(inode->i_mode))
670 GOTO(out_och_free, rc);
671
672 ll_capa_open(inode);
673
674 if (!lli->lli_has_smd &&
675 (cl_is_lov_delay_create(file->f_flags) ||
676 (file->f_mode & FMODE_WRITE) == 0)) {
677 CDEBUG(D_INODE, "object creation was delayed\n");
678 GOTO(out_och_free, rc);
679 }
680 cl_lov_delay_create_clear(&file->f_flags);
681 GOTO(out_och_free, rc);
682
683 out_och_free:
684 if (rc) {
685 if (och_p && *och_p) {
686 OBD_FREE(*och_p, sizeof (struct obd_client_handle));
687 *och_p = NULL; /* OBD_FREE writes some magic there */
688 (*och_usecount)--;
689 }
690 mutex_unlock(&lli->lli_och_mutex);
691
692 out_openerr:
693 if (opendir_set != 0)
694 ll_stop_statahead(inode, lli->lli_opendir_key);
695 if (fd != NULL)
696 ll_file_data_put(fd);
697 } else {
698 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
699 }
700
701 if (it && it_disposition(it, DISP_ENQ_OPEN_REF)) {
702 ptlrpc_req_finished(it->d.lustre.it_data);
703 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
704 }
705
706 return rc;
707 }
708
709 static int ll_md_blocking_lease_ast(struct ldlm_lock *lock,
710 struct ldlm_lock_desc *desc, void *data, int flag)
711 {
712 int rc;
713 struct lustre_handle lockh;
714
715 switch (flag) {
716 case LDLM_CB_BLOCKING:
717 ldlm_lock2handle(lock, &lockh);
718 rc = ldlm_cli_cancel(&lockh, LCF_ASYNC);
719 if (rc < 0) {
720 CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc);
721 return rc;
722 }
723 break;
724 case LDLM_CB_CANCELING:
725 /* do nothing */
726 break;
727 }
728 return 0;
729 }
730
731 /**
732 * Acquire a lease and open the file.
733 */
734 struct obd_client_handle *ll_lease_open(struct inode *inode, struct file *file,
735 fmode_t fmode, __u64 open_flags)
736 {
737 struct lookup_intent it = { .it_op = IT_OPEN };
738 struct ll_sb_info *sbi = ll_i2sbi(inode);
739 struct md_op_data *op_data;
740 struct ptlrpc_request *req;
741 struct lustre_handle old_handle = { 0 };
742 struct obd_client_handle *och = NULL;
743 int rc;
744 int rc2;
745
746 if (fmode != FMODE_WRITE && fmode != FMODE_READ)
747 return ERR_PTR(-EINVAL);
748
749 if (file != NULL) {
750 struct ll_inode_info *lli = ll_i2info(inode);
751 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
752 struct obd_client_handle **och_p;
753 __u64 *och_usecount;
754
755 if (!(fmode & file->f_mode) || (file->f_mode & FMODE_EXEC))
756 return ERR_PTR(-EPERM);
757
758 /* Get the openhandle of the file */
759 rc = -EBUSY;
760 mutex_lock(&lli->lli_och_mutex);
761 if (fd->fd_lease_och != NULL) {
762 mutex_unlock(&lli->lli_och_mutex);
763 return ERR_PTR(rc);
764 }
765
766 if (fd->fd_och == NULL) {
767 if (file->f_mode & FMODE_WRITE) {
768 LASSERT(lli->lli_mds_write_och != NULL);
769 och_p = &lli->lli_mds_write_och;
770 och_usecount = &lli->lli_open_fd_write_count;
771 } else {
772 LASSERT(lli->lli_mds_read_och != NULL);
773 och_p = &lli->lli_mds_read_och;
774 och_usecount = &lli->lli_open_fd_read_count;
775 }
776 if (*och_usecount == 1) {
777 fd->fd_och = *och_p;
778 *och_p = NULL;
779 *och_usecount = 0;
780 rc = 0;
781 }
782 }
783 mutex_unlock(&lli->lli_och_mutex);
784 if (rc < 0) /* more than 1 opener */
785 return ERR_PTR(rc);
786
787 LASSERT(fd->fd_och != NULL);
788 old_handle = fd->fd_och->och_fh;
789 }
790
791 OBD_ALLOC_PTR(och);
792 if (och == NULL)
793 return ERR_PTR(-ENOMEM);
794
795 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL, 0, 0,
796 LUSTRE_OPC_ANY, NULL);
797 if (IS_ERR(op_data))
798 GOTO(out, rc = PTR_ERR(op_data));
799
800 /* To tell the MDT this openhandle is from the same owner */
801 op_data->op_handle = old_handle;
802
803 it.it_flags = fmode | open_flags;
804 it.it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID | MDS_OPEN_LEASE;
805 rc = md_intent_lock(sbi->ll_md_exp, op_data, NULL, 0, &it, 0, &req,
806 ll_md_blocking_lease_ast,
807 /* LDLM_FL_NO_LRU: To not put the lease lock into LRU list, otherwise
808 * it can be cancelled which may mislead applications that the lease is
809 * broken;
810 * LDLM_FL_EXCL: Set this flag so that it won't be matched by normal
811 * open in ll_md_blocking_ast(). Otherwise as ll_md_blocking_lease_ast
812 * doesn't deal with openhandle, so normal openhandle will be leaked. */
813 LDLM_FL_NO_LRU | LDLM_FL_EXCL);
814 ll_finish_md_op_data(op_data);
815 if (req != NULL) {
816 ptlrpc_req_finished(req);
817 it_clear_disposition(&it, DISP_ENQ_COMPLETE);
818 }
819 if (rc < 0)
820 GOTO(out_release_it, rc);
821
822 if (it_disposition(&it, DISP_LOOKUP_NEG))
823 GOTO(out_release_it, rc = -ENOENT);
824
825 rc = it_open_error(DISP_OPEN_OPEN, &it);
826 if (rc)
827 GOTO(out_release_it, rc);
828
829 LASSERT(it_disposition(&it, DISP_ENQ_OPEN_REF));
830 ll_och_fill(sbi->ll_md_exp, &it, och);
831
832 if (!it_disposition(&it, DISP_OPEN_LEASE)) /* old server? */
833 GOTO(out_close, rc = -EOPNOTSUPP);
834
835 /* already get lease, handle lease lock */
836 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
837 if (it.d.lustre.it_lock_mode == 0 ||
838 it.d.lustre.it_lock_bits != MDS_INODELOCK_OPEN) {
839 /* open lock must return for lease */
840 CERROR(DFID "lease granted but no open lock, %d/%llu.\n",
841 PFID(ll_inode2fid(inode)), it.d.lustre.it_lock_mode,
842 it.d.lustre.it_lock_bits);
843 GOTO(out_close, rc = -EPROTO);
844 }
845
846 ll_intent_release(&it);
847 return och;
848
849 out_close:
850 rc2 = ll_close_inode_openhandle(sbi->ll_md_exp, inode, och, NULL);
851 if (rc2)
852 CERROR("Close openhandle returned %d\n", rc2);
853
854 /* cancel open lock */
855 if (it.d.lustre.it_lock_mode != 0) {
856 ldlm_lock_decref_and_cancel(&och->och_lease_handle,
857 it.d.lustre.it_lock_mode);
858 it.d.lustre.it_lock_mode = 0;
859 }
860 out_release_it:
861 ll_intent_release(&it);
862 out:
863 OBD_FREE_PTR(och);
864 return ERR_PTR(rc);
865 }
866 EXPORT_SYMBOL(ll_lease_open);
867
868 /**
869 * Release lease and close the file.
870 * It will check if the lease has ever broken.
871 */
872 int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
873 bool *lease_broken)
874 {
875 struct ldlm_lock *lock;
876 bool cancelled = true;
877 int rc;
878
879 lock = ldlm_handle2lock(&och->och_lease_handle);
880 if (lock != NULL) {
881 lock_res_and_lock(lock);
882 cancelled = ldlm_is_cancel(lock);
883 unlock_res_and_lock(lock);
884 ldlm_lock_put(lock);
885 }
886
887 CDEBUG(D_INODE, "lease for "DFID" broken? %d\n",
888 PFID(&ll_i2info(inode)->lli_fid), cancelled);
889
890 if (!cancelled)
891 ldlm_cli_cancel(&och->och_lease_handle, 0);
892 if (lease_broken != NULL)
893 *lease_broken = cancelled;
894
895 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, inode, och,
896 NULL);
897 return rc;
898 }
899 EXPORT_SYMBOL(ll_lease_close);
900
901 /* Fills the obdo with the attributes for the lsm */
902 static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
903 struct obd_capa *capa, struct obdo *obdo,
904 __u64 ioepoch, int sync)
905 {
906 struct ptlrpc_request_set *set;
907 struct obd_info oinfo = { { { 0 } } };
908 int rc;
909
910 LASSERT(lsm != NULL);
911
912 oinfo.oi_md = lsm;
913 oinfo.oi_oa = obdo;
914 oinfo.oi_oa->o_oi = lsm->lsm_oi;
915 oinfo.oi_oa->o_mode = S_IFREG;
916 oinfo.oi_oa->o_ioepoch = ioepoch;
917 oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE |
918 OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
919 OBD_MD_FLBLKSZ | OBD_MD_FLATIME |
920 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
921 OBD_MD_FLGROUP | OBD_MD_FLEPOCH |
922 OBD_MD_FLDATAVERSION;
923 oinfo.oi_capa = capa;
924 if (sync) {
925 oinfo.oi_oa->o_valid |= OBD_MD_FLFLAGS;
926 oinfo.oi_oa->o_flags |= OBD_FL_SRVLOCK;
927 }
928
929 set = ptlrpc_prep_set();
930 if (set == NULL) {
931 CERROR("can't allocate ptlrpc set\n");
932 rc = -ENOMEM;
933 } else {
934 rc = obd_getattr_async(exp, &oinfo, set);
935 if (rc == 0)
936 rc = ptlrpc_set_wait(set);
937 ptlrpc_set_destroy(set);
938 }
939 if (rc == 0)
940 oinfo.oi_oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
941 OBD_MD_FLATIME | OBD_MD_FLMTIME |
942 OBD_MD_FLCTIME | OBD_MD_FLSIZE |
943 OBD_MD_FLDATAVERSION);
944 return rc;
945 }
946
947 /**
948 * Performs the getattr on the inode and updates its fields.
949 * If @sync != 0, perform the getattr under the server-side lock.
950 */
951 int ll_inode_getattr(struct inode *inode, struct obdo *obdo,
952 __u64 ioepoch, int sync)
953 {
954 struct obd_capa *capa = ll_mdscapa_get(inode);
955 struct lov_stripe_md *lsm;
956 int rc;
957
958 lsm = ccc_inode_lsm_get(inode);
959 rc = ll_lsm_getattr(lsm, ll_i2dtexp(inode),
960 capa, obdo, ioepoch, sync);
961 capa_put(capa);
962 if (rc == 0) {
963 struct ost_id *oi = lsm ? &lsm->lsm_oi : &obdo->o_oi;
964
965 obdo_refresh_inode(inode, obdo, obdo->o_valid);
966 CDEBUG(D_INODE, "objid "DOSTID" size %llu, blocks %llu,"
967 " blksize %lu\n", POSTID(oi), i_size_read(inode),
968 (unsigned long long)inode->i_blocks,
969 (unsigned long)ll_inode_blksize(inode));
970 }
971 ccc_inode_lsm_put(inode, lsm);
972 return rc;
973 }
974
975 int ll_merge_lvb(const struct lu_env *env, struct inode *inode)
976 {
977 struct ll_inode_info *lli = ll_i2info(inode);
978 struct cl_object *obj = lli->lli_clob;
979 struct cl_attr *attr = ccc_env_thread_attr(env);
980 struct ost_lvb lvb;
981 int rc = 0;
982
983 ll_inode_size_lock(inode);
984 /* merge timestamps the most recently obtained from mds with
985 timestamps obtained from osts */
986 LTIME_S(inode->i_atime) = lli->lli_lvb.lvb_atime;
987 LTIME_S(inode->i_mtime) = lli->lli_lvb.lvb_mtime;
988 LTIME_S(inode->i_ctime) = lli->lli_lvb.lvb_ctime;
989 inode_init_lvb(inode, &lvb);
990
991 cl_object_attr_lock(obj);
992 rc = cl_object_attr_get(env, obj, attr);
993 cl_object_attr_unlock(obj);
994
995 if (rc == 0) {
996 if (lvb.lvb_atime < attr->cat_atime)
997 lvb.lvb_atime = attr->cat_atime;
998 if (lvb.lvb_ctime < attr->cat_ctime)
999 lvb.lvb_ctime = attr->cat_ctime;
1000 if (lvb.lvb_mtime < attr->cat_mtime)
1001 lvb.lvb_mtime = attr->cat_mtime;
1002
1003 CDEBUG(D_VFSTRACE, DFID" updating i_size "LPU64"\n",
1004 PFID(&lli->lli_fid), attr->cat_size);
1005 cl_isize_write_nolock(inode, attr->cat_size);
1006
1007 inode->i_blocks = attr->cat_blocks;
1008
1009 LTIME_S(inode->i_mtime) = lvb.lvb_mtime;
1010 LTIME_S(inode->i_atime) = lvb.lvb_atime;
1011 LTIME_S(inode->i_ctime) = lvb.lvb_ctime;
1012 }
1013 ll_inode_size_unlock(inode);
1014
1015 return rc;
1016 }
1017
1018 int ll_glimpse_ioctl(struct ll_sb_info *sbi, struct lov_stripe_md *lsm,
1019 lstat_t *st)
1020 {
1021 struct obdo obdo = { 0 };
1022 int rc;
1023
1024 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, &obdo, 0, 0);
1025 if (rc == 0) {
1026 st->st_size = obdo.o_size;
1027 st->st_blocks = obdo.o_blocks;
1028 st->st_mtime = obdo.o_mtime;
1029 st->st_atime = obdo.o_atime;
1030 st->st_ctime = obdo.o_ctime;
1031 }
1032 return rc;
1033 }
1034
1035 void ll_io_init(struct cl_io *io, const struct file *file, int write)
1036 {
1037 struct inode *inode = file->f_dentry->d_inode;
1038
1039 io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK;
1040 if (write) {
1041 io->u.ci_wr.wr_append = !!(file->f_flags & O_APPEND);
1042 io->u.ci_wr.wr_sync = file->f_flags & O_SYNC ||
1043 file->f_flags & O_DIRECT ||
1044 IS_SYNC(inode);
1045 }
1046 io->ci_obj = ll_i2info(inode)->lli_clob;
1047 io->ci_lockreq = CILR_MAYBE;
1048 if (ll_file_nolock(file)) {
1049 io->ci_lockreq = CILR_NEVER;
1050 io->ci_no_srvlock = 1;
1051 } else if (file->f_flags & O_APPEND) {
1052 io->ci_lockreq = CILR_MANDATORY;
1053 }
1054 }
1055
1056 static ssize_t
1057 ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args,
1058 struct file *file, enum cl_io_type iot,
1059 loff_t *ppos, size_t count)
1060 {
1061 struct ll_inode_info *lli = ll_i2info(file->f_dentry->d_inode);
1062 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1063 struct cl_io *io;
1064 ssize_t result;
1065
1066 restart:
1067 io = ccc_env_thread_io(env);
1068 ll_io_init(io, file, iot == CIT_WRITE);
1069
1070 if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
1071 struct vvp_io *vio = vvp_env_io(env);
1072 struct ccc_io *cio = ccc_env_io(env);
1073 int write_mutex_locked = 0;
1074
1075 cio->cui_fd = LUSTRE_FPRIVATE(file);
1076 vio->cui_io_subtype = args->via_io_subtype;
1077
1078 switch (vio->cui_io_subtype) {
1079 case IO_NORMAL:
1080 cio->cui_iov = args->u.normal.via_iov;
1081 cio->cui_nrsegs = args->u.normal.via_nrsegs;
1082 cio->cui_tot_nrsegs = cio->cui_nrsegs;
1083 cio->cui_iocb = args->u.normal.via_iocb;
1084 if ((iot == CIT_WRITE) &&
1085 !(cio->cui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1086 if (mutex_lock_interruptible(&lli->
1087 lli_write_mutex))
1088 GOTO(out, result = -ERESTARTSYS);
1089 write_mutex_locked = 1;
1090 } else if (iot == CIT_READ) {
1091 down_read(&lli->lli_trunc_sem);
1092 }
1093 break;
1094 case IO_SPLICE:
1095 vio->u.splice.cui_pipe = args->u.splice.via_pipe;
1096 vio->u.splice.cui_flags = args->u.splice.via_flags;
1097 break;
1098 default:
1099 CERROR("Unknow IO type - %u\n", vio->cui_io_subtype);
1100 LBUG();
1101 }
1102 result = cl_io_loop(env, io);
1103 if (write_mutex_locked)
1104 mutex_unlock(&lli->lli_write_mutex);
1105 else if (args->via_io_subtype == IO_NORMAL && iot == CIT_READ)
1106 up_read(&lli->lli_trunc_sem);
1107 } else {
1108 /* cl_io_rw_init() handled IO */
1109 result = io->ci_result;
1110 }
1111
1112 if (io->ci_nob > 0) {
1113 result = io->ci_nob;
1114 *ppos = io->u.ci_wr.wr.crw_pos;
1115 }
1116 GOTO(out, result);
1117 out:
1118 cl_io_fini(env, io);
1119 /* If any bit been read/written (result != 0), we just return
1120 * short read/write instead of restart io. */
1121 if ((result == 0 || result == -ENODATA) && io->ci_need_restart) {
1122 CDEBUG(D_VFSTRACE, "Restart %s on %s from %lld, count:%zd\n",
1123 iot == CIT_READ ? "read" : "write",
1124 file->f_dentry->d_name.name, *ppos, count);
1125 LASSERTF(io->ci_nob == 0, "%zd", io->ci_nob);
1126 goto restart;
1127 }
1128
1129 if (iot == CIT_READ) {
1130 if (result >= 0)
1131 ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode),
1132 LPROC_LL_READ_BYTES, result);
1133 } else if (iot == CIT_WRITE) {
1134 if (result >= 0) {
1135 ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode),
1136 LPROC_LL_WRITE_BYTES, result);
1137 fd->fd_write_failed = false;
1138 } else if (result != -ERESTARTSYS) {
1139 fd->fd_write_failed = true;
1140 }
1141 }
1142
1143 return result;
1144 }
1145
1146 static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
1147 unsigned long nr_segs, loff_t pos)
1148 {
1149 struct lu_env *env;
1150 struct vvp_io_args *args;
1151 size_t count = 0;
1152 ssize_t result;
1153 int refcheck;
1154
1155 result = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE);
1156 if (result)
1157 return result;
1158
1159 env = cl_env_get(&refcheck);
1160 if (IS_ERR(env))
1161 return PTR_ERR(env);
1162
1163 args = vvp_env_args(env, IO_NORMAL);
1164 args->u.normal.via_iov = (struct iovec *)iov;
1165 args->u.normal.via_nrsegs = nr_segs;
1166 args->u.normal.via_iocb = iocb;
1167
1168 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ,
1169 &iocb->ki_pos, count);
1170 cl_env_put(env, &refcheck);
1171 return result;
1172 }
1173
1174 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
1175 loff_t *ppos)
1176 {
1177 struct lu_env *env;
1178 struct iovec *local_iov;
1179 struct kiocb *kiocb;
1180 ssize_t result;
1181 int refcheck;
1182
1183 env = cl_env_get(&refcheck);
1184 if (IS_ERR(env))
1185 return PTR_ERR(env);
1186
1187 local_iov = &vvp_env_info(env)->vti_local_iov;
1188 kiocb = &vvp_env_info(env)->vti_kiocb;
1189 local_iov->iov_base = (void __user *)buf;
1190 local_iov->iov_len = count;
1191 init_sync_kiocb(kiocb, file);
1192 kiocb->ki_pos = *ppos;
1193 kiocb->ki_nbytes = count;
1194
1195 result = ll_file_aio_read(kiocb, local_iov, 1, kiocb->ki_pos);
1196 *ppos = kiocb->ki_pos;
1197
1198 cl_env_put(env, &refcheck);
1199 return result;
1200 }
1201
1202 /*
1203 * Write to a file (through the page cache).
1204 */
1205 static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
1206 unsigned long nr_segs, loff_t pos)
1207 {
1208 struct lu_env *env;
1209 struct vvp_io_args *args;
1210 size_t count = 0;
1211 ssize_t result;
1212 int refcheck;
1213
1214 result = generic_segment_checks(iov, &nr_segs, &count, VERIFY_READ);
1215 if (result)
1216 return result;
1217
1218 env = cl_env_get(&refcheck);
1219 if (IS_ERR(env))
1220 return PTR_ERR(env);
1221
1222 args = vvp_env_args(env, IO_NORMAL);
1223 args->u.normal.via_iov = (struct iovec *)iov;
1224 args->u.normal.via_nrsegs = nr_segs;
1225 args->u.normal.via_iocb = iocb;
1226
1227 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
1228 &iocb->ki_pos, count);
1229 cl_env_put(env, &refcheck);
1230 return result;
1231 }
1232
1233 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
1234 loff_t *ppos)
1235 {
1236 struct lu_env *env;
1237 struct iovec *local_iov;
1238 struct kiocb *kiocb;
1239 ssize_t result;
1240 int refcheck;
1241
1242 env = cl_env_get(&refcheck);
1243 if (IS_ERR(env))
1244 return PTR_ERR(env);
1245
1246 local_iov = &vvp_env_info(env)->vti_local_iov;
1247 kiocb = &vvp_env_info(env)->vti_kiocb;
1248 local_iov->iov_base = (void __user *)buf;
1249 local_iov->iov_len = count;
1250 init_sync_kiocb(kiocb, file);
1251 kiocb->ki_pos = *ppos;
1252 kiocb->ki_nbytes = count;
1253
1254 result = ll_file_aio_write(kiocb, local_iov, 1, kiocb->ki_pos);
1255 *ppos = kiocb->ki_pos;
1256
1257 cl_env_put(env, &refcheck);
1258 return result;
1259 }
1260
1261
1262
1263 /*
1264 * Send file content (through pagecache) somewhere with helper
1265 */
1266 static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
1267 struct pipe_inode_info *pipe, size_t count,
1268 unsigned int flags)
1269 {
1270 struct lu_env *env;
1271 struct vvp_io_args *args;
1272 ssize_t result;
1273 int refcheck;
1274
1275 env = cl_env_get(&refcheck);
1276 if (IS_ERR(env))
1277 return PTR_ERR(env);
1278
1279 args = vvp_env_args(env, IO_SPLICE);
1280 args->u.splice.via_pipe = pipe;
1281 args->u.splice.via_flags = flags;
1282
1283 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1284 cl_env_put(env, &refcheck);
1285 return result;
1286 }
1287
1288 static int ll_lov_recreate(struct inode *inode, struct ost_id *oi,
1289 obd_count ost_idx)
1290 {
1291 struct obd_export *exp = ll_i2dtexp(inode);
1292 struct obd_trans_info oti = { 0 };
1293 struct obdo *oa = NULL;
1294 int lsm_size;
1295 int rc = 0;
1296 struct lov_stripe_md *lsm = NULL, *lsm2;
1297
1298 OBDO_ALLOC(oa);
1299 if (oa == NULL)
1300 return -ENOMEM;
1301
1302 lsm = ccc_inode_lsm_get(inode);
1303 if (!lsm_has_objects(lsm))
1304 GOTO(out, rc = -ENOENT);
1305
1306 lsm_size = sizeof(*lsm) + (sizeof(struct lov_oinfo) *
1307 (lsm->lsm_stripe_count));
1308
1309 OBD_ALLOC_LARGE(lsm2, lsm_size);
1310 if (lsm2 == NULL)
1311 GOTO(out, rc = -ENOMEM);
1312
1313 oa->o_oi = *oi;
1314 oa->o_nlink = ost_idx;
1315 oa->o_flags |= OBD_FL_RECREATE_OBJS;
1316 oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS | OBD_MD_FLGROUP;
1317 obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
1318 OBD_MD_FLMTIME | OBD_MD_FLCTIME);
1319 obdo_set_parent_fid(oa, &ll_i2info(inode)->lli_fid);
1320 memcpy(lsm2, lsm, lsm_size);
1321 ll_inode_size_lock(inode);
1322 rc = obd_create(NULL, exp, oa, &lsm2, &oti);
1323 ll_inode_size_unlock(inode);
1324
1325 OBD_FREE_LARGE(lsm2, lsm_size);
1326 GOTO(out, rc);
1327 out:
1328 ccc_inode_lsm_put(inode, lsm);
1329 OBDO_FREE(oa);
1330 return rc;
1331 }
1332
1333 static int ll_lov_recreate_obj(struct inode *inode, unsigned long arg)
1334 {
1335 struct ll_recreate_obj ucreat;
1336 struct ost_id oi;
1337
1338 if (!capable(CFS_CAP_SYS_ADMIN))
1339 return -EPERM;
1340
1341 if (copy_from_user(&ucreat, (struct ll_recreate_obj *)arg,
1342 sizeof(ucreat)))
1343 return -EFAULT;
1344
1345 ostid_set_seq_mdt0(&oi);
1346 ostid_set_id(&oi, ucreat.lrc_id);
1347 return ll_lov_recreate(inode, &oi, ucreat.lrc_ost_idx);
1348 }
1349
1350 static int ll_lov_recreate_fid(struct inode *inode, unsigned long arg)
1351 {
1352 struct lu_fid fid;
1353 struct ost_id oi;
1354 obd_count ost_idx;
1355
1356 if (!capable(CFS_CAP_SYS_ADMIN))
1357 return -EPERM;
1358
1359 if (copy_from_user(&fid, (struct lu_fid *)arg, sizeof(fid)))
1360 return -EFAULT;
1361
1362 fid_to_ostid(&fid, &oi);
1363 ost_idx = (fid_seq(&fid) >> 16) & 0xffff;
1364 return ll_lov_recreate(inode, &oi, ost_idx);
1365 }
1366
1367 int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
1368 int flags, struct lov_user_md *lum, int lum_size)
1369 {
1370 struct lov_stripe_md *lsm = NULL;
1371 struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags};
1372 int rc = 0;
1373
1374 lsm = ccc_inode_lsm_get(inode);
1375 if (lsm != NULL) {
1376 ccc_inode_lsm_put(inode, lsm);
1377 CDEBUG(D_IOCTL, "stripe already exists for ino %lu\n",
1378 inode->i_ino);
1379 GOTO(out, rc = -EEXIST);
1380 }
1381
1382 ll_inode_size_lock(inode);
1383 rc = ll_intent_file_open(file, lum, lum_size, &oit);
1384 if (rc)
1385 GOTO(out_unlock, rc);
1386 rc = oit.d.lustre.it_status;
1387 if (rc < 0)
1388 GOTO(out_req_free, rc);
1389
1390 ll_release_openhandle(file->f_dentry, &oit);
1391
1392 out_unlock:
1393 ll_inode_size_unlock(inode);
1394 ll_intent_release(&oit);
1395 ccc_inode_lsm_put(inode, lsm);
1396 out:
1397 cl_lov_delay_create_clear(&file->f_flags);
1398 return rc;
1399 out_req_free:
1400 ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data);
1401 goto out;
1402 }
1403
1404 int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
1405 struct lov_mds_md **lmmp, int *lmm_size,
1406 struct ptlrpc_request **request)
1407 {
1408 struct ll_sb_info *sbi = ll_i2sbi(inode);
1409 struct mdt_body *body;
1410 struct lov_mds_md *lmm = NULL;
1411 struct ptlrpc_request *req = NULL;
1412 struct md_op_data *op_data;
1413 int rc, lmmsize;
1414
1415 rc = ll_get_max_mdsize(sbi, &lmmsize);
1416 if (rc)
1417 return rc;
1418
1419 op_data = ll_prep_md_op_data(NULL, inode, NULL, filename,
1420 strlen(filename), lmmsize,
1421 LUSTRE_OPC_ANY, NULL);
1422 if (IS_ERR(op_data))
1423 return PTR_ERR(op_data);
1424
1425 op_data->op_valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA;
1426 rc = md_getattr_name(sbi->ll_md_exp, op_data, &req);
1427 ll_finish_md_op_data(op_data);
1428 if (rc < 0) {
1429 CDEBUG(D_INFO, "md_getattr_name failed "
1430 "on %s: rc %d\n", filename, rc);
1431 GOTO(out, rc);
1432 }
1433
1434 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
1435 LASSERT(body != NULL); /* checked by mdc_getattr_name */
1436
1437 lmmsize = body->eadatasize;
1438
1439 if (!(body->valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
1440 lmmsize == 0) {
1441 GOTO(out, rc = -ENODATA);
1442 }
1443
1444 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize);
1445 LASSERT(lmm != NULL);
1446
1447 if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1)) &&
1448 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3))) {
1449 GOTO(out, rc = -EPROTO);
1450 }
1451
1452 /*
1453 * This is coming from the MDS, so is probably in
1454 * little endian. We convert it to host endian before
1455 * passing it to userspace.
1456 */
1457 if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) {
1458 int stripe_count;
1459
1460 stripe_count = le16_to_cpu(lmm->lmm_stripe_count);
1461 if (le32_to_cpu(lmm->lmm_pattern) & LOV_PATTERN_F_RELEASED)
1462 stripe_count = 0;
1463
1464 /* if function called for directory - we should
1465 * avoid swab not existent lsm objects */
1466 if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) {
1467 lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
1468 if (S_ISREG(body->mode))
1469 lustre_swab_lov_user_md_objects(
1470 ((struct lov_user_md_v1 *)lmm)->lmm_objects,
1471 stripe_count);
1472 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
1473 lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm);
1474 if (S_ISREG(body->mode))
1475 lustre_swab_lov_user_md_objects(
1476 ((struct lov_user_md_v3 *)lmm)->lmm_objects,
1477 stripe_count);
1478 }
1479 }
1480
1481 out:
1482 *lmmp = lmm;
1483 *lmm_size = lmmsize;
1484 *request = req;
1485 return rc;
1486 }
1487
1488 static int ll_lov_setea(struct inode *inode, struct file *file,
1489 unsigned long arg)
1490 {
1491 int flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
1492 struct lov_user_md *lump;
1493 int lum_size = sizeof(struct lov_user_md) +
1494 sizeof(struct lov_user_ost_data);
1495 int rc;
1496
1497 if (!capable(CFS_CAP_SYS_ADMIN))
1498 return -EPERM;
1499
1500 OBD_ALLOC_LARGE(lump, lum_size);
1501 if (lump == NULL)
1502 return -ENOMEM;
1503
1504 if (copy_from_user(lump, (struct lov_user_md *)arg, lum_size)) {
1505 OBD_FREE_LARGE(lump, lum_size);
1506 return -EFAULT;
1507 }
1508
1509 rc = ll_lov_setstripe_ea_info(inode, file, flags, lump, lum_size);
1510
1511 OBD_FREE_LARGE(lump, lum_size);
1512 return rc;
1513 }
1514
1515 static int ll_lov_setstripe(struct inode *inode, struct file *file,
1516 unsigned long arg)
1517 {
1518 struct lov_user_md_v3 lumv3;
1519 struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3;
1520 struct lov_user_md_v1 *lumv1p = (struct lov_user_md_v1 *)arg;
1521 struct lov_user_md_v3 *lumv3p = (struct lov_user_md_v3 *)arg;
1522 int lum_size, rc;
1523 int flags = FMODE_WRITE;
1524
1525 /* first try with v1 which is smaller than v3 */
1526 lum_size = sizeof(struct lov_user_md_v1);
1527 if (copy_from_user(lumv1, lumv1p, lum_size))
1528 return -EFAULT;
1529
1530 if (lumv1->lmm_magic == LOV_USER_MAGIC_V3) {
1531 lum_size = sizeof(struct lov_user_md_v3);
1532 if (copy_from_user(&lumv3, lumv3p, lum_size))
1533 return -EFAULT;
1534 }
1535
1536 rc = ll_lov_setstripe_ea_info(inode, file, flags, lumv1, lum_size);
1537 if (rc == 0) {
1538 struct lov_stripe_md *lsm;
1539 __u32 gen;
1540
1541 put_user(0, &lumv1p->lmm_stripe_count);
1542
1543 ll_layout_refresh(inode, &gen);
1544 lsm = ccc_inode_lsm_get(inode);
1545 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode),
1546 0, lsm, (void *)arg);
1547 ccc_inode_lsm_put(inode, lsm);
1548 }
1549 return rc;
1550 }
1551
1552 static int ll_lov_getstripe(struct inode *inode, unsigned long arg)
1553 {
1554 struct lov_stripe_md *lsm;
1555 int rc = -ENODATA;
1556
1557 lsm = ccc_inode_lsm_get(inode);
1558 if (lsm != NULL)
1559 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode), 0,
1560 lsm, (void *)arg);
1561 ccc_inode_lsm_put(inode, lsm);
1562 return rc;
1563 }
1564
1565 int ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1566 {
1567 struct ll_inode_info *lli = ll_i2info(inode);
1568 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1569 struct ccc_grouplock grouplock;
1570 int rc;
1571
1572 if (ll_file_nolock(file))
1573 return -EOPNOTSUPP;
1574
1575 spin_lock(&lli->lli_lock);
1576 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1577 CWARN("group lock already existed with gid %lu\n",
1578 fd->fd_grouplock.cg_gid);
1579 spin_unlock(&lli->lli_lock);
1580 return -EINVAL;
1581 }
1582 LASSERT(fd->fd_grouplock.cg_lock == NULL);
1583 spin_unlock(&lli->lli_lock);
1584
1585 rc = cl_get_grouplock(cl_i2info(inode)->lli_clob,
1586 arg, (file->f_flags & O_NONBLOCK), &grouplock);
1587 if (rc)
1588 return rc;
1589
1590 spin_lock(&lli->lli_lock);
1591 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1592 spin_unlock(&lli->lli_lock);
1593 CERROR("another thread just won the race\n");
1594 cl_put_grouplock(&grouplock);
1595 return -EINVAL;
1596 }
1597
1598 fd->fd_flags |= LL_FILE_GROUP_LOCKED;
1599 fd->fd_grouplock = grouplock;
1600 spin_unlock(&lli->lli_lock);
1601
1602 CDEBUG(D_INFO, "group lock %lu obtained\n", arg);
1603 return 0;
1604 }
1605
1606 int ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1607 {
1608 struct ll_inode_info *lli = ll_i2info(inode);
1609 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1610 struct ccc_grouplock grouplock;
1611
1612 spin_lock(&lli->lli_lock);
1613 if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1614 spin_unlock(&lli->lli_lock);
1615 CWARN("no group lock held\n");
1616 return -EINVAL;
1617 }
1618 LASSERT(fd->fd_grouplock.cg_lock != NULL);
1619
1620 if (fd->fd_grouplock.cg_gid != arg) {
1621 CWARN("group lock %lu doesn't match current id %lu\n",
1622 arg, fd->fd_grouplock.cg_gid);
1623 spin_unlock(&lli->lli_lock);
1624 return -EINVAL;
1625 }
1626
1627 grouplock = fd->fd_grouplock;
1628 memset(&fd->fd_grouplock, 0, sizeof(fd->fd_grouplock));
1629 fd->fd_flags &= ~LL_FILE_GROUP_LOCKED;
1630 spin_unlock(&lli->lli_lock);
1631
1632 cl_put_grouplock(&grouplock);
1633 CDEBUG(D_INFO, "group lock %lu released\n", arg);
1634 return 0;
1635 }
1636
1637 /**
1638 * Close inode open handle
1639 *
1640 * \param dentry [in] dentry which contains the inode
1641 * \param it [in,out] intent which contains open info and result
1642 *
1643 * \retval 0 success
1644 * \retval <0 failure
1645 */
1646 int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it)
1647 {
1648 struct inode *inode = dentry->d_inode;
1649 struct obd_client_handle *och;
1650 int rc;
1651
1652 LASSERT(inode);
1653
1654 /* Root ? Do nothing. */
1655 if (dentry->d_inode->i_sb->s_root == dentry)
1656 return 0;
1657
1658 /* No open handle to close? Move away */
1659 if (!it_disposition(it, DISP_OPEN_OPEN))
1660 return 0;
1661
1662 LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0);
1663
1664 OBD_ALLOC(och, sizeof(*och));
1665 if (!och)
1666 GOTO(out, rc = -ENOMEM);
1667
1668 ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
1669
1670 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
1671 inode, och, NULL);
1672 out:
1673 /* this one is in place of ll_file_open */
1674 if (it_disposition(it, DISP_ENQ_OPEN_REF)) {
1675 ptlrpc_req_finished(it->d.lustre.it_data);
1676 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
1677 }
1678 return rc;
1679 }
1680
1681 /**
1682 * Get size for inode for which FIEMAP mapping is requested.
1683 * Make the FIEMAP get_info call and returns the result.
1684 */
1685 int ll_do_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
1686 int num_bytes)
1687 {
1688 struct obd_export *exp = ll_i2dtexp(inode);
1689 struct lov_stripe_md *lsm = NULL;
1690 struct ll_fiemap_info_key fm_key = { .name = KEY_FIEMAP, };
1691 int vallen = num_bytes;
1692 int rc;
1693
1694 /* Checks for fiemap flags */
1695 if (fiemap->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
1696 fiemap->fm_flags &= ~LUSTRE_FIEMAP_FLAGS_COMPAT;
1697 return -EBADR;
1698 }
1699
1700 /* Check for FIEMAP_FLAG_SYNC */
1701 if (fiemap->fm_flags & FIEMAP_FLAG_SYNC) {
1702 rc = filemap_fdatawrite(inode->i_mapping);
1703 if (rc)
1704 return rc;
1705 }
1706
1707 lsm = ccc_inode_lsm_get(inode);
1708 if (lsm == NULL)
1709 return -ENOENT;
1710
1711 /* If the stripe_count > 1 and the application does not understand
1712 * DEVICE_ORDER flag, then it cannot interpret the extents correctly.
1713 */
1714 if (lsm->lsm_stripe_count > 1 &&
1715 !(fiemap->fm_flags & FIEMAP_FLAG_DEVICE_ORDER))
1716 GOTO(out, rc = -EOPNOTSUPP);
1717
1718 fm_key.oa.o_oi = lsm->lsm_oi;
1719 fm_key.oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1720
1721 obdo_from_inode(&fm_key.oa, inode, OBD_MD_FLSIZE);
1722 obdo_set_parent_fid(&fm_key.oa, &ll_i2info(inode)->lli_fid);
1723 /* If filesize is 0, then there would be no objects for mapping */
1724 if (fm_key.oa.o_size == 0) {
1725 fiemap->fm_mapped_extents = 0;
1726 GOTO(out, rc = 0);
1727 }
1728
1729 memcpy(&fm_key.fiemap, fiemap, sizeof(*fiemap));
1730
1731 rc = obd_get_info(NULL, exp, sizeof(fm_key), &fm_key, &vallen,
1732 fiemap, lsm);
1733 if (rc)
1734 CERROR("obd_get_info failed: rc = %d\n", rc);
1735
1736 out:
1737 ccc_inode_lsm_put(inode, lsm);
1738 return rc;
1739 }
1740
1741 int ll_fid2path(struct inode *inode, void *arg)
1742 {
1743 struct obd_export *exp = ll_i2mdexp(inode);
1744 struct getinfo_fid2path *gfout, *gfin;
1745 int outsize, rc;
1746
1747 if (!capable(CFS_CAP_DAC_READ_SEARCH) &&
1748 !(ll_i2sbi(inode)->ll_flags & LL_SBI_USER_FID2PATH))
1749 return -EPERM;
1750
1751 /* Need to get the buflen */
1752 OBD_ALLOC_PTR(gfin);
1753 if (gfin == NULL)
1754 return -ENOMEM;
1755 if (copy_from_user(gfin, arg, sizeof(*gfin))) {
1756 OBD_FREE_PTR(gfin);
1757 return -EFAULT;
1758 }
1759
1760 outsize = sizeof(*gfout) + gfin->gf_pathlen;
1761 OBD_ALLOC(gfout, outsize);
1762 if (gfout == NULL) {
1763 OBD_FREE_PTR(gfin);
1764 return -ENOMEM;
1765 }
1766 memcpy(gfout, gfin, sizeof(*gfout));
1767 OBD_FREE_PTR(gfin);
1768
1769 /* Call mdc_iocontrol */
1770 rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL);
1771 if (rc)
1772 GOTO(gf_free, rc);
1773
1774 if (copy_to_user(arg, gfout, outsize))
1775 rc = -EFAULT;
1776
1777 gf_free:
1778 OBD_FREE(gfout, outsize);
1779 return rc;
1780 }
1781
1782 static int ll_ioctl_fiemap(struct inode *inode, unsigned long arg)
1783 {
1784 struct ll_user_fiemap *fiemap_s;
1785 size_t num_bytes, ret_bytes;
1786 unsigned int extent_count;
1787 int rc = 0;
1788
1789 /* Get the extent count so we can calculate the size of
1790 * required fiemap buffer */
1791 if (get_user(extent_count,
1792 &((struct ll_user_fiemap __user *)arg)->fm_extent_count))
1793 return -EFAULT;
1794 num_bytes = sizeof(*fiemap_s) + (extent_count *
1795 sizeof(struct ll_fiemap_extent));
1796
1797 OBD_ALLOC_LARGE(fiemap_s, num_bytes);
1798 if (fiemap_s == NULL)
1799 return -ENOMEM;
1800
1801 /* get the fiemap value */
1802 if (copy_from_user(fiemap_s, (struct ll_user_fiemap __user *)arg,
1803 sizeof(*fiemap_s)))
1804 GOTO(error, rc = -EFAULT);
1805
1806 /* If fm_extent_count is non-zero, read the first extent since
1807 * it is used to calculate end_offset and device from previous
1808 * fiemap call. */
1809 if (extent_count) {
1810 if (copy_from_user(&fiemap_s->fm_extents[0],
1811 (char __user *)arg + sizeof(*fiemap_s),
1812 sizeof(struct ll_fiemap_extent)))
1813 GOTO(error, rc = -EFAULT);
1814 }
1815
1816 rc = ll_do_fiemap(inode, fiemap_s, num_bytes);
1817 if (rc)
1818 GOTO(error, rc);
1819
1820 ret_bytes = sizeof(struct ll_user_fiemap);
1821
1822 if (extent_count != 0)
1823 ret_bytes += (fiemap_s->fm_mapped_extents *
1824 sizeof(struct ll_fiemap_extent));
1825
1826 if (copy_to_user((void *)arg, fiemap_s, ret_bytes))
1827 rc = -EFAULT;
1828
1829 error:
1830 OBD_FREE_LARGE(fiemap_s, num_bytes);
1831 return rc;
1832 }
1833
1834 /*
1835 * Read the data_version for inode.
1836 *
1837 * This value is computed using stripe object version on OST.
1838 * Version is computed using server side locking.
1839 *
1840 * @param extent_lock Take extent lock. Not needed if a process is already
1841 * holding the OST object group locks.
1842 */
1843 int ll_data_version(struct inode *inode, __u64 *data_version,
1844 int extent_lock)
1845 {
1846 struct lov_stripe_md *lsm = NULL;
1847 struct ll_sb_info *sbi = ll_i2sbi(inode);
1848 struct obdo *obdo = NULL;
1849 int rc;
1850
1851 /* If no stripe, we consider version is 0. */
1852 lsm = ccc_inode_lsm_get(inode);
1853 if (!lsm_has_objects(lsm)) {
1854 *data_version = 0;
1855 CDEBUG(D_INODE, "No object for inode\n");
1856 GOTO(out, rc = 0);
1857 }
1858
1859 OBD_ALLOC_PTR(obdo);
1860 if (obdo == NULL)
1861 GOTO(out, rc = -ENOMEM);
1862
1863 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, obdo, 0, extent_lock);
1864 if (rc == 0) {
1865 if (!(obdo->o_valid & OBD_MD_FLDATAVERSION))
1866 rc = -EOPNOTSUPP;
1867 else
1868 *data_version = obdo->o_data_version;
1869 }
1870
1871 OBD_FREE_PTR(obdo);
1872 out:
1873 ccc_inode_lsm_put(inode, lsm);
1874 return rc;
1875 }
1876
1877 /*
1878 * Trigger a HSM release request for the provided inode.
1879 */
1880 int ll_hsm_release(struct inode *inode)
1881 {
1882 struct cl_env_nest nest;
1883 struct lu_env *env;
1884 struct obd_client_handle *och = NULL;
1885 __u64 data_version = 0;
1886 int rc;
1887
1888
1889 CDEBUG(D_INODE, "%s: Releasing file "DFID".\n",
1890 ll_get_fsname(inode->i_sb, NULL, 0),
1891 PFID(&ll_i2info(inode)->lli_fid));
1892
1893 och = ll_lease_open(inode, NULL, FMODE_WRITE, MDS_OPEN_RELEASE);
1894 if (IS_ERR(och))
1895 GOTO(out, rc = PTR_ERR(och));
1896
1897 /* Grab latest data_version and [am]time values */
1898 rc = ll_data_version(inode, &data_version, 1);
1899 if (rc != 0)
1900 GOTO(out, rc);
1901
1902 env = cl_env_nested_get(&nest);
1903 if (IS_ERR(env))
1904 GOTO(out, rc = PTR_ERR(env));
1905
1906 ll_merge_lvb(env, inode);
1907 cl_env_nested_put(&nest, env);
1908
1909 /* Release the file.
1910 * NB: lease lock handle is released in mdc_hsm_release_pack() because
1911 * we still need it to pack l_remote_handle to MDT. */
1912 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, inode, och,
1913 &data_version);
1914 och = NULL;
1915
1916
1917 out:
1918 if (och != NULL && !IS_ERR(och)) /* close the file */
1919 ll_lease_close(och, inode, NULL);
1920
1921 return rc;
1922 }
1923
1924 struct ll_swap_stack {
1925 struct iattr ia1, ia2;
1926 __u64 dv1, dv2;
1927 struct inode *inode1, *inode2;
1928 bool check_dv1, check_dv2;
1929 };
1930
1931 static int ll_swap_layouts(struct file *file1, struct file *file2,
1932 struct lustre_swap_layouts *lsl)
1933 {
1934 struct mdc_swap_layouts msl;
1935 struct md_op_data *op_data;
1936 __u32 gid;
1937 __u64 dv;
1938 struct ll_swap_stack *llss = NULL;
1939 int rc;
1940
1941 OBD_ALLOC_PTR(llss);
1942 if (llss == NULL)
1943 return -ENOMEM;
1944
1945 llss->inode1 = file1->f_dentry->d_inode;
1946 llss->inode2 = file2->f_dentry->d_inode;
1947
1948 if (!S_ISREG(llss->inode2->i_mode))
1949 GOTO(free, rc = -EINVAL);
1950
1951 if (inode_permission(llss->inode1, MAY_WRITE) ||
1952 inode_permission(llss->inode2, MAY_WRITE))
1953 GOTO(free, rc = -EPERM);
1954
1955 if (llss->inode2->i_sb != llss->inode1->i_sb)
1956 GOTO(free, rc = -EXDEV);
1957
1958 /* we use 2 bool because it is easier to swap than 2 bits */
1959 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV1)
1960 llss->check_dv1 = true;
1961
1962 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV2)
1963 llss->check_dv2 = true;
1964
1965 /* we cannot use lsl->sl_dvX directly because we may swap them */
1966 llss->dv1 = lsl->sl_dv1;
1967 llss->dv2 = lsl->sl_dv2;
1968
1969 rc = lu_fid_cmp(ll_inode2fid(llss->inode1), ll_inode2fid(llss->inode2));
1970 if (rc == 0) /* same file, done! */
1971 GOTO(free, rc = 0);
1972
1973 if (rc < 0) { /* sequentialize it */
1974 swap(llss->inode1, llss->inode2);
1975 swap(file1, file2);
1976 swap(llss->dv1, llss->dv2);
1977 swap(llss->check_dv1, llss->check_dv2);
1978 }
1979
1980 gid = lsl->sl_gid;
1981 if (gid != 0) { /* application asks to flush dirty cache */
1982 rc = ll_get_grouplock(llss->inode1, file1, gid);
1983 if (rc < 0)
1984 GOTO(free, rc);
1985
1986 rc = ll_get_grouplock(llss->inode2, file2, gid);
1987 if (rc < 0) {
1988 ll_put_grouplock(llss->inode1, file1, gid);
1989 GOTO(free, rc);
1990 }
1991 }
1992
1993 /* to be able to restore mtime and atime after swap
1994 * we need to first save them */
1995 if (lsl->sl_flags &
1996 (SWAP_LAYOUTS_KEEP_MTIME | SWAP_LAYOUTS_KEEP_ATIME)) {
1997 llss->ia1.ia_mtime = llss->inode1->i_mtime;
1998 llss->ia1.ia_atime = llss->inode1->i_atime;
1999 llss->ia1.ia_valid = ATTR_MTIME | ATTR_ATIME;
2000 llss->ia2.ia_mtime = llss->inode2->i_mtime;
2001 llss->ia2.ia_atime = llss->inode2->i_atime;
2002 llss->ia2.ia_valid = ATTR_MTIME | ATTR_ATIME;
2003 }
2004
2005 /* ultimate check, before swaping the layouts we check if
2006 * dataversion has changed (if requested) */
2007 if (llss->check_dv1) {
2008 rc = ll_data_version(llss->inode1, &dv, 0);
2009 if (rc)
2010 GOTO(putgl, rc);
2011 if (dv != llss->dv1)
2012 GOTO(putgl, rc = -EAGAIN);
2013 }
2014
2015 if (llss->check_dv2) {
2016 rc = ll_data_version(llss->inode2, &dv, 0);
2017 if (rc)
2018 GOTO(putgl, rc);
2019 if (dv != llss->dv2)
2020 GOTO(putgl, rc = -EAGAIN);
2021 }
2022
2023 /* struct md_op_data is used to send the swap args to the mdt
2024 * only flags is missing, so we use struct mdc_swap_layouts
2025 * through the md_op_data->op_data */
2026 /* flags from user space have to be converted before they are send to
2027 * server, no flag is sent today, they are only used on the client */
2028 msl.msl_flags = 0;
2029 rc = -ENOMEM;
2030 op_data = ll_prep_md_op_data(NULL, llss->inode1, llss->inode2, NULL, 0,
2031 0, LUSTRE_OPC_ANY, &msl);
2032 if (IS_ERR(op_data))
2033 GOTO(free, rc = PTR_ERR(op_data));
2034
2035 rc = obd_iocontrol(LL_IOC_LOV_SWAP_LAYOUTS, ll_i2mdexp(llss->inode1),
2036 sizeof(*op_data), op_data, NULL);
2037 ll_finish_md_op_data(op_data);
2038
2039 putgl:
2040 if (gid != 0) {
2041 ll_put_grouplock(llss->inode2, file2, gid);
2042 ll_put_grouplock(llss->inode1, file1, gid);
2043 }
2044
2045 /* rc can be set from obd_iocontrol() or from a GOTO(putgl, ...) */
2046 if (rc != 0)
2047 GOTO(free, rc);
2048
2049 /* clear useless flags */
2050 if (!(lsl->sl_flags & SWAP_LAYOUTS_KEEP_MTIME)) {
2051 llss->ia1.ia_valid &= ~ATTR_MTIME;
2052 llss->ia2.ia_valid &= ~ATTR_MTIME;
2053 }
2054
2055 if (!(lsl->sl_flags & SWAP_LAYOUTS_KEEP_ATIME)) {
2056 llss->ia1.ia_valid &= ~ATTR_ATIME;
2057 llss->ia2.ia_valid &= ~ATTR_ATIME;
2058 }
2059
2060 /* update time if requested */
2061 rc = 0;
2062 if (llss->ia2.ia_valid != 0) {
2063 mutex_lock(&llss->inode1->i_mutex);
2064 rc = ll_setattr(file1->f_dentry, &llss->ia2);
2065 mutex_unlock(&llss->inode1->i_mutex);
2066 }
2067
2068 if (llss->ia1.ia_valid != 0) {
2069 int rc1;
2070
2071 mutex_lock(&llss->inode2->i_mutex);
2072 rc1 = ll_setattr(file2->f_dentry, &llss->ia1);
2073 mutex_unlock(&llss->inode2->i_mutex);
2074 if (rc == 0)
2075 rc = rc1;
2076 }
2077
2078 free:
2079 if (llss != NULL)
2080 OBD_FREE_PTR(llss);
2081
2082 return rc;
2083 }
2084
2085 static int ll_hsm_state_set(struct inode *inode, struct hsm_state_set *hss)
2086 {
2087 struct md_op_data *op_data;
2088 int rc;
2089
2090 /* Non-root users are forbidden to set or clear flags which are
2091 * NOT defined in HSM_USER_MASK. */
2092 if (((hss->hss_setmask | hss->hss_clearmask) & ~HSM_USER_MASK) &&
2093 !capable(CFS_CAP_SYS_ADMIN))
2094 return -EPERM;
2095
2096 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2097 LUSTRE_OPC_ANY, hss);
2098 if (IS_ERR(op_data))
2099 return PTR_ERR(op_data);
2100
2101 rc = obd_iocontrol(LL_IOC_HSM_STATE_SET, ll_i2mdexp(inode),
2102 sizeof(*op_data), op_data, NULL);
2103
2104 ll_finish_md_op_data(op_data);
2105
2106 return rc;
2107 }
2108
2109 static int ll_hsm_import(struct inode *inode, struct file *file,
2110 struct hsm_user_import *hui)
2111 {
2112 struct hsm_state_set *hss = NULL;
2113 struct iattr *attr = NULL;
2114 int rc;
2115
2116
2117 if (!S_ISREG(inode->i_mode))
2118 return -EINVAL;
2119
2120 /* set HSM flags */
2121 OBD_ALLOC_PTR(hss);
2122 if (hss == NULL)
2123 GOTO(out, rc = -ENOMEM);
2124
2125 hss->hss_valid = HSS_SETMASK | HSS_ARCHIVE_ID;
2126 hss->hss_archive_id = hui->hui_archive_id;
2127 hss->hss_setmask = HS_ARCHIVED | HS_EXISTS | HS_RELEASED;
2128 rc = ll_hsm_state_set(inode, hss);
2129 if (rc != 0)
2130 GOTO(out, rc);
2131
2132 OBD_ALLOC_PTR(attr);
2133 if (attr == NULL)
2134 GOTO(out, rc = -ENOMEM);
2135
2136 attr->ia_mode = hui->hui_mode & (S_IRWXU | S_IRWXG | S_IRWXO);
2137 attr->ia_mode |= S_IFREG;
2138 attr->ia_uid = make_kuid(&init_user_ns, hui->hui_uid);
2139 attr->ia_gid = make_kgid(&init_user_ns, hui->hui_gid);
2140 attr->ia_size = hui->hui_size;
2141 attr->ia_mtime.tv_sec = hui->hui_mtime;
2142 attr->ia_mtime.tv_nsec = hui->hui_mtime_ns;
2143 attr->ia_atime.tv_sec = hui->hui_atime;
2144 attr->ia_atime.tv_nsec = hui->hui_atime_ns;
2145
2146 attr->ia_valid = ATTR_SIZE | ATTR_MODE | ATTR_FORCE |
2147 ATTR_UID | ATTR_GID |
2148 ATTR_MTIME | ATTR_MTIME_SET |
2149 ATTR_ATIME | ATTR_ATIME_SET;
2150
2151 rc = ll_setattr_raw(file->f_dentry, attr, true);
2152 if (rc == -ENODATA)
2153 rc = 0;
2154
2155 out:
2156 if (hss != NULL)
2157 OBD_FREE_PTR(hss);
2158
2159 if (attr != NULL)
2160 OBD_FREE_PTR(attr);
2161
2162 return rc;
2163 }
2164
2165 long ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
2166 {
2167 struct inode *inode = file->f_dentry->d_inode;
2168 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2169 int flags, rc;
2170
2171 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%x\n", inode->i_ino,
2172 inode->i_generation, inode, cmd);
2173 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
2174
2175 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
2176 if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
2177 return -ENOTTY;
2178
2179 switch(cmd) {
2180 case LL_IOC_GETFLAGS:
2181 /* Get the current value of the file flags */
2182 return put_user(fd->fd_flags, (int *)arg);
2183 case LL_IOC_SETFLAGS:
2184 case LL_IOC_CLRFLAGS:
2185 /* Set or clear specific file flags */
2186 /* XXX This probably needs checks to ensure the flags are
2187 * not abused, and to handle any flag side effects.
2188 */
2189 if (get_user(flags, (int *) arg))
2190 return -EFAULT;
2191
2192 if (cmd == LL_IOC_SETFLAGS) {
2193 if ((flags & LL_FILE_IGNORE_LOCK) &&
2194 !(file->f_flags & O_DIRECT)) {
2195 CERROR("%s: unable to disable locking on "
2196 "non-O_DIRECT file\n", current->comm);
2197 return -EINVAL;
2198 }
2199
2200 fd->fd_flags |= flags;
2201 } else {
2202 fd->fd_flags &= ~flags;
2203 }
2204 return 0;
2205 case LL_IOC_LOV_SETSTRIPE:
2206 return ll_lov_setstripe(inode, file, arg);
2207 case LL_IOC_LOV_SETEA:
2208 return ll_lov_setea(inode, file, arg);
2209 case LL_IOC_LOV_SWAP_LAYOUTS: {
2210 struct file *file2;
2211 struct lustre_swap_layouts lsl;
2212
2213 if (copy_from_user(&lsl, (char *)arg,
2214 sizeof(struct lustre_swap_layouts)))
2215 return -EFAULT;
2216
2217 if ((file->f_flags & O_ACCMODE) == 0) /* O_RDONLY */
2218 return -EPERM;
2219
2220 file2 = fget(lsl.sl_fd);
2221 if (file2 == NULL)
2222 return -EBADF;
2223
2224 rc = -EPERM;
2225 if ((file2->f_flags & O_ACCMODE) != 0) /* O_WRONLY or O_RDWR */
2226 rc = ll_swap_layouts(file, file2, &lsl);
2227 fput(file2);
2228 return rc;
2229 }
2230 case LL_IOC_LOV_GETSTRIPE:
2231 return ll_lov_getstripe(inode, arg);
2232 case LL_IOC_RECREATE_OBJ:
2233 return ll_lov_recreate_obj(inode, arg);
2234 case LL_IOC_RECREATE_FID:
2235 return ll_lov_recreate_fid(inode, arg);
2236 case FSFILT_IOC_FIEMAP:
2237 return ll_ioctl_fiemap(inode, arg);
2238 case FSFILT_IOC_GETFLAGS:
2239 case FSFILT_IOC_SETFLAGS:
2240 return ll_iocontrol(inode, file, cmd, arg);
2241 case FSFILT_IOC_GETVERSION_OLD:
2242 case FSFILT_IOC_GETVERSION:
2243 return put_user(inode->i_generation, (int *)arg);
2244 case LL_IOC_GROUP_LOCK:
2245 return ll_get_grouplock(inode, file, arg);
2246 case LL_IOC_GROUP_UNLOCK:
2247 return ll_put_grouplock(inode, file, arg);
2248 case IOC_OBD_STATFS:
2249 return ll_obd_statfs(inode, (void *)arg);
2250
2251 /* We need to special case any other ioctls we want to handle,
2252 * to send them to the MDS/OST as appropriate and to properly
2253 * network encode the arg field.
2254 case FSFILT_IOC_SETVERSION_OLD:
2255 case FSFILT_IOC_SETVERSION:
2256 */
2257 case LL_IOC_FLUSHCTX:
2258 return ll_flush_ctx(inode);
2259 case LL_IOC_PATH2FID: {
2260 if (copy_to_user((void *)arg, ll_inode2fid(inode),
2261 sizeof(struct lu_fid)))
2262 return -EFAULT;
2263
2264 return 0;
2265 }
2266 case OBD_IOC_FID2PATH:
2267 return ll_fid2path(inode, (void *)arg);
2268 case LL_IOC_DATA_VERSION: {
2269 struct ioc_data_version idv;
2270 int rc;
2271
2272 if (copy_from_user(&idv, (char *)arg, sizeof(idv)))
2273 return -EFAULT;
2274
2275 rc = ll_data_version(inode, &idv.idv_version,
2276 !(idv.idv_flags & LL_DV_NOFLUSH));
2277
2278 if (rc == 0 && copy_to_user((char *) arg, &idv, sizeof(idv)))
2279 return -EFAULT;
2280
2281 return rc;
2282 }
2283
2284 case LL_IOC_GET_MDTIDX: {
2285 int mdtidx;
2286
2287 mdtidx = ll_get_mdt_idx(inode);
2288 if (mdtidx < 0)
2289 return mdtidx;
2290
2291 if (put_user((int)mdtidx, (int*)arg))
2292 return -EFAULT;
2293
2294 return 0;
2295 }
2296 case OBD_IOC_GETDTNAME:
2297 case OBD_IOC_GETMDNAME:
2298 return ll_get_obd_name(inode, cmd, arg);
2299 case LL_IOC_HSM_STATE_GET: {
2300 struct md_op_data *op_data;
2301 struct hsm_user_state *hus;
2302 int rc;
2303
2304 OBD_ALLOC_PTR(hus);
2305 if (hus == NULL)
2306 return -ENOMEM;
2307
2308 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2309 LUSTRE_OPC_ANY, hus);
2310 if (IS_ERR(op_data)) {
2311 OBD_FREE_PTR(hus);
2312 return PTR_ERR(op_data);
2313 }
2314
2315 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
2316 op_data, NULL);
2317
2318 if (copy_to_user((void *)arg, hus, sizeof(*hus)))
2319 rc = -EFAULT;
2320
2321 ll_finish_md_op_data(op_data);
2322 OBD_FREE_PTR(hus);
2323 return rc;
2324 }
2325 case LL_IOC_HSM_STATE_SET: {
2326 struct hsm_state_set *hss;
2327 int rc;
2328
2329 OBD_ALLOC_PTR(hss);
2330 if (hss == NULL)
2331 return -ENOMEM;
2332
2333 if (copy_from_user(hss, (char *)arg, sizeof(*hss))) {
2334 OBD_FREE_PTR(hss);
2335 return -EFAULT;
2336 }
2337
2338 rc = ll_hsm_state_set(inode, hss);
2339
2340 OBD_FREE_PTR(hss);
2341 return rc;
2342 }
2343 case LL_IOC_HSM_ACTION: {
2344 struct md_op_data *op_data;
2345 struct hsm_current_action *hca;
2346 int rc;
2347
2348 OBD_ALLOC_PTR(hca);
2349 if (hca == NULL)
2350 return -ENOMEM;
2351
2352 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2353 LUSTRE_OPC_ANY, hca);
2354 if (IS_ERR(op_data)) {
2355 OBD_FREE_PTR(hca);
2356 return PTR_ERR(op_data);
2357 }
2358
2359 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
2360 op_data, NULL);
2361
2362 if (copy_to_user((char *)arg, hca, sizeof(*hca)))
2363 rc = -EFAULT;
2364
2365 ll_finish_md_op_data(op_data);
2366 OBD_FREE_PTR(hca);
2367 return rc;
2368 }
2369 case LL_IOC_SET_LEASE: {
2370 struct ll_inode_info *lli = ll_i2info(inode);
2371 struct obd_client_handle *och = NULL;
2372 bool lease_broken;
2373 fmode_t mode = 0;
2374
2375 switch (arg) {
2376 case F_WRLCK:
2377 if (!(file->f_mode & FMODE_WRITE))
2378 return -EPERM;
2379 mode = FMODE_WRITE;
2380 break;
2381 case F_RDLCK:
2382 if (!(file->f_mode & FMODE_READ))
2383 return -EPERM;
2384 mode = FMODE_READ;
2385 break;
2386 case F_UNLCK:
2387 mutex_lock(&lli->lli_och_mutex);
2388 if (fd->fd_lease_och != NULL) {
2389 och = fd->fd_lease_och;
2390 fd->fd_lease_och = NULL;
2391 }
2392 mutex_unlock(&lli->lli_och_mutex);
2393
2394 if (och != NULL) {
2395 mode = och->och_flags &
2396 (FMODE_READ|FMODE_WRITE);
2397 rc = ll_lease_close(och, inode, &lease_broken);
2398 if (rc == 0 && lease_broken)
2399 mode = 0;
2400 } else {
2401 rc = -ENOLCK;
2402 }
2403
2404 /* return the type of lease or error */
2405 return rc < 0 ? rc : (int)mode;
2406 default:
2407 return -EINVAL;
2408 }
2409
2410 CDEBUG(D_INODE, "Set lease with mode %d\n", mode);
2411
2412 /* apply for lease */
2413 och = ll_lease_open(inode, file, mode, 0);
2414 if (IS_ERR(och))
2415 return PTR_ERR(och);
2416
2417 rc = 0;
2418 mutex_lock(&lli->lli_och_mutex);
2419 if (fd->fd_lease_och == NULL) {
2420 fd->fd_lease_och = och;
2421 och = NULL;
2422 }
2423 mutex_unlock(&lli->lli_och_mutex);
2424 if (och != NULL) {
2425 /* impossible now that only excl is supported for now */
2426 ll_lease_close(och, inode, &lease_broken);
2427 rc = -EBUSY;
2428 }
2429 return rc;
2430 }
2431 case LL_IOC_GET_LEASE: {
2432 struct ll_inode_info *lli = ll_i2info(inode);
2433 struct ldlm_lock *lock = NULL;
2434
2435 rc = 0;
2436 mutex_lock(&lli->lli_och_mutex);
2437 if (fd->fd_lease_och != NULL) {
2438 struct obd_client_handle *och = fd->fd_lease_och;
2439
2440 lock = ldlm_handle2lock(&och->och_lease_handle);
2441 if (lock != NULL) {
2442 lock_res_and_lock(lock);
2443 if (!ldlm_is_cancel(lock))
2444 rc = och->och_flags &
2445 (FMODE_READ | FMODE_WRITE);
2446 unlock_res_and_lock(lock);
2447 ldlm_lock_put(lock);
2448 }
2449 }
2450 mutex_unlock(&lli->lli_och_mutex);
2451 return rc;
2452 }
2453 case LL_IOC_HSM_IMPORT: {
2454 struct hsm_user_import *hui;
2455
2456 OBD_ALLOC_PTR(hui);
2457 if (hui == NULL)
2458 return -ENOMEM;
2459
2460 if (copy_from_user(hui, (void *)arg, sizeof(*hui))) {
2461 OBD_FREE_PTR(hui);
2462 return -EFAULT;
2463 }
2464
2465 rc = ll_hsm_import(inode, file, hui);
2466
2467 OBD_FREE_PTR(hui);
2468 return rc;
2469 }
2470 default: {
2471 int err;
2472
2473 if (LLIOC_STOP ==
2474 ll_iocontrol_call(inode, file, cmd, arg, &err))
2475 return err;
2476
2477 return obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
2478 (void *)arg);
2479 }
2480 }
2481 }
2482
2483
2484 loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
2485 {
2486 struct inode *inode = file->f_dentry->d_inode;
2487 loff_t retval, eof = 0;
2488
2489 retval = offset + ((origin == SEEK_END) ? i_size_read(inode) :
2490 (origin == SEEK_CUR) ? file->f_pos : 0);
2491 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), to=%llu=%#llx(%d)\n",
2492 inode->i_ino, inode->i_generation, inode, retval, retval,
2493 origin);
2494 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
2495
2496 if (origin == SEEK_END || origin == SEEK_HOLE || origin == SEEK_DATA) {
2497 retval = ll_glimpse_size(inode);
2498 if (retval != 0)
2499 return retval;
2500 eof = i_size_read(inode);
2501 }
2502
2503 retval = generic_file_llseek_size(file, offset, origin,
2504 ll_file_maxbytes(inode), eof);
2505 return retval;
2506 }
2507
2508 int ll_flush(struct file *file, fl_owner_t id)
2509 {
2510 struct inode *inode = file->f_dentry->d_inode;
2511 struct ll_inode_info *lli = ll_i2info(inode);
2512 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2513 int rc, err;
2514
2515 LASSERT(!S_ISDIR(inode->i_mode));
2516
2517 /* catch async errors that were recorded back when async writeback
2518 * failed for pages in this mapping. */
2519 rc = lli->lli_async_rc;
2520 lli->lli_async_rc = 0;
2521 err = lov_read_and_clear_async_rc(lli->lli_clob);
2522 if (rc == 0)
2523 rc = err;
2524
2525 /* The application has been told write failure already.
2526 * Do not report failure again. */
2527 if (fd->fd_write_failed)
2528 return 0;
2529 return rc ? -EIO : 0;
2530 }
2531
2532 /**
2533 * Called to make sure a portion of file has been written out.
2534 * if @local_only is not true, it will send OST_SYNC RPCs to ost.
2535 *
2536 * Return how many pages have been written.
2537 */
2538 int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end,
2539 enum cl_fsync_mode mode, int ignore_layout)
2540 {
2541 struct cl_env_nest nest;
2542 struct lu_env *env;
2543 struct cl_io *io;
2544 struct obd_capa *capa = NULL;
2545 struct cl_fsync_io *fio;
2546 int result;
2547
2548 if (mode != CL_FSYNC_NONE && mode != CL_FSYNC_LOCAL &&
2549 mode != CL_FSYNC_DISCARD && mode != CL_FSYNC_ALL)
2550 return -EINVAL;
2551
2552 env = cl_env_nested_get(&nest);
2553 if (IS_ERR(env))
2554 return PTR_ERR(env);
2555
2556 capa = ll_osscapa_get(inode, CAPA_OPC_OSS_WRITE);
2557
2558 io = ccc_env_thread_io(env);
2559 io->ci_obj = cl_i2info(inode)->lli_clob;
2560 io->ci_ignore_layout = ignore_layout;
2561
2562 /* initialize parameters for sync */
2563 fio = &io->u.ci_fsync;
2564 fio->fi_capa = capa;
2565 fio->fi_start = start;
2566 fio->fi_end = end;
2567 fio->fi_fid = ll_inode2fid(inode);
2568 fio->fi_mode = mode;
2569 fio->fi_nr_written = 0;
2570
2571 if (cl_io_init(env, io, CIT_FSYNC, io->ci_obj) == 0)
2572 result = cl_io_loop(env, io);
2573 else
2574 result = io->ci_result;
2575 if (result == 0)
2576 result = fio->fi_nr_written;
2577 cl_io_fini(env, io);
2578 cl_env_nested_put(&nest, env);
2579
2580 capa_put(capa);
2581
2582 return result;
2583 }
2584
2585 /*
2586 * When dentry is provided (the 'else' case), *file->f_dentry may be
2587 * null and dentry must be used directly rather than pulled from
2588 * *file->f_dentry as is done otherwise.
2589 */
2590
2591 int ll_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2592 {
2593 struct dentry *dentry = file->f_dentry;
2594 struct inode *inode = dentry->d_inode;
2595 struct ll_inode_info *lli = ll_i2info(inode);
2596 struct ptlrpc_request *req;
2597 struct obd_capa *oc;
2598 int rc, err;
2599
2600 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
2601 inode->i_generation, inode);
2602 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
2603
2604 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2605 mutex_lock(&inode->i_mutex);
2606
2607 /* catch async errors that were recorded back when async writeback
2608 * failed for pages in this mapping. */
2609 if (!S_ISDIR(inode->i_mode)) {
2610 err = lli->lli_async_rc;
2611 lli->lli_async_rc = 0;
2612 if (rc == 0)
2613 rc = err;
2614 err = lov_read_and_clear_async_rc(lli->lli_clob);
2615 if (rc == 0)
2616 rc = err;
2617 }
2618
2619 oc = ll_mdscapa_get(inode);
2620 err = md_sync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), oc,
2621 &req);
2622 capa_put(oc);
2623 if (!rc)
2624 rc = err;
2625 if (!err)
2626 ptlrpc_req_finished(req);
2627
2628 if (datasync && S_ISREG(inode->i_mode)) {
2629 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2630
2631 err = cl_sync_file_range(inode, 0, OBD_OBJECT_EOF,
2632 CL_FSYNC_ALL, 0);
2633 if (rc == 0 && err < 0)
2634 rc = err;
2635 if (rc < 0)
2636 fd->fd_write_failed = true;
2637 else
2638 fd->fd_write_failed = false;
2639 }
2640
2641 mutex_unlock(&inode->i_mutex);
2642 return rc;
2643 }
2644
2645 int ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
2646 {
2647 struct inode *inode = file->f_dentry->d_inode;
2648 struct ll_sb_info *sbi = ll_i2sbi(inode);
2649 struct ldlm_enqueue_info einfo = {
2650 .ei_type = LDLM_FLOCK,
2651 .ei_cb_cp = ldlm_flock_completion_ast,
2652 .ei_cbdata = file_lock,
2653 };
2654 struct md_op_data *op_data;
2655 struct lustre_handle lockh = {0};
2656 ldlm_policy_data_t flock = {{0}};
2657 int flags = 0;
2658 int rc;
2659 int rc2 = 0;
2660
2661 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu file_lock=%p\n",
2662 inode->i_ino, file_lock);
2663
2664 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
2665
2666 if (file_lock->fl_flags & FL_FLOCK) {
2667 LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK));
2668 /* flocks are whole-file locks */
2669 flock.l_flock.end = OFFSET_MAX;
2670 /* For flocks owner is determined by the local file desctiptor*/
2671 flock.l_flock.owner = (unsigned long)file_lock->fl_file;
2672 } else if (file_lock->fl_flags & FL_POSIX) {
2673 flock.l_flock.owner = (unsigned long)file_lock->fl_owner;
2674 flock.l_flock.start = file_lock->fl_start;
2675 flock.l_flock.end = file_lock->fl_end;
2676 } else {
2677 return -EINVAL;
2678 }
2679 flock.l_flock.pid = file_lock->fl_pid;
2680
2681 /* Somewhat ugly workaround for svc lockd.
2682 * lockd installs custom fl_lmops->lm_compare_owner that checks
2683 * for the fl_owner to be the same (which it always is on local node
2684 * I guess between lockd processes) and then compares pid.
2685 * As such we assign pid to the owner field to make it all work,
2686 * conflict with normal locks is unlikely since pid space and
2687 * pointer space for current->files are not intersecting */
2688 if (file_lock->fl_lmops && file_lock->fl_lmops->lm_compare_owner)
2689 flock.l_flock.owner = (unsigned long)file_lock->fl_pid;
2690
2691 switch (file_lock->fl_type) {
2692 case F_RDLCK:
2693 einfo.ei_mode = LCK_PR;
2694 break;
2695 case F_UNLCK:
2696 /* An unlock request may or may not have any relation to
2697 * existing locks so we may not be able to pass a lock handle
2698 * via a normal ldlm_lock_cancel() request. The request may even
2699 * unlock a byte range in the middle of an existing lock. In
2700 * order to process an unlock request we need all of the same
2701 * information that is given with a normal read or write record
2702 * lock request. To avoid creating another ldlm unlock (cancel)
2703 * message we'll treat a LCK_NL flock request as an unlock. */
2704 einfo.ei_mode = LCK_NL;
2705 break;
2706 case F_WRLCK:
2707 einfo.ei_mode = LCK_PW;
2708 break;
2709 default:
2710 CDEBUG(D_INFO, "Unknown fcntl lock type: %d\n",
2711 file_lock->fl_type);
2712 return -ENOTSUPP;
2713 }
2714
2715 switch (cmd) {
2716 case F_SETLKW:
2717 #ifdef F_SETLKW64
2718 case F_SETLKW64:
2719 #endif
2720 flags = 0;
2721 break;
2722 case F_SETLK:
2723 #ifdef F_SETLK64
2724 case F_SETLK64:
2725 #endif
2726 flags = LDLM_FL_BLOCK_NOWAIT;
2727 break;
2728 case F_GETLK:
2729 #ifdef F_GETLK64
2730 case F_GETLK64:
2731 #endif
2732 flags = LDLM_FL_TEST_LOCK;
2733 /* Save the old mode so that if the mode in the lock changes we
2734 * can decrement the appropriate reader or writer refcount. */
2735 file_lock->fl_type = einfo.ei_mode;
2736 break;
2737 default:
2738 CERROR("unknown fcntl lock command: %d\n", cmd);
2739 return -EINVAL;
2740 }
2741
2742 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2743 LUSTRE_OPC_ANY, NULL);
2744 if (IS_ERR(op_data))
2745 return PTR_ERR(op_data);
2746
2747 CDEBUG(D_DLMTRACE, "inode=%lu, pid=%u, flags=%#x, mode=%u, "
2748 "start="LPU64", end="LPU64"\n", inode->i_ino, flock.l_flock.pid,
2749 flags, einfo.ei_mode, flock.l_flock.start, flock.l_flock.end);
2750
2751 rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL,
2752 op_data, &lockh, &flock, 0, NULL /* req */, flags);
2753
2754 if ((file_lock->fl_flags & FL_FLOCK) &&
2755 (rc == 0 || file_lock->fl_type == F_UNLCK))
2756 rc2 = flock_lock_file_wait(file, file_lock);
2757 if ((file_lock->fl_flags & FL_POSIX) &&
2758 (rc == 0 || file_lock->fl_type == F_UNLCK) &&
2759 !(flags & LDLM_FL_TEST_LOCK))
2760 rc2 = posix_lock_file_wait(file, file_lock);
2761
2762 if (rc2 && file_lock->fl_type != F_UNLCK) {
2763 einfo.ei_mode = LCK_NL;
2764 md_enqueue(sbi->ll_md_exp, &einfo, NULL,
2765 op_data, &lockh, &flock, 0, NULL /* req */, flags);
2766 rc = rc2;
2767 }
2768
2769 ll_finish_md_op_data(op_data);
2770
2771 return rc;
2772 }
2773
2774 int ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
2775 {
2776 return -ENOSYS;
2777 }
2778
2779 /**
2780 * test if some locks matching bits and l_req_mode are acquired
2781 * - bits can be in different locks
2782 * - if found clear the common lock bits in *bits
2783 * - the bits not found, are kept in *bits
2784 * \param inode [IN]
2785 * \param bits [IN] searched lock bits [IN]
2786 * \param l_req_mode [IN] searched lock mode
2787 * \retval boolean, true iff all bits are found
2788 */
2789 int ll_have_md_lock(struct inode *inode, __u64 *bits, ldlm_mode_t l_req_mode)
2790 {
2791 struct lustre_handle lockh;
2792 ldlm_policy_data_t policy;
2793 ldlm_mode_t mode = (l_req_mode == LCK_MINMODE) ?
2794 (LCK_CR|LCK_CW|LCK_PR|LCK_PW) : l_req_mode;
2795 struct lu_fid *fid;
2796 __u64 flags;
2797 int i;
2798
2799 if (!inode)
2800 return 0;
2801
2802 fid = &ll_i2info(inode)->lli_fid;
2803 CDEBUG(D_INFO, "trying to match res "DFID" mode %s\n", PFID(fid),
2804 ldlm_lockname[mode]);
2805
2806 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
2807 for (i = 0; i <= MDS_INODELOCK_MAXSHIFT && *bits != 0; i++) {
2808 policy.l_inodebits.bits = *bits & (1 << i);
2809 if (policy.l_inodebits.bits == 0)
2810 continue;
2811
2812 if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS,
2813 &policy, mode, &lockh)) {
2814 struct ldlm_lock *lock;
2815
2816 lock = ldlm_handle2lock(&lockh);
2817 if (lock) {
2818 *bits &=
2819 ~(lock->l_policy_data.l_inodebits.bits);
2820 LDLM_LOCK_PUT(lock);
2821 } else {
2822 *bits &= ~policy.l_inodebits.bits;
2823 }
2824 }
2825 }
2826 return *bits == 0;
2827 }
2828
2829 ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits,
2830 struct lustre_handle *lockh, __u64 flags,
2831 ldlm_mode_t mode)
2832 {
2833 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
2834 struct lu_fid *fid;
2835 ldlm_mode_t rc;
2836
2837 fid = &ll_i2info(inode)->lli_fid;
2838 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
2839
2840 rc = md_lock_match(ll_i2mdexp(inode), LDLM_FL_BLOCK_GRANTED|flags,
2841 fid, LDLM_IBITS, &policy, mode, lockh);
2842
2843 return rc;
2844 }
2845
2846 static int ll_inode_revalidate_fini(struct inode *inode, int rc)
2847 {
2848 /* Already unlinked. Just update nlink and return success */
2849 if (rc == -ENOENT) {
2850 clear_nlink(inode);
2851 /* This path cannot be hit for regular files unless in
2852 * case of obscure races, so no need to validate size.
2853 */
2854 if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
2855 return 0;
2856 } else if (rc != 0) {
2857 CERROR("%s: revalidate FID "DFID" error: rc = %d\n",
2858 ll_get_fsname(inode->i_sb, NULL, 0),
2859 PFID(ll_inode2fid(inode)), rc);
2860 }
2861
2862 return rc;
2863 }
2864
2865 int __ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it,
2866 __u64 ibits)
2867 {
2868 struct inode *inode = dentry->d_inode;
2869 struct ptlrpc_request *req = NULL;
2870 struct obd_export *exp;
2871 int rc = 0;
2872
2873 LASSERT(inode != NULL);
2874
2875 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),name=%s\n",
2876 inode->i_ino, inode->i_generation, inode, dentry->d_name.name);
2877
2878 exp = ll_i2mdexp(inode);
2879
2880 /* XXX: Enable OBD_CONNECT_ATTRFID to reduce unnecessary getattr RPC.
2881 * But under CMD case, it caused some lock issues, should be fixed
2882 * with new CMD ibits lock. See bug 12718 */
2883 if (exp_connect_flags(exp) & OBD_CONNECT_ATTRFID) {
2884 struct lookup_intent oit = { .it_op = IT_GETATTR };
2885 struct md_op_data *op_data;
2886
2887 if (ibits == MDS_INODELOCK_LOOKUP)
2888 oit.it_op = IT_LOOKUP;
2889
2890 /* Call getattr by fid, so do not provide name at all. */
2891 op_data = ll_prep_md_op_data(NULL, dentry->d_parent->d_inode,
2892 dentry->d_inode, NULL, 0, 0,
2893 LUSTRE_OPC_ANY, NULL);
2894 if (IS_ERR(op_data))
2895 return PTR_ERR(op_data);
2896
2897 oit.it_create_mode |= M_CHECK_STALE;
2898 rc = md_intent_lock(exp, op_data, NULL, 0,
2899 /* we are not interested in name
2900 based lookup */
2901 &oit, 0, &req,
2902 ll_md_blocking_ast, 0);
2903 ll_finish_md_op_data(op_data);
2904 oit.it_create_mode &= ~M_CHECK_STALE;
2905 if (rc < 0) {
2906 rc = ll_inode_revalidate_fini(inode, rc);
2907 GOTO (out, rc);
2908 }
2909
2910 rc = ll_revalidate_it_finish(req, &oit, dentry);
2911 if (rc != 0) {
2912 ll_intent_release(&oit);
2913 GOTO(out, rc);
2914 }
2915
2916 /* Unlinked? Unhash dentry, so it is not picked up later by
2917 do_lookup() -> ll_revalidate_it(). We cannot use d_drop
2918 here to preserve get_cwd functionality on 2.6.
2919 Bug 10503 */
2920 if (!dentry->d_inode->i_nlink)
2921 d_lustre_invalidate(dentry, 0);
2922
2923 ll_lookup_finish_locks(&oit, dentry);
2924 } else if (!ll_have_md_lock(dentry->d_inode, &ibits, LCK_MINMODE)) {
2925 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
2926 obd_valid valid = OBD_MD_FLGETATTR;
2927 struct md_op_data *op_data;
2928 int ealen = 0;
2929
2930 if (S_ISREG(inode->i_mode)) {
2931 rc = ll_get_max_mdsize(sbi, &ealen);
2932 if (rc)
2933 return rc;
2934 valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
2935 }
2936
2937 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL,
2938 0, ealen, LUSTRE_OPC_ANY,
2939 NULL);
2940 if (IS_ERR(op_data))
2941 return PTR_ERR(op_data);
2942
2943 op_data->op_valid = valid;
2944 /* Once OBD_CONNECT_ATTRFID is not supported, we can't find one
2945 * capa for this inode. Because we only keep capas of dirs
2946 * fresh. */
2947 rc = md_getattr(sbi->ll_md_exp, op_data, &req);
2948 ll_finish_md_op_data(op_data);
2949 if (rc) {
2950 rc = ll_inode_revalidate_fini(inode, rc);
2951 return rc;
2952 }
2953
2954 rc = ll_prep_inode(&inode, req, NULL, NULL);
2955 }
2956 out:
2957 ptlrpc_req_finished(req);
2958 return rc;
2959 }
2960
2961 int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it,
2962 __u64 ibits)
2963 {
2964 struct inode *inode = dentry->d_inode;
2965 int rc;
2966
2967 rc = __ll_inode_revalidate_it(dentry, it, ibits);
2968 if (rc != 0)
2969 return rc;
2970
2971 /* if object isn't regular file, don't validate size */
2972 if (!S_ISREG(inode->i_mode)) {
2973 LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_lvb.lvb_atime;
2974 LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_lvb.lvb_mtime;
2975 LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_lvb.lvb_ctime;
2976 } else {
2977 /* In case of restore, the MDT has the right size and has
2978 * already send it back without granting the layout lock,
2979 * inode is up-to-date so glimpse is useless.
2980 * Also to glimpse we need the layout, in case of a running
2981 * restore the MDT holds the layout lock so the glimpse will
2982 * block up to the end of restore (getattr will block)
2983 */
2984 if (!(ll_i2info(inode)->lli_flags & LLIF_FILE_RESTORING))
2985 rc = ll_glimpse_size(inode);
2986 }
2987 return rc;
2988 }
2989
2990 int ll_getattr_it(struct vfsmount *mnt, struct dentry *de,
2991 struct lookup_intent *it, struct kstat *stat)
2992 {
2993 struct inode *inode = de->d_inode;
2994 struct ll_sb_info *sbi = ll_i2sbi(inode);
2995 struct ll_inode_info *lli = ll_i2info(inode);
2996 int res = 0;
2997
2998 res = ll_inode_revalidate_it(de, it, MDS_INODELOCK_UPDATE |
2999 MDS_INODELOCK_LOOKUP);
3000 ll_stats_ops_tally(sbi, LPROC_LL_GETATTR, 1);
3001
3002 if (res)
3003 return res;
3004
3005 stat->dev = inode->i_sb->s_dev;
3006 if (ll_need_32bit_api(sbi))
3007 stat->ino = cl_fid_build_ino(&lli->lli_fid, 1);
3008 else
3009 stat->ino = inode->i_ino;
3010 stat->mode = inode->i_mode;
3011 stat->nlink = inode->i_nlink;
3012 stat->uid = inode->i_uid;
3013 stat->gid = inode->i_gid;
3014 stat->rdev = inode->i_rdev;
3015 stat->atime = inode->i_atime;
3016 stat->mtime = inode->i_mtime;
3017 stat->ctime = inode->i_ctime;
3018 stat->blksize = 1 << inode->i_blkbits;
3019
3020 stat->size = i_size_read(inode);
3021 stat->blocks = inode->i_blocks;
3022
3023 return 0;
3024 }
3025 int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
3026 {
3027 struct lookup_intent it = { .it_op = IT_GETATTR };
3028
3029 return ll_getattr_it(mnt, de, &it, stat);
3030 }
3031
3032 int ll_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3033 __u64 start, __u64 len)
3034 {
3035 int rc;
3036 size_t num_bytes;
3037 struct ll_user_fiemap *fiemap;
3038 unsigned int extent_count = fieinfo->fi_extents_max;
3039
3040 num_bytes = sizeof(*fiemap) + (extent_count *
3041 sizeof(struct ll_fiemap_extent));
3042 OBD_ALLOC_LARGE(fiemap, num_bytes);
3043
3044 if (fiemap == NULL)
3045 return -ENOMEM;
3046
3047 fiemap->fm_flags = fieinfo->fi_flags;
3048 fiemap->fm_extent_count = fieinfo->fi_extents_max;
3049 fiemap->fm_start = start;
3050 fiemap->fm_length = len;
3051 memcpy(&fiemap->fm_extents[0], fieinfo->fi_extents_start,
3052 sizeof(struct ll_fiemap_extent));
3053
3054 rc = ll_do_fiemap(inode, fiemap, num_bytes);
3055
3056 fieinfo->fi_flags = fiemap->fm_flags;
3057 fieinfo->fi_extents_mapped = fiemap->fm_mapped_extents;
3058 memcpy(fieinfo->fi_extents_start, &fiemap->fm_extents[0],
3059 fiemap->fm_mapped_extents * sizeof(struct ll_fiemap_extent));
3060
3061 OBD_FREE_LARGE(fiemap, num_bytes);
3062 return rc;
3063 }
3064
3065 struct posix_acl * ll_get_acl(struct inode *inode, int type)
3066 {
3067 struct ll_inode_info *lli = ll_i2info(inode);
3068 struct posix_acl *acl = NULL;
3069
3070 spin_lock(&lli->lli_lock);
3071 /* VFS' acl_permission_check->check_acl will release the refcount */
3072 acl = posix_acl_dup(lli->lli_posix_acl);
3073 spin_unlock(&lli->lli_lock);
3074
3075 return acl;
3076 }
3077
3078
3079 int ll_inode_permission(struct inode *inode, int mask)
3080 {
3081 int rc = 0;
3082
3083 #ifdef MAY_NOT_BLOCK
3084 if (mask & MAY_NOT_BLOCK)
3085 return -ECHILD;
3086 #endif
3087
3088 /* as root inode are NOT getting validated in lookup operation,
3089 * need to do it before permission check. */
3090
3091 if (inode == inode->i_sb->s_root->d_inode) {
3092 struct lookup_intent it = { .it_op = IT_LOOKUP };
3093
3094 rc = __ll_inode_revalidate_it(inode->i_sb->s_root, &it,
3095 MDS_INODELOCK_LOOKUP);
3096 if (rc)
3097 return rc;
3098 }
3099
3100 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), inode mode %x mask %o\n",
3101 inode->i_ino, inode->i_generation, inode, inode->i_mode, mask);
3102
3103 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
3104 return lustre_check_remote_perm(inode, mask);
3105
3106 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
3107 rc = generic_permission(inode, mask);
3108
3109 return rc;
3110 }
3111
3112 /* -o localflock - only provides locally consistent flock locks */
3113 struct file_operations ll_file_operations = {
3114 .read = ll_file_read,
3115 .aio_read = ll_file_aio_read,
3116 .write = ll_file_write,
3117 .aio_write = ll_file_aio_write,
3118 .unlocked_ioctl = ll_file_ioctl,
3119 .open = ll_file_open,
3120 .release = ll_file_release,
3121 .mmap = ll_file_mmap,
3122 .llseek = ll_file_seek,
3123 .splice_read = ll_file_splice_read,
3124 .fsync = ll_fsync,
3125 .flush = ll_flush
3126 };
3127
3128 struct file_operations ll_file_operations_flock = {
3129 .read = ll_file_read,
3130 .aio_read = ll_file_aio_read,
3131 .write = ll_file_write,
3132 .aio_write = ll_file_aio_write,
3133 .unlocked_ioctl = ll_file_ioctl,
3134 .open = ll_file_open,
3135 .release = ll_file_release,
3136 .mmap = ll_file_mmap,
3137 .llseek = ll_file_seek,
3138 .splice_read = ll_file_splice_read,
3139 .fsync = ll_fsync,
3140 .flush = ll_flush,
3141 .flock = ll_file_flock,
3142 .lock = ll_file_flock
3143 };
3144
3145 /* These are for -o noflock - to return ENOSYS on flock calls */
3146 struct file_operations ll_file_operations_noflock = {
3147 .read = ll_file_read,
3148 .aio_read = ll_file_aio_read,
3149 .write = ll_file_write,
3150 .aio_write = ll_file_aio_write,
3151 .unlocked_ioctl = ll_file_ioctl,
3152 .open = ll_file_open,
3153 .release = ll_file_release,
3154 .mmap = ll_file_mmap,
3155 .llseek = ll_file_seek,
3156 .splice_read = ll_file_splice_read,
3157 .fsync = ll_fsync,
3158 .flush = ll_flush,
3159 .flock = ll_file_noflock,
3160 .lock = ll_file_noflock
3161 };
3162
3163 struct inode_operations ll_file_inode_operations = {
3164 .setattr = ll_setattr,
3165 .getattr = ll_getattr,
3166 .permission = ll_inode_permission,
3167 .setxattr = ll_setxattr,
3168 .getxattr = ll_getxattr,
3169 .listxattr = ll_listxattr,
3170 .removexattr = ll_removexattr,
3171 .fiemap = ll_fiemap,
3172 .get_acl = ll_get_acl,
3173 };
3174
3175 /* dynamic ioctl number support routins */
3176 static struct llioc_ctl_data {
3177 struct rw_semaphore ioc_sem;
3178 struct list_head ioc_head;
3179 } llioc = {
3180 __RWSEM_INITIALIZER(llioc.ioc_sem),
3181 LIST_HEAD_INIT(llioc.ioc_head)
3182 };
3183
3184
3185 struct llioc_data {
3186 struct list_head iocd_list;
3187 unsigned int iocd_size;
3188 llioc_callback_t iocd_cb;
3189 unsigned int iocd_count;
3190 unsigned int iocd_cmd[0];
3191 };
3192
3193 void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
3194 {
3195 unsigned int size;
3196 struct llioc_data *in_data = NULL;
3197
3198 if (cb == NULL || cmd == NULL ||
3199 count > LLIOC_MAX_CMD || count < 0)
3200 return NULL;
3201
3202 size = sizeof(*in_data) + count * sizeof(unsigned int);
3203 OBD_ALLOC(in_data, size);
3204 if (in_data == NULL)
3205 return NULL;
3206
3207 memset(in_data, 0, sizeof(*in_data));
3208 in_data->iocd_size = size;
3209 in_data->iocd_cb = cb;
3210 in_data->iocd_count = count;
3211 memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
3212
3213 down_write(&llioc.ioc_sem);
3214 list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
3215 up_write(&llioc.ioc_sem);
3216
3217 return in_data;
3218 }
3219
3220 void ll_iocontrol_unregister(void *magic)
3221 {
3222 struct llioc_data *tmp;
3223
3224 if (magic == NULL)
3225 return;
3226
3227 down_write(&llioc.ioc_sem);
3228 list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
3229 if (tmp == magic) {
3230 unsigned int size = tmp->iocd_size;
3231
3232 list_del(&tmp->iocd_list);
3233 up_write(&llioc.ioc_sem);
3234
3235 OBD_FREE(tmp, size);
3236 return;
3237 }
3238 }
3239 up_write(&llioc.ioc_sem);
3240
3241 CWARN("didn't find iocontrol register block with magic: %p\n", magic);
3242 }
3243
3244 EXPORT_SYMBOL(ll_iocontrol_register);
3245 EXPORT_SYMBOL(ll_iocontrol_unregister);
3246
3247 enum llioc_iter ll_iocontrol_call(struct inode *inode, struct file *file,
3248 unsigned int cmd, unsigned long arg, int *rcp)
3249 {
3250 enum llioc_iter ret = LLIOC_CONT;
3251 struct llioc_data *data;
3252 int rc = -EINVAL, i;
3253
3254 down_read(&llioc.ioc_sem);
3255 list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
3256 for (i = 0; i < data->iocd_count; i++) {
3257 if (cmd != data->iocd_cmd[i])
3258 continue;
3259
3260 ret = data->iocd_cb(inode, file, cmd, arg, data, &rc);
3261 break;
3262 }
3263
3264 if (ret == LLIOC_STOP)
3265 break;
3266 }
3267 up_read(&llioc.ioc_sem);
3268
3269 if (rcp)
3270 *rcp = rc;
3271 return ret;
3272 }
3273
3274 int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf)
3275 {
3276 struct ll_inode_info *lli = ll_i2info(inode);
3277 struct cl_env_nest nest;
3278 struct lu_env *env;
3279 int result;
3280
3281 if (lli->lli_clob == NULL)
3282 return 0;
3283
3284 env = cl_env_nested_get(&nest);
3285 if (IS_ERR(env))
3286 return PTR_ERR(env);
3287
3288 result = cl_conf_set(env, lli->lli_clob, conf);
3289 cl_env_nested_put(&nest, env);
3290
3291 if (conf->coc_opc == OBJECT_CONF_SET) {
3292 struct ldlm_lock *lock = conf->coc_lock;
3293
3294 LASSERT(lock != NULL);
3295 LASSERT(ldlm_has_layout(lock));
3296 if (result == 0) {
3297 /* it can only be allowed to match after layout is
3298 * applied to inode otherwise false layout would be
3299 * seen. Applying layout shoud happen before dropping
3300 * the intent lock. */
3301 ldlm_lock_allow_match(lock);
3302 }
3303 }
3304 return result;
3305 }
3306
3307 /* Fetch layout from MDT with getxattr request, if it's not ready yet */
3308 static int ll_layout_fetch(struct inode *inode, struct ldlm_lock *lock)
3309
3310 {
3311 struct ll_sb_info *sbi = ll_i2sbi(inode);
3312 struct obd_capa *oc;
3313 struct ptlrpc_request *req;
3314 struct mdt_body *body;
3315 void *lvbdata;
3316 void *lmm;
3317 int lmmsize;
3318 int rc;
3319
3320 CDEBUG(D_INODE, DFID" LVB_READY=%d l_lvb_data=%p l_lvb_len=%d\n",
3321 PFID(ll_inode2fid(inode)), !!(lock->l_flags & LDLM_FL_LVB_READY),
3322 lock->l_lvb_data, lock->l_lvb_len);
3323
3324 if ((lock->l_lvb_data != NULL) && (lock->l_flags & LDLM_FL_LVB_READY))
3325 return 0;
3326
3327 /* if layout lock was granted right away, the layout is returned
3328 * within DLM_LVB of dlm reply; otherwise if the lock was ever
3329 * blocked and then granted via completion ast, we have to fetch
3330 * layout here. Please note that we can't use the LVB buffer in
3331 * completion AST because it doesn't have a large enough buffer */
3332 oc = ll_mdscapa_get(inode);
3333 rc = ll_get_max_mdsize(sbi, &lmmsize);
3334 if (rc == 0)
3335 rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode), oc,
3336 OBD_MD_FLXATTR, XATTR_NAME_LOV, NULL, 0,
3337 lmmsize, 0, &req);
3338 capa_put(oc);
3339 if (rc < 0)
3340 return rc;
3341
3342 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
3343 if (body == NULL || body->eadatasize > lmmsize)
3344 GOTO(out, rc = -EPROTO);
3345
3346 lmmsize = body->eadatasize;
3347 if (lmmsize == 0) /* empty layout */
3348 GOTO(out, rc = 0);
3349
3350 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_EADATA, lmmsize);
3351 if (lmm == NULL)
3352 GOTO(out, rc = -EFAULT);
3353
3354 OBD_ALLOC_LARGE(lvbdata, lmmsize);
3355 if (lvbdata == NULL)
3356 GOTO(out, rc = -ENOMEM);
3357
3358 memcpy(lvbdata, lmm, lmmsize);
3359 lock_res_and_lock(lock);
3360 if (lock->l_lvb_data != NULL)
3361 OBD_FREE_LARGE(lock->l_lvb_data, lock->l_lvb_len);
3362
3363 lock->l_lvb_data = lvbdata;
3364 lock->l_lvb_len = lmmsize;
3365 unlock_res_and_lock(lock);
3366
3367 out:
3368 ptlrpc_req_finished(req);
3369 return rc;
3370 }
3371
3372 /**
3373 * Apply the layout to the inode. Layout lock is held and will be released
3374 * in this function.
3375 */
3376 static int ll_layout_lock_set(struct lustre_handle *lockh, ldlm_mode_t mode,
3377 struct inode *inode, __u32 *gen, bool reconf)
3378 {
3379 struct ll_inode_info *lli = ll_i2info(inode);
3380 struct ll_sb_info *sbi = ll_i2sbi(inode);
3381 struct ldlm_lock *lock;
3382 struct lustre_md md = { NULL };
3383 struct cl_object_conf conf;
3384 int rc = 0;
3385 bool lvb_ready;
3386 bool wait_layout = false;
3387
3388 LASSERT(lustre_handle_is_used(lockh));
3389
3390 lock = ldlm_handle2lock(lockh);
3391 LASSERT(lock != NULL);
3392 LASSERT(ldlm_has_layout(lock));
3393
3394 LDLM_DEBUG(lock, "File %p/"DFID" being reconfigured: %d.\n",
3395 inode, PFID(&lli->lli_fid), reconf);
3396
3397 /* in case this is a caching lock and reinstate with new inode */
3398 md_set_lock_data(sbi->ll_md_exp, &lockh->cookie, inode, NULL);
3399
3400 lock_res_and_lock(lock);
3401 lvb_ready = !!(lock->l_flags & LDLM_FL_LVB_READY);
3402 unlock_res_and_lock(lock);
3403 /* checking lvb_ready is racy but this is okay. The worst case is
3404 * that multi processes may configure the file on the same time. */
3405 if (lvb_ready || !reconf) {
3406 rc = -ENODATA;
3407 if (lvb_ready) {
3408 /* layout_gen must be valid if layout lock is not
3409 * cancelled and stripe has already set */
3410 *gen = lli->lli_layout_gen;
3411 rc = 0;
3412 }
3413 GOTO(out, rc);
3414 }
3415
3416 rc = ll_layout_fetch(inode, lock);
3417 if (rc < 0)
3418 GOTO(out, rc);
3419
3420 /* for layout lock, lmm is returned in lock's lvb.
3421 * lvb_data is immutable if the lock is held so it's safe to access it
3422 * without res lock. See the description in ldlm_lock_decref_internal()
3423 * for the condition to free lvb_data of layout lock */
3424 if (lock->l_lvb_data != NULL) {
3425 rc = obd_unpackmd(sbi->ll_dt_exp, &md.lsm,
3426 lock->l_lvb_data, lock->l_lvb_len);
3427 if (rc >= 0) {
3428 *gen = LL_LAYOUT_GEN_EMPTY;
3429 if (md.lsm != NULL)
3430 *gen = md.lsm->lsm_layout_gen;
3431 rc = 0;
3432 } else {
3433 CERROR("%s: file "DFID" unpackmd error: %d\n",
3434 ll_get_fsname(inode->i_sb, NULL, 0),
3435 PFID(&lli->lli_fid), rc);
3436 }
3437 }
3438 if (rc < 0)
3439 GOTO(out, rc);
3440
3441 /* set layout to file. Unlikely this will fail as old layout was
3442 * surely eliminated */
3443 memset(&conf, 0, sizeof(conf));
3444 conf.coc_opc = OBJECT_CONF_SET;
3445 conf.coc_inode = inode;
3446 conf.coc_lock = lock;
3447 conf.u.coc_md = &md;
3448 rc = ll_layout_conf(inode, &conf);
3449
3450 if (md.lsm != NULL)
3451 obd_free_memmd(sbi->ll_dt_exp, &md.lsm);
3452
3453 /* refresh layout failed, need to wait */
3454 wait_layout = rc == -EBUSY;
3455
3456 out:
3457 LDLM_LOCK_PUT(lock);
3458 ldlm_lock_decref(lockh, mode);
3459
3460 /* wait for IO to complete if it's still being used. */
3461 if (wait_layout) {
3462 CDEBUG(D_INODE, "%s: %p/"DFID" wait for layout reconf.\n",
3463 ll_get_fsname(inode->i_sb, NULL, 0),
3464 inode, PFID(&lli->lli_fid));
3465
3466 memset(&conf, 0, sizeof(conf));
3467 conf.coc_opc = OBJECT_CONF_WAIT;
3468 conf.coc_inode = inode;
3469 rc = ll_layout_conf(inode, &conf);
3470 if (rc == 0)
3471 rc = -EAGAIN;
3472
3473 CDEBUG(D_INODE, "file: "DFID" waiting layout return: %d.\n",
3474 PFID(&lli->lli_fid), rc);
3475 }
3476 return rc;
3477 }
3478
3479 /**
3480 * This function checks if there exists a LAYOUT lock on the client side,
3481 * or enqueues it if it doesn't have one in cache.
3482 *
3483 * This function will not hold layout lock so it may be revoked any time after
3484 * this function returns. Any operations depend on layout should be redone
3485 * in that case.
3486 *
3487 * This function should be called before lov_io_init() to get an uptodate
3488 * layout version, the caller should save the version number and after IO
3489 * is finished, this function should be called again to verify that layout
3490 * is not changed during IO time.
3491 */
3492 int ll_layout_refresh(struct inode *inode, __u32 *gen)
3493 {
3494 struct ll_inode_info *lli = ll_i2info(inode);
3495 struct ll_sb_info *sbi = ll_i2sbi(inode);
3496 struct md_op_data *op_data;
3497 struct lookup_intent it;
3498 struct lustre_handle lockh;
3499 ldlm_mode_t mode;
3500 struct ldlm_enqueue_info einfo = {
3501 .ei_type = LDLM_IBITS,
3502 .ei_mode = LCK_CR,
3503 .ei_cb_bl = ll_md_blocking_ast,
3504 .ei_cb_cp = ldlm_completion_ast,
3505 };
3506 int rc;
3507
3508 *gen = lli->lli_layout_gen;
3509 if (!(sbi->ll_flags & LL_SBI_LAYOUT_LOCK))
3510 return 0;
3511
3512 /* sanity checks */
3513 LASSERT(fid_is_sane(ll_inode2fid(inode)));
3514 LASSERT(S_ISREG(inode->i_mode));
3515
3516 /* mostly layout lock is caching on the local side, so try to match
3517 * it before grabbing layout lock mutex. */
3518 mode = ll_take_md_lock(inode, MDS_INODELOCK_LAYOUT, &lockh, 0,
3519 LCK_CR | LCK_CW | LCK_PR | LCK_PW);
3520 if (mode != 0) { /* hit cached lock */
3521 rc = ll_layout_lock_set(&lockh, mode, inode, gen, false);
3522 if (rc == 0)
3523 return 0;
3524
3525 /* better hold lli_layout_mutex to try again otherwise
3526 * it will have starvation problem. */
3527 }
3528
3529 /* take layout lock mutex to enqueue layout lock exclusively. */
3530 mutex_lock(&lli->lli_layout_mutex);
3531
3532 again:
3533 /* try again. Maybe somebody else has done this. */
3534 mode = ll_take_md_lock(inode, MDS_INODELOCK_LAYOUT, &lockh, 0,
3535 LCK_CR | LCK_CW | LCK_PR | LCK_PW);
3536 if (mode != 0) { /* hit cached lock */
3537 rc = ll_layout_lock_set(&lockh, mode, inode, gen, true);
3538 if (rc == -EAGAIN)
3539 goto again;
3540
3541 mutex_unlock(&lli->lli_layout_mutex);
3542 return rc;
3543 }
3544
3545 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL,
3546 0, 0, LUSTRE_OPC_ANY, NULL);
3547 if (IS_ERR(op_data)) {
3548 mutex_unlock(&lli->lli_layout_mutex);
3549 return PTR_ERR(op_data);
3550 }
3551
3552 /* have to enqueue one */
3553 memset(&it, 0, sizeof(it));
3554 it.it_op = IT_LAYOUT;
3555 lockh.cookie = 0ULL;
3556
3557 LDLM_DEBUG_NOLOCK("%s: requeue layout lock for file %p/"DFID".\n",
3558 ll_get_fsname(inode->i_sb, NULL, 0), inode,
3559 PFID(&lli->lli_fid));
3560
3561 rc = md_enqueue(sbi->ll_md_exp, &einfo, &it, op_data, &lockh,
3562 NULL, 0, NULL, 0);
3563 if (it.d.lustre.it_data != NULL)
3564 ptlrpc_req_finished(it.d.lustre.it_data);
3565 it.d.lustre.it_data = NULL;
3566
3567 ll_finish_md_op_data(op_data);
3568
3569 mode = it.d.lustre.it_lock_mode;
3570 it.d.lustre.it_lock_mode = 0;
3571 ll_intent_drop_lock(&it);
3572
3573 if (rc == 0) {
3574 /* set lock data in case this is a new lock */
3575 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
3576 rc = ll_layout_lock_set(&lockh, mode, inode, gen, true);
3577 if (rc == -EAGAIN)
3578 goto again;
3579 }
3580 mutex_unlock(&lli->lli_layout_mutex);
3581
3582 return rc;
3583 }
3584
3585 /**
3586 * This function send a restore request to the MDT
3587 */
3588 int ll_layout_restore(struct inode *inode)
3589 {
3590 struct hsm_user_request *hur;
3591 int len, rc;
3592
3593 len = sizeof(struct hsm_user_request) +
3594 sizeof(struct hsm_user_item);
3595 OBD_ALLOC(hur, len);
3596 if (hur == NULL)
3597 return -ENOMEM;
3598
3599 hur->hur_request.hr_action = HUA_RESTORE;
3600 hur->hur_request.hr_archive_id = 0;
3601 hur->hur_request.hr_flags = 0;
3602 memcpy(&hur->hur_user_item[0].hui_fid, &ll_i2info(inode)->lli_fid,
3603 sizeof(hur->hur_user_item[0].hui_fid));
3604 hur->hur_user_item[0].hui_extent.length = -1;
3605 hur->hur_request.hr_itemcount = 1;
3606 rc = obd_iocontrol(LL_IOC_HSM_REQUEST, cl_i2sbi(inode)->ll_md_exp,
3607 len, hur, NULL);
3608 OBD_FREE(hur, len);
3609 return rc;
3610 }
This page took 0.10572 seconds and 4 git commands to generate.