staging/lustre/llite: fix O_TMPFILE/O_LOV_DELAY_CREATE conflict
[deliverable/linux.git] / drivers / staging / lustre / lustre / llite / file.c
1 /*
2 * GPL HEADER START
3 *
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19 *
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
22 * have any questions.
23 *
24 * GPL HEADER END
25 */
26 /*
27 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
29 *
30 * Copyright (c) 2011, 2012, Intel Corporation.
31 */
32 /*
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
35 *
36 * lustre/llite/file.c
37 *
38 * Author: Peter Braam <braam@clusterfs.com>
39 * Author: Phil Schwan <phil@clusterfs.com>
40 * Author: Andreas Dilger <adilger@clusterfs.com>
41 */
42
43 #define DEBUG_SUBSYSTEM S_LLITE
44 #include <lustre_dlm.h>
45 #include <lustre_lite.h>
46 #include <linux/pagemap.h>
47 #include <linux/file.h>
48 #include "llite_internal.h"
49 #include <lustre/ll_fiemap.h>
50
51 #include "cl_object.h"
52
53 struct ll_file_data *ll_file_data_get(void)
54 {
55 struct ll_file_data *fd;
56
57 OBD_SLAB_ALLOC_PTR_GFP(fd, ll_file_data_slab, __GFP_IO);
58 if (fd == NULL)
59 return NULL;
60 fd->fd_write_failed = false;
61 return fd;
62 }
63
64 static void ll_file_data_put(struct ll_file_data *fd)
65 {
66 if (fd != NULL)
67 OBD_SLAB_FREE_PTR(fd, ll_file_data_slab);
68 }
69
70 void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
71 struct lustre_handle *fh)
72 {
73 op_data->op_fid1 = ll_i2info(inode)->lli_fid;
74 op_data->op_attr.ia_mode = inode->i_mode;
75 op_data->op_attr.ia_atime = inode->i_atime;
76 op_data->op_attr.ia_mtime = inode->i_mtime;
77 op_data->op_attr.ia_ctime = inode->i_ctime;
78 op_data->op_attr.ia_size = i_size_read(inode);
79 op_data->op_attr_blocks = inode->i_blocks;
80 ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags =
81 ll_inode_to_ext_flags(inode->i_flags);
82 op_data->op_ioepoch = ll_i2info(inode)->lli_ioepoch;
83 if (fh)
84 op_data->op_handle = *fh;
85 op_data->op_capa1 = ll_mdscapa_get(inode);
86
87 if (LLIF_DATA_MODIFIED & ll_i2info(inode)->lli_flags)
88 op_data->op_bias |= MDS_DATA_MODIFIED;
89 }
90
91 /**
92 * Closes the IO epoch and packs all the attributes into @op_data for
93 * the CLOSE rpc.
94 */
95 static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
96 struct obd_client_handle *och)
97 {
98 op_data->op_attr.ia_valid = ATTR_MODE | ATTR_ATIME | ATTR_ATIME_SET |
99 ATTR_MTIME | ATTR_MTIME_SET |
100 ATTR_CTIME | ATTR_CTIME_SET;
101
102 if (!(och->och_flags & FMODE_WRITE))
103 goto out;
104
105 if (!exp_connect_som(ll_i2mdexp(inode)) || !S_ISREG(inode->i_mode))
106 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
107 else
108 ll_ioepoch_close(inode, op_data, &och, 0);
109
110 out:
111 ll_pack_inode2opdata(inode, op_data, &och->och_fh);
112 ll_prep_md_op_data(op_data, inode, NULL, NULL,
113 0, 0, LUSTRE_OPC_ANY, NULL);
114 }
115
116 static int ll_close_inode_openhandle(struct obd_export *md_exp,
117 struct inode *inode,
118 struct obd_client_handle *och,
119 const __u64 *data_version)
120 {
121 struct obd_export *exp = ll_i2mdexp(inode);
122 struct md_op_data *op_data;
123 struct ptlrpc_request *req = NULL;
124 struct obd_device *obd = class_exp2obd(exp);
125 int epoch_close = 1;
126 int rc;
127
128 if (obd == NULL) {
129 /*
130 * XXX: in case of LMV, is this correct to access
131 * ->exp_handle?
132 */
133 CERROR("Invalid MDC connection handle "LPX64"\n",
134 ll_i2mdexp(inode)->exp_handle.h_cookie);
135 GOTO(out, rc = 0);
136 }
137
138 OBD_ALLOC_PTR(op_data);
139 if (op_data == NULL)
140 GOTO(out, rc = -ENOMEM); // XXX We leak openhandle and request here.
141
142 ll_prepare_close(inode, op_data, och);
143 if (data_version != NULL) {
144 /* Pass in data_version implies release. */
145 op_data->op_bias |= MDS_HSM_RELEASE;
146 op_data->op_data_version = *data_version;
147 op_data->op_lease_handle = och->och_lease_handle;
148 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
149 }
150 epoch_close = (op_data->op_flags & MF_EPOCH_CLOSE);
151 rc = md_close(md_exp, op_data, och->och_mod, &req);
152 if (rc == -EAGAIN) {
153 /* This close must have the epoch closed. */
154 LASSERT(epoch_close);
155 /* MDS has instructed us to obtain Size-on-MDS attribute from
156 * OSTs and send setattr to back to MDS. */
157 rc = ll_som_update(inode, op_data);
158 if (rc) {
159 CERROR("inode %lu mdc Size-on-MDS update failed: "
160 "rc = %d\n", inode->i_ino, rc);
161 rc = 0;
162 }
163 } else if (rc) {
164 CERROR("inode %lu mdc close failed: rc = %d\n",
165 inode->i_ino, rc);
166 }
167
168 /* DATA_MODIFIED flag was successfully sent on close, cancel data
169 * modification flag. */
170 if (rc == 0 && (op_data->op_bias & MDS_DATA_MODIFIED)) {
171 struct ll_inode_info *lli = ll_i2info(inode);
172
173 spin_lock(&lli->lli_lock);
174 lli->lli_flags &= ~LLIF_DATA_MODIFIED;
175 spin_unlock(&lli->lli_lock);
176 }
177
178 if (rc == 0) {
179 rc = ll_objects_destroy(req, inode);
180 if (rc)
181 CERROR("inode %lu ll_objects destroy: rc = %d\n",
182 inode->i_ino, rc);
183 }
184 if (rc == 0 && op_data->op_bias & MDS_HSM_RELEASE) {
185 struct mdt_body *body;
186 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
187 if (!(body->valid & OBD_MD_FLRELEASED))
188 rc = -EBUSY;
189 }
190
191 ll_finish_md_op_data(op_data);
192
193 out:
194 if (exp_connect_som(exp) && !epoch_close &&
195 S_ISREG(inode->i_mode) && (och->och_flags & FMODE_WRITE)) {
196 ll_queue_done_writing(inode, LLIF_DONE_WRITING);
197 } else {
198 md_clear_open_replay_data(md_exp, och);
199 /* Free @och if it is not waiting for DONE_WRITING. */
200 och->och_fh.cookie = DEAD_HANDLE_MAGIC;
201 OBD_FREE_PTR(och);
202 }
203 if (req) /* This is close request */
204 ptlrpc_req_finished(req);
205 return rc;
206 }
207
208 int ll_md_real_close(struct inode *inode, int flags)
209 {
210 struct ll_inode_info *lli = ll_i2info(inode);
211 struct obd_client_handle **och_p;
212 struct obd_client_handle *och;
213 __u64 *och_usecount;
214 int rc = 0;
215
216 if (flags & FMODE_WRITE) {
217 och_p = &lli->lli_mds_write_och;
218 och_usecount = &lli->lli_open_fd_write_count;
219 } else if (flags & FMODE_EXEC) {
220 och_p = &lli->lli_mds_exec_och;
221 och_usecount = &lli->lli_open_fd_exec_count;
222 } else {
223 LASSERT(flags & FMODE_READ);
224 och_p = &lli->lli_mds_read_och;
225 och_usecount = &lli->lli_open_fd_read_count;
226 }
227
228 mutex_lock(&lli->lli_och_mutex);
229 if (*och_usecount) { /* There are still users of this handle, so
230 skip freeing it. */
231 mutex_unlock(&lli->lli_och_mutex);
232 return 0;
233 }
234 och=*och_p;
235 *och_p = NULL;
236 mutex_unlock(&lli->lli_och_mutex);
237
238 if (och) { /* There might be a race and somebody have freed this och
239 already */
240 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
241 inode, och, NULL);
242 }
243
244 return rc;
245 }
246
247 int ll_md_close(struct obd_export *md_exp, struct inode *inode,
248 struct file *file)
249 {
250 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
251 struct ll_inode_info *lli = ll_i2info(inode);
252 int rc = 0;
253
254 /* clear group lock, if present */
255 if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED))
256 ll_put_grouplock(inode, file, fd->fd_grouplock.cg_gid);
257
258 if (fd->fd_lease_och != NULL) {
259 bool lease_broken;
260
261 /* Usually the lease is not released when the
262 * application crashed, we need to release here. */
263 rc = ll_lease_close(fd->fd_lease_och, inode, &lease_broken);
264 CDEBUG(rc ? D_ERROR : D_INODE, "Clean up lease "DFID" %d/%d\n",
265 PFID(&lli->lli_fid), rc, lease_broken);
266
267 fd->fd_lease_och = NULL;
268 }
269
270 if (fd->fd_och != NULL) {
271 rc = ll_close_inode_openhandle(md_exp, inode, fd->fd_och, NULL);
272 fd->fd_och = NULL;
273 GOTO(out, rc);
274 }
275
276 /* Let's see if we have good enough OPEN lock on the file and if
277 we can skip talking to MDS */
278 if (file->f_dentry->d_inode) { /* Can this ever be false? */
279 int lockmode;
280 int flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
281 struct lustre_handle lockh;
282 struct inode *inode = file->f_dentry->d_inode;
283 ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_OPEN}};
284
285 mutex_lock(&lli->lli_och_mutex);
286 if (fd->fd_omode & FMODE_WRITE) {
287 lockmode = LCK_CW;
288 LASSERT(lli->lli_open_fd_write_count);
289 lli->lli_open_fd_write_count--;
290 } else if (fd->fd_omode & FMODE_EXEC) {
291 lockmode = LCK_PR;
292 LASSERT(lli->lli_open_fd_exec_count);
293 lli->lli_open_fd_exec_count--;
294 } else {
295 lockmode = LCK_CR;
296 LASSERT(lli->lli_open_fd_read_count);
297 lli->lli_open_fd_read_count--;
298 }
299 mutex_unlock(&lli->lli_och_mutex);
300
301 if (!md_lock_match(md_exp, flags, ll_inode2fid(inode),
302 LDLM_IBITS, &policy, lockmode,
303 &lockh)) {
304 rc = ll_md_real_close(file->f_dentry->d_inode,
305 fd->fd_omode);
306 }
307 } else {
308 CERROR("Releasing a file %p with negative dentry %p. Name %s",
309 file, file->f_dentry, file->f_dentry->d_name.name);
310 }
311
312 out:
313 LUSTRE_FPRIVATE(file) = NULL;
314 ll_file_data_put(fd);
315 ll_capa_close(inode);
316
317 return rc;
318 }
319
320 /* While this returns an error code, fput() the caller does not, so we need
321 * to make every effort to clean up all of our state here. Also, applications
322 * rarely check close errors and even if an error is returned they will not
323 * re-try the close call.
324 */
325 int ll_file_release(struct inode *inode, struct file *file)
326 {
327 struct ll_file_data *fd;
328 struct ll_sb_info *sbi = ll_i2sbi(inode);
329 struct ll_inode_info *lli = ll_i2info(inode);
330 int rc;
331
332 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
333 inode->i_generation, inode);
334
335 #ifdef CONFIG_FS_POSIX_ACL
336 if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
337 inode == inode->i_sb->s_root->d_inode) {
338 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
339
340 LASSERT(fd != NULL);
341 if (unlikely(fd->fd_flags & LL_FILE_RMTACL)) {
342 fd->fd_flags &= ~LL_FILE_RMTACL;
343 rct_del(&sbi->ll_rct, current_pid());
344 et_search_free(&sbi->ll_et, current_pid());
345 }
346 }
347 #endif
348
349 if (inode->i_sb->s_root != file->f_dentry)
350 ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
351 fd = LUSTRE_FPRIVATE(file);
352 LASSERT(fd != NULL);
353
354 /* The last ref on @file, maybe not the the owner pid of statahead.
355 * Different processes can open the same dir, "ll_opendir_key" means:
356 * it is me that should stop the statahead thread. */
357 if (S_ISDIR(inode->i_mode) && lli->lli_opendir_key == fd &&
358 lli->lli_opendir_pid != 0)
359 ll_stop_statahead(inode, lli->lli_opendir_key);
360
361 if (inode->i_sb->s_root == file->f_dentry) {
362 LUSTRE_FPRIVATE(file) = NULL;
363 ll_file_data_put(fd);
364 return 0;
365 }
366
367 if (!S_ISDIR(inode->i_mode)) {
368 lov_read_and_clear_async_rc(lli->lli_clob);
369 lli->lli_async_rc = 0;
370 }
371
372 rc = ll_md_close(sbi->ll_md_exp, inode, file);
373
374 if (CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_DUMP_LOG, cfs_fail_val))
375 libcfs_debug_dumplog();
376
377 return rc;
378 }
379
380 static int ll_intent_file_open(struct file *file, void *lmm,
381 int lmmsize, struct lookup_intent *itp)
382 {
383 struct ll_sb_info *sbi = ll_i2sbi(file->f_dentry->d_inode);
384 struct dentry *parent = file->f_dentry->d_parent;
385 const char *name = file->f_dentry->d_name.name;
386 const int len = file->f_dentry->d_name.len;
387 struct md_op_data *op_data;
388 struct ptlrpc_request *req;
389 __u32 opc = LUSTRE_OPC_ANY;
390 int rc;
391
392 if (!parent)
393 return -ENOENT;
394
395 /* Usually we come here only for NFSD, and we want open lock.
396 But we can also get here with pre 2.6.15 patchless kernels, and in
397 that case that lock is also ok */
398 /* We can also get here if there was cached open handle in revalidate_it
399 * but it disappeared while we were getting from there to ll_file_open.
400 * But this means this file was closed and immediately opened which
401 * makes a good candidate for using OPEN lock */
402 /* If lmmsize & lmm are not 0, we are just setting stripe info
403 * parameters. No need for the open lock */
404 if (lmm == NULL && lmmsize == 0) {
405 itp->it_flags |= MDS_OPEN_LOCK;
406 if (itp->it_flags & FMODE_WRITE)
407 opc = LUSTRE_OPC_CREATE;
408 }
409
410 op_data = ll_prep_md_op_data(NULL, parent->d_inode,
411 file->f_dentry->d_inode, name, len,
412 O_RDWR, opc, NULL);
413 if (IS_ERR(op_data))
414 return PTR_ERR(op_data);
415
416 itp->it_flags |= MDS_OPEN_BY_FID;
417 rc = md_intent_lock(sbi->ll_md_exp, op_data, lmm, lmmsize, itp,
418 0 /*unused */, &req, ll_md_blocking_ast, 0);
419 ll_finish_md_op_data(op_data);
420 if (rc == -ESTALE) {
421 /* reason for keep own exit path - don`t flood log
422 * with messages with -ESTALE errors.
423 */
424 if (!it_disposition(itp, DISP_OPEN_OPEN) ||
425 it_open_error(DISP_OPEN_OPEN, itp))
426 GOTO(out, rc);
427 ll_release_openhandle(file->f_dentry, itp);
428 GOTO(out, rc);
429 }
430
431 if (it_disposition(itp, DISP_LOOKUP_NEG))
432 GOTO(out, rc = -ENOENT);
433
434 if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) {
435 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp);
436 CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc);
437 GOTO(out, rc);
438 }
439
440 rc = ll_prep_inode(&file->f_dentry->d_inode, req, NULL, itp);
441 if (!rc && itp->d.lustre.it_lock_mode)
442 ll_set_lock_data(sbi->ll_md_exp, file->f_dentry->d_inode,
443 itp, NULL);
444
445 out:
446 ptlrpc_req_finished(itp->d.lustre.it_data);
447 it_clear_disposition(itp, DISP_ENQ_COMPLETE);
448 ll_intent_drop_lock(itp);
449
450 return rc;
451 }
452
453 /**
454 * Assign an obtained @ioepoch to client's inode. No lock is needed, MDS does
455 * not believe attributes if a few ioepoch holders exist. Attributes for
456 * previous ioepoch if new one is opened are also skipped by MDS.
457 */
458 void ll_ioepoch_open(struct ll_inode_info *lli, __u64 ioepoch)
459 {
460 if (ioepoch && lli->lli_ioepoch != ioepoch) {
461 lli->lli_ioepoch = ioepoch;
462 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
463 ioepoch, PFID(&lli->lli_fid));
464 }
465 }
466
467 static int ll_och_fill(struct obd_export *md_exp, struct lookup_intent *it,
468 struct obd_client_handle *och)
469 {
470 struct ptlrpc_request *req = it->d.lustre.it_data;
471 struct mdt_body *body;
472
473 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
474 och->och_fh = body->handle;
475 och->och_fid = body->fid1;
476 och->och_lease_handle.cookie = it->d.lustre.it_lock_handle;
477 och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
478 och->och_flags = it->it_flags;
479
480 return md_set_open_replay_data(md_exp, och, req);
481 }
482
483 int ll_local_open(struct file *file, struct lookup_intent *it,
484 struct ll_file_data *fd, struct obd_client_handle *och)
485 {
486 struct inode *inode = file->f_dentry->d_inode;
487 struct ll_inode_info *lli = ll_i2info(inode);
488
489 LASSERT(!LUSTRE_FPRIVATE(file));
490
491 LASSERT(fd != NULL);
492
493 if (och) {
494 struct ptlrpc_request *req = it->d.lustre.it_data;
495 struct mdt_body *body;
496 int rc;
497
498 rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
499 if (rc != 0)
500 return rc;
501
502 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
503 ll_ioepoch_open(lli, body->ioepoch);
504 }
505
506 LUSTRE_FPRIVATE(file) = fd;
507 ll_readahead_init(inode, &fd->fd_ras);
508 fd->fd_omode = it->it_flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC);
509 return 0;
510 }
511
512 /* Open a file, and (for the very first open) create objects on the OSTs at
513 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
514 * creation or open until ll_lov_setstripe() ioctl is called.
515 *
516 * If we already have the stripe MD locally then we don't request it in
517 * md_open(), by passing a lmm_size = 0.
518 *
519 * It is up to the application to ensure no other processes open this file
520 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
521 * used. We might be able to avoid races of that sort by getting lli_open_sem
522 * before returning in the O_LOV_DELAY_CREATE case and dropping it here
523 * or in ll_file_release(), but I'm not sure that is desirable/necessary.
524 */
525 int ll_file_open(struct inode *inode, struct file *file)
526 {
527 struct ll_inode_info *lli = ll_i2info(inode);
528 struct lookup_intent *it, oit = { .it_op = IT_OPEN,
529 .it_flags = file->f_flags };
530 struct obd_client_handle **och_p = NULL;
531 __u64 *och_usecount = NULL;
532 struct ll_file_data *fd;
533 int rc = 0, opendir_set = 0;
534
535 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), flags %o\n", inode->i_ino,
536 inode->i_generation, inode, file->f_flags);
537
538 it = file->private_data; /* XXX: compat macro */
539 file->private_data = NULL; /* prevent ll_local_open assertion */
540
541 fd = ll_file_data_get();
542 if (fd == NULL)
543 GOTO(out_openerr, rc = -ENOMEM);
544
545 fd->fd_file = file;
546 if (S_ISDIR(inode->i_mode)) {
547 spin_lock(&lli->lli_sa_lock);
548 if (lli->lli_opendir_key == NULL && lli->lli_sai == NULL &&
549 lli->lli_opendir_pid == 0) {
550 lli->lli_opendir_key = fd;
551 lli->lli_opendir_pid = current_pid();
552 opendir_set = 1;
553 }
554 spin_unlock(&lli->lli_sa_lock);
555 }
556
557 if (inode->i_sb->s_root == file->f_dentry) {
558 LUSTRE_FPRIVATE(file) = fd;
559 return 0;
560 }
561
562 if (!it || !it->d.lustre.it_disposition) {
563 /* Convert f_flags into access mode. We cannot use file->f_mode,
564 * because everything but O_ACCMODE mask was stripped from
565 * there */
566 if ((oit.it_flags + 1) & O_ACCMODE)
567 oit.it_flags++;
568 if (file->f_flags & O_TRUNC)
569 oit.it_flags |= FMODE_WRITE;
570
571 /* kernel only call f_op->open in dentry_open. filp_open calls
572 * dentry_open after call to open_namei that checks permissions.
573 * Only nfsd_open call dentry_open directly without checking
574 * permissions and because of that this code below is safe. */
575 if (oit.it_flags & (FMODE_WRITE | FMODE_READ))
576 oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
577
578 /* We do not want O_EXCL here, presumably we opened the file
579 * already? XXX - NFS implications? */
580 oit.it_flags &= ~O_EXCL;
581
582 /* bug20584, if "it_flags" contains O_CREAT, the file will be
583 * created if necessary, then "IT_CREAT" should be set to keep
584 * consistent with it */
585 if (oit.it_flags & O_CREAT)
586 oit.it_op |= IT_CREAT;
587
588 it = &oit;
589 }
590
591 restart:
592 /* Let's see if we have file open on MDS already. */
593 if (it->it_flags & FMODE_WRITE) {
594 och_p = &lli->lli_mds_write_och;
595 och_usecount = &lli->lli_open_fd_write_count;
596 } else if (it->it_flags & FMODE_EXEC) {
597 och_p = &lli->lli_mds_exec_och;
598 och_usecount = &lli->lli_open_fd_exec_count;
599 } else {
600 och_p = &lli->lli_mds_read_och;
601 och_usecount = &lli->lli_open_fd_read_count;
602 }
603
604 mutex_lock(&lli->lli_och_mutex);
605 if (*och_p) { /* Open handle is present */
606 if (it_disposition(it, DISP_OPEN_OPEN)) {
607 /* Well, there's extra open request that we do not need,
608 let's close it somehow. This will decref request. */
609 rc = it_open_error(DISP_OPEN_OPEN, it);
610 if (rc) {
611 mutex_unlock(&lli->lli_och_mutex);
612 GOTO(out_openerr, rc);
613 }
614
615 ll_release_openhandle(file->f_dentry, it);
616 }
617 (*och_usecount)++;
618
619 rc = ll_local_open(file, it, fd, NULL);
620 if (rc) {
621 (*och_usecount)--;
622 mutex_unlock(&lli->lli_och_mutex);
623 GOTO(out_openerr, rc);
624 }
625 } else {
626 LASSERT(*och_usecount == 0);
627 if (!it->d.lustre.it_disposition) {
628 /* We cannot just request lock handle now, new ELC code
629 means that one of other OPEN locks for this file
630 could be cancelled, and since blocking ast handler
631 would attempt to grab och_mutex as well, that would
632 result in a deadlock */
633 mutex_unlock(&lli->lli_och_mutex);
634 it->it_create_mode |= M_CHECK_STALE;
635 rc = ll_intent_file_open(file, NULL, 0, it);
636 it->it_create_mode &= ~M_CHECK_STALE;
637 if (rc)
638 GOTO(out_openerr, rc);
639
640 goto restart;
641 }
642 OBD_ALLOC(*och_p, sizeof (struct obd_client_handle));
643 if (!*och_p)
644 GOTO(out_och_free, rc = -ENOMEM);
645
646 (*och_usecount)++;
647
648 /* md_intent_lock() didn't get a request ref if there was an
649 * open error, so don't do cleanup on the request here
650 * (bug 3430) */
651 /* XXX (green): Should not we bail out on any error here, not
652 * just open error? */
653 rc = it_open_error(DISP_OPEN_OPEN, it);
654 if (rc)
655 GOTO(out_och_free, rc);
656
657 LASSERT(it_disposition(it, DISP_ENQ_OPEN_REF));
658
659 rc = ll_local_open(file, it, fd, *och_p);
660 if (rc)
661 GOTO(out_och_free, rc);
662 }
663 mutex_unlock(&lli->lli_och_mutex);
664 fd = NULL;
665
666 /* Must do this outside lli_och_mutex lock to prevent deadlock where
667 different kind of OPEN lock for this same inode gets cancelled
668 by ldlm_cancel_lru */
669 if (!S_ISREG(inode->i_mode))
670 GOTO(out_och_free, rc);
671
672 ll_capa_open(inode);
673
674 if (!lli->lli_has_smd &&
675 (cl_is_lov_delay_create(file->f_flags) ||
676 (file->f_mode & FMODE_WRITE) == 0)) {
677 CDEBUG(D_INODE, "object creation was delayed\n");
678 GOTO(out_och_free, rc);
679 }
680 cl_lov_delay_create_clear(&file->f_flags);
681 GOTO(out_och_free, rc);
682
683 out_och_free:
684 if (rc) {
685 if (och_p && *och_p) {
686 OBD_FREE(*och_p, sizeof (struct obd_client_handle));
687 *och_p = NULL; /* OBD_FREE writes some magic there */
688 (*och_usecount)--;
689 }
690 mutex_unlock(&lli->lli_och_mutex);
691
692 out_openerr:
693 if (opendir_set != 0)
694 ll_stop_statahead(inode, lli->lli_opendir_key);
695 if (fd != NULL)
696 ll_file_data_put(fd);
697 } else {
698 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
699 }
700
701 if (it && it_disposition(it, DISP_ENQ_OPEN_REF)) {
702 ptlrpc_req_finished(it->d.lustre.it_data);
703 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
704 }
705
706 return rc;
707 }
708
709 static int ll_md_blocking_lease_ast(struct ldlm_lock *lock,
710 struct ldlm_lock_desc *desc, void *data, int flag)
711 {
712 int rc;
713 struct lustre_handle lockh;
714
715 switch (flag) {
716 case LDLM_CB_BLOCKING:
717 ldlm_lock2handle(lock, &lockh);
718 rc = ldlm_cli_cancel(&lockh, LCF_ASYNC);
719 if (rc < 0) {
720 CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc);
721 return rc;
722 }
723 break;
724 case LDLM_CB_CANCELING:
725 /* do nothing */
726 break;
727 }
728 return 0;
729 }
730
731 /**
732 * Acquire a lease and open the file.
733 */
734 struct obd_client_handle *ll_lease_open(struct inode *inode, struct file *file,
735 fmode_t fmode, __u64 open_flags)
736 {
737 struct lookup_intent it = { .it_op = IT_OPEN };
738 struct ll_sb_info *sbi = ll_i2sbi(inode);
739 struct md_op_data *op_data;
740 struct ptlrpc_request *req;
741 struct lustre_handle old_handle = { 0 };
742 struct obd_client_handle *och = NULL;
743 int rc;
744 int rc2;
745
746 if (fmode != FMODE_WRITE && fmode != FMODE_READ)
747 return ERR_PTR(-EINVAL);
748
749 if (file != NULL) {
750 struct ll_inode_info *lli = ll_i2info(inode);
751 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
752 struct obd_client_handle **och_p;
753 __u64 *och_usecount;
754
755 if (!(fmode & file->f_mode) || (file->f_mode & FMODE_EXEC))
756 return ERR_PTR(-EPERM);
757
758 /* Get the openhandle of the file */
759 rc = -EBUSY;
760 mutex_lock(&lli->lli_och_mutex);
761 if (fd->fd_lease_och != NULL) {
762 mutex_unlock(&lli->lli_och_mutex);
763 return ERR_PTR(rc);
764 }
765
766 if (fd->fd_och == NULL) {
767 if (file->f_mode & FMODE_WRITE) {
768 LASSERT(lli->lli_mds_write_och != NULL);
769 och_p = &lli->lli_mds_write_och;
770 och_usecount = &lli->lli_open_fd_write_count;
771 } else {
772 LASSERT(lli->lli_mds_read_och != NULL);
773 och_p = &lli->lli_mds_read_och;
774 och_usecount = &lli->lli_open_fd_read_count;
775 }
776 if (*och_usecount == 1) {
777 fd->fd_och = *och_p;
778 *och_p = NULL;
779 *och_usecount = 0;
780 rc = 0;
781 }
782 }
783 mutex_unlock(&lli->lli_och_mutex);
784 if (rc < 0) /* more than 1 opener */
785 return ERR_PTR(rc);
786
787 LASSERT(fd->fd_och != NULL);
788 old_handle = fd->fd_och->och_fh;
789 }
790
791 OBD_ALLOC_PTR(och);
792 if (och == NULL)
793 return ERR_PTR(-ENOMEM);
794
795 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL, 0, 0,
796 LUSTRE_OPC_ANY, NULL);
797 if (IS_ERR(op_data))
798 GOTO(out, rc = PTR_ERR(op_data));
799
800 /* To tell the MDT this openhandle is from the same owner */
801 op_data->op_handle = old_handle;
802
803 it.it_flags = fmode | open_flags;
804 it.it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID | MDS_OPEN_LEASE;
805 rc = md_intent_lock(sbi->ll_md_exp, op_data, NULL, 0, &it, 0, &req,
806 ll_md_blocking_lease_ast,
807 /* LDLM_FL_NO_LRU: To not put the lease lock into LRU list, otherwise
808 * it can be cancelled which may mislead applications that the lease is
809 * broken;
810 * LDLM_FL_EXCL: Set this flag so that it won't be matched by normal
811 * open in ll_md_blocking_ast(). Otherwise as ll_md_blocking_lease_ast
812 * doesn't deal with openhandle, so normal openhandle will be leaked. */
813 LDLM_FL_NO_LRU | LDLM_FL_EXCL);
814 ll_finish_md_op_data(op_data);
815 if (req != NULL) {
816 ptlrpc_req_finished(req);
817 it_clear_disposition(&it, DISP_ENQ_COMPLETE);
818 }
819 if (rc < 0)
820 GOTO(out_release_it, rc);
821
822 if (it_disposition(&it, DISP_LOOKUP_NEG))
823 GOTO(out_release_it, rc = -ENOENT);
824
825 rc = it_open_error(DISP_OPEN_OPEN, &it);
826 if (rc)
827 GOTO(out_release_it, rc);
828
829 LASSERT(it_disposition(&it, DISP_ENQ_OPEN_REF));
830 ll_och_fill(sbi->ll_md_exp, &it, och);
831
832 if (!it_disposition(&it, DISP_OPEN_LEASE)) /* old server? */
833 GOTO(out_close, rc = -EOPNOTSUPP);
834
835 /* already get lease, handle lease lock */
836 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
837 if (it.d.lustre.it_lock_mode == 0 ||
838 it.d.lustre.it_lock_bits != MDS_INODELOCK_OPEN) {
839 /* open lock must return for lease */
840 CERROR(DFID "lease granted but no open lock, %d/%llu.\n",
841 PFID(ll_inode2fid(inode)), it.d.lustre.it_lock_mode,
842 it.d.lustre.it_lock_bits);
843 GOTO(out_close, rc = -EPROTO);
844 }
845
846 ll_intent_release(&it);
847 return och;
848
849 out_close:
850 rc2 = ll_close_inode_openhandle(sbi->ll_md_exp, inode, och, NULL);
851 if (rc2)
852 CERROR("Close openhandle returned %d\n", rc2);
853
854 /* cancel open lock */
855 if (it.d.lustre.it_lock_mode != 0) {
856 ldlm_lock_decref_and_cancel(&och->och_lease_handle,
857 it.d.lustre.it_lock_mode);
858 it.d.lustre.it_lock_mode = 0;
859 }
860 out_release_it:
861 ll_intent_release(&it);
862 out:
863 OBD_FREE_PTR(och);
864 return ERR_PTR(rc);
865 }
866 EXPORT_SYMBOL(ll_lease_open);
867
868 /**
869 * Release lease and close the file.
870 * It will check if the lease has ever broken.
871 */
872 int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
873 bool *lease_broken)
874 {
875 struct ldlm_lock *lock;
876 bool cancelled = true;
877 int rc;
878
879 lock = ldlm_handle2lock(&och->och_lease_handle);
880 if (lock != NULL) {
881 lock_res_and_lock(lock);
882 cancelled = ldlm_is_cancel(lock);
883 unlock_res_and_lock(lock);
884 ldlm_lock_put(lock);
885 }
886
887 CDEBUG(D_INODE, "lease for "DFID" broken? %d\n",
888 PFID(&ll_i2info(inode)->lli_fid), cancelled);
889
890 if (!cancelled)
891 ldlm_cli_cancel(&och->och_lease_handle, 0);
892 if (lease_broken != NULL)
893 *lease_broken = cancelled;
894
895 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, inode, och,
896 NULL);
897 return rc;
898 }
899 EXPORT_SYMBOL(ll_lease_close);
900
901 /* Fills the obdo with the attributes for the lsm */
902 static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
903 struct obd_capa *capa, struct obdo *obdo,
904 __u64 ioepoch, int sync)
905 {
906 struct ptlrpc_request_set *set;
907 struct obd_info oinfo = { { { 0 } } };
908 int rc;
909
910 LASSERT(lsm != NULL);
911
912 oinfo.oi_md = lsm;
913 oinfo.oi_oa = obdo;
914 oinfo.oi_oa->o_oi = lsm->lsm_oi;
915 oinfo.oi_oa->o_mode = S_IFREG;
916 oinfo.oi_oa->o_ioepoch = ioepoch;
917 oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE |
918 OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
919 OBD_MD_FLBLKSZ | OBD_MD_FLATIME |
920 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
921 OBD_MD_FLGROUP | OBD_MD_FLEPOCH |
922 OBD_MD_FLDATAVERSION;
923 oinfo.oi_capa = capa;
924 if (sync) {
925 oinfo.oi_oa->o_valid |= OBD_MD_FLFLAGS;
926 oinfo.oi_oa->o_flags |= OBD_FL_SRVLOCK;
927 }
928
929 set = ptlrpc_prep_set();
930 if (set == NULL) {
931 CERROR("can't allocate ptlrpc set\n");
932 rc = -ENOMEM;
933 } else {
934 rc = obd_getattr_async(exp, &oinfo, set);
935 if (rc == 0)
936 rc = ptlrpc_set_wait(set);
937 ptlrpc_set_destroy(set);
938 }
939 if (rc == 0)
940 oinfo.oi_oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
941 OBD_MD_FLATIME | OBD_MD_FLMTIME |
942 OBD_MD_FLCTIME | OBD_MD_FLSIZE |
943 OBD_MD_FLDATAVERSION);
944 return rc;
945 }
946
947 /**
948 * Performs the getattr on the inode and updates its fields.
949 * If @sync != 0, perform the getattr under the server-side lock.
950 */
951 int ll_inode_getattr(struct inode *inode, struct obdo *obdo,
952 __u64 ioepoch, int sync)
953 {
954 struct obd_capa *capa = ll_mdscapa_get(inode);
955 struct lov_stripe_md *lsm;
956 int rc;
957
958 lsm = ccc_inode_lsm_get(inode);
959 rc = ll_lsm_getattr(lsm, ll_i2dtexp(inode),
960 capa, obdo, ioepoch, sync);
961 capa_put(capa);
962 if (rc == 0) {
963 struct ost_id *oi = lsm ? &lsm->lsm_oi : &obdo->o_oi;
964
965 obdo_refresh_inode(inode, obdo, obdo->o_valid);
966 CDEBUG(D_INODE, "objid "DOSTID" size %llu, blocks %llu,"
967 " blksize %lu\n", POSTID(oi), i_size_read(inode),
968 (unsigned long long)inode->i_blocks,
969 (unsigned long)ll_inode_blksize(inode));
970 }
971 ccc_inode_lsm_put(inode, lsm);
972 return rc;
973 }
974
975 int ll_merge_lvb(const struct lu_env *env, struct inode *inode)
976 {
977 struct ll_inode_info *lli = ll_i2info(inode);
978 struct cl_object *obj = lli->lli_clob;
979 struct cl_attr *attr = ccc_env_thread_attr(env);
980 struct ost_lvb lvb;
981 int rc = 0;
982
983 ll_inode_size_lock(inode);
984 /* merge timestamps the most recently obtained from mds with
985 timestamps obtained from osts */
986 LTIME_S(inode->i_atime) = lli->lli_lvb.lvb_atime;
987 LTIME_S(inode->i_mtime) = lli->lli_lvb.lvb_mtime;
988 LTIME_S(inode->i_ctime) = lli->lli_lvb.lvb_ctime;
989 inode_init_lvb(inode, &lvb);
990
991 cl_object_attr_lock(obj);
992 rc = cl_object_attr_get(env, obj, attr);
993 cl_object_attr_unlock(obj);
994
995 if (rc == 0) {
996 if (lvb.lvb_atime < attr->cat_atime)
997 lvb.lvb_atime = attr->cat_atime;
998 if (lvb.lvb_ctime < attr->cat_ctime)
999 lvb.lvb_ctime = attr->cat_ctime;
1000 if (lvb.lvb_mtime < attr->cat_mtime)
1001 lvb.lvb_mtime = attr->cat_mtime;
1002
1003 CDEBUG(D_VFSTRACE, DFID" updating i_size "LPU64"\n",
1004 PFID(&lli->lli_fid), attr->cat_size);
1005 cl_isize_write_nolock(inode, attr->cat_size);
1006
1007 inode->i_blocks = attr->cat_blocks;
1008
1009 LTIME_S(inode->i_mtime) = lvb.lvb_mtime;
1010 LTIME_S(inode->i_atime) = lvb.lvb_atime;
1011 LTIME_S(inode->i_ctime) = lvb.lvb_ctime;
1012 }
1013 ll_inode_size_unlock(inode);
1014
1015 return rc;
1016 }
1017
1018 int ll_glimpse_ioctl(struct ll_sb_info *sbi, struct lov_stripe_md *lsm,
1019 lstat_t *st)
1020 {
1021 struct obdo obdo = { 0 };
1022 int rc;
1023
1024 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, &obdo, 0, 0);
1025 if (rc == 0) {
1026 st->st_size = obdo.o_size;
1027 st->st_blocks = obdo.o_blocks;
1028 st->st_mtime = obdo.o_mtime;
1029 st->st_atime = obdo.o_atime;
1030 st->st_ctime = obdo.o_ctime;
1031 }
1032 return rc;
1033 }
1034
1035 void ll_io_init(struct cl_io *io, const struct file *file, int write)
1036 {
1037 struct inode *inode = file->f_dentry->d_inode;
1038
1039 io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK;
1040 if (write) {
1041 io->u.ci_wr.wr_append = !!(file->f_flags & O_APPEND);
1042 io->u.ci_wr.wr_sync = file->f_flags & O_SYNC ||
1043 file->f_flags & O_DIRECT ||
1044 IS_SYNC(inode);
1045 }
1046 io->ci_obj = ll_i2info(inode)->lli_clob;
1047 io->ci_lockreq = CILR_MAYBE;
1048 if (ll_file_nolock(file)) {
1049 io->ci_lockreq = CILR_NEVER;
1050 io->ci_no_srvlock = 1;
1051 } else if (file->f_flags & O_APPEND) {
1052 io->ci_lockreq = CILR_MANDATORY;
1053 }
1054 }
1055
1056 static ssize_t
1057 ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args,
1058 struct file *file, enum cl_io_type iot,
1059 loff_t *ppos, size_t count)
1060 {
1061 struct ll_inode_info *lli = ll_i2info(file->f_dentry->d_inode);
1062 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1063 struct cl_io *io;
1064 ssize_t result;
1065
1066 restart:
1067 io = ccc_env_thread_io(env);
1068 ll_io_init(io, file, iot == CIT_WRITE);
1069
1070 if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
1071 struct vvp_io *vio = vvp_env_io(env);
1072 struct ccc_io *cio = ccc_env_io(env);
1073 int write_mutex_locked = 0;
1074
1075 cio->cui_fd = LUSTRE_FPRIVATE(file);
1076 vio->cui_io_subtype = args->via_io_subtype;
1077
1078 switch (vio->cui_io_subtype) {
1079 case IO_NORMAL:
1080 cio->cui_iov = args->u.normal.via_iov;
1081 cio->cui_nrsegs = args->u.normal.via_nrsegs;
1082 cio->cui_tot_nrsegs = cio->cui_nrsegs;
1083 cio->cui_iocb = args->u.normal.via_iocb;
1084 if ((iot == CIT_WRITE) &&
1085 !(cio->cui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1086 if (mutex_lock_interruptible(&lli->
1087 lli_write_mutex))
1088 GOTO(out, result = -ERESTARTSYS);
1089 write_mutex_locked = 1;
1090 } else if (iot == CIT_READ) {
1091 down_read(&lli->lli_trunc_sem);
1092 }
1093 break;
1094 case IO_SENDFILE:
1095 vio->u.sendfile.cui_actor = args->u.sendfile.via_actor;
1096 vio->u.sendfile.cui_target = args->u.sendfile.via_target;
1097 break;
1098 case IO_SPLICE:
1099 vio->u.splice.cui_pipe = args->u.splice.via_pipe;
1100 vio->u.splice.cui_flags = args->u.splice.via_flags;
1101 break;
1102 default:
1103 CERROR("Unknow IO type - %u\n", vio->cui_io_subtype);
1104 LBUG();
1105 }
1106 result = cl_io_loop(env, io);
1107 if (write_mutex_locked)
1108 mutex_unlock(&lli->lli_write_mutex);
1109 else if (args->via_io_subtype == IO_NORMAL && iot == CIT_READ)
1110 up_read(&lli->lli_trunc_sem);
1111 } else {
1112 /* cl_io_rw_init() handled IO */
1113 result = io->ci_result;
1114 }
1115
1116 if (io->ci_nob > 0) {
1117 result = io->ci_nob;
1118 *ppos = io->u.ci_wr.wr.crw_pos;
1119 }
1120 GOTO(out, result);
1121 out:
1122 cl_io_fini(env, io);
1123 /* If any bit been read/written (result != 0), we just return
1124 * short read/write instead of restart io. */
1125 if ((result == 0 || result == -ENODATA) && io->ci_need_restart) {
1126 CDEBUG(D_VFSTRACE, "Restart %s on %s from %lld, count:%zd\n",
1127 iot == CIT_READ ? "read" : "write",
1128 file->f_dentry->d_name.name, *ppos, count);
1129 LASSERTF(io->ci_nob == 0, "%zd", io->ci_nob);
1130 goto restart;
1131 }
1132
1133 if (iot == CIT_READ) {
1134 if (result >= 0)
1135 ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode),
1136 LPROC_LL_READ_BYTES, result);
1137 } else if (iot == CIT_WRITE) {
1138 if (result >= 0) {
1139 ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode),
1140 LPROC_LL_WRITE_BYTES, result);
1141 fd->fd_write_failed = false;
1142 } else if (result != -ERESTARTSYS) {
1143 fd->fd_write_failed = true;
1144 }
1145 }
1146
1147 return result;
1148 }
1149
1150 static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
1151 unsigned long nr_segs, loff_t pos)
1152 {
1153 struct lu_env *env;
1154 struct vvp_io_args *args;
1155 size_t count = 0;
1156 ssize_t result;
1157 int refcheck;
1158
1159 result = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE);
1160 if (result)
1161 return result;
1162
1163 env = cl_env_get(&refcheck);
1164 if (IS_ERR(env))
1165 return PTR_ERR(env);
1166
1167 args = vvp_env_args(env, IO_NORMAL);
1168 args->u.normal.via_iov = (struct iovec *)iov;
1169 args->u.normal.via_nrsegs = nr_segs;
1170 args->u.normal.via_iocb = iocb;
1171
1172 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ,
1173 &iocb->ki_pos, count);
1174 cl_env_put(env, &refcheck);
1175 return result;
1176 }
1177
1178 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
1179 loff_t *ppos)
1180 {
1181 struct lu_env *env;
1182 struct iovec *local_iov;
1183 struct kiocb *kiocb;
1184 ssize_t result;
1185 int refcheck;
1186
1187 env = cl_env_get(&refcheck);
1188 if (IS_ERR(env))
1189 return PTR_ERR(env);
1190
1191 local_iov = &vvp_env_info(env)->vti_local_iov;
1192 kiocb = &vvp_env_info(env)->vti_kiocb;
1193 local_iov->iov_base = (void __user *)buf;
1194 local_iov->iov_len = count;
1195 init_sync_kiocb(kiocb, file);
1196 kiocb->ki_pos = *ppos;
1197 kiocb->ki_nbytes = count;
1198
1199 result = ll_file_aio_read(kiocb, local_iov, 1, kiocb->ki_pos);
1200 *ppos = kiocb->ki_pos;
1201
1202 cl_env_put(env, &refcheck);
1203 return result;
1204 }
1205
1206 /*
1207 * Write to a file (through the page cache).
1208 */
1209 static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
1210 unsigned long nr_segs, loff_t pos)
1211 {
1212 struct lu_env *env;
1213 struct vvp_io_args *args;
1214 size_t count = 0;
1215 ssize_t result;
1216 int refcheck;
1217
1218 result = generic_segment_checks(iov, &nr_segs, &count, VERIFY_READ);
1219 if (result)
1220 return result;
1221
1222 env = cl_env_get(&refcheck);
1223 if (IS_ERR(env))
1224 return PTR_ERR(env);
1225
1226 args = vvp_env_args(env, IO_NORMAL);
1227 args->u.normal.via_iov = (struct iovec *)iov;
1228 args->u.normal.via_nrsegs = nr_segs;
1229 args->u.normal.via_iocb = iocb;
1230
1231 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
1232 &iocb->ki_pos, count);
1233 cl_env_put(env, &refcheck);
1234 return result;
1235 }
1236
1237 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
1238 loff_t *ppos)
1239 {
1240 struct lu_env *env;
1241 struct iovec *local_iov;
1242 struct kiocb *kiocb;
1243 ssize_t result;
1244 int refcheck;
1245
1246 env = cl_env_get(&refcheck);
1247 if (IS_ERR(env))
1248 return PTR_ERR(env);
1249
1250 local_iov = &vvp_env_info(env)->vti_local_iov;
1251 kiocb = &vvp_env_info(env)->vti_kiocb;
1252 local_iov->iov_base = (void __user *)buf;
1253 local_iov->iov_len = count;
1254 init_sync_kiocb(kiocb, file);
1255 kiocb->ki_pos = *ppos;
1256 kiocb->ki_nbytes = count;
1257
1258 result = ll_file_aio_write(kiocb, local_iov, 1, kiocb->ki_pos);
1259 *ppos = kiocb->ki_pos;
1260
1261 cl_env_put(env, &refcheck);
1262 return result;
1263 }
1264
1265
1266
1267 /*
1268 * Send file content (through pagecache) somewhere with helper
1269 */
1270 static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
1271 struct pipe_inode_info *pipe, size_t count,
1272 unsigned int flags)
1273 {
1274 struct lu_env *env;
1275 struct vvp_io_args *args;
1276 ssize_t result;
1277 int refcheck;
1278
1279 env = cl_env_get(&refcheck);
1280 if (IS_ERR(env))
1281 return PTR_ERR(env);
1282
1283 args = vvp_env_args(env, IO_SPLICE);
1284 args->u.splice.via_pipe = pipe;
1285 args->u.splice.via_flags = flags;
1286
1287 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1288 cl_env_put(env, &refcheck);
1289 return result;
1290 }
1291
1292 static int ll_lov_recreate(struct inode *inode, struct ost_id *oi,
1293 obd_count ost_idx)
1294 {
1295 struct obd_export *exp = ll_i2dtexp(inode);
1296 struct obd_trans_info oti = { 0 };
1297 struct obdo *oa = NULL;
1298 int lsm_size;
1299 int rc = 0;
1300 struct lov_stripe_md *lsm = NULL, *lsm2;
1301
1302 OBDO_ALLOC(oa);
1303 if (oa == NULL)
1304 return -ENOMEM;
1305
1306 lsm = ccc_inode_lsm_get(inode);
1307 if (!lsm_has_objects(lsm))
1308 GOTO(out, rc = -ENOENT);
1309
1310 lsm_size = sizeof(*lsm) + (sizeof(struct lov_oinfo) *
1311 (lsm->lsm_stripe_count));
1312
1313 OBD_ALLOC_LARGE(lsm2, lsm_size);
1314 if (lsm2 == NULL)
1315 GOTO(out, rc = -ENOMEM);
1316
1317 oa->o_oi = *oi;
1318 oa->o_nlink = ost_idx;
1319 oa->o_flags |= OBD_FL_RECREATE_OBJS;
1320 oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS | OBD_MD_FLGROUP;
1321 obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
1322 OBD_MD_FLMTIME | OBD_MD_FLCTIME);
1323 obdo_set_parent_fid(oa, &ll_i2info(inode)->lli_fid);
1324 memcpy(lsm2, lsm, lsm_size);
1325 ll_inode_size_lock(inode);
1326 rc = obd_create(NULL, exp, oa, &lsm2, &oti);
1327 ll_inode_size_unlock(inode);
1328
1329 OBD_FREE_LARGE(lsm2, lsm_size);
1330 GOTO(out, rc);
1331 out:
1332 ccc_inode_lsm_put(inode, lsm);
1333 OBDO_FREE(oa);
1334 return rc;
1335 }
1336
1337 static int ll_lov_recreate_obj(struct inode *inode, unsigned long arg)
1338 {
1339 struct ll_recreate_obj ucreat;
1340 struct ost_id oi;
1341
1342 if (!capable(CFS_CAP_SYS_ADMIN))
1343 return -EPERM;
1344
1345 if (copy_from_user(&ucreat, (struct ll_recreate_obj *)arg,
1346 sizeof(ucreat)))
1347 return -EFAULT;
1348
1349 ostid_set_seq_mdt0(&oi);
1350 ostid_set_id(&oi, ucreat.lrc_id);
1351 return ll_lov_recreate(inode, &oi, ucreat.lrc_ost_idx);
1352 }
1353
1354 static int ll_lov_recreate_fid(struct inode *inode, unsigned long arg)
1355 {
1356 struct lu_fid fid;
1357 struct ost_id oi;
1358 obd_count ost_idx;
1359
1360 if (!capable(CFS_CAP_SYS_ADMIN))
1361 return -EPERM;
1362
1363 if (copy_from_user(&fid, (struct lu_fid *)arg, sizeof(fid)))
1364 return -EFAULT;
1365
1366 fid_to_ostid(&fid, &oi);
1367 ost_idx = (fid_seq(&fid) >> 16) & 0xffff;
1368 return ll_lov_recreate(inode, &oi, ost_idx);
1369 }
1370
1371 int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
1372 int flags, struct lov_user_md *lum, int lum_size)
1373 {
1374 struct lov_stripe_md *lsm = NULL;
1375 struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags};
1376 int rc = 0;
1377
1378 lsm = ccc_inode_lsm_get(inode);
1379 if (lsm != NULL) {
1380 ccc_inode_lsm_put(inode, lsm);
1381 CDEBUG(D_IOCTL, "stripe already exists for ino %lu\n",
1382 inode->i_ino);
1383 GOTO(out, rc = -EEXIST);
1384 }
1385
1386 ll_inode_size_lock(inode);
1387 rc = ll_intent_file_open(file, lum, lum_size, &oit);
1388 if (rc)
1389 GOTO(out_unlock, rc);
1390 rc = oit.d.lustre.it_status;
1391 if (rc < 0)
1392 GOTO(out_req_free, rc);
1393
1394 ll_release_openhandle(file->f_dentry, &oit);
1395
1396 out_unlock:
1397 ll_inode_size_unlock(inode);
1398 ll_intent_release(&oit);
1399 ccc_inode_lsm_put(inode, lsm);
1400 out:
1401 cl_lov_delay_create_clear(&file->f_flags);
1402 return rc;
1403 out_req_free:
1404 ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data);
1405 goto out;
1406 }
1407
1408 int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
1409 struct lov_mds_md **lmmp, int *lmm_size,
1410 struct ptlrpc_request **request)
1411 {
1412 struct ll_sb_info *sbi = ll_i2sbi(inode);
1413 struct mdt_body *body;
1414 struct lov_mds_md *lmm = NULL;
1415 struct ptlrpc_request *req = NULL;
1416 struct md_op_data *op_data;
1417 int rc, lmmsize;
1418
1419 rc = ll_get_max_mdsize(sbi, &lmmsize);
1420 if (rc)
1421 return rc;
1422
1423 op_data = ll_prep_md_op_data(NULL, inode, NULL, filename,
1424 strlen(filename), lmmsize,
1425 LUSTRE_OPC_ANY, NULL);
1426 if (IS_ERR(op_data))
1427 return PTR_ERR(op_data);
1428
1429 op_data->op_valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA;
1430 rc = md_getattr_name(sbi->ll_md_exp, op_data, &req);
1431 ll_finish_md_op_data(op_data);
1432 if (rc < 0) {
1433 CDEBUG(D_INFO, "md_getattr_name failed "
1434 "on %s: rc %d\n", filename, rc);
1435 GOTO(out, rc);
1436 }
1437
1438 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
1439 LASSERT(body != NULL); /* checked by mdc_getattr_name */
1440
1441 lmmsize = body->eadatasize;
1442
1443 if (!(body->valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
1444 lmmsize == 0) {
1445 GOTO(out, rc = -ENODATA);
1446 }
1447
1448 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize);
1449 LASSERT(lmm != NULL);
1450
1451 if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1)) &&
1452 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3))) {
1453 GOTO(out, rc = -EPROTO);
1454 }
1455
1456 /*
1457 * This is coming from the MDS, so is probably in
1458 * little endian. We convert it to host endian before
1459 * passing it to userspace.
1460 */
1461 if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) {
1462 int stripe_count;
1463
1464 stripe_count = le16_to_cpu(lmm->lmm_stripe_count);
1465 if (le32_to_cpu(lmm->lmm_pattern) & LOV_PATTERN_F_RELEASED)
1466 stripe_count = 0;
1467
1468 /* if function called for directory - we should
1469 * avoid swab not existent lsm objects */
1470 if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) {
1471 lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
1472 if (S_ISREG(body->mode))
1473 lustre_swab_lov_user_md_objects(
1474 ((struct lov_user_md_v1 *)lmm)->lmm_objects,
1475 stripe_count);
1476 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
1477 lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm);
1478 if (S_ISREG(body->mode))
1479 lustre_swab_lov_user_md_objects(
1480 ((struct lov_user_md_v3 *)lmm)->lmm_objects,
1481 stripe_count);
1482 }
1483 }
1484
1485 out:
1486 *lmmp = lmm;
1487 *lmm_size = lmmsize;
1488 *request = req;
1489 return rc;
1490 }
1491
1492 static int ll_lov_setea(struct inode *inode, struct file *file,
1493 unsigned long arg)
1494 {
1495 int flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
1496 struct lov_user_md *lump;
1497 int lum_size = sizeof(struct lov_user_md) +
1498 sizeof(struct lov_user_ost_data);
1499 int rc;
1500
1501 if (!capable(CFS_CAP_SYS_ADMIN))
1502 return -EPERM;
1503
1504 OBD_ALLOC_LARGE(lump, lum_size);
1505 if (lump == NULL)
1506 return -ENOMEM;
1507
1508 if (copy_from_user(lump, (struct lov_user_md *)arg, lum_size)) {
1509 OBD_FREE_LARGE(lump, lum_size);
1510 return -EFAULT;
1511 }
1512
1513 rc = ll_lov_setstripe_ea_info(inode, file, flags, lump, lum_size);
1514
1515 OBD_FREE_LARGE(lump, lum_size);
1516 return rc;
1517 }
1518
1519 static int ll_lov_setstripe(struct inode *inode, struct file *file,
1520 unsigned long arg)
1521 {
1522 struct lov_user_md_v3 lumv3;
1523 struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3;
1524 struct lov_user_md_v1 *lumv1p = (struct lov_user_md_v1 *)arg;
1525 struct lov_user_md_v3 *lumv3p = (struct lov_user_md_v3 *)arg;
1526 int lum_size, rc;
1527 int flags = FMODE_WRITE;
1528
1529 /* first try with v1 which is smaller than v3 */
1530 lum_size = sizeof(struct lov_user_md_v1);
1531 if (copy_from_user(lumv1, lumv1p, lum_size))
1532 return -EFAULT;
1533
1534 if (lumv1->lmm_magic == LOV_USER_MAGIC_V3) {
1535 lum_size = sizeof(struct lov_user_md_v3);
1536 if (copy_from_user(&lumv3, lumv3p, lum_size))
1537 return -EFAULT;
1538 }
1539
1540 rc = ll_lov_setstripe_ea_info(inode, file, flags, lumv1, lum_size);
1541 if (rc == 0) {
1542 struct lov_stripe_md *lsm;
1543 __u32 gen;
1544
1545 put_user(0, &lumv1p->lmm_stripe_count);
1546
1547 ll_layout_refresh(inode, &gen);
1548 lsm = ccc_inode_lsm_get(inode);
1549 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode),
1550 0, lsm, (void *)arg);
1551 ccc_inode_lsm_put(inode, lsm);
1552 }
1553 return rc;
1554 }
1555
1556 static int ll_lov_getstripe(struct inode *inode, unsigned long arg)
1557 {
1558 struct lov_stripe_md *lsm;
1559 int rc = -ENODATA;
1560
1561 lsm = ccc_inode_lsm_get(inode);
1562 if (lsm != NULL)
1563 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode), 0,
1564 lsm, (void *)arg);
1565 ccc_inode_lsm_put(inode, lsm);
1566 return rc;
1567 }
1568
1569 int ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1570 {
1571 struct ll_inode_info *lli = ll_i2info(inode);
1572 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1573 struct ccc_grouplock grouplock;
1574 int rc;
1575
1576 if (ll_file_nolock(file))
1577 return -EOPNOTSUPP;
1578
1579 spin_lock(&lli->lli_lock);
1580 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1581 CWARN("group lock already existed with gid %lu\n",
1582 fd->fd_grouplock.cg_gid);
1583 spin_unlock(&lli->lli_lock);
1584 return -EINVAL;
1585 }
1586 LASSERT(fd->fd_grouplock.cg_lock == NULL);
1587 spin_unlock(&lli->lli_lock);
1588
1589 rc = cl_get_grouplock(cl_i2info(inode)->lli_clob,
1590 arg, (file->f_flags & O_NONBLOCK), &grouplock);
1591 if (rc)
1592 return rc;
1593
1594 spin_lock(&lli->lli_lock);
1595 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1596 spin_unlock(&lli->lli_lock);
1597 CERROR("another thread just won the race\n");
1598 cl_put_grouplock(&grouplock);
1599 return -EINVAL;
1600 }
1601
1602 fd->fd_flags |= LL_FILE_GROUP_LOCKED;
1603 fd->fd_grouplock = grouplock;
1604 spin_unlock(&lli->lli_lock);
1605
1606 CDEBUG(D_INFO, "group lock %lu obtained\n", arg);
1607 return 0;
1608 }
1609
1610 int ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1611 {
1612 struct ll_inode_info *lli = ll_i2info(inode);
1613 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1614 struct ccc_grouplock grouplock;
1615
1616 spin_lock(&lli->lli_lock);
1617 if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1618 spin_unlock(&lli->lli_lock);
1619 CWARN("no group lock held\n");
1620 return -EINVAL;
1621 }
1622 LASSERT(fd->fd_grouplock.cg_lock != NULL);
1623
1624 if (fd->fd_grouplock.cg_gid != arg) {
1625 CWARN("group lock %lu doesn't match current id %lu\n",
1626 arg, fd->fd_grouplock.cg_gid);
1627 spin_unlock(&lli->lli_lock);
1628 return -EINVAL;
1629 }
1630
1631 grouplock = fd->fd_grouplock;
1632 memset(&fd->fd_grouplock, 0, sizeof(fd->fd_grouplock));
1633 fd->fd_flags &= ~LL_FILE_GROUP_LOCKED;
1634 spin_unlock(&lli->lli_lock);
1635
1636 cl_put_grouplock(&grouplock);
1637 CDEBUG(D_INFO, "group lock %lu released\n", arg);
1638 return 0;
1639 }
1640
1641 /**
1642 * Close inode open handle
1643 *
1644 * \param dentry [in] dentry which contains the inode
1645 * \param it [in,out] intent which contains open info and result
1646 *
1647 * \retval 0 success
1648 * \retval <0 failure
1649 */
1650 int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it)
1651 {
1652 struct inode *inode = dentry->d_inode;
1653 struct obd_client_handle *och;
1654 int rc;
1655
1656 LASSERT(inode);
1657
1658 /* Root ? Do nothing. */
1659 if (dentry->d_inode->i_sb->s_root == dentry)
1660 return 0;
1661
1662 /* No open handle to close? Move away */
1663 if (!it_disposition(it, DISP_OPEN_OPEN))
1664 return 0;
1665
1666 LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0);
1667
1668 OBD_ALLOC(och, sizeof(*och));
1669 if (!och)
1670 GOTO(out, rc = -ENOMEM);
1671
1672 ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
1673
1674 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
1675 inode, och, NULL);
1676 out:
1677 /* this one is in place of ll_file_open */
1678 if (it_disposition(it, DISP_ENQ_OPEN_REF)) {
1679 ptlrpc_req_finished(it->d.lustre.it_data);
1680 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
1681 }
1682 return rc;
1683 }
1684
1685 /**
1686 * Get size for inode for which FIEMAP mapping is requested.
1687 * Make the FIEMAP get_info call and returns the result.
1688 */
1689 int ll_do_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
1690 int num_bytes)
1691 {
1692 struct obd_export *exp = ll_i2dtexp(inode);
1693 struct lov_stripe_md *lsm = NULL;
1694 struct ll_fiemap_info_key fm_key = { .name = KEY_FIEMAP, };
1695 int vallen = num_bytes;
1696 int rc;
1697
1698 /* Checks for fiemap flags */
1699 if (fiemap->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
1700 fiemap->fm_flags &= ~LUSTRE_FIEMAP_FLAGS_COMPAT;
1701 return -EBADR;
1702 }
1703
1704 /* Check for FIEMAP_FLAG_SYNC */
1705 if (fiemap->fm_flags & FIEMAP_FLAG_SYNC) {
1706 rc = filemap_fdatawrite(inode->i_mapping);
1707 if (rc)
1708 return rc;
1709 }
1710
1711 lsm = ccc_inode_lsm_get(inode);
1712 if (lsm == NULL)
1713 return -ENOENT;
1714
1715 /* If the stripe_count > 1 and the application does not understand
1716 * DEVICE_ORDER flag, then it cannot interpret the extents correctly.
1717 */
1718 if (lsm->lsm_stripe_count > 1 &&
1719 !(fiemap->fm_flags & FIEMAP_FLAG_DEVICE_ORDER))
1720 GOTO(out, rc = -EOPNOTSUPP);
1721
1722 fm_key.oa.o_oi = lsm->lsm_oi;
1723 fm_key.oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1724
1725 obdo_from_inode(&fm_key.oa, inode, OBD_MD_FLSIZE);
1726 obdo_set_parent_fid(&fm_key.oa, &ll_i2info(inode)->lli_fid);
1727 /* If filesize is 0, then there would be no objects for mapping */
1728 if (fm_key.oa.o_size == 0) {
1729 fiemap->fm_mapped_extents = 0;
1730 GOTO(out, rc = 0);
1731 }
1732
1733 memcpy(&fm_key.fiemap, fiemap, sizeof(*fiemap));
1734
1735 rc = obd_get_info(NULL, exp, sizeof(fm_key), &fm_key, &vallen,
1736 fiemap, lsm);
1737 if (rc)
1738 CERROR("obd_get_info failed: rc = %d\n", rc);
1739
1740 out:
1741 ccc_inode_lsm_put(inode, lsm);
1742 return rc;
1743 }
1744
1745 int ll_fid2path(struct inode *inode, void *arg)
1746 {
1747 struct obd_export *exp = ll_i2mdexp(inode);
1748 struct getinfo_fid2path *gfout, *gfin;
1749 int outsize, rc;
1750
1751 if (!capable(CFS_CAP_DAC_READ_SEARCH) &&
1752 !(ll_i2sbi(inode)->ll_flags & LL_SBI_USER_FID2PATH))
1753 return -EPERM;
1754
1755 /* Need to get the buflen */
1756 OBD_ALLOC_PTR(gfin);
1757 if (gfin == NULL)
1758 return -ENOMEM;
1759 if (copy_from_user(gfin, arg, sizeof(*gfin))) {
1760 OBD_FREE_PTR(gfin);
1761 return -EFAULT;
1762 }
1763
1764 outsize = sizeof(*gfout) + gfin->gf_pathlen;
1765 OBD_ALLOC(gfout, outsize);
1766 if (gfout == NULL) {
1767 OBD_FREE_PTR(gfin);
1768 return -ENOMEM;
1769 }
1770 memcpy(gfout, gfin, sizeof(*gfout));
1771 OBD_FREE_PTR(gfin);
1772
1773 /* Call mdc_iocontrol */
1774 rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL);
1775 if (rc)
1776 GOTO(gf_free, rc);
1777
1778 if (copy_to_user(arg, gfout, outsize))
1779 rc = -EFAULT;
1780
1781 gf_free:
1782 OBD_FREE(gfout, outsize);
1783 return rc;
1784 }
1785
1786 static int ll_ioctl_fiemap(struct inode *inode, unsigned long arg)
1787 {
1788 struct ll_user_fiemap *fiemap_s;
1789 size_t num_bytes, ret_bytes;
1790 unsigned int extent_count;
1791 int rc = 0;
1792
1793 /* Get the extent count so we can calculate the size of
1794 * required fiemap buffer */
1795 if (get_user(extent_count,
1796 &((struct ll_user_fiemap __user *)arg)->fm_extent_count))
1797 return -EFAULT;
1798 num_bytes = sizeof(*fiemap_s) + (extent_count *
1799 sizeof(struct ll_fiemap_extent));
1800
1801 OBD_ALLOC_LARGE(fiemap_s, num_bytes);
1802 if (fiemap_s == NULL)
1803 return -ENOMEM;
1804
1805 /* get the fiemap value */
1806 if (copy_from_user(fiemap_s, (struct ll_user_fiemap __user *)arg,
1807 sizeof(*fiemap_s)))
1808 GOTO(error, rc = -EFAULT);
1809
1810 /* If fm_extent_count is non-zero, read the first extent since
1811 * it is used to calculate end_offset and device from previous
1812 * fiemap call. */
1813 if (extent_count) {
1814 if (copy_from_user(&fiemap_s->fm_extents[0],
1815 (char __user *)arg + sizeof(*fiemap_s),
1816 sizeof(struct ll_fiemap_extent)))
1817 GOTO(error, rc = -EFAULT);
1818 }
1819
1820 rc = ll_do_fiemap(inode, fiemap_s, num_bytes);
1821 if (rc)
1822 GOTO(error, rc);
1823
1824 ret_bytes = sizeof(struct ll_user_fiemap);
1825
1826 if (extent_count != 0)
1827 ret_bytes += (fiemap_s->fm_mapped_extents *
1828 sizeof(struct ll_fiemap_extent));
1829
1830 if (copy_to_user((void *)arg, fiemap_s, ret_bytes))
1831 rc = -EFAULT;
1832
1833 error:
1834 OBD_FREE_LARGE(fiemap_s, num_bytes);
1835 return rc;
1836 }
1837
1838 /*
1839 * Read the data_version for inode.
1840 *
1841 * This value is computed using stripe object version on OST.
1842 * Version is computed using server side locking.
1843 *
1844 * @param extent_lock Take extent lock. Not needed if a process is already
1845 * holding the OST object group locks.
1846 */
1847 int ll_data_version(struct inode *inode, __u64 *data_version,
1848 int extent_lock)
1849 {
1850 struct lov_stripe_md *lsm = NULL;
1851 struct ll_sb_info *sbi = ll_i2sbi(inode);
1852 struct obdo *obdo = NULL;
1853 int rc;
1854
1855 /* If no stripe, we consider version is 0. */
1856 lsm = ccc_inode_lsm_get(inode);
1857 if (!lsm_has_objects(lsm)) {
1858 *data_version = 0;
1859 CDEBUG(D_INODE, "No object for inode\n");
1860 GOTO(out, rc = 0);
1861 }
1862
1863 OBD_ALLOC_PTR(obdo);
1864 if (obdo == NULL)
1865 GOTO(out, rc = -ENOMEM);
1866
1867 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, obdo, 0, extent_lock);
1868 if (rc == 0) {
1869 if (!(obdo->o_valid & OBD_MD_FLDATAVERSION))
1870 rc = -EOPNOTSUPP;
1871 else
1872 *data_version = obdo->o_data_version;
1873 }
1874
1875 OBD_FREE_PTR(obdo);
1876 out:
1877 ccc_inode_lsm_put(inode, lsm);
1878 return rc;
1879 }
1880
1881 /*
1882 * Trigger a HSM release request for the provided inode.
1883 */
1884 int ll_hsm_release(struct inode *inode)
1885 {
1886 struct cl_env_nest nest;
1887 struct lu_env *env;
1888 struct obd_client_handle *och = NULL;
1889 __u64 data_version = 0;
1890 int rc;
1891
1892
1893 CDEBUG(D_INODE, "%s: Releasing file "DFID".\n",
1894 ll_get_fsname(inode->i_sb, NULL, 0),
1895 PFID(&ll_i2info(inode)->lli_fid));
1896
1897 och = ll_lease_open(inode, NULL, FMODE_WRITE, MDS_OPEN_RELEASE);
1898 if (IS_ERR(och))
1899 GOTO(out, rc = PTR_ERR(och));
1900
1901 /* Grab latest data_version and [am]time values */
1902 rc = ll_data_version(inode, &data_version, 1);
1903 if (rc != 0)
1904 GOTO(out, rc);
1905
1906 env = cl_env_nested_get(&nest);
1907 if (IS_ERR(env))
1908 GOTO(out, rc = PTR_ERR(env));
1909
1910 ll_merge_lvb(env, inode);
1911 cl_env_nested_put(&nest, env);
1912
1913 /* Release the file.
1914 * NB: lease lock handle is released in mdc_hsm_release_pack() because
1915 * we still need it to pack l_remote_handle to MDT. */
1916 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, inode, och,
1917 &data_version);
1918 och = NULL;
1919
1920
1921 out:
1922 if (och != NULL && !IS_ERR(och)) /* close the file */
1923 ll_lease_close(och, inode, NULL);
1924
1925 return rc;
1926 }
1927
1928 struct ll_swap_stack {
1929 struct iattr ia1, ia2;
1930 __u64 dv1, dv2;
1931 struct inode *inode1, *inode2;
1932 bool check_dv1, check_dv2;
1933 };
1934
1935 static int ll_swap_layouts(struct file *file1, struct file *file2,
1936 struct lustre_swap_layouts *lsl)
1937 {
1938 struct mdc_swap_layouts msl;
1939 struct md_op_data *op_data;
1940 __u32 gid;
1941 __u64 dv;
1942 struct ll_swap_stack *llss = NULL;
1943 int rc;
1944
1945 OBD_ALLOC_PTR(llss);
1946 if (llss == NULL)
1947 return -ENOMEM;
1948
1949 llss->inode1 = file1->f_dentry->d_inode;
1950 llss->inode2 = file2->f_dentry->d_inode;
1951
1952 if (!S_ISREG(llss->inode2->i_mode))
1953 GOTO(free, rc = -EINVAL);
1954
1955 if (inode_permission(llss->inode1, MAY_WRITE) ||
1956 inode_permission(llss->inode2, MAY_WRITE))
1957 GOTO(free, rc = -EPERM);
1958
1959 if (llss->inode2->i_sb != llss->inode1->i_sb)
1960 GOTO(free, rc = -EXDEV);
1961
1962 /* we use 2 bool because it is easier to swap than 2 bits */
1963 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV1)
1964 llss->check_dv1 = true;
1965
1966 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV2)
1967 llss->check_dv2 = true;
1968
1969 /* we cannot use lsl->sl_dvX directly because we may swap them */
1970 llss->dv1 = lsl->sl_dv1;
1971 llss->dv2 = lsl->sl_dv2;
1972
1973 rc = lu_fid_cmp(ll_inode2fid(llss->inode1), ll_inode2fid(llss->inode2));
1974 if (rc == 0) /* same file, done! */
1975 GOTO(free, rc = 0);
1976
1977 if (rc < 0) { /* sequentialize it */
1978 swap(llss->inode1, llss->inode2);
1979 swap(file1, file2);
1980 swap(llss->dv1, llss->dv2);
1981 swap(llss->check_dv1, llss->check_dv2);
1982 }
1983
1984 gid = lsl->sl_gid;
1985 if (gid != 0) { /* application asks to flush dirty cache */
1986 rc = ll_get_grouplock(llss->inode1, file1, gid);
1987 if (rc < 0)
1988 GOTO(free, rc);
1989
1990 rc = ll_get_grouplock(llss->inode2, file2, gid);
1991 if (rc < 0) {
1992 ll_put_grouplock(llss->inode1, file1, gid);
1993 GOTO(free, rc);
1994 }
1995 }
1996
1997 /* to be able to restore mtime and atime after swap
1998 * we need to first save them */
1999 if (lsl->sl_flags &
2000 (SWAP_LAYOUTS_KEEP_MTIME | SWAP_LAYOUTS_KEEP_ATIME)) {
2001 llss->ia1.ia_mtime = llss->inode1->i_mtime;
2002 llss->ia1.ia_atime = llss->inode1->i_atime;
2003 llss->ia1.ia_valid = ATTR_MTIME | ATTR_ATIME;
2004 llss->ia2.ia_mtime = llss->inode2->i_mtime;
2005 llss->ia2.ia_atime = llss->inode2->i_atime;
2006 llss->ia2.ia_valid = ATTR_MTIME | ATTR_ATIME;
2007 }
2008
2009 /* ultimate check, before swaping the layouts we check if
2010 * dataversion has changed (if requested) */
2011 if (llss->check_dv1) {
2012 rc = ll_data_version(llss->inode1, &dv, 0);
2013 if (rc)
2014 GOTO(putgl, rc);
2015 if (dv != llss->dv1)
2016 GOTO(putgl, rc = -EAGAIN);
2017 }
2018
2019 if (llss->check_dv2) {
2020 rc = ll_data_version(llss->inode2, &dv, 0);
2021 if (rc)
2022 GOTO(putgl, rc);
2023 if (dv != llss->dv2)
2024 GOTO(putgl, rc = -EAGAIN);
2025 }
2026
2027 /* struct md_op_data is used to send the swap args to the mdt
2028 * only flags is missing, so we use struct mdc_swap_layouts
2029 * through the md_op_data->op_data */
2030 /* flags from user space have to be converted before they are send to
2031 * server, no flag is sent today, they are only used on the client */
2032 msl.msl_flags = 0;
2033 rc = -ENOMEM;
2034 op_data = ll_prep_md_op_data(NULL, llss->inode1, llss->inode2, NULL, 0,
2035 0, LUSTRE_OPC_ANY, &msl);
2036 if (IS_ERR(op_data))
2037 GOTO(free, rc = PTR_ERR(op_data));
2038
2039 rc = obd_iocontrol(LL_IOC_LOV_SWAP_LAYOUTS, ll_i2mdexp(llss->inode1),
2040 sizeof(*op_data), op_data, NULL);
2041 ll_finish_md_op_data(op_data);
2042
2043 putgl:
2044 if (gid != 0) {
2045 ll_put_grouplock(llss->inode2, file2, gid);
2046 ll_put_grouplock(llss->inode1, file1, gid);
2047 }
2048
2049 /* rc can be set from obd_iocontrol() or from a GOTO(putgl, ...) */
2050 if (rc != 0)
2051 GOTO(free, rc);
2052
2053 /* clear useless flags */
2054 if (!(lsl->sl_flags & SWAP_LAYOUTS_KEEP_MTIME)) {
2055 llss->ia1.ia_valid &= ~ATTR_MTIME;
2056 llss->ia2.ia_valid &= ~ATTR_MTIME;
2057 }
2058
2059 if (!(lsl->sl_flags & SWAP_LAYOUTS_KEEP_ATIME)) {
2060 llss->ia1.ia_valid &= ~ATTR_ATIME;
2061 llss->ia2.ia_valid &= ~ATTR_ATIME;
2062 }
2063
2064 /* update time if requested */
2065 rc = 0;
2066 if (llss->ia2.ia_valid != 0) {
2067 mutex_lock(&llss->inode1->i_mutex);
2068 rc = ll_setattr(file1->f_dentry, &llss->ia2);
2069 mutex_unlock(&llss->inode1->i_mutex);
2070 }
2071
2072 if (llss->ia1.ia_valid != 0) {
2073 int rc1;
2074
2075 mutex_lock(&llss->inode2->i_mutex);
2076 rc1 = ll_setattr(file2->f_dentry, &llss->ia1);
2077 mutex_unlock(&llss->inode2->i_mutex);
2078 if (rc == 0)
2079 rc = rc1;
2080 }
2081
2082 free:
2083 if (llss != NULL)
2084 OBD_FREE_PTR(llss);
2085
2086 return rc;
2087 }
2088
2089 static int ll_hsm_state_set(struct inode *inode, struct hsm_state_set *hss)
2090 {
2091 struct md_op_data *op_data;
2092 int rc;
2093
2094 /* Non-root users are forbidden to set or clear flags which are
2095 * NOT defined in HSM_USER_MASK. */
2096 if (((hss->hss_setmask | hss->hss_clearmask) & ~HSM_USER_MASK) &&
2097 !capable(CFS_CAP_SYS_ADMIN))
2098 return -EPERM;
2099
2100 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2101 LUSTRE_OPC_ANY, hss);
2102 if (IS_ERR(op_data))
2103 return PTR_ERR(op_data);
2104
2105 rc = obd_iocontrol(LL_IOC_HSM_STATE_SET, ll_i2mdexp(inode),
2106 sizeof(*op_data), op_data, NULL);
2107
2108 ll_finish_md_op_data(op_data);
2109
2110 return rc;
2111 }
2112
2113 static int ll_hsm_import(struct inode *inode, struct file *file,
2114 struct hsm_user_import *hui)
2115 {
2116 struct hsm_state_set *hss = NULL;
2117 struct iattr *attr = NULL;
2118 int rc;
2119
2120
2121 if (!S_ISREG(inode->i_mode))
2122 return -EINVAL;
2123
2124 /* set HSM flags */
2125 OBD_ALLOC_PTR(hss);
2126 if (hss == NULL)
2127 GOTO(out, rc = -ENOMEM);
2128
2129 hss->hss_valid = HSS_SETMASK | HSS_ARCHIVE_ID;
2130 hss->hss_archive_id = hui->hui_archive_id;
2131 hss->hss_setmask = HS_ARCHIVED | HS_EXISTS | HS_RELEASED;
2132 rc = ll_hsm_state_set(inode, hss);
2133 if (rc != 0)
2134 GOTO(out, rc);
2135
2136 OBD_ALLOC_PTR(attr);
2137 if (attr == NULL)
2138 GOTO(out, rc = -ENOMEM);
2139
2140 attr->ia_mode = hui->hui_mode & (S_IRWXU | S_IRWXG | S_IRWXO);
2141 attr->ia_mode |= S_IFREG;
2142 attr->ia_uid = make_kuid(&init_user_ns, hui->hui_uid);
2143 attr->ia_gid = make_kgid(&init_user_ns, hui->hui_gid);
2144 attr->ia_size = hui->hui_size;
2145 attr->ia_mtime.tv_sec = hui->hui_mtime;
2146 attr->ia_mtime.tv_nsec = hui->hui_mtime_ns;
2147 attr->ia_atime.tv_sec = hui->hui_atime;
2148 attr->ia_atime.tv_nsec = hui->hui_atime_ns;
2149
2150 attr->ia_valid = ATTR_SIZE | ATTR_MODE | ATTR_FORCE |
2151 ATTR_UID | ATTR_GID |
2152 ATTR_MTIME | ATTR_MTIME_SET |
2153 ATTR_ATIME | ATTR_ATIME_SET;
2154
2155 rc = ll_setattr_raw(file->f_dentry, attr, true);
2156 if (rc == -ENODATA)
2157 rc = 0;
2158
2159 out:
2160 if (hss != NULL)
2161 OBD_FREE_PTR(hss);
2162
2163 if (attr != NULL)
2164 OBD_FREE_PTR(attr);
2165
2166 return rc;
2167 }
2168
2169 long ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
2170 {
2171 struct inode *inode = file->f_dentry->d_inode;
2172 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2173 int flags, rc;
2174
2175 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%x\n", inode->i_ino,
2176 inode->i_generation, inode, cmd);
2177 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
2178
2179 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
2180 if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
2181 return -ENOTTY;
2182
2183 switch(cmd) {
2184 case LL_IOC_GETFLAGS:
2185 /* Get the current value of the file flags */
2186 return put_user(fd->fd_flags, (int *)arg);
2187 case LL_IOC_SETFLAGS:
2188 case LL_IOC_CLRFLAGS:
2189 /* Set or clear specific file flags */
2190 /* XXX This probably needs checks to ensure the flags are
2191 * not abused, and to handle any flag side effects.
2192 */
2193 if (get_user(flags, (int *) arg))
2194 return -EFAULT;
2195
2196 if (cmd == LL_IOC_SETFLAGS) {
2197 if ((flags & LL_FILE_IGNORE_LOCK) &&
2198 !(file->f_flags & O_DIRECT)) {
2199 CERROR("%s: unable to disable locking on "
2200 "non-O_DIRECT file\n", current->comm);
2201 return -EINVAL;
2202 }
2203
2204 fd->fd_flags |= flags;
2205 } else {
2206 fd->fd_flags &= ~flags;
2207 }
2208 return 0;
2209 case LL_IOC_LOV_SETSTRIPE:
2210 return ll_lov_setstripe(inode, file, arg);
2211 case LL_IOC_LOV_SETEA:
2212 return ll_lov_setea(inode, file, arg);
2213 case LL_IOC_LOV_SWAP_LAYOUTS: {
2214 struct file *file2;
2215 struct lustre_swap_layouts lsl;
2216
2217 if (copy_from_user(&lsl, (char *)arg,
2218 sizeof(struct lustre_swap_layouts)))
2219 return -EFAULT;
2220
2221 if ((file->f_flags & O_ACCMODE) == 0) /* O_RDONLY */
2222 return -EPERM;
2223
2224 file2 = fget(lsl.sl_fd);
2225 if (file2 == NULL)
2226 return -EBADF;
2227
2228 rc = -EPERM;
2229 if ((file2->f_flags & O_ACCMODE) != 0) /* O_WRONLY or O_RDWR */
2230 rc = ll_swap_layouts(file, file2, &lsl);
2231 fput(file2);
2232 return rc;
2233 }
2234 case LL_IOC_LOV_GETSTRIPE:
2235 return ll_lov_getstripe(inode, arg);
2236 case LL_IOC_RECREATE_OBJ:
2237 return ll_lov_recreate_obj(inode, arg);
2238 case LL_IOC_RECREATE_FID:
2239 return ll_lov_recreate_fid(inode, arg);
2240 case FSFILT_IOC_FIEMAP:
2241 return ll_ioctl_fiemap(inode, arg);
2242 case FSFILT_IOC_GETFLAGS:
2243 case FSFILT_IOC_SETFLAGS:
2244 return ll_iocontrol(inode, file, cmd, arg);
2245 case FSFILT_IOC_GETVERSION_OLD:
2246 case FSFILT_IOC_GETVERSION:
2247 return put_user(inode->i_generation, (int *)arg);
2248 case LL_IOC_GROUP_LOCK:
2249 return ll_get_grouplock(inode, file, arg);
2250 case LL_IOC_GROUP_UNLOCK:
2251 return ll_put_grouplock(inode, file, arg);
2252 case IOC_OBD_STATFS:
2253 return ll_obd_statfs(inode, (void *)arg);
2254
2255 /* We need to special case any other ioctls we want to handle,
2256 * to send them to the MDS/OST as appropriate and to properly
2257 * network encode the arg field.
2258 case FSFILT_IOC_SETVERSION_OLD:
2259 case FSFILT_IOC_SETVERSION:
2260 */
2261 case LL_IOC_FLUSHCTX:
2262 return ll_flush_ctx(inode);
2263 case LL_IOC_PATH2FID: {
2264 if (copy_to_user((void *)arg, ll_inode2fid(inode),
2265 sizeof(struct lu_fid)))
2266 return -EFAULT;
2267
2268 return 0;
2269 }
2270 case OBD_IOC_FID2PATH:
2271 return ll_fid2path(inode, (void *)arg);
2272 case LL_IOC_DATA_VERSION: {
2273 struct ioc_data_version idv;
2274 int rc;
2275
2276 if (copy_from_user(&idv, (char *)arg, sizeof(idv)))
2277 return -EFAULT;
2278
2279 rc = ll_data_version(inode, &idv.idv_version,
2280 !(idv.idv_flags & LL_DV_NOFLUSH));
2281
2282 if (rc == 0 && copy_to_user((char *) arg, &idv, sizeof(idv)))
2283 return -EFAULT;
2284
2285 return rc;
2286 }
2287
2288 case LL_IOC_GET_MDTIDX: {
2289 int mdtidx;
2290
2291 mdtidx = ll_get_mdt_idx(inode);
2292 if (mdtidx < 0)
2293 return mdtidx;
2294
2295 if (put_user((int)mdtidx, (int*)arg))
2296 return -EFAULT;
2297
2298 return 0;
2299 }
2300 case OBD_IOC_GETDTNAME:
2301 case OBD_IOC_GETMDNAME:
2302 return ll_get_obd_name(inode, cmd, arg);
2303 case LL_IOC_HSM_STATE_GET: {
2304 struct md_op_data *op_data;
2305 struct hsm_user_state *hus;
2306 int rc;
2307
2308 OBD_ALLOC_PTR(hus);
2309 if (hus == NULL)
2310 return -ENOMEM;
2311
2312 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2313 LUSTRE_OPC_ANY, hus);
2314 if (IS_ERR(op_data)) {
2315 OBD_FREE_PTR(hus);
2316 return PTR_ERR(op_data);
2317 }
2318
2319 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
2320 op_data, NULL);
2321
2322 if (copy_to_user((void *)arg, hus, sizeof(*hus)))
2323 rc = -EFAULT;
2324
2325 ll_finish_md_op_data(op_data);
2326 OBD_FREE_PTR(hus);
2327 return rc;
2328 }
2329 case LL_IOC_HSM_STATE_SET: {
2330 struct hsm_state_set *hss;
2331 int rc;
2332
2333 OBD_ALLOC_PTR(hss);
2334 if (hss == NULL)
2335 return -ENOMEM;
2336
2337 if (copy_from_user(hss, (char *)arg, sizeof(*hss))) {
2338 OBD_FREE_PTR(hss);
2339 return -EFAULT;
2340 }
2341
2342 rc = ll_hsm_state_set(inode, hss);
2343
2344 OBD_FREE_PTR(hss);
2345 return rc;
2346 }
2347 case LL_IOC_HSM_ACTION: {
2348 struct md_op_data *op_data;
2349 struct hsm_current_action *hca;
2350 int rc;
2351
2352 OBD_ALLOC_PTR(hca);
2353 if (hca == NULL)
2354 return -ENOMEM;
2355
2356 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2357 LUSTRE_OPC_ANY, hca);
2358 if (IS_ERR(op_data)) {
2359 OBD_FREE_PTR(hca);
2360 return PTR_ERR(op_data);
2361 }
2362
2363 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
2364 op_data, NULL);
2365
2366 if (copy_to_user((char *)arg, hca, sizeof(*hca)))
2367 rc = -EFAULT;
2368
2369 ll_finish_md_op_data(op_data);
2370 OBD_FREE_PTR(hca);
2371 return rc;
2372 }
2373 case LL_IOC_SET_LEASE: {
2374 struct ll_inode_info *lli = ll_i2info(inode);
2375 struct obd_client_handle *och = NULL;
2376 bool lease_broken;
2377 fmode_t mode = 0;
2378
2379 switch (arg) {
2380 case F_WRLCK:
2381 if (!(file->f_mode & FMODE_WRITE))
2382 return -EPERM;
2383 mode = FMODE_WRITE;
2384 break;
2385 case F_RDLCK:
2386 if (!(file->f_mode & FMODE_READ))
2387 return -EPERM;
2388 mode = FMODE_READ;
2389 break;
2390 case F_UNLCK:
2391 mutex_lock(&lli->lli_och_mutex);
2392 if (fd->fd_lease_och != NULL) {
2393 och = fd->fd_lease_och;
2394 fd->fd_lease_och = NULL;
2395 }
2396 mutex_unlock(&lli->lli_och_mutex);
2397
2398 if (och != NULL) {
2399 mode = och->och_flags &
2400 (FMODE_READ|FMODE_WRITE);
2401 rc = ll_lease_close(och, inode, &lease_broken);
2402 if (rc == 0 && lease_broken)
2403 mode = 0;
2404 } else {
2405 rc = -ENOLCK;
2406 }
2407
2408 /* return the type of lease or error */
2409 return rc < 0 ? rc : (int)mode;
2410 default:
2411 return -EINVAL;
2412 }
2413
2414 CDEBUG(D_INODE, "Set lease with mode %d\n", mode);
2415
2416 /* apply for lease */
2417 och = ll_lease_open(inode, file, mode, 0);
2418 if (IS_ERR(och))
2419 return PTR_ERR(och);
2420
2421 rc = 0;
2422 mutex_lock(&lli->lli_och_mutex);
2423 if (fd->fd_lease_och == NULL) {
2424 fd->fd_lease_och = och;
2425 och = NULL;
2426 }
2427 mutex_unlock(&lli->lli_och_mutex);
2428 if (och != NULL) {
2429 /* impossible now that only excl is supported for now */
2430 ll_lease_close(och, inode, &lease_broken);
2431 rc = -EBUSY;
2432 }
2433 return rc;
2434 }
2435 case LL_IOC_GET_LEASE: {
2436 struct ll_inode_info *lli = ll_i2info(inode);
2437 struct ldlm_lock *lock = NULL;
2438
2439 rc = 0;
2440 mutex_lock(&lli->lli_och_mutex);
2441 if (fd->fd_lease_och != NULL) {
2442 struct obd_client_handle *och = fd->fd_lease_och;
2443
2444 lock = ldlm_handle2lock(&och->och_lease_handle);
2445 if (lock != NULL) {
2446 lock_res_and_lock(lock);
2447 if (!ldlm_is_cancel(lock))
2448 rc = och->och_flags &
2449 (FMODE_READ | FMODE_WRITE);
2450 unlock_res_and_lock(lock);
2451 ldlm_lock_put(lock);
2452 }
2453 }
2454 mutex_unlock(&lli->lli_och_mutex);
2455 return rc;
2456 }
2457 case LL_IOC_HSM_IMPORT: {
2458 struct hsm_user_import *hui;
2459
2460 OBD_ALLOC_PTR(hui);
2461 if (hui == NULL)
2462 return -ENOMEM;
2463
2464 if (copy_from_user(hui, (void *)arg, sizeof(*hui))) {
2465 OBD_FREE_PTR(hui);
2466 return -EFAULT;
2467 }
2468
2469 rc = ll_hsm_import(inode, file, hui);
2470
2471 OBD_FREE_PTR(hui);
2472 return rc;
2473 }
2474 default: {
2475 int err;
2476
2477 if (LLIOC_STOP ==
2478 ll_iocontrol_call(inode, file, cmd, arg, &err))
2479 return err;
2480
2481 return obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
2482 (void *)arg);
2483 }
2484 }
2485 }
2486
2487
2488 loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
2489 {
2490 struct inode *inode = file->f_dentry->d_inode;
2491 loff_t retval, eof = 0;
2492
2493 retval = offset + ((origin == SEEK_END) ? i_size_read(inode) :
2494 (origin == SEEK_CUR) ? file->f_pos : 0);
2495 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), to=%llu=%#llx(%d)\n",
2496 inode->i_ino, inode->i_generation, inode, retval, retval,
2497 origin);
2498 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
2499
2500 if (origin == SEEK_END || origin == SEEK_HOLE || origin == SEEK_DATA) {
2501 retval = ll_glimpse_size(inode);
2502 if (retval != 0)
2503 return retval;
2504 eof = i_size_read(inode);
2505 }
2506
2507 retval = generic_file_llseek_size(file, offset, origin,
2508 ll_file_maxbytes(inode), eof);
2509 return retval;
2510 }
2511
2512 int ll_flush(struct file *file, fl_owner_t id)
2513 {
2514 struct inode *inode = file->f_dentry->d_inode;
2515 struct ll_inode_info *lli = ll_i2info(inode);
2516 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2517 int rc, err;
2518
2519 LASSERT(!S_ISDIR(inode->i_mode));
2520
2521 /* catch async errors that were recorded back when async writeback
2522 * failed for pages in this mapping. */
2523 rc = lli->lli_async_rc;
2524 lli->lli_async_rc = 0;
2525 err = lov_read_and_clear_async_rc(lli->lli_clob);
2526 if (rc == 0)
2527 rc = err;
2528
2529 /* The application has been told write failure already.
2530 * Do not report failure again. */
2531 if (fd->fd_write_failed)
2532 return 0;
2533 return rc ? -EIO : 0;
2534 }
2535
2536 /**
2537 * Called to make sure a portion of file has been written out.
2538 * if @local_only is not true, it will send OST_SYNC RPCs to ost.
2539 *
2540 * Return how many pages have been written.
2541 */
2542 int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end,
2543 enum cl_fsync_mode mode, int ignore_layout)
2544 {
2545 struct cl_env_nest nest;
2546 struct lu_env *env;
2547 struct cl_io *io;
2548 struct obd_capa *capa = NULL;
2549 struct cl_fsync_io *fio;
2550 int result;
2551
2552 if (mode != CL_FSYNC_NONE && mode != CL_FSYNC_LOCAL &&
2553 mode != CL_FSYNC_DISCARD && mode != CL_FSYNC_ALL)
2554 return -EINVAL;
2555
2556 env = cl_env_nested_get(&nest);
2557 if (IS_ERR(env))
2558 return PTR_ERR(env);
2559
2560 capa = ll_osscapa_get(inode, CAPA_OPC_OSS_WRITE);
2561
2562 io = ccc_env_thread_io(env);
2563 io->ci_obj = cl_i2info(inode)->lli_clob;
2564 io->ci_ignore_layout = ignore_layout;
2565
2566 /* initialize parameters for sync */
2567 fio = &io->u.ci_fsync;
2568 fio->fi_capa = capa;
2569 fio->fi_start = start;
2570 fio->fi_end = end;
2571 fio->fi_fid = ll_inode2fid(inode);
2572 fio->fi_mode = mode;
2573 fio->fi_nr_written = 0;
2574
2575 if (cl_io_init(env, io, CIT_FSYNC, io->ci_obj) == 0)
2576 result = cl_io_loop(env, io);
2577 else
2578 result = io->ci_result;
2579 if (result == 0)
2580 result = fio->fi_nr_written;
2581 cl_io_fini(env, io);
2582 cl_env_nested_put(&nest, env);
2583
2584 capa_put(capa);
2585
2586 return result;
2587 }
2588
2589 /*
2590 * When dentry is provided (the 'else' case), *file->f_dentry may be
2591 * null and dentry must be used directly rather than pulled from
2592 * *file->f_dentry as is done otherwise.
2593 */
2594
2595 int ll_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2596 {
2597 struct dentry *dentry = file->f_dentry;
2598 struct inode *inode = dentry->d_inode;
2599 struct ll_inode_info *lli = ll_i2info(inode);
2600 struct ptlrpc_request *req;
2601 struct obd_capa *oc;
2602 int rc, err;
2603
2604 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
2605 inode->i_generation, inode);
2606 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
2607
2608 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2609 mutex_lock(&inode->i_mutex);
2610
2611 /* catch async errors that were recorded back when async writeback
2612 * failed for pages in this mapping. */
2613 if (!S_ISDIR(inode->i_mode)) {
2614 err = lli->lli_async_rc;
2615 lli->lli_async_rc = 0;
2616 if (rc == 0)
2617 rc = err;
2618 err = lov_read_and_clear_async_rc(lli->lli_clob);
2619 if (rc == 0)
2620 rc = err;
2621 }
2622
2623 oc = ll_mdscapa_get(inode);
2624 err = md_sync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), oc,
2625 &req);
2626 capa_put(oc);
2627 if (!rc)
2628 rc = err;
2629 if (!err)
2630 ptlrpc_req_finished(req);
2631
2632 if (datasync && S_ISREG(inode->i_mode)) {
2633 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2634
2635 err = cl_sync_file_range(inode, 0, OBD_OBJECT_EOF,
2636 CL_FSYNC_ALL, 0);
2637 if (rc == 0 && err < 0)
2638 rc = err;
2639 if (rc < 0)
2640 fd->fd_write_failed = true;
2641 else
2642 fd->fd_write_failed = false;
2643 }
2644
2645 mutex_unlock(&inode->i_mutex);
2646 return rc;
2647 }
2648
2649 int ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
2650 {
2651 struct inode *inode = file->f_dentry->d_inode;
2652 struct ll_sb_info *sbi = ll_i2sbi(inode);
2653 struct ldlm_enqueue_info einfo = {
2654 .ei_type = LDLM_FLOCK,
2655 .ei_cb_cp = ldlm_flock_completion_ast,
2656 .ei_cbdata = file_lock,
2657 };
2658 struct md_op_data *op_data;
2659 struct lustre_handle lockh = {0};
2660 ldlm_policy_data_t flock = {{0}};
2661 int flags = 0;
2662 int rc;
2663 int rc2 = 0;
2664
2665 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu file_lock=%p\n",
2666 inode->i_ino, file_lock);
2667
2668 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
2669
2670 if (file_lock->fl_flags & FL_FLOCK) {
2671 LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK));
2672 /* flocks are whole-file locks */
2673 flock.l_flock.end = OFFSET_MAX;
2674 /* For flocks owner is determined by the local file desctiptor*/
2675 flock.l_flock.owner = (unsigned long)file_lock->fl_file;
2676 } else if (file_lock->fl_flags & FL_POSIX) {
2677 flock.l_flock.owner = (unsigned long)file_lock->fl_owner;
2678 flock.l_flock.start = file_lock->fl_start;
2679 flock.l_flock.end = file_lock->fl_end;
2680 } else {
2681 return -EINVAL;
2682 }
2683 flock.l_flock.pid = file_lock->fl_pid;
2684
2685 /* Somewhat ugly workaround for svc lockd.
2686 * lockd installs custom fl_lmops->lm_compare_owner that checks
2687 * for the fl_owner to be the same (which it always is on local node
2688 * I guess between lockd processes) and then compares pid.
2689 * As such we assign pid to the owner field to make it all work,
2690 * conflict with normal locks is unlikely since pid space and
2691 * pointer space for current->files are not intersecting */
2692 if (file_lock->fl_lmops && file_lock->fl_lmops->lm_compare_owner)
2693 flock.l_flock.owner = (unsigned long)file_lock->fl_pid;
2694
2695 switch (file_lock->fl_type) {
2696 case F_RDLCK:
2697 einfo.ei_mode = LCK_PR;
2698 break;
2699 case F_UNLCK:
2700 /* An unlock request may or may not have any relation to
2701 * existing locks so we may not be able to pass a lock handle
2702 * via a normal ldlm_lock_cancel() request. The request may even
2703 * unlock a byte range in the middle of an existing lock. In
2704 * order to process an unlock request we need all of the same
2705 * information that is given with a normal read or write record
2706 * lock request. To avoid creating another ldlm unlock (cancel)
2707 * message we'll treat a LCK_NL flock request as an unlock. */
2708 einfo.ei_mode = LCK_NL;
2709 break;
2710 case F_WRLCK:
2711 einfo.ei_mode = LCK_PW;
2712 break;
2713 default:
2714 CDEBUG(D_INFO, "Unknown fcntl lock type: %d\n",
2715 file_lock->fl_type);
2716 return -ENOTSUPP;
2717 }
2718
2719 switch (cmd) {
2720 case F_SETLKW:
2721 #ifdef F_SETLKW64
2722 case F_SETLKW64:
2723 #endif
2724 flags = 0;
2725 break;
2726 case F_SETLK:
2727 #ifdef F_SETLK64
2728 case F_SETLK64:
2729 #endif
2730 flags = LDLM_FL_BLOCK_NOWAIT;
2731 break;
2732 case F_GETLK:
2733 #ifdef F_GETLK64
2734 case F_GETLK64:
2735 #endif
2736 flags = LDLM_FL_TEST_LOCK;
2737 /* Save the old mode so that if the mode in the lock changes we
2738 * can decrement the appropriate reader or writer refcount. */
2739 file_lock->fl_type = einfo.ei_mode;
2740 break;
2741 default:
2742 CERROR("unknown fcntl lock command: %d\n", cmd);
2743 return -EINVAL;
2744 }
2745
2746 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2747 LUSTRE_OPC_ANY, NULL);
2748 if (IS_ERR(op_data))
2749 return PTR_ERR(op_data);
2750
2751 CDEBUG(D_DLMTRACE, "inode=%lu, pid=%u, flags=%#x, mode=%u, "
2752 "start="LPU64", end="LPU64"\n", inode->i_ino, flock.l_flock.pid,
2753 flags, einfo.ei_mode, flock.l_flock.start, flock.l_flock.end);
2754
2755 rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL,
2756 op_data, &lockh, &flock, 0, NULL /* req */, flags);
2757
2758 if ((file_lock->fl_flags & FL_FLOCK) &&
2759 (rc == 0 || file_lock->fl_type == F_UNLCK))
2760 rc2 = flock_lock_file_wait(file, file_lock);
2761 if ((file_lock->fl_flags & FL_POSIX) &&
2762 (rc == 0 || file_lock->fl_type == F_UNLCK) &&
2763 !(flags & LDLM_FL_TEST_LOCK))
2764 rc2 = posix_lock_file_wait(file, file_lock);
2765
2766 if (rc2 && file_lock->fl_type != F_UNLCK) {
2767 einfo.ei_mode = LCK_NL;
2768 md_enqueue(sbi->ll_md_exp, &einfo, NULL,
2769 op_data, &lockh, &flock, 0, NULL /* req */, flags);
2770 rc = rc2;
2771 }
2772
2773 ll_finish_md_op_data(op_data);
2774
2775 return rc;
2776 }
2777
2778 int ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
2779 {
2780 return -ENOSYS;
2781 }
2782
2783 /**
2784 * test if some locks matching bits and l_req_mode are acquired
2785 * - bits can be in different locks
2786 * - if found clear the common lock bits in *bits
2787 * - the bits not found, are kept in *bits
2788 * \param inode [IN]
2789 * \param bits [IN] searched lock bits [IN]
2790 * \param l_req_mode [IN] searched lock mode
2791 * \retval boolean, true iff all bits are found
2792 */
2793 int ll_have_md_lock(struct inode *inode, __u64 *bits, ldlm_mode_t l_req_mode)
2794 {
2795 struct lustre_handle lockh;
2796 ldlm_policy_data_t policy;
2797 ldlm_mode_t mode = (l_req_mode == LCK_MINMODE) ?
2798 (LCK_CR|LCK_CW|LCK_PR|LCK_PW) : l_req_mode;
2799 struct lu_fid *fid;
2800 __u64 flags;
2801 int i;
2802
2803 if (!inode)
2804 return 0;
2805
2806 fid = &ll_i2info(inode)->lli_fid;
2807 CDEBUG(D_INFO, "trying to match res "DFID" mode %s\n", PFID(fid),
2808 ldlm_lockname[mode]);
2809
2810 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
2811 for (i = 0; i <= MDS_INODELOCK_MAXSHIFT && *bits != 0; i++) {
2812 policy.l_inodebits.bits = *bits & (1 << i);
2813 if (policy.l_inodebits.bits == 0)
2814 continue;
2815
2816 if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS,
2817 &policy, mode, &lockh)) {
2818 struct ldlm_lock *lock;
2819
2820 lock = ldlm_handle2lock(&lockh);
2821 if (lock) {
2822 *bits &=
2823 ~(lock->l_policy_data.l_inodebits.bits);
2824 LDLM_LOCK_PUT(lock);
2825 } else {
2826 *bits &= ~policy.l_inodebits.bits;
2827 }
2828 }
2829 }
2830 return *bits == 0;
2831 }
2832
2833 ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits,
2834 struct lustre_handle *lockh, __u64 flags,
2835 ldlm_mode_t mode)
2836 {
2837 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
2838 struct lu_fid *fid;
2839 ldlm_mode_t rc;
2840
2841 fid = &ll_i2info(inode)->lli_fid;
2842 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
2843
2844 rc = md_lock_match(ll_i2mdexp(inode), LDLM_FL_BLOCK_GRANTED|flags,
2845 fid, LDLM_IBITS, &policy, mode, lockh);
2846
2847 return rc;
2848 }
2849
2850 static int ll_inode_revalidate_fini(struct inode *inode, int rc)
2851 {
2852 /* Already unlinked. Just update nlink and return success */
2853 if (rc == -ENOENT) {
2854 clear_nlink(inode);
2855 /* This path cannot be hit for regular files unless in
2856 * case of obscure races, so no need to validate size.
2857 */
2858 if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
2859 return 0;
2860 } else if (rc != 0) {
2861 CERROR("%s: revalidate FID "DFID" error: rc = %d\n",
2862 ll_get_fsname(inode->i_sb, NULL, 0),
2863 PFID(ll_inode2fid(inode)), rc);
2864 }
2865
2866 return rc;
2867 }
2868
2869 int __ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it,
2870 __u64 ibits)
2871 {
2872 struct inode *inode = dentry->d_inode;
2873 struct ptlrpc_request *req = NULL;
2874 struct obd_export *exp;
2875 int rc = 0;
2876
2877 LASSERT(inode != NULL);
2878
2879 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),name=%s\n",
2880 inode->i_ino, inode->i_generation, inode, dentry->d_name.name);
2881
2882 exp = ll_i2mdexp(inode);
2883
2884 /* XXX: Enable OBD_CONNECT_ATTRFID to reduce unnecessary getattr RPC.
2885 * But under CMD case, it caused some lock issues, should be fixed
2886 * with new CMD ibits lock. See bug 12718 */
2887 if (exp_connect_flags(exp) & OBD_CONNECT_ATTRFID) {
2888 struct lookup_intent oit = { .it_op = IT_GETATTR };
2889 struct md_op_data *op_data;
2890
2891 if (ibits == MDS_INODELOCK_LOOKUP)
2892 oit.it_op = IT_LOOKUP;
2893
2894 /* Call getattr by fid, so do not provide name at all. */
2895 op_data = ll_prep_md_op_data(NULL, dentry->d_parent->d_inode,
2896 dentry->d_inode, NULL, 0, 0,
2897 LUSTRE_OPC_ANY, NULL);
2898 if (IS_ERR(op_data))
2899 return PTR_ERR(op_data);
2900
2901 oit.it_create_mode |= M_CHECK_STALE;
2902 rc = md_intent_lock(exp, op_data, NULL, 0,
2903 /* we are not interested in name
2904 based lookup */
2905 &oit, 0, &req,
2906 ll_md_blocking_ast, 0);
2907 ll_finish_md_op_data(op_data);
2908 oit.it_create_mode &= ~M_CHECK_STALE;
2909 if (rc < 0) {
2910 rc = ll_inode_revalidate_fini(inode, rc);
2911 GOTO (out, rc);
2912 }
2913
2914 rc = ll_revalidate_it_finish(req, &oit, dentry);
2915 if (rc != 0) {
2916 ll_intent_release(&oit);
2917 GOTO(out, rc);
2918 }
2919
2920 /* Unlinked? Unhash dentry, so it is not picked up later by
2921 do_lookup() -> ll_revalidate_it(). We cannot use d_drop
2922 here to preserve get_cwd functionality on 2.6.
2923 Bug 10503 */
2924 if (!dentry->d_inode->i_nlink)
2925 d_lustre_invalidate(dentry, 0);
2926
2927 ll_lookup_finish_locks(&oit, dentry);
2928 } else if (!ll_have_md_lock(dentry->d_inode, &ibits, LCK_MINMODE)) {
2929 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
2930 obd_valid valid = OBD_MD_FLGETATTR;
2931 struct md_op_data *op_data;
2932 int ealen = 0;
2933
2934 if (S_ISREG(inode->i_mode)) {
2935 rc = ll_get_max_mdsize(sbi, &ealen);
2936 if (rc)
2937 return rc;
2938 valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
2939 }
2940
2941 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL,
2942 0, ealen, LUSTRE_OPC_ANY,
2943 NULL);
2944 if (IS_ERR(op_data))
2945 return PTR_ERR(op_data);
2946
2947 op_data->op_valid = valid;
2948 /* Once OBD_CONNECT_ATTRFID is not supported, we can't find one
2949 * capa for this inode. Because we only keep capas of dirs
2950 * fresh. */
2951 rc = md_getattr(sbi->ll_md_exp, op_data, &req);
2952 ll_finish_md_op_data(op_data);
2953 if (rc) {
2954 rc = ll_inode_revalidate_fini(inode, rc);
2955 return rc;
2956 }
2957
2958 rc = ll_prep_inode(&inode, req, NULL, NULL);
2959 }
2960 out:
2961 ptlrpc_req_finished(req);
2962 return rc;
2963 }
2964
2965 int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it,
2966 __u64 ibits)
2967 {
2968 struct inode *inode = dentry->d_inode;
2969 int rc;
2970
2971 rc = __ll_inode_revalidate_it(dentry, it, ibits);
2972 if (rc != 0)
2973 return rc;
2974
2975 /* if object isn't regular file, don't validate size */
2976 if (!S_ISREG(inode->i_mode)) {
2977 LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_lvb.lvb_atime;
2978 LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_lvb.lvb_mtime;
2979 LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_lvb.lvb_ctime;
2980 } else {
2981 /* In case of restore, the MDT has the right size and has
2982 * already send it back without granting the layout lock,
2983 * inode is up-to-date so glimpse is useless.
2984 * Also to glimpse we need the layout, in case of a running
2985 * restore the MDT holds the layout lock so the glimpse will
2986 * block up to the end of restore (getattr will block)
2987 */
2988 if (!(ll_i2info(inode)->lli_flags & LLIF_FILE_RESTORING))
2989 rc = ll_glimpse_size(inode);
2990 }
2991 return rc;
2992 }
2993
2994 int ll_getattr_it(struct vfsmount *mnt, struct dentry *de,
2995 struct lookup_intent *it, struct kstat *stat)
2996 {
2997 struct inode *inode = de->d_inode;
2998 struct ll_sb_info *sbi = ll_i2sbi(inode);
2999 struct ll_inode_info *lli = ll_i2info(inode);
3000 int res = 0;
3001
3002 res = ll_inode_revalidate_it(de, it, MDS_INODELOCK_UPDATE |
3003 MDS_INODELOCK_LOOKUP);
3004 ll_stats_ops_tally(sbi, LPROC_LL_GETATTR, 1);
3005
3006 if (res)
3007 return res;
3008
3009 stat->dev = inode->i_sb->s_dev;
3010 if (ll_need_32bit_api(sbi))
3011 stat->ino = cl_fid_build_ino(&lli->lli_fid, 1);
3012 else
3013 stat->ino = inode->i_ino;
3014 stat->mode = inode->i_mode;
3015 stat->nlink = inode->i_nlink;
3016 stat->uid = inode->i_uid;
3017 stat->gid = inode->i_gid;
3018 stat->rdev = inode->i_rdev;
3019 stat->atime = inode->i_atime;
3020 stat->mtime = inode->i_mtime;
3021 stat->ctime = inode->i_ctime;
3022 stat->blksize = 1 << inode->i_blkbits;
3023
3024 stat->size = i_size_read(inode);
3025 stat->blocks = inode->i_blocks;
3026
3027 return 0;
3028 }
3029 int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
3030 {
3031 struct lookup_intent it = { .it_op = IT_GETATTR };
3032
3033 return ll_getattr_it(mnt, de, &it, stat);
3034 }
3035
3036 int ll_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3037 __u64 start, __u64 len)
3038 {
3039 int rc;
3040 size_t num_bytes;
3041 struct ll_user_fiemap *fiemap;
3042 unsigned int extent_count = fieinfo->fi_extents_max;
3043
3044 num_bytes = sizeof(*fiemap) + (extent_count *
3045 sizeof(struct ll_fiemap_extent));
3046 OBD_ALLOC_LARGE(fiemap, num_bytes);
3047
3048 if (fiemap == NULL)
3049 return -ENOMEM;
3050
3051 fiemap->fm_flags = fieinfo->fi_flags;
3052 fiemap->fm_extent_count = fieinfo->fi_extents_max;
3053 fiemap->fm_start = start;
3054 fiemap->fm_length = len;
3055 memcpy(&fiemap->fm_extents[0], fieinfo->fi_extents_start,
3056 sizeof(struct ll_fiemap_extent));
3057
3058 rc = ll_do_fiemap(inode, fiemap, num_bytes);
3059
3060 fieinfo->fi_flags = fiemap->fm_flags;
3061 fieinfo->fi_extents_mapped = fiemap->fm_mapped_extents;
3062 memcpy(fieinfo->fi_extents_start, &fiemap->fm_extents[0],
3063 fiemap->fm_mapped_extents * sizeof(struct ll_fiemap_extent));
3064
3065 OBD_FREE_LARGE(fiemap, num_bytes);
3066 return rc;
3067 }
3068
3069 struct posix_acl * ll_get_acl(struct inode *inode, int type)
3070 {
3071 struct ll_inode_info *lli = ll_i2info(inode);
3072 struct posix_acl *acl = NULL;
3073
3074 spin_lock(&lli->lli_lock);
3075 /* VFS' acl_permission_check->check_acl will release the refcount */
3076 acl = posix_acl_dup(lli->lli_posix_acl);
3077 spin_unlock(&lli->lli_lock);
3078
3079 return acl;
3080 }
3081
3082
3083 int ll_inode_permission(struct inode *inode, int mask)
3084 {
3085 int rc = 0;
3086
3087 #ifdef MAY_NOT_BLOCK
3088 if (mask & MAY_NOT_BLOCK)
3089 return -ECHILD;
3090 #endif
3091
3092 /* as root inode are NOT getting validated in lookup operation,
3093 * need to do it before permission check. */
3094
3095 if (inode == inode->i_sb->s_root->d_inode) {
3096 struct lookup_intent it = { .it_op = IT_LOOKUP };
3097
3098 rc = __ll_inode_revalidate_it(inode->i_sb->s_root, &it,
3099 MDS_INODELOCK_LOOKUP);
3100 if (rc)
3101 return rc;
3102 }
3103
3104 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), inode mode %x mask %o\n",
3105 inode->i_ino, inode->i_generation, inode, inode->i_mode, mask);
3106
3107 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
3108 return lustre_check_remote_perm(inode, mask);
3109
3110 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
3111 rc = generic_permission(inode, mask);
3112
3113 return rc;
3114 }
3115
3116 /* -o localflock - only provides locally consistent flock locks */
3117 struct file_operations ll_file_operations = {
3118 .read = ll_file_read,
3119 .aio_read = ll_file_aio_read,
3120 .write = ll_file_write,
3121 .aio_write = ll_file_aio_write,
3122 .unlocked_ioctl = ll_file_ioctl,
3123 .open = ll_file_open,
3124 .release = ll_file_release,
3125 .mmap = ll_file_mmap,
3126 .llseek = ll_file_seek,
3127 .splice_read = ll_file_splice_read,
3128 .fsync = ll_fsync,
3129 .flush = ll_flush
3130 };
3131
3132 struct file_operations ll_file_operations_flock = {
3133 .read = ll_file_read,
3134 .aio_read = ll_file_aio_read,
3135 .write = ll_file_write,
3136 .aio_write = ll_file_aio_write,
3137 .unlocked_ioctl = ll_file_ioctl,
3138 .open = ll_file_open,
3139 .release = ll_file_release,
3140 .mmap = ll_file_mmap,
3141 .llseek = ll_file_seek,
3142 .splice_read = ll_file_splice_read,
3143 .fsync = ll_fsync,
3144 .flush = ll_flush,
3145 .flock = ll_file_flock,
3146 .lock = ll_file_flock
3147 };
3148
3149 /* These are for -o noflock - to return ENOSYS on flock calls */
3150 struct file_operations ll_file_operations_noflock = {
3151 .read = ll_file_read,
3152 .aio_read = ll_file_aio_read,
3153 .write = ll_file_write,
3154 .aio_write = ll_file_aio_write,
3155 .unlocked_ioctl = ll_file_ioctl,
3156 .open = ll_file_open,
3157 .release = ll_file_release,
3158 .mmap = ll_file_mmap,
3159 .llseek = ll_file_seek,
3160 .splice_read = ll_file_splice_read,
3161 .fsync = ll_fsync,
3162 .flush = ll_flush,
3163 .flock = ll_file_noflock,
3164 .lock = ll_file_noflock
3165 };
3166
3167 struct inode_operations ll_file_inode_operations = {
3168 .setattr = ll_setattr,
3169 .getattr = ll_getattr,
3170 .permission = ll_inode_permission,
3171 .setxattr = ll_setxattr,
3172 .getxattr = ll_getxattr,
3173 .listxattr = ll_listxattr,
3174 .removexattr = ll_removexattr,
3175 .fiemap = ll_fiemap,
3176 .get_acl = ll_get_acl,
3177 };
3178
3179 /* dynamic ioctl number support routins */
3180 static struct llioc_ctl_data {
3181 struct rw_semaphore ioc_sem;
3182 struct list_head ioc_head;
3183 } llioc = {
3184 __RWSEM_INITIALIZER(llioc.ioc_sem),
3185 LIST_HEAD_INIT(llioc.ioc_head)
3186 };
3187
3188
3189 struct llioc_data {
3190 struct list_head iocd_list;
3191 unsigned int iocd_size;
3192 llioc_callback_t iocd_cb;
3193 unsigned int iocd_count;
3194 unsigned int iocd_cmd[0];
3195 };
3196
3197 void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
3198 {
3199 unsigned int size;
3200 struct llioc_data *in_data = NULL;
3201
3202 if (cb == NULL || cmd == NULL ||
3203 count > LLIOC_MAX_CMD || count < 0)
3204 return NULL;
3205
3206 size = sizeof(*in_data) + count * sizeof(unsigned int);
3207 OBD_ALLOC(in_data, size);
3208 if (in_data == NULL)
3209 return NULL;
3210
3211 memset(in_data, 0, sizeof(*in_data));
3212 in_data->iocd_size = size;
3213 in_data->iocd_cb = cb;
3214 in_data->iocd_count = count;
3215 memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
3216
3217 down_write(&llioc.ioc_sem);
3218 list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
3219 up_write(&llioc.ioc_sem);
3220
3221 return in_data;
3222 }
3223
3224 void ll_iocontrol_unregister(void *magic)
3225 {
3226 struct llioc_data *tmp;
3227
3228 if (magic == NULL)
3229 return;
3230
3231 down_write(&llioc.ioc_sem);
3232 list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
3233 if (tmp == magic) {
3234 unsigned int size = tmp->iocd_size;
3235
3236 list_del(&tmp->iocd_list);
3237 up_write(&llioc.ioc_sem);
3238
3239 OBD_FREE(tmp, size);
3240 return;
3241 }
3242 }
3243 up_write(&llioc.ioc_sem);
3244
3245 CWARN("didn't find iocontrol register block with magic: %p\n", magic);
3246 }
3247
3248 EXPORT_SYMBOL(ll_iocontrol_register);
3249 EXPORT_SYMBOL(ll_iocontrol_unregister);
3250
3251 enum llioc_iter ll_iocontrol_call(struct inode *inode, struct file *file,
3252 unsigned int cmd, unsigned long arg, int *rcp)
3253 {
3254 enum llioc_iter ret = LLIOC_CONT;
3255 struct llioc_data *data;
3256 int rc = -EINVAL, i;
3257
3258 down_read(&llioc.ioc_sem);
3259 list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
3260 for (i = 0; i < data->iocd_count; i++) {
3261 if (cmd != data->iocd_cmd[i])
3262 continue;
3263
3264 ret = data->iocd_cb(inode, file, cmd, arg, data, &rc);
3265 break;
3266 }
3267
3268 if (ret == LLIOC_STOP)
3269 break;
3270 }
3271 up_read(&llioc.ioc_sem);
3272
3273 if (rcp)
3274 *rcp = rc;
3275 return ret;
3276 }
3277
3278 int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf)
3279 {
3280 struct ll_inode_info *lli = ll_i2info(inode);
3281 struct cl_env_nest nest;
3282 struct lu_env *env;
3283 int result;
3284
3285 if (lli->lli_clob == NULL)
3286 return 0;
3287
3288 env = cl_env_nested_get(&nest);
3289 if (IS_ERR(env))
3290 return PTR_ERR(env);
3291
3292 result = cl_conf_set(env, lli->lli_clob, conf);
3293 cl_env_nested_put(&nest, env);
3294
3295 if (conf->coc_opc == OBJECT_CONF_SET) {
3296 struct ldlm_lock *lock = conf->coc_lock;
3297
3298 LASSERT(lock != NULL);
3299 LASSERT(ldlm_has_layout(lock));
3300 if (result == 0) {
3301 /* it can only be allowed to match after layout is
3302 * applied to inode otherwise false layout would be
3303 * seen. Applying layout shoud happen before dropping
3304 * the intent lock. */
3305 ldlm_lock_allow_match(lock);
3306 }
3307 }
3308 return result;
3309 }
3310
3311 /* Fetch layout from MDT with getxattr request, if it's not ready yet */
3312 static int ll_layout_fetch(struct inode *inode, struct ldlm_lock *lock)
3313
3314 {
3315 struct ll_sb_info *sbi = ll_i2sbi(inode);
3316 struct obd_capa *oc;
3317 struct ptlrpc_request *req;
3318 struct mdt_body *body;
3319 void *lvbdata;
3320 void *lmm;
3321 int lmmsize;
3322 int rc;
3323
3324 CDEBUG(D_INODE, DFID" LVB_READY=%d l_lvb_data=%p l_lvb_len=%d\n",
3325 PFID(ll_inode2fid(inode)), !!(lock->l_flags & LDLM_FL_LVB_READY),
3326 lock->l_lvb_data, lock->l_lvb_len);
3327
3328 if ((lock->l_lvb_data != NULL) && (lock->l_flags & LDLM_FL_LVB_READY))
3329 return 0;
3330
3331 /* if layout lock was granted right away, the layout is returned
3332 * within DLM_LVB of dlm reply; otherwise if the lock was ever
3333 * blocked and then granted via completion ast, we have to fetch
3334 * layout here. Please note that we can't use the LVB buffer in
3335 * completion AST because it doesn't have a large enough buffer */
3336 oc = ll_mdscapa_get(inode);
3337 rc = ll_get_max_mdsize(sbi, &lmmsize);
3338 if (rc == 0)
3339 rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode), oc,
3340 OBD_MD_FLXATTR, XATTR_NAME_LOV, NULL, 0,
3341 lmmsize, 0, &req);
3342 capa_put(oc);
3343 if (rc < 0)
3344 return rc;
3345
3346 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
3347 if (body == NULL || body->eadatasize > lmmsize)
3348 GOTO(out, rc = -EPROTO);
3349
3350 lmmsize = body->eadatasize;
3351 if (lmmsize == 0) /* empty layout */
3352 GOTO(out, rc = 0);
3353
3354 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_EADATA, lmmsize);
3355 if (lmm == NULL)
3356 GOTO(out, rc = -EFAULT);
3357
3358 OBD_ALLOC_LARGE(lvbdata, lmmsize);
3359 if (lvbdata == NULL)
3360 GOTO(out, rc = -ENOMEM);
3361
3362 memcpy(lvbdata, lmm, lmmsize);
3363 lock_res_and_lock(lock);
3364 if (lock->l_lvb_data != NULL)
3365 OBD_FREE_LARGE(lock->l_lvb_data, lock->l_lvb_len);
3366
3367 lock->l_lvb_data = lvbdata;
3368 lock->l_lvb_len = lmmsize;
3369 unlock_res_and_lock(lock);
3370
3371 out:
3372 ptlrpc_req_finished(req);
3373 return rc;
3374 }
3375
3376 /**
3377 * Apply the layout to the inode. Layout lock is held and will be released
3378 * in this function.
3379 */
3380 static int ll_layout_lock_set(struct lustre_handle *lockh, ldlm_mode_t mode,
3381 struct inode *inode, __u32 *gen, bool reconf)
3382 {
3383 struct ll_inode_info *lli = ll_i2info(inode);
3384 struct ll_sb_info *sbi = ll_i2sbi(inode);
3385 struct ldlm_lock *lock;
3386 struct lustre_md md = { NULL };
3387 struct cl_object_conf conf;
3388 int rc = 0;
3389 bool lvb_ready;
3390 bool wait_layout = false;
3391
3392 LASSERT(lustre_handle_is_used(lockh));
3393
3394 lock = ldlm_handle2lock(lockh);
3395 LASSERT(lock != NULL);
3396 LASSERT(ldlm_has_layout(lock));
3397
3398 LDLM_DEBUG(lock, "File %p/"DFID" being reconfigured: %d.\n",
3399 inode, PFID(&lli->lli_fid), reconf);
3400
3401 /* in case this is a caching lock and reinstate with new inode */
3402 md_set_lock_data(sbi->ll_md_exp, &lockh->cookie, inode, NULL);
3403
3404 lock_res_and_lock(lock);
3405 lvb_ready = !!(lock->l_flags & LDLM_FL_LVB_READY);
3406 unlock_res_and_lock(lock);
3407 /* checking lvb_ready is racy but this is okay. The worst case is
3408 * that multi processes may configure the file on the same time. */
3409 if (lvb_ready || !reconf) {
3410 rc = -ENODATA;
3411 if (lvb_ready) {
3412 /* layout_gen must be valid if layout lock is not
3413 * cancelled and stripe has already set */
3414 *gen = lli->lli_layout_gen;
3415 rc = 0;
3416 }
3417 GOTO(out, rc);
3418 }
3419
3420 rc = ll_layout_fetch(inode, lock);
3421 if (rc < 0)
3422 GOTO(out, rc);
3423
3424 /* for layout lock, lmm is returned in lock's lvb.
3425 * lvb_data is immutable if the lock is held so it's safe to access it
3426 * without res lock. See the description in ldlm_lock_decref_internal()
3427 * for the condition to free lvb_data of layout lock */
3428 if (lock->l_lvb_data != NULL) {
3429 rc = obd_unpackmd(sbi->ll_dt_exp, &md.lsm,
3430 lock->l_lvb_data, lock->l_lvb_len);
3431 if (rc >= 0) {
3432 *gen = LL_LAYOUT_GEN_EMPTY;
3433 if (md.lsm != NULL)
3434 *gen = md.lsm->lsm_layout_gen;
3435 rc = 0;
3436 } else {
3437 CERROR("%s: file "DFID" unpackmd error: %d\n",
3438 ll_get_fsname(inode->i_sb, NULL, 0),
3439 PFID(&lli->lli_fid), rc);
3440 }
3441 }
3442 if (rc < 0)
3443 GOTO(out, rc);
3444
3445 /* set layout to file. Unlikely this will fail as old layout was
3446 * surely eliminated */
3447 memset(&conf, 0, sizeof(conf));
3448 conf.coc_opc = OBJECT_CONF_SET;
3449 conf.coc_inode = inode;
3450 conf.coc_lock = lock;
3451 conf.u.coc_md = &md;
3452 rc = ll_layout_conf(inode, &conf);
3453
3454 if (md.lsm != NULL)
3455 obd_free_memmd(sbi->ll_dt_exp, &md.lsm);
3456
3457 /* refresh layout failed, need to wait */
3458 wait_layout = rc == -EBUSY;
3459
3460 out:
3461 LDLM_LOCK_PUT(lock);
3462 ldlm_lock_decref(lockh, mode);
3463
3464 /* wait for IO to complete if it's still being used. */
3465 if (wait_layout) {
3466 CDEBUG(D_INODE, "%s: %p/"DFID" wait for layout reconf.\n",
3467 ll_get_fsname(inode->i_sb, NULL, 0),
3468 inode, PFID(&lli->lli_fid));
3469
3470 memset(&conf, 0, sizeof(conf));
3471 conf.coc_opc = OBJECT_CONF_WAIT;
3472 conf.coc_inode = inode;
3473 rc = ll_layout_conf(inode, &conf);
3474 if (rc == 0)
3475 rc = -EAGAIN;
3476
3477 CDEBUG(D_INODE, "file: "DFID" waiting layout return: %d.\n",
3478 PFID(&lli->lli_fid), rc);
3479 }
3480 return rc;
3481 }
3482
3483 /**
3484 * This function checks if there exists a LAYOUT lock on the client side,
3485 * or enqueues it if it doesn't have one in cache.
3486 *
3487 * This function will not hold layout lock so it may be revoked any time after
3488 * this function returns. Any operations depend on layout should be redone
3489 * in that case.
3490 *
3491 * This function should be called before lov_io_init() to get an uptodate
3492 * layout version, the caller should save the version number and after IO
3493 * is finished, this function should be called again to verify that layout
3494 * is not changed during IO time.
3495 */
3496 int ll_layout_refresh(struct inode *inode, __u32 *gen)
3497 {
3498 struct ll_inode_info *lli = ll_i2info(inode);
3499 struct ll_sb_info *sbi = ll_i2sbi(inode);
3500 struct md_op_data *op_data;
3501 struct lookup_intent it;
3502 struct lustre_handle lockh;
3503 ldlm_mode_t mode;
3504 struct ldlm_enqueue_info einfo = {
3505 .ei_type = LDLM_IBITS,
3506 .ei_mode = LCK_CR,
3507 .ei_cb_bl = ll_md_blocking_ast,
3508 .ei_cb_cp = ldlm_completion_ast,
3509 };
3510 int rc;
3511
3512 *gen = lli->lli_layout_gen;
3513 if (!(sbi->ll_flags & LL_SBI_LAYOUT_LOCK))
3514 return 0;
3515
3516 /* sanity checks */
3517 LASSERT(fid_is_sane(ll_inode2fid(inode)));
3518 LASSERT(S_ISREG(inode->i_mode));
3519
3520 /* mostly layout lock is caching on the local side, so try to match
3521 * it before grabbing layout lock mutex. */
3522 mode = ll_take_md_lock(inode, MDS_INODELOCK_LAYOUT, &lockh, 0,
3523 LCK_CR | LCK_CW | LCK_PR | LCK_PW);
3524 if (mode != 0) { /* hit cached lock */
3525 rc = ll_layout_lock_set(&lockh, mode, inode, gen, false);
3526 if (rc == 0)
3527 return 0;
3528
3529 /* better hold lli_layout_mutex to try again otherwise
3530 * it will have starvation problem. */
3531 }
3532
3533 /* take layout lock mutex to enqueue layout lock exclusively. */
3534 mutex_lock(&lli->lli_layout_mutex);
3535
3536 again:
3537 /* try again. Maybe somebody else has done this. */
3538 mode = ll_take_md_lock(inode, MDS_INODELOCK_LAYOUT, &lockh, 0,
3539 LCK_CR | LCK_CW | LCK_PR | LCK_PW);
3540 if (mode != 0) { /* hit cached lock */
3541 rc = ll_layout_lock_set(&lockh, mode, inode, gen, true);
3542 if (rc == -EAGAIN)
3543 goto again;
3544
3545 mutex_unlock(&lli->lli_layout_mutex);
3546 return rc;
3547 }
3548
3549 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL,
3550 0, 0, LUSTRE_OPC_ANY, NULL);
3551 if (IS_ERR(op_data)) {
3552 mutex_unlock(&lli->lli_layout_mutex);
3553 return PTR_ERR(op_data);
3554 }
3555
3556 /* have to enqueue one */
3557 memset(&it, 0, sizeof(it));
3558 it.it_op = IT_LAYOUT;
3559 lockh.cookie = 0ULL;
3560
3561 LDLM_DEBUG_NOLOCK("%s: requeue layout lock for file %p/"DFID".\n",
3562 ll_get_fsname(inode->i_sb, NULL, 0), inode,
3563 PFID(&lli->lli_fid));
3564
3565 rc = md_enqueue(sbi->ll_md_exp, &einfo, &it, op_data, &lockh,
3566 NULL, 0, NULL, 0);
3567 if (it.d.lustre.it_data != NULL)
3568 ptlrpc_req_finished(it.d.lustre.it_data);
3569 it.d.lustre.it_data = NULL;
3570
3571 ll_finish_md_op_data(op_data);
3572
3573 mode = it.d.lustre.it_lock_mode;
3574 it.d.lustre.it_lock_mode = 0;
3575 ll_intent_drop_lock(&it);
3576
3577 if (rc == 0) {
3578 /* set lock data in case this is a new lock */
3579 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
3580 rc = ll_layout_lock_set(&lockh, mode, inode, gen, true);
3581 if (rc == -EAGAIN)
3582 goto again;
3583 }
3584 mutex_unlock(&lli->lli_layout_mutex);
3585
3586 return rc;
3587 }
3588
3589 /**
3590 * This function send a restore request to the MDT
3591 */
3592 int ll_layout_restore(struct inode *inode)
3593 {
3594 struct hsm_user_request *hur;
3595 int len, rc;
3596
3597 len = sizeof(struct hsm_user_request) +
3598 sizeof(struct hsm_user_item);
3599 OBD_ALLOC(hur, len);
3600 if (hur == NULL)
3601 return -ENOMEM;
3602
3603 hur->hur_request.hr_action = HUA_RESTORE;
3604 hur->hur_request.hr_archive_id = 0;
3605 hur->hur_request.hr_flags = 0;
3606 memcpy(&hur->hur_user_item[0].hui_fid, &ll_i2info(inode)->lli_fid,
3607 sizeof(hur->hur_user_item[0].hui_fid));
3608 hur->hur_user_item[0].hui_extent.length = -1;
3609 hur->hur_request.hr_itemcount = 1;
3610 rc = obd_iocontrol(LL_IOC_HSM_REQUEST, cl_i2sbi(inode)->ll_md_exp,
3611 len, hur, NULL);
3612 OBD_FREE(hur, len);
3613 return rc;
3614 }
This page took 0.15572 seconds and 6 git commands to generate.