Merge branch 'drm-next' of git://people.freedesktop.org/~airlied/linux
[deliverable/linux.git] / drivers / staging / lustre / lustre / llite / file.c
1 /*
2 * GPL HEADER START
3 *
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19 *
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
22 * have any questions.
23 *
24 * GPL HEADER END
25 */
26 /*
27 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
29 *
30 * Copyright (c) 2011, 2012, Intel Corporation.
31 */
32 /*
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
35 *
36 * lustre/llite/file.c
37 *
38 * Author: Peter Braam <braam@clusterfs.com>
39 * Author: Phil Schwan <phil@clusterfs.com>
40 * Author: Andreas Dilger <adilger@clusterfs.com>
41 */
42
43 #define DEBUG_SUBSYSTEM S_LLITE
44 #include <lustre_dlm.h>
45 #include <lustre_lite.h>
46 #include <linux/pagemap.h>
47 #include <linux/file.h>
48 #include "llite_internal.h"
49 #include <lustre/ll_fiemap.h>
50
51 #include "cl_object.h"
52
53 static int
54 ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg);
55
56 static int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
57 bool *lease_broken);
58
59 static enum llioc_iter
60 ll_iocontrol_call(struct inode *inode, struct file *file,
61 unsigned int cmd, unsigned long arg, int *rcp);
62
63 static struct ll_file_data *ll_file_data_get(void)
64 {
65 struct ll_file_data *fd;
66
67 OBD_SLAB_ALLOC_PTR_GFP(fd, ll_file_data_slab, GFP_NOFS);
68 if (fd == NULL)
69 return NULL;
70 fd->fd_write_failed = false;
71 return fd;
72 }
73
74 static void ll_file_data_put(struct ll_file_data *fd)
75 {
76 if (fd != NULL)
77 OBD_SLAB_FREE_PTR(fd, ll_file_data_slab);
78 }
79
80 void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
81 struct lustre_handle *fh)
82 {
83 op_data->op_fid1 = ll_i2info(inode)->lli_fid;
84 op_data->op_attr.ia_mode = inode->i_mode;
85 op_data->op_attr.ia_atime = inode->i_atime;
86 op_data->op_attr.ia_mtime = inode->i_mtime;
87 op_data->op_attr.ia_ctime = inode->i_ctime;
88 op_data->op_attr.ia_size = i_size_read(inode);
89 op_data->op_attr_blocks = inode->i_blocks;
90 ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags =
91 ll_inode_to_ext_flags(inode->i_flags);
92 op_data->op_ioepoch = ll_i2info(inode)->lli_ioepoch;
93 if (fh)
94 op_data->op_handle = *fh;
95 op_data->op_capa1 = ll_mdscapa_get(inode);
96
97 if (LLIF_DATA_MODIFIED & ll_i2info(inode)->lli_flags)
98 op_data->op_bias |= MDS_DATA_MODIFIED;
99 }
100
101 /**
102 * Closes the IO epoch and packs all the attributes into @op_data for
103 * the CLOSE rpc.
104 */
105 static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
106 struct obd_client_handle *och)
107 {
108 op_data->op_attr.ia_valid = ATTR_MODE | ATTR_ATIME | ATTR_ATIME_SET |
109 ATTR_MTIME | ATTR_MTIME_SET |
110 ATTR_CTIME | ATTR_CTIME_SET;
111
112 if (!(och->och_flags & FMODE_WRITE))
113 goto out;
114
115 if (!exp_connect_som(ll_i2mdexp(inode)) || !S_ISREG(inode->i_mode))
116 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
117 else
118 ll_ioepoch_close(inode, op_data, &och, 0);
119
120 out:
121 ll_pack_inode2opdata(inode, op_data, &och->och_fh);
122 ll_prep_md_op_data(op_data, inode, NULL, NULL,
123 0, 0, LUSTRE_OPC_ANY, NULL);
124 }
125
126 static int ll_close_inode_openhandle(struct obd_export *md_exp,
127 struct inode *inode,
128 struct obd_client_handle *och,
129 const __u64 *data_version)
130 {
131 struct obd_export *exp = ll_i2mdexp(inode);
132 struct md_op_data *op_data;
133 struct ptlrpc_request *req = NULL;
134 struct obd_device *obd = class_exp2obd(exp);
135 int epoch_close = 1;
136 int rc;
137
138 if (obd == NULL) {
139 /*
140 * XXX: in case of LMV, is this correct to access
141 * ->exp_handle?
142 */
143 CERROR("Invalid MDC connection handle "LPX64"\n",
144 ll_i2mdexp(inode)->exp_handle.h_cookie);
145 GOTO(out, rc = 0);
146 }
147
148 OBD_ALLOC_PTR(op_data);
149 if (op_data == NULL)
150 GOTO(out, rc = -ENOMEM); // XXX We leak openhandle and request here.
151
152 ll_prepare_close(inode, op_data, och);
153 if (data_version != NULL) {
154 /* Pass in data_version implies release. */
155 op_data->op_bias |= MDS_HSM_RELEASE;
156 op_data->op_data_version = *data_version;
157 op_data->op_lease_handle = och->och_lease_handle;
158 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
159 }
160 epoch_close = (op_data->op_flags & MF_EPOCH_CLOSE);
161 rc = md_close(md_exp, op_data, och->och_mod, &req);
162 if (rc == -EAGAIN) {
163 /* This close must have the epoch closed. */
164 LASSERT(epoch_close);
165 /* MDS has instructed us to obtain Size-on-MDS attribute from
166 * OSTs and send setattr to back to MDS. */
167 rc = ll_som_update(inode, op_data);
168 if (rc) {
169 CERROR("inode %lu mdc Size-on-MDS update failed: "
170 "rc = %d\n", inode->i_ino, rc);
171 rc = 0;
172 }
173 } else if (rc) {
174 CERROR("inode %lu mdc close failed: rc = %d\n",
175 inode->i_ino, rc);
176 }
177
178 /* DATA_MODIFIED flag was successfully sent on close, cancel data
179 * modification flag. */
180 if (rc == 0 && (op_data->op_bias & MDS_DATA_MODIFIED)) {
181 struct ll_inode_info *lli = ll_i2info(inode);
182
183 spin_lock(&lli->lli_lock);
184 lli->lli_flags &= ~LLIF_DATA_MODIFIED;
185 spin_unlock(&lli->lli_lock);
186 }
187
188 if (rc == 0) {
189 rc = ll_objects_destroy(req, inode);
190 if (rc)
191 CERROR("inode %lu ll_objects destroy: rc = %d\n",
192 inode->i_ino, rc);
193 }
194 if (rc == 0 && op_data->op_bias & MDS_HSM_RELEASE) {
195 struct mdt_body *body;
196 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
197 if (!(body->valid & OBD_MD_FLRELEASED))
198 rc = -EBUSY;
199 }
200
201 ll_finish_md_op_data(op_data);
202
203 out:
204 if (exp_connect_som(exp) && !epoch_close &&
205 S_ISREG(inode->i_mode) && (och->och_flags & FMODE_WRITE)) {
206 ll_queue_done_writing(inode, LLIF_DONE_WRITING);
207 } else {
208 md_clear_open_replay_data(md_exp, och);
209 /* Free @och if it is not waiting for DONE_WRITING. */
210 och->och_fh.cookie = DEAD_HANDLE_MAGIC;
211 OBD_FREE_PTR(och);
212 }
213 if (req) /* This is close request */
214 ptlrpc_req_finished(req);
215 return rc;
216 }
217
218 int ll_md_real_close(struct inode *inode, fmode_t fmode)
219 {
220 struct ll_inode_info *lli = ll_i2info(inode);
221 struct obd_client_handle **och_p;
222 struct obd_client_handle *och;
223 __u64 *och_usecount;
224 int rc = 0;
225
226 if (fmode & FMODE_WRITE) {
227 och_p = &lli->lli_mds_write_och;
228 och_usecount = &lli->lli_open_fd_write_count;
229 } else if (fmode & FMODE_EXEC) {
230 och_p = &lli->lli_mds_exec_och;
231 och_usecount = &lli->lli_open_fd_exec_count;
232 } else {
233 LASSERT(fmode & FMODE_READ);
234 och_p = &lli->lli_mds_read_och;
235 och_usecount = &lli->lli_open_fd_read_count;
236 }
237
238 mutex_lock(&lli->lli_och_mutex);
239 if (*och_usecount > 0) {
240 /* There are still users of this handle, so skip
241 * freeing it. */
242 mutex_unlock(&lli->lli_och_mutex);
243 return 0;
244 }
245
246 och=*och_p;
247 *och_p = NULL;
248 mutex_unlock(&lli->lli_och_mutex);
249
250 if (och != NULL) {
251 /* There might be a race and this handle may already
252 be closed. */
253 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
254 inode, och, NULL);
255 }
256
257 return rc;
258 }
259
260 static int ll_md_close(struct obd_export *md_exp, struct inode *inode,
261 struct file *file)
262 {
263 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
264 struct ll_inode_info *lli = ll_i2info(inode);
265 int rc = 0;
266
267 /* clear group lock, if present */
268 if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED))
269 ll_put_grouplock(inode, file, fd->fd_grouplock.cg_gid);
270
271 if (fd->fd_lease_och != NULL) {
272 bool lease_broken;
273
274 /* Usually the lease is not released when the
275 * application crashed, we need to release here. */
276 rc = ll_lease_close(fd->fd_lease_och, inode, &lease_broken);
277 CDEBUG(rc ? D_ERROR : D_INODE, "Clean up lease "DFID" %d/%d\n",
278 PFID(&lli->lli_fid), rc, lease_broken);
279
280 fd->fd_lease_och = NULL;
281 }
282
283 if (fd->fd_och != NULL) {
284 rc = ll_close_inode_openhandle(md_exp, inode, fd->fd_och, NULL);
285 fd->fd_och = NULL;
286 GOTO(out, rc);
287 }
288
289 /* Let's see if we have good enough OPEN lock on the file and if
290 we can skip talking to MDS */
291 if (file->f_dentry->d_inode) { /* Can this ever be false? */
292 int lockmode;
293 int flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
294 struct lustre_handle lockh;
295 struct inode *inode = file->f_dentry->d_inode;
296 ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_OPEN}};
297
298 mutex_lock(&lli->lli_och_mutex);
299 if (fd->fd_omode & FMODE_WRITE) {
300 lockmode = LCK_CW;
301 LASSERT(lli->lli_open_fd_write_count);
302 lli->lli_open_fd_write_count--;
303 } else if (fd->fd_omode & FMODE_EXEC) {
304 lockmode = LCK_PR;
305 LASSERT(lli->lli_open_fd_exec_count);
306 lli->lli_open_fd_exec_count--;
307 } else {
308 lockmode = LCK_CR;
309 LASSERT(lli->lli_open_fd_read_count);
310 lli->lli_open_fd_read_count--;
311 }
312 mutex_unlock(&lli->lli_och_mutex);
313
314 if (!md_lock_match(md_exp, flags, ll_inode2fid(inode),
315 LDLM_IBITS, &policy, lockmode,
316 &lockh)) {
317 rc = ll_md_real_close(file->f_dentry->d_inode,
318 fd->fd_omode);
319 }
320 } else {
321 CERROR("Releasing a file %p with negative dentry %p. Name %s",
322 file, file->f_dentry, file->f_dentry->d_name.name);
323 }
324
325 out:
326 LUSTRE_FPRIVATE(file) = NULL;
327 ll_file_data_put(fd);
328 ll_capa_close(inode);
329
330 return rc;
331 }
332
333 /* While this returns an error code, fput() the caller does not, so we need
334 * to make every effort to clean up all of our state here. Also, applications
335 * rarely check close errors and even if an error is returned they will not
336 * re-try the close call.
337 */
338 int ll_file_release(struct inode *inode, struct file *file)
339 {
340 struct ll_file_data *fd;
341 struct ll_sb_info *sbi = ll_i2sbi(inode);
342 struct ll_inode_info *lli = ll_i2info(inode);
343 int rc;
344
345 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
346 inode->i_generation, inode);
347
348 #ifdef CONFIG_FS_POSIX_ACL
349 if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
350 inode == inode->i_sb->s_root->d_inode) {
351 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
352
353 LASSERT(fd != NULL);
354 if (unlikely(fd->fd_flags & LL_FILE_RMTACL)) {
355 fd->fd_flags &= ~LL_FILE_RMTACL;
356 rct_del(&sbi->ll_rct, current_pid());
357 et_search_free(&sbi->ll_et, current_pid());
358 }
359 }
360 #endif
361
362 if (inode->i_sb->s_root != file->f_dentry)
363 ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
364 fd = LUSTRE_FPRIVATE(file);
365 LASSERT(fd != NULL);
366
367 /* The last ref on @file, maybe not the the owner pid of statahead.
368 * Different processes can open the same dir, "ll_opendir_key" means:
369 * it is me that should stop the statahead thread. */
370 if (S_ISDIR(inode->i_mode) && lli->lli_opendir_key == fd &&
371 lli->lli_opendir_pid != 0)
372 ll_stop_statahead(inode, lli->lli_opendir_key);
373
374 if (inode->i_sb->s_root == file->f_dentry) {
375 LUSTRE_FPRIVATE(file) = NULL;
376 ll_file_data_put(fd);
377 return 0;
378 }
379
380 if (!S_ISDIR(inode->i_mode)) {
381 lov_read_and_clear_async_rc(lli->lli_clob);
382 lli->lli_async_rc = 0;
383 }
384
385 rc = ll_md_close(sbi->ll_md_exp, inode, file);
386
387 if (CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_DUMP_LOG, cfs_fail_val))
388 libcfs_debug_dumplog();
389
390 return rc;
391 }
392
393 static int ll_intent_file_open(struct file *file, void *lmm,
394 int lmmsize, struct lookup_intent *itp)
395 {
396 struct ll_sb_info *sbi = ll_i2sbi(file->f_dentry->d_inode);
397 struct dentry *parent = file->f_dentry->d_parent;
398 const char *name = file->f_dentry->d_name.name;
399 const int len = file->f_dentry->d_name.len;
400 struct md_op_data *op_data;
401 struct ptlrpc_request *req;
402 __u32 opc = LUSTRE_OPC_ANY;
403 int rc;
404
405 if (!parent)
406 return -ENOENT;
407
408 /* Usually we come here only for NFSD, and we want open lock.
409 But we can also get here with pre 2.6.15 patchless kernels, and in
410 that case that lock is also ok */
411 /* We can also get here if there was cached open handle in revalidate_it
412 * but it disappeared while we were getting from there to ll_file_open.
413 * But this means this file was closed and immediately opened which
414 * makes a good candidate for using OPEN lock */
415 /* If lmmsize & lmm are not 0, we are just setting stripe info
416 * parameters. No need for the open lock */
417 if (lmm == NULL && lmmsize == 0) {
418 itp->it_flags |= MDS_OPEN_LOCK;
419 if (itp->it_flags & FMODE_WRITE)
420 opc = LUSTRE_OPC_CREATE;
421 }
422
423 op_data = ll_prep_md_op_data(NULL, parent->d_inode,
424 file->f_dentry->d_inode, name, len,
425 O_RDWR, opc, NULL);
426 if (IS_ERR(op_data))
427 return PTR_ERR(op_data);
428
429 itp->it_flags |= MDS_OPEN_BY_FID;
430 rc = md_intent_lock(sbi->ll_md_exp, op_data, lmm, lmmsize, itp,
431 0 /*unused */, &req, ll_md_blocking_ast, 0);
432 ll_finish_md_op_data(op_data);
433 if (rc == -ESTALE) {
434 /* reason for keep own exit path - don`t flood log
435 * with messages with -ESTALE errors.
436 */
437 if (!it_disposition(itp, DISP_OPEN_OPEN) ||
438 it_open_error(DISP_OPEN_OPEN, itp))
439 GOTO(out, rc);
440 ll_release_openhandle(file->f_dentry, itp);
441 GOTO(out, rc);
442 }
443
444 if (it_disposition(itp, DISP_LOOKUP_NEG))
445 GOTO(out, rc = -ENOENT);
446
447 if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) {
448 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp);
449 CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc);
450 GOTO(out, rc);
451 }
452
453 rc = ll_prep_inode(&file->f_dentry->d_inode, req, NULL, itp);
454 if (!rc && itp->d.lustre.it_lock_mode)
455 ll_set_lock_data(sbi->ll_md_exp, file->f_dentry->d_inode,
456 itp, NULL);
457
458 out:
459 ptlrpc_req_finished(req);
460 ll_intent_drop_lock(itp);
461
462 return rc;
463 }
464
465 /**
466 * Assign an obtained @ioepoch to client's inode. No lock is needed, MDS does
467 * not believe attributes if a few ioepoch holders exist. Attributes for
468 * previous ioepoch if new one is opened are also skipped by MDS.
469 */
470 void ll_ioepoch_open(struct ll_inode_info *lli, __u64 ioepoch)
471 {
472 if (ioepoch && lli->lli_ioepoch != ioepoch) {
473 lli->lli_ioepoch = ioepoch;
474 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
475 ioepoch, PFID(&lli->lli_fid));
476 }
477 }
478
479 static int ll_och_fill(struct obd_export *md_exp, struct lookup_intent *it,
480 struct obd_client_handle *och)
481 {
482 struct ptlrpc_request *req = it->d.lustre.it_data;
483 struct mdt_body *body;
484
485 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
486 och->och_fh = body->handle;
487 och->och_fid = body->fid1;
488 och->och_lease_handle.cookie = it->d.lustre.it_lock_handle;
489 och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
490 och->och_flags = it->it_flags;
491
492 return md_set_open_replay_data(md_exp, och, it);
493 }
494
495 static int ll_local_open(struct file *file, struct lookup_intent *it,
496 struct ll_file_data *fd, struct obd_client_handle *och)
497 {
498 struct inode *inode = file->f_dentry->d_inode;
499 struct ll_inode_info *lli = ll_i2info(inode);
500
501 LASSERT(!LUSTRE_FPRIVATE(file));
502
503 LASSERT(fd != NULL);
504
505 if (och) {
506 struct ptlrpc_request *req = it->d.lustre.it_data;
507 struct mdt_body *body;
508 int rc;
509
510 rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
511 if (rc != 0)
512 return rc;
513
514 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
515 ll_ioepoch_open(lli, body->ioepoch);
516 }
517
518 LUSTRE_FPRIVATE(file) = fd;
519 ll_readahead_init(inode, &fd->fd_ras);
520 fd->fd_omode = it->it_flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC);
521 return 0;
522 }
523
524 /* Open a file, and (for the very first open) create objects on the OSTs at
525 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
526 * creation or open until ll_lov_setstripe() ioctl is called.
527 *
528 * If we already have the stripe MD locally then we don't request it in
529 * md_open(), by passing a lmm_size = 0.
530 *
531 * It is up to the application to ensure no other processes open this file
532 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
533 * used. We might be able to avoid races of that sort by getting lli_open_sem
534 * before returning in the O_LOV_DELAY_CREATE case and dropping it here
535 * or in ll_file_release(), but I'm not sure that is desirable/necessary.
536 */
537 int ll_file_open(struct inode *inode, struct file *file)
538 {
539 struct ll_inode_info *lli = ll_i2info(inode);
540 struct lookup_intent *it, oit = { .it_op = IT_OPEN,
541 .it_flags = file->f_flags };
542 struct obd_client_handle **och_p = NULL;
543 __u64 *och_usecount = NULL;
544 struct ll_file_data *fd;
545 int rc = 0, opendir_set = 0;
546
547 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), flags %o\n", inode->i_ino,
548 inode->i_generation, inode, file->f_flags);
549
550 it = file->private_data; /* XXX: compat macro */
551 file->private_data = NULL; /* prevent ll_local_open assertion */
552
553 fd = ll_file_data_get();
554 if (fd == NULL)
555 GOTO(out_openerr, rc = -ENOMEM);
556
557 fd->fd_file = file;
558 if (S_ISDIR(inode->i_mode)) {
559 spin_lock(&lli->lli_sa_lock);
560 if (lli->lli_opendir_key == NULL && lli->lli_sai == NULL &&
561 lli->lli_opendir_pid == 0) {
562 lli->lli_opendir_key = fd;
563 lli->lli_opendir_pid = current_pid();
564 opendir_set = 1;
565 }
566 spin_unlock(&lli->lli_sa_lock);
567 }
568
569 if (inode->i_sb->s_root == file->f_dentry) {
570 LUSTRE_FPRIVATE(file) = fd;
571 return 0;
572 }
573
574 if (!it || !it->d.lustre.it_disposition) {
575 /* Convert f_flags into access mode. We cannot use file->f_mode,
576 * because everything but O_ACCMODE mask was stripped from
577 * there */
578 if ((oit.it_flags + 1) & O_ACCMODE)
579 oit.it_flags++;
580 if (file->f_flags & O_TRUNC)
581 oit.it_flags |= FMODE_WRITE;
582
583 /* kernel only call f_op->open in dentry_open. filp_open calls
584 * dentry_open after call to open_namei that checks permissions.
585 * Only nfsd_open call dentry_open directly without checking
586 * permissions and because of that this code below is safe. */
587 if (oit.it_flags & (FMODE_WRITE | FMODE_READ))
588 oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
589
590 /* We do not want O_EXCL here, presumably we opened the file
591 * already? XXX - NFS implications? */
592 oit.it_flags &= ~O_EXCL;
593
594 /* bug20584, if "it_flags" contains O_CREAT, the file will be
595 * created if necessary, then "IT_CREAT" should be set to keep
596 * consistent with it */
597 if (oit.it_flags & O_CREAT)
598 oit.it_op |= IT_CREAT;
599
600 it = &oit;
601 }
602
603 restart:
604 /* Let's see if we have file open on MDS already. */
605 if (it->it_flags & FMODE_WRITE) {
606 och_p = &lli->lli_mds_write_och;
607 och_usecount = &lli->lli_open_fd_write_count;
608 } else if (it->it_flags & FMODE_EXEC) {
609 och_p = &lli->lli_mds_exec_och;
610 och_usecount = &lli->lli_open_fd_exec_count;
611 } else {
612 och_p = &lli->lli_mds_read_och;
613 och_usecount = &lli->lli_open_fd_read_count;
614 }
615
616 mutex_lock(&lli->lli_och_mutex);
617 if (*och_p) { /* Open handle is present */
618 if (it_disposition(it, DISP_OPEN_OPEN)) {
619 /* Well, there's extra open request that we do not need,
620 let's close it somehow. This will decref request. */
621 rc = it_open_error(DISP_OPEN_OPEN, it);
622 if (rc) {
623 mutex_unlock(&lli->lli_och_mutex);
624 GOTO(out_openerr, rc);
625 }
626
627 ll_release_openhandle(file->f_dentry, it);
628 }
629 (*och_usecount)++;
630
631 rc = ll_local_open(file, it, fd, NULL);
632 if (rc) {
633 (*och_usecount)--;
634 mutex_unlock(&lli->lli_och_mutex);
635 GOTO(out_openerr, rc);
636 }
637 } else {
638 LASSERT(*och_usecount == 0);
639 if (!it->d.lustre.it_disposition) {
640 /* We cannot just request lock handle now, new ELC code
641 means that one of other OPEN locks for this file
642 could be cancelled, and since blocking ast handler
643 would attempt to grab och_mutex as well, that would
644 result in a deadlock */
645 mutex_unlock(&lli->lli_och_mutex);
646 it->it_create_mode |= M_CHECK_STALE;
647 rc = ll_intent_file_open(file, NULL, 0, it);
648 it->it_create_mode &= ~M_CHECK_STALE;
649 if (rc)
650 GOTO(out_openerr, rc);
651
652 goto restart;
653 }
654 OBD_ALLOC(*och_p, sizeof (struct obd_client_handle));
655 if (!*och_p)
656 GOTO(out_och_free, rc = -ENOMEM);
657
658 (*och_usecount)++;
659
660 /* md_intent_lock() didn't get a request ref if there was an
661 * open error, so don't do cleanup on the request here
662 * (bug 3430) */
663 /* XXX (green): Should not we bail out on any error here, not
664 * just open error? */
665 rc = it_open_error(DISP_OPEN_OPEN, it);
666 if (rc)
667 GOTO(out_och_free, rc);
668
669 LASSERT(it_disposition(it, DISP_ENQ_OPEN_REF));
670
671 rc = ll_local_open(file, it, fd, *och_p);
672 if (rc)
673 GOTO(out_och_free, rc);
674 }
675 mutex_unlock(&lli->lli_och_mutex);
676 fd = NULL;
677
678 /* Must do this outside lli_och_mutex lock to prevent deadlock where
679 different kind of OPEN lock for this same inode gets cancelled
680 by ldlm_cancel_lru */
681 if (!S_ISREG(inode->i_mode))
682 GOTO(out_och_free, rc);
683
684 ll_capa_open(inode);
685
686 if (!lli->lli_has_smd &&
687 (cl_is_lov_delay_create(file->f_flags) ||
688 (file->f_mode & FMODE_WRITE) == 0)) {
689 CDEBUG(D_INODE, "object creation was delayed\n");
690 GOTO(out_och_free, rc);
691 }
692 cl_lov_delay_create_clear(&file->f_flags);
693 GOTO(out_och_free, rc);
694
695 out_och_free:
696 if (rc) {
697 if (och_p && *och_p) {
698 OBD_FREE(*och_p, sizeof (struct obd_client_handle));
699 *och_p = NULL; /* OBD_FREE writes some magic there */
700 (*och_usecount)--;
701 }
702 mutex_unlock(&lli->lli_och_mutex);
703
704 out_openerr:
705 if (opendir_set != 0)
706 ll_stop_statahead(inode, lli->lli_opendir_key);
707 if (fd != NULL)
708 ll_file_data_put(fd);
709 } else {
710 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
711 }
712
713 if (it && it_disposition(it, DISP_ENQ_OPEN_REF)) {
714 ptlrpc_req_finished(it->d.lustre.it_data);
715 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
716 }
717
718 return rc;
719 }
720
721 static int ll_md_blocking_lease_ast(struct ldlm_lock *lock,
722 struct ldlm_lock_desc *desc, void *data, int flag)
723 {
724 int rc;
725 struct lustre_handle lockh;
726
727 switch (flag) {
728 case LDLM_CB_BLOCKING:
729 ldlm_lock2handle(lock, &lockh);
730 rc = ldlm_cli_cancel(&lockh, LCF_ASYNC);
731 if (rc < 0) {
732 CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc);
733 return rc;
734 }
735 break;
736 case LDLM_CB_CANCELING:
737 /* do nothing */
738 break;
739 }
740 return 0;
741 }
742
743 /**
744 * Acquire a lease and open the file.
745 */
746 static struct obd_client_handle *
747 ll_lease_open(struct inode *inode, struct file *file, fmode_t fmode,
748 __u64 open_flags)
749 {
750 struct lookup_intent it = { .it_op = IT_OPEN };
751 struct ll_sb_info *sbi = ll_i2sbi(inode);
752 struct md_op_data *op_data;
753 struct ptlrpc_request *req;
754 struct lustre_handle old_handle = { 0 };
755 struct obd_client_handle *och = NULL;
756 int rc;
757 int rc2;
758
759 if (fmode != FMODE_WRITE && fmode != FMODE_READ)
760 return ERR_PTR(-EINVAL);
761
762 if (file != NULL) {
763 struct ll_inode_info *lli = ll_i2info(inode);
764 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
765 struct obd_client_handle **och_p;
766 __u64 *och_usecount;
767
768 if (!(fmode & file->f_mode) || (file->f_mode & FMODE_EXEC))
769 return ERR_PTR(-EPERM);
770
771 /* Get the openhandle of the file */
772 rc = -EBUSY;
773 mutex_lock(&lli->lli_och_mutex);
774 if (fd->fd_lease_och != NULL) {
775 mutex_unlock(&lli->lli_och_mutex);
776 return ERR_PTR(rc);
777 }
778
779 if (fd->fd_och == NULL) {
780 if (file->f_mode & FMODE_WRITE) {
781 LASSERT(lli->lli_mds_write_och != NULL);
782 och_p = &lli->lli_mds_write_och;
783 och_usecount = &lli->lli_open_fd_write_count;
784 } else {
785 LASSERT(lli->lli_mds_read_och != NULL);
786 och_p = &lli->lli_mds_read_och;
787 och_usecount = &lli->lli_open_fd_read_count;
788 }
789 if (*och_usecount == 1) {
790 fd->fd_och = *och_p;
791 *och_p = NULL;
792 *och_usecount = 0;
793 rc = 0;
794 }
795 }
796 mutex_unlock(&lli->lli_och_mutex);
797 if (rc < 0) /* more than 1 opener */
798 return ERR_PTR(rc);
799
800 LASSERT(fd->fd_och != NULL);
801 old_handle = fd->fd_och->och_fh;
802 }
803
804 OBD_ALLOC_PTR(och);
805 if (och == NULL)
806 return ERR_PTR(-ENOMEM);
807
808 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL, 0, 0,
809 LUSTRE_OPC_ANY, NULL);
810 if (IS_ERR(op_data))
811 GOTO(out, rc = PTR_ERR(op_data));
812
813 /* To tell the MDT this openhandle is from the same owner */
814 op_data->op_handle = old_handle;
815
816 it.it_flags = fmode | open_flags;
817 it.it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID | MDS_OPEN_LEASE;
818 rc = md_intent_lock(sbi->ll_md_exp, op_data, NULL, 0, &it, 0, &req,
819 ll_md_blocking_lease_ast,
820 /* LDLM_FL_NO_LRU: To not put the lease lock into LRU list, otherwise
821 * it can be cancelled which may mislead applications that the lease is
822 * broken;
823 * LDLM_FL_EXCL: Set this flag so that it won't be matched by normal
824 * open in ll_md_blocking_ast(). Otherwise as ll_md_blocking_lease_ast
825 * doesn't deal with openhandle, so normal openhandle will be leaked. */
826 LDLM_FL_NO_LRU | LDLM_FL_EXCL);
827 ll_finish_md_op_data(op_data);
828 ptlrpc_req_finished(req);
829 if (rc < 0)
830 GOTO(out_release_it, rc);
831
832 if (it_disposition(&it, DISP_LOOKUP_NEG))
833 GOTO(out_release_it, rc = -ENOENT);
834
835 rc = it_open_error(DISP_OPEN_OPEN, &it);
836 if (rc)
837 GOTO(out_release_it, rc);
838
839 LASSERT(it_disposition(&it, DISP_ENQ_OPEN_REF));
840 ll_och_fill(sbi->ll_md_exp, &it, och);
841
842 if (!it_disposition(&it, DISP_OPEN_LEASE)) /* old server? */
843 GOTO(out_close, rc = -EOPNOTSUPP);
844
845 /* already get lease, handle lease lock */
846 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
847 if (it.d.lustre.it_lock_mode == 0 ||
848 it.d.lustre.it_lock_bits != MDS_INODELOCK_OPEN) {
849 /* open lock must return for lease */
850 CERROR(DFID "lease granted but no open lock, %d/%llu.\n",
851 PFID(ll_inode2fid(inode)), it.d.lustre.it_lock_mode,
852 it.d.lustre.it_lock_bits);
853 GOTO(out_close, rc = -EPROTO);
854 }
855
856 ll_intent_release(&it);
857 return och;
858
859 out_close:
860 rc2 = ll_close_inode_openhandle(sbi->ll_md_exp, inode, och, NULL);
861 if (rc2)
862 CERROR("Close openhandle returned %d\n", rc2);
863
864 /* cancel open lock */
865 if (it.d.lustre.it_lock_mode != 0) {
866 ldlm_lock_decref_and_cancel(&och->och_lease_handle,
867 it.d.lustre.it_lock_mode);
868 it.d.lustre.it_lock_mode = 0;
869 }
870 out_release_it:
871 ll_intent_release(&it);
872 out:
873 OBD_FREE_PTR(och);
874 return ERR_PTR(rc);
875 }
876
877 /**
878 * Release lease and close the file.
879 * It will check if the lease has ever broken.
880 */
881 static int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
882 bool *lease_broken)
883 {
884 struct ldlm_lock *lock;
885 bool cancelled = true;
886 int rc;
887
888 lock = ldlm_handle2lock(&och->och_lease_handle);
889 if (lock != NULL) {
890 lock_res_and_lock(lock);
891 cancelled = ldlm_is_cancel(lock);
892 unlock_res_and_lock(lock);
893 ldlm_lock_put(lock);
894 }
895
896 CDEBUG(D_INODE, "lease for "DFID" broken? %d\n",
897 PFID(&ll_i2info(inode)->lli_fid), cancelled);
898
899 if (!cancelled)
900 ldlm_cli_cancel(&och->och_lease_handle, 0);
901 if (lease_broken != NULL)
902 *lease_broken = cancelled;
903
904 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, inode, och,
905 NULL);
906 return rc;
907 }
908
909 /* Fills the obdo with the attributes for the lsm */
910 static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
911 struct obd_capa *capa, struct obdo *obdo,
912 __u64 ioepoch, int sync)
913 {
914 struct ptlrpc_request_set *set;
915 struct obd_info oinfo = { { { 0 } } };
916 int rc;
917
918 LASSERT(lsm != NULL);
919
920 oinfo.oi_md = lsm;
921 oinfo.oi_oa = obdo;
922 oinfo.oi_oa->o_oi = lsm->lsm_oi;
923 oinfo.oi_oa->o_mode = S_IFREG;
924 oinfo.oi_oa->o_ioepoch = ioepoch;
925 oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE |
926 OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
927 OBD_MD_FLBLKSZ | OBD_MD_FLATIME |
928 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
929 OBD_MD_FLGROUP | OBD_MD_FLEPOCH |
930 OBD_MD_FLDATAVERSION;
931 oinfo.oi_capa = capa;
932 if (sync) {
933 oinfo.oi_oa->o_valid |= OBD_MD_FLFLAGS;
934 oinfo.oi_oa->o_flags |= OBD_FL_SRVLOCK;
935 }
936
937 set = ptlrpc_prep_set();
938 if (set == NULL) {
939 CERROR("can't allocate ptlrpc set\n");
940 rc = -ENOMEM;
941 } else {
942 rc = obd_getattr_async(exp, &oinfo, set);
943 if (rc == 0)
944 rc = ptlrpc_set_wait(set);
945 ptlrpc_set_destroy(set);
946 }
947 if (rc == 0)
948 oinfo.oi_oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
949 OBD_MD_FLATIME | OBD_MD_FLMTIME |
950 OBD_MD_FLCTIME | OBD_MD_FLSIZE |
951 OBD_MD_FLDATAVERSION);
952 return rc;
953 }
954
955 /**
956 * Performs the getattr on the inode and updates its fields.
957 * If @sync != 0, perform the getattr under the server-side lock.
958 */
959 int ll_inode_getattr(struct inode *inode, struct obdo *obdo,
960 __u64 ioepoch, int sync)
961 {
962 struct obd_capa *capa = ll_mdscapa_get(inode);
963 struct lov_stripe_md *lsm;
964 int rc;
965
966 lsm = ccc_inode_lsm_get(inode);
967 rc = ll_lsm_getattr(lsm, ll_i2dtexp(inode),
968 capa, obdo, ioepoch, sync);
969 capa_put(capa);
970 if (rc == 0) {
971 struct ost_id *oi = lsm ? &lsm->lsm_oi : &obdo->o_oi;
972
973 obdo_refresh_inode(inode, obdo, obdo->o_valid);
974 CDEBUG(D_INODE, "objid "DOSTID" size %llu, blocks %llu,"
975 " blksize %lu\n", POSTID(oi), i_size_read(inode),
976 (unsigned long long)inode->i_blocks,
977 (unsigned long)ll_inode_blksize(inode));
978 }
979 ccc_inode_lsm_put(inode, lsm);
980 return rc;
981 }
982
983 int ll_merge_lvb(const struct lu_env *env, struct inode *inode)
984 {
985 struct ll_inode_info *lli = ll_i2info(inode);
986 struct cl_object *obj = lli->lli_clob;
987 struct cl_attr *attr = ccc_env_thread_attr(env);
988 struct ost_lvb lvb;
989 int rc = 0;
990
991 ll_inode_size_lock(inode);
992 /* merge timestamps the most recently obtained from mds with
993 timestamps obtained from osts */
994 LTIME_S(inode->i_atime) = lli->lli_lvb.lvb_atime;
995 LTIME_S(inode->i_mtime) = lli->lli_lvb.lvb_mtime;
996 LTIME_S(inode->i_ctime) = lli->lli_lvb.lvb_ctime;
997 inode_init_lvb(inode, &lvb);
998
999 cl_object_attr_lock(obj);
1000 rc = cl_object_attr_get(env, obj, attr);
1001 cl_object_attr_unlock(obj);
1002
1003 if (rc == 0) {
1004 if (lvb.lvb_atime < attr->cat_atime)
1005 lvb.lvb_atime = attr->cat_atime;
1006 if (lvb.lvb_ctime < attr->cat_ctime)
1007 lvb.lvb_ctime = attr->cat_ctime;
1008 if (lvb.lvb_mtime < attr->cat_mtime)
1009 lvb.lvb_mtime = attr->cat_mtime;
1010
1011 CDEBUG(D_VFSTRACE, DFID" updating i_size "LPU64"\n",
1012 PFID(&lli->lli_fid), attr->cat_size);
1013 cl_isize_write_nolock(inode, attr->cat_size);
1014
1015 inode->i_blocks = attr->cat_blocks;
1016
1017 LTIME_S(inode->i_mtime) = lvb.lvb_mtime;
1018 LTIME_S(inode->i_atime) = lvb.lvb_atime;
1019 LTIME_S(inode->i_ctime) = lvb.lvb_ctime;
1020 }
1021 ll_inode_size_unlock(inode);
1022
1023 return rc;
1024 }
1025
1026 int ll_glimpse_ioctl(struct ll_sb_info *sbi, struct lov_stripe_md *lsm,
1027 lstat_t *st)
1028 {
1029 struct obdo obdo = { 0 };
1030 int rc;
1031
1032 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, &obdo, 0, 0);
1033 if (rc == 0) {
1034 st->st_size = obdo.o_size;
1035 st->st_blocks = obdo.o_blocks;
1036 st->st_mtime = obdo.o_mtime;
1037 st->st_atime = obdo.o_atime;
1038 st->st_ctime = obdo.o_ctime;
1039 }
1040 return rc;
1041 }
1042
1043 static bool file_is_noatime(const struct file *file)
1044 {
1045 const struct vfsmount *mnt = file->f_path.mnt;
1046 const struct inode *inode = file->f_path.dentry->d_inode;
1047
1048 /* Adapted from file_accessed() and touch_atime().*/
1049 if (file->f_flags & O_NOATIME)
1050 return true;
1051
1052 if (inode->i_flags & S_NOATIME)
1053 return true;
1054
1055 if (IS_NOATIME(inode))
1056 return true;
1057
1058 if (mnt->mnt_flags & (MNT_NOATIME | MNT_READONLY))
1059 return true;
1060
1061 if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
1062 return true;
1063
1064 if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))
1065 return true;
1066
1067 return false;
1068 }
1069
1070 void ll_io_init(struct cl_io *io, const struct file *file, int write)
1071 {
1072 struct inode *inode = file->f_dentry->d_inode;
1073
1074 io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK;
1075 if (write) {
1076 io->u.ci_wr.wr_append = !!(file->f_flags & O_APPEND);
1077 io->u.ci_wr.wr_sync = file->f_flags & O_SYNC ||
1078 file->f_flags & O_DIRECT ||
1079 IS_SYNC(inode);
1080 }
1081 io->ci_obj = ll_i2info(inode)->lli_clob;
1082 io->ci_lockreq = CILR_MAYBE;
1083 if (ll_file_nolock(file)) {
1084 io->ci_lockreq = CILR_NEVER;
1085 io->ci_no_srvlock = 1;
1086 } else if (file->f_flags & O_APPEND) {
1087 io->ci_lockreq = CILR_MANDATORY;
1088 }
1089
1090 io->ci_noatime = file_is_noatime(file);
1091 }
1092
1093 static ssize_t
1094 ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args,
1095 struct file *file, enum cl_io_type iot,
1096 loff_t *ppos, size_t count)
1097 {
1098 struct ll_inode_info *lli = ll_i2info(file->f_dentry->d_inode);
1099 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1100 struct cl_io *io;
1101 ssize_t result;
1102
1103 restart:
1104 io = ccc_env_thread_io(env);
1105 ll_io_init(io, file, iot == CIT_WRITE);
1106
1107 if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
1108 struct vvp_io *vio = vvp_env_io(env);
1109 struct ccc_io *cio = ccc_env_io(env);
1110 int write_mutex_locked = 0;
1111
1112 cio->cui_fd = LUSTRE_FPRIVATE(file);
1113 vio->cui_io_subtype = args->via_io_subtype;
1114
1115 switch (vio->cui_io_subtype) {
1116 case IO_NORMAL:
1117 cio->cui_iter = args->u.normal.via_iter;
1118 cio->cui_iocb = args->u.normal.via_iocb;
1119 if ((iot == CIT_WRITE) &&
1120 !(cio->cui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1121 if (mutex_lock_interruptible(&lli->
1122 lli_write_mutex))
1123 GOTO(out, result = -ERESTARTSYS);
1124 write_mutex_locked = 1;
1125 } else if (iot == CIT_READ) {
1126 down_read(&lli->lli_trunc_sem);
1127 }
1128 break;
1129 case IO_SPLICE:
1130 vio->u.splice.cui_pipe = args->u.splice.via_pipe;
1131 vio->u.splice.cui_flags = args->u.splice.via_flags;
1132 break;
1133 default:
1134 CERROR("Unknown IO type - %u\n", vio->cui_io_subtype);
1135 LBUG();
1136 }
1137 result = cl_io_loop(env, io);
1138 if (write_mutex_locked)
1139 mutex_unlock(&lli->lli_write_mutex);
1140 else if (args->via_io_subtype == IO_NORMAL && iot == CIT_READ)
1141 up_read(&lli->lli_trunc_sem);
1142 } else {
1143 /* cl_io_rw_init() handled IO */
1144 result = io->ci_result;
1145 }
1146
1147 if (io->ci_nob > 0) {
1148 result = io->ci_nob;
1149 *ppos = io->u.ci_wr.wr.crw_pos;
1150 }
1151 GOTO(out, result);
1152 out:
1153 cl_io_fini(env, io);
1154 /* If any bit been read/written (result != 0), we just return
1155 * short read/write instead of restart io. */
1156 if ((result == 0 || result == -ENODATA) && io->ci_need_restart) {
1157 CDEBUG(D_VFSTRACE, "Restart %s on %s from %lld, count:%zd\n",
1158 iot == CIT_READ ? "read" : "write",
1159 file->f_dentry->d_name.name, *ppos, count);
1160 LASSERTF(io->ci_nob == 0, "%zd", io->ci_nob);
1161 goto restart;
1162 }
1163
1164 if (iot == CIT_READ) {
1165 if (result >= 0)
1166 ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode),
1167 LPROC_LL_READ_BYTES, result);
1168 } else if (iot == CIT_WRITE) {
1169 if (result >= 0) {
1170 ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode),
1171 LPROC_LL_WRITE_BYTES, result);
1172 fd->fd_write_failed = false;
1173 } else if (result != -ERESTARTSYS) {
1174 fd->fd_write_failed = true;
1175 }
1176 }
1177
1178 return result;
1179 }
1180
1181 static ssize_t ll_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
1182 {
1183 struct lu_env *env;
1184 struct vvp_io_args *args;
1185 ssize_t result;
1186 int refcheck;
1187
1188 env = cl_env_get(&refcheck);
1189 if (IS_ERR(env))
1190 return PTR_ERR(env);
1191
1192 args = vvp_env_args(env, IO_NORMAL);
1193 args->u.normal.via_iter = to;
1194 args->u.normal.via_iocb = iocb;
1195
1196 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ,
1197 &iocb->ki_pos, iov_iter_count(to));
1198 cl_env_put(env, &refcheck);
1199 return result;
1200 }
1201
1202 /*
1203 * Write to a file (through the page cache).
1204 */
1205 static ssize_t ll_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
1206 {
1207 struct lu_env *env;
1208 struct vvp_io_args *args;
1209 ssize_t result;
1210 int refcheck;
1211
1212 env = cl_env_get(&refcheck);
1213 if (IS_ERR(env))
1214 return PTR_ERR(env);
1215
1216 args = vvp_env_args(env, IO_NORMAL);
1217 args->u.normal.via_iter = from;
1218 args->u.normal.via_iocb = iocb;
1219
1220 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
1221 &iocb->ki_pos, iov_iter_count(from));
1222 cl_env_put(env, &refcheck);
1223 return result;
1224 }
1225
1226 /*
1227 * Send file content (through pagecache) somewhere with helper
1228 */
1229 static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
1230 struct pipe_inode_info *pipe, size_t count,
1231 unsigned int flags)
1232 {
1233 struct lu_env *env;
1234 struct vvp_io_args *args;
1235 ssize_t result;
1236 int refcheck;
1237
1238 env = cl_env_get(&refcheck);
1239 if (IS_ERR(env))
1240 return PTR_ERR(env);
1241
1242 args = vvp_env_args(env, IO_SPLICE);
1243 args->u.splice.via_pipe = pipe;
1244 args->u.splice.via_flags = flags;
1245
1246 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1247 cl_env_put(env, &refcheck);
1248 return result;
1249 }
1250
1251 static int ll_lov_recreate(struct inode *inode, struct ost_id *oi,
1252 obd_count ost_idx)
1253 {
1254 struct obd_export *exp = ll_i2dtexp(inode);
1255 struct obd_trans_info oti = { 0 };
1256 struct obdo *oa = NULL;
1257 int lsm_size;
1258 int rc = 0;
1259 struct lov_stripe_md *lsm = NULL, *lsm2;
1260
1261 OBDO_ALLOC(oa);
1262 if (oa == NULL)
1263 return -ENOMEM;
1264
1265 lsm = ccc_inode_lsm_get(inode);
1266 if (!lsm_has_objects(lsm))
1267 GOTO(out, rc = -ENOENT);
1268
1269 lsm_size = sizeof(*lsm) + (sizeof(struct lov_oinfo) *
1270 (lsm->lsm_stripe_count));
1271
1272 OBD_ALLOC_LARGE(lsm2, lsm_size);
1273 if (lsm2 == NULL)
1274 GOTO(out, rc = -ENOMEM);
1275
1276 oa->o_oi = *oi;
1277 oa->o_nlink = ost_idx;
1278 oa->o_flags |= OBD_FL_RECREATE_OBJS;
1279 oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS | OBD_MD_FLGROUP;
1280 obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
1281 OBD_MD_FLMTIME | OBD_MD_FLCTIME);
1282 obdo_set_parent_fid(oa, &ll_i2info(inode)->lli_fid);
1283 memcpy(lsm2, lsm, lsm_size);
1284 ll_inode_size_lock(inode);
1285 rc = obd_create(NULL, exp, oa, &lsm2, &oti);
1286 ll_inode_size_unlock(inode);
1287
1288 OBD_FREE_LARGE(lsm2, lsm_size);
1289 GOTO(out, rc);
1290 out:
1291 ccc_inode_lsm_put(inode, lsm);
1292 OBDO_FREE(oa);
1293 return rc;
1294 }
1295
1296 static int ll_lov_recreate_obj(struct inode *inode, unsigned long arg)
1297 {
1298 struct ll_recreate_obj ucreat;
1299 struct ost_id oi;
1300
1301 if (!capable(CFS_CAP_SYS_ADMIN))
1302 return -EPERM;
1303
1304 if (copy_from_user(&ucreat, (struct ll_recreate_obj *)arg,
1305 sizeof(ucreat)))
1306 return -EFAULT;
1307
1308 ostid_set_seq_mdt0(&oi);
1309 ostid_set_id(&oi, ucreat.lrc_id);
1310 return ll_lov_recreate(inode, &oi, ucreat.lrc_ost_idx);
1311 }
1312
1313 static int ll_lov_recreate_fid(struct inode *inode, unsigned long arg)
1314 {
1315 struct lu_fid fid;
1316 struct ost_id oi;
1317 obd_count ost_idx;
1318
1319 if (!capable(CFS_CAP_SYS_ADMIN))
1320 return -EPERM;
1321
1322 if (copy_from_user(&fid, (struct lu_fid *)arg, sizeof(fid)))
1323 return -EFAULT;
1324
1325 fid_to_ostid(&fid, &oi);
1326 ost_idx = (fid_seq(&fid) >> 16) & 0xffff;
1327 return ll_lov_recreate(inode, &oi, ost_idx);
1328 }
1329
1330 int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
1331 int flags, struct lov_user_md *lum, int lum_size)
1332 {
1333 struct lov_stripe_md *lsm = NULL;
1334 struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags};
1335 int rc = 0;
1336
1337 lsm = ccc_inode_lsm_get(inode);
1338 if (lsm != NULL) {
1339 ccc_inode_lsm_put(inode, lsm);
1340 CDEBUG(D_IOCTL, "stripe already exists for ino %lu\n",
1341 inode->i_ino);
1342 GOTO(out, rc = -EEXIST);
1343 }
1344
1345 ll_inode_size_lock(inode);
1346 rc = ll_intent_file_open(file, lum, lum_size, &oit);
1347 if (rc)
1348 GOTO(out_unlock, rc);
1349 rc = oit.d.lustre.it_status;
1350 if (rc < 0)
1351 GOTO(out_req_free, rc);
1352
1353 ll_release_openhandle(file->f_dentry, &oit);
1354
1355 out_unlock:
1356 ll_inode_size_unlock(inode);
1357 ll_intent_release(&oit);
1358 ccc_inode_lsm_put(inode, lsm);
1359 out:
1360 cl_lov_delay_create_clear(&file->f_flags);
1361 return rc;
1362 out_req_free:
1363 ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data);
1364 goto out;
1365 }
1366
1367 int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
1368 struct lov_mds_md **lmmp, int *lmm_size,
1369 struct ptlrpc_request **request)
1370 {
1371 struct ll_sb_info *sbi = ll_i2sbi(inode);
1372 struct mdt_body *body;
1373 struct lov_mds_md *lmm = NULL;
1374 struct ptlrpc_request *req = NULL;
1375 struct md_op_data *op_data;
1376 int rc, lmmsize;
1377
1378 rc = ll_get_default_mdsize(sbi, &lmmsize);
1379 if (rc)
1380 return rc;
1381
1382 op_data = ll_prep_md_op_data(NULL, inode, NULL, filename,
1383 strlen(filename), lmmsize,
1384 LUSTRE_OPC_ANY, NULL);
1385 if (IS_ERR(op_data))
1386 return PTR_ERR(op_data);
1387
1388 op_data->op_valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA;
1389 rc = md_getattr_name(sbi->ll_md_exp, op_data, &req);
1390 ll_finish_md_op_data(op_data);
1391 if (rc < 0) {
1392 CDEBUG(D_INFO, "md_getattr_name failed "
1393 "on %s: rc %d\n", filename, rc);
1394 GOTO(out, rc);
1395 }
1396
1397 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
1398 LASSERT(body != NULL); /* checked by mdc_getattr_name */
1399
1400 lmmsize = body->eadatasize;
1401
1402 if (!(body->valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
1403 lmmsize == 0) {
1404 GOTO(out, rc = -ENODATA);
1405 }
1406
1407 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize);
1408 LASSERT(lmm != NULL);
1409
1410 if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1)) &&
1411 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3))) {
1412 GOTO(out, rc = -EPROTO);
1413 }
1414
1415 /*
1416 * This is coming from the MDS, so is probably in
1417 * little endian. We convert it to host endian before
1418 * passing it to userspace.
1419 */
1420 if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) {
1421 int stripe_count;
1422
1423 stripe_count = le16_to_cpu(lmm->lmm_stripe_count);
1424 if (le32_to_cpu(lmm->lmm_pattern) & LOV_PATTERN_F_RELEASED)
1425 stripe_count = 0;
1426
1427 /* if function called for directory - we should
1428 * avoid swab not existent lsm objects */
1429 if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) {
1430 lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
1431 if (S_ISREG(body->mode))
1432 lustre_swab_lov_user_md_objects(
1433 ((struct lov_user_md_v1 *)lmm)->lmm_objects,
1434 stripe_count);
1435 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
1436 lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm);
1437 if (S_ISREG(body->mode))
1438 lustre_swab_lov_user_md_objects(
1439 ((struct lov_user_md_v3 *)lmm)->lmm_objects,
1440 stripe_count);
1441 }
1442 }
1443
1444 out:
1445 *lmmp = lmm;
1446 *lmm_size = lmmsize;
1447 *request = req;
1448 return rc;
1449 }
1450
1451 static int ll_lov_setea(struct inode *inode, struct file *file,
1452 unsigned long arg)
1453 {
1454 int flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
1455 struct lov_user_md *lump;
1456 int lum_size = sizeof(struct lov_user_md) +
1457 sizeof(struct lov_user_ost_data);
1458 int rc;
1459
1460 if (!capable(CFS_CAP_SYS_ADMIN))
1461 return -EPERM;
1462
1463 OBD_ALLOC_LARGE(lump, lum_size);
1464 if (lump == NULL)
1465 return -ENOMEM;
1466
1467 if (copy_from_user(lump, (struct lov_user_md *)arg, lum_size)) {
1468 OBD_FREE_LARGE(lump, lum_size);
1469 return -EFAULT;
1470 }
1471
1472 rc = ll_lov_setstripe_ea_info(inode, file, flags, lump, lum_size);
1473
1474 OBD_FREE_LARGE(lump, lum_size);
1475 return rc;
1476 }
1477
1478 static int ll_lov_setstripe(struct inode *inode, struct file *file,
1479 unsigned long arg)
1480 {
1481 struct lov_user_md_v3 lumv3;
1482 struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3;
1483 struct lov_user_md_v1 *lumv1p = (struct lov_user_md_v1 *)arg;
1484 struct lov_user_md_v3 *lumv3p = (struct lov_user_md_v3 *)arg;
1485 int lum_size, rc;
1486 int flags = FMODE_WRITE;
1487
1488 /* first try with v1 which is smaller than v3 */
1489 lum_size = sizeof(struct lov_user_md_v1);
1490 if (copy_from_user(lumv1, lumv1p, lum_size))
1491 return -EFAULT;
1492
1493 if (lumv1->lmm_magic == LOV_USER_MAGIC_V3) {
1494 lum_size = sizeof(struct lov_user_md_v3);
1495 if (copy_from_user(&lumv3, lumv3p, lum_size))
1496 return -EFAULT;
1497 }
1498
1499 rc = ll_lov_setstripe_ea_info(inode, file, flags, lumv1, lum_size);
1500 if (rc == 0) {
1501 struct lov_stripe_md *lsm;
1502 __u32 gen;
1503
1504 put_user(0, &lumv1p->lmm_stripe_count);
1505
1506 ll_layout_refresh(inode, &gen);
1507 lsm = ccc_inode_lsm_get(inode);
1508 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode),
1509 0, lsm, (void *)arg);
1510 ccc_inode_lsm_put(inode, lsm);
1511 }
1512 return rc;
1513 }
1514
1515 static int ll_lov_getstripe(struct inode *inode, unsigned long arg)
1516 {
1517 struct lov_stripe_md *lsm;
1518 int rc = -ENODATA;
1519
1520 lsm = ccc_inode_lsm_get(inode);
1521 if (lsm != NULL)
1522 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode), 0,
1523 lsm, (void *)arg);
1524 ccc_inode_lsm_put(inode, lsm);
1525 return rc;
1526 }
1527
1528 static int
1529 ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1530 {
1531 struct ll_inode_info *lli = ll_i2info(inode);
1532 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1533 struct ccc_grouplock grouplock;
1534 int rc;
1535
1536 if (ll_file_nolock(file))
1537 return -EOPNOTSUPP;
1538
1539 spin_lock(&lli->lli_lock);
1540 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1541 CWARN("group lock already existed with gid %lu\n",
1542 fd->fd_grouplock.cg_gid);
1543 spin_unlock(&lli->lli_lock);
1544 return -EINVAL;
1545 }
1546 LASSERT(fd->fd_grouplock.cg_lock == NULL);
1547 spin_unlock(&lli->lli_lock);
1548
1549 rc = cl_get_grouplock(cl_i2info(inode)->lli_clob,
1550 arg, (file->f_flags & O_NONBLOCK), &grouplock);
1551 if (rc)
1552 return rc;
1553
1554 spin_lock(&lli->lli_lock);
1555 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1556 spin_unlock(&lli->lli_lock);
1557 CERROR("another thread just won the race\n");
1558 cl_put_grouplock(&grouplock);
1559 return -EINVAL;
1560 }
1561
1562 fd->fd_flags |= LL_FILE_GROUP_LOCKED;
1563 fd->fd_grouplock = grouplock;
1564 spin_unlock(&lli->lli_lock);
1565
1566 CDEBUG(D_INFO, "group lock %lu obtained\n", arg);
1567 return 0;
1568 }
1569
1570 int ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1571 {
1572 struct ll_inode_info *lli = ll_i2info(inode);
1573 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1574 struct ccc_grouplock grouplock;
1575
1576 spin_lock(&lli->lli_lock);
1577 if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1578 spin_unlock(&lli->lli_lock);
1579 CWARN("no group lock held\n");
1580 return -EINVAL;
1581 }
1582 LASSERT(fd->fd_grouplock.cg_lock != NULL);
1583
1584 if (fd->fd_grouplock.cg_gid != arg) {
1585 CWARN("group lock %lu doesn't match current id %lu\n",
1586 arg, fd->fd_grouplock.cg_gid);
1587 spin_unlock(&lli->lli_lock);
1588 return -EINVAL;
1589 }
1590
1591 grouplock = fd->fd_grouplock;
1592 memset(&fd->fd_grouplock, 0, sizeof(fd->fd_grouplock));
1593 fd->fd_flags &= ~LL_FILE_GROUP_LOCKED;
1594 spin_unlock(&lli->lli_lock);
1595
1596 cl_put_grouplock(&grouplock);
1597 CDEBUG(D_INFO, "group lock %lu released\n", arg);
1598 return 0;
1599 }
1600
1601 /**
1602 * Close inode open handle
1603 *
1604 * \param dentry [in] dentry which contains the inode
1605 * \param it [in,out] intent which contains open info and result
1606 *
1607 * \retval 0 success
1608 * \retval <0 failure
1609 */
1610 int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it)
1611 {
1612 struct inode *inode = dentry->d_inode;
1613 struct obd_client_handle *och;
1614 int rc;
1615
1616 LASSERT(inode);
1617
1618 /* Root ? Do nothing. */
1619 if (dentry->d_inode->i_sb->s_root == dentry)
1620 return 0;
1621
1622 /* No open handle to close? Move away */
1623 if (!it_disposition(it, DISP_OPEN_OPEN))
1624 return 0;
1625
1626 LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0);
1627
1628 OBD_ALLOC(och, sizeof(*och));
1629 if (!och)
1630 GOTO(out, rc = -ENOMEM);
1631
1632 ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
1633
1634 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
1635 inode, och, NULL);
1636 out:
1637 /* this one is in place of ll_file_open */
1638 if (it_disposition(it, DISP_ENQ_OPEN_REF)) {
1639 ptlrpc_req_finished(it->d.lustre.it_data);
1640 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
1641 }
1642 return rc;
1643 }
1644
1645 /**
1646 * Get size for inode for which FIEMAP mapping is requested.
1647 * Make the FIEMAP get_info call and returns the result.
1648 */
1649 static int ll_do_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
1650 size_t num_bytes)
1651 {
1652 struct obd_export *exp = ll_i2dtexp(inode);
1653 struct lov_stripe_md *lsm = NULL;
1654 struct ll_fiemap_info_key fm_key = { .name = KEY_FIEMAP, };
1655 __u32 vallen = num_bytes;
1656 int rc;
1657
1658 /* Checks for fiemap flags */
1659 if (fiemap->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
1660 fiemap->fm_flags &= ~LUSTRE_FIEMAP_FLAGS_COMPAT;
1661 return -EBADR;
1662 }
1663
1664 /* Check for FIEMAP_FLAG_SYNC */
1665 if (fiemap->fm_flags & FIEMAP_FLAG_SYNC) {
1666 rc = filemap_fdatawrite(inode->i_mapping);
1667 if (rc)
1668 return rc;
1669 }
1670
1671 lsm = ccc_inode_lsm_get(inode);
1672 if (lsm == NULL)
1673 return -ENOENT;
1674
1675 /* If the stripe_count > 1 and the application does not understand
1676 * DEVICE_ORDER flag, then it cannot interpret the extents correctly.
1677 */
1678 if (lsm->lsm_stripe_count > 1 &&
1679 !(fiemap->fm_flags & FIEMAP_FLAG_DEVICE_ORDER))
1680 GOTO(out, rc = -EOPNOTSUPP);
1681
1682 fm_key.oa.o_oi = lsm->lsm_oi;
1683 fm_key.oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1684
1685 obdo_from_inode(&fm_key.oa, inode, OBD_MD_FLSIZE);
1686 obdo_set_parent_fid(&fm_key.oa, &ll_i2info(inode)->lli_fid);
1687 /* If filesize is 0, then there would be no objects for mapping */
1688 if (fm_key.oa.o_size == 0) {
1689 fiemap->fm_mapped_extents = 0;
1690 GOTO(out, rc = 0);
1691 }
1692
1693 memcpy(&fm_key.fiemap, fiemap, sizeof(*fiemap));
1694
1695 rc = obd_get_info(NULL, exp, sizeof(fm_key), &fm_key, &vallen,
1696 fiemap, lsm);
1697 if (rc)
1698 CERROR("obd_get_info failed: rc = %d\n", rc);
1699
1700 out:
1701 ccc_inode_lsm_put(inode, lsm);
1702 return rc;
1703 }
1704
1705 int ll_fid2path(struct inode *inode, void *arg)
1706 {
1707 struct obd_export *exp = ll_i2mdexp(inode);
1708 struct getinfo_fid2path *gfout, *gfin;
1709 int outsize, rc;
1710
1711 if (!capable(CFS_CAP_DAC_READ_SEARCH) &&
1712 !(ll_i2sbi(inode)->ll_flags & LL_SBI_USER_FID2PATH))
1713 return -EPERM;
1714
1715 /* Need to get the buflen */
1716 OBD_ALLOC_PTR(gfin);
1717 if (gfin == NULL)
1718 return -ENOMEM;
1719 if (copy_from_user(gfin, arg, sizeof(*gfin))) {
1720 OBD_FREE_PTR(gfin);
1721 return -EFAULT;
1722 }
1723
1724 outsize = sizeof(*gfout) + gfin->gf_pathlen;
1725 OBD_ALLOC(gfout, outsize);
1726 if (gfout == NULL) {
1727 OBD_FREE_PTR(gfin);
1728 return -ENOMEM;
1729 }
1730 memcpy(gfout, gfin, sizeof(*gfout));
1731 OBD_FREE_PTR(gfin);
1732
1733 /* Call mdc_iocontrol */
1734 rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL);
1735 if (rc)
1736 GOTO(gf_free, rc);
1737
1738 if (copy_to_user(arg, gfout, outsize))
1739 rc = -EFAULT;
1740
1741 gf_free:
1742 OBD_FREE(gfout, outsize);
1743 return rc;
1744 }
1745
1746 static int ll_ioctl_fiemap(struct inode *inode, unsigned long arg)
1747 {
1748 struct ll_user_fiemap *fiemap_s;
1749 size_t num_bytes, ret_bytes;
1750 unsigned int extent_count;
1751 int rc = 0;
1752
1753 /* Get the extent count so we can calculate the size of
1754 * required fiemap buffer */
1755 if (get_user(extent_count,
1756 &((struct ll_user_fiemap __user *)arg)->fm_extent_count))
1757 return -EFAULT;
1758
1759 if (extent_count >=
1760 (SIZE_MAX - sizeof(*fiemap_s)) / sizeof(struct ll_fiemap_extent))
1761 return -EINVAL;
1762 num_bytes = sizeof(*fiemap_s) + (extent_count *
1763 sizeof(struct ll_fiemap_extent));
1764
1765 OBD_ALLOC_LARGE(fiemap_s, num_bytes);
1766 if (fiemap_s == NULL)
1767 return -ENOMEM;
1768
1769 /* get the fiemap value */
1770 if (copy_from_user(fiemap_s, (struct ll_user_fiemap __user *)arg,
1771 sizeof(*fiemap_s)))
1772 GOTO(error, rc = -EFAULT);
1773
1774 /* If fm_extent_count is non-zero, read the first extent since
1775 * it is used to calculate end_offset and device from previous
1776 * fiemap call. */
1777 if (extent_count) {
1778 if (copy_from_user(&fiemap_s->fm_extents[0],
1779 (char __user *)arg + sizeof(*fiemap_s),
1780 sizeof(struct ll_fiemap_extent)))
1781 GOTO(error, rc = -EFAULT);
1782 }
1783
1784 rc = ll_do_fiemap(inode, fiemap_s, num_bytes);
1785 if (rc)
1786 GOTO(error, rc);
1787
1788 ret_bytes = sizeof(struct ll_user_fiemap);
1789
1790 if (extent_count != 0)
1791 ret_bytes += (fiemap_s->fm_mapped_extents *
1792 sizeof(struct ll_fiemap_extent));
1793
1794 if (copy_to_user((void *)arg, fiemap_s, ret_bytes))
1795 rc = -EFAULT;
1796
1797 error:
1798 OBD_FREE_LARGE(fiemap_s, num_bytes);
1799 return rc;
1800 }
1801
1802 /*
1803 * Read the data_version for inode.
1804 *
1805 * This value is computed using stripe object version on OST.
1806 * Version is computed using server side locking.
1807 *
1808 * @param extent_lock Take extent lock. Not needed if a process is already
1809 * holding the OST object group locks.
1810 */
1811 int ll_data_version(struct inode *inode, __u64 *data_version,
1812 int extent_lock)
1813 {
1814 struct lov_stripe_md *lsm = NULL;
1815 struct ll_sb_info *sbi = ll_i2sbi(inode);
1816 struct obdo *obdo = NULL;
1817 int rc;
1818
1819 /* If no stripe, we consider version is 0. */
1820 lsm = ccc_inode_lsm_get(inode);
1821 if (!lsm_has_objects(lsm)) {
1822 *data_version = 0;
1823 CDEBUG(D_INODE, "No object for inode\n");
1824 GOTO(out, rc = 0);
1825 }
1826
1827 OBD_ALLOC_PTR(obdo);
1828 if (obdo == NULL)
1829 GOTO(out, rc = -ENOMEM);
1830
1831 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, obdo, 0, extent_lock);
1832 if (rc == 0) {
1833 if (!(obdo->o_valid & OBD_MD_FLDATAVERSION))
1834 rc = -EOPNOTSUPP;
1835 else
1836 *data_version = obdo->o_data_version;
1837 }
1838
1839 OBD_FREE_PTR(obdo);
1840 out:
1841 ccc_inode_lsm_put(inode, lsm);
1842 return rc;
1843 }
1844
1845 /*
1846 * Trigger a HSM release request for the provided inode.
1847 */
1848 int ll_hsm_release(struct inode *inode)
1849 {
1850 struct cl_env_nest nest;
1851 struct lu_env *env;
1852 struct obd_client_handle *och = NULL;
1853 __u64 data_version = 0;
1854 int rc;
1855
1856
1857 CDEBUG(D_INODE, "%s: Releasing file "DFID".\n",
1858 ll_get_fsname(inode->i_sb, NULL, 0),
1859 PFID(&ll_i2info(inode)->lli_fid));
1860
1861 och = ll_lease_open(inode, NULL, FMODE_WRITE, MDS_OPEN_RELEASE);
1862 if (IS_ERR(och))
1863 GOTO(out, rc = PTR_ERR(och));
1864
1865 /* Grab latest data_version and [am]time values */
1866 rc = ll_data_version(inode, &data_version, 1);
1867 if (rc != 0)
1868 GOTO(out, rc);
1869
1870 env = cl_env_nested_get(&nest);
1871 if (IS_ERR(env))
1872 GOTO(out, rc = PTR_ERR(env));
1873
1874 ll_merge_lvb(env, inode);
1875 cl_env_nested_put(&nest, env);
1876
1877 /* Release the file.
1878 * NB: lease lock handle is released in mdc_hsm_release_pack() because
1879 * we still need it to pack l_remote_handle to MDT. */
1880 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, inode, och,
1881 &data_version);
1882 och = NULL;
1883
1884
1885 out:
1886 if (och != NULL && !IS_ERR(och)) /* close the file */
1887 ll_lease_close(och, inode, NULL);
1888
1889 return rc;
1890 }
1891
1892 struct ll_swap_stack {
1893 struct iattr ia1, ia2;
1894 __u64 dv1, dv2;
1895 struct inode *inode1, *inode2;
1896 bool check_dv1, check_dv2;
1897 };
1898
1899 static int ll_swap_layouts(struct file *file1, struct file *file2,
1900 struct lustre_swap_layouts *lsl)
1901 {
1902 struct mdc_swap_layouts msl;
1903 struct md_op_data *op_data;
1904 __u32 gid;
1905 __u64 dv;
1906 struct ll_swap_stack *llss = NULL;
1907 int rc;
1908
1909 OBD_ALLOC_PTR(llss);
1910 if (llss == NULL)
1911 return -ENOMEM;
1912
1913 llss->inode1 = file1->f_dentry->d_inode;
1914 llss->inode2 = file2->f_dentry->d_inode;
1915
1916 if (!S_ISREG(llss->inode2->i_mode))
1917 GOTO(free, rc = -EINVAL);
1918
1919 if (inode_permission(llss->inode1, MAY_WRITE) ||
1920 inode_permission(llss->inode2, MAY_WRITE))
1921 GOTO(free, rc = -EPERM);
1922
1923 if (llss->inode2->i_sb != llss->inode1->i_sb)
1924 GOTO(free, rc = -EXDEV);
1925
1926 /* we use 2 bool because it is easier to swap than 2 bits */
1927 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV1)
1928 llss->check_dv1 = true;
1929
1930 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV2)
1931 llss->check_dv2 = true;
1932
1933 /* we cannot use lsl->sl_dvX directly because we may swap them */
1934 llss->dv1 = lsl->sl_dv1;
1935 llss->dv2 = lsl->sl_dv2;
1936
1937 rc = lu_fid_cmp(ll_inode2fid(llss->inode1), ll_inode2fid(llss->inode2));
1938 if (rc == 0) /* same file, done! */
1939 GOTO(free, rc = 0);
1940
1941 if (rc < 0) { /* sequentialize it */
1942 swap(llss->inode1, llss->inode2);
1943 swap(file1, file2);
1944 swap(llss->dv1, llss->dv2);
1945 swap(llss->check_dv1, llss->check_dv2);
1946 }
1947
1948 gid = lsl->sl_gid;
1949 if (gid != 0) { /* application asks to flush dirty cache */
1950 rc = ll_get_grouplock(llss->inode1, file1, gid);
1951 if (rc < 0)
1952 GOTO(free, rc);
1953
1954 rc = ll_get_grouplock(llss->inode2, file2, gid);
1955 if (rc < 0) {
1956 ll_put_grouplock(llss->inode1, file1, gid);
1957 GOTO(free, rc);
1958 }
1959 }
1960
1961 /* to be able to restore mtime and atime after swap
1962 * we need to first save them */
1963 if (lsl->sl_flags &
1964 (SWAP_LAYOUTS_KEEP_MTIME | SWAP_LAYOUTS_KEEP_ATIME)) {
1965 llss->ia1.ia_mtime = llss->inode1->i_mtime;
1966 llss->ia1.ia_atime = llss->inode1->i_atime;
1967 llss->ia1.ia_valid = ATTR_MTIME | ATTR_ATIME;
1968 llss->ia2.ia_mtime = llss->inode2->i_mtime;
1969 llss->ia2.ia_atime = llss->inode2->i_atime;
1970 llss->ia2.ia_valid = ATTR_MTIME | ATTR_ATIME;
1971 }
1972
1973 /* ultimate check, before swapping the layouts we check if
1974 * dataversion has changed (if requested) */
1975 if (llss->check_dv1) {
1976 rc = ll_data_version(llss->inode1, &dv, 0);
1977 if (rc)
1978 GOTO(putgl, rc);
1979 if (dv != llss->dv1)
1980 GOTO(putgl, rc = -EAGAIN);
1981 }
1982
1983 if (llss->check_dv2) {
1984 rc = ll_data_version(llss->inode2, &dv, 0);
1985 if (rc)
1986 GOTO(putgl, rc);
1987 if (dv != llss->dv2)
1988 GOTO(putgl, rc = -EAGAIN);
1989 }
1990
1991 /* struct md_op_data is used to send the swap args to the mdt
1992 * only flags is missing, so we use struct mdc_swap_layouts
1993 * through the md_op_data->op_data */
1994 /* flags from user space have to be converted before they are send to
1995 * server, no flag is sent today, they are only used on the client */
1996 msl.msl_flags = 0;
1997 rc = -ENOMEM;
1998 op_data = ll_prep_md_op_data(NULL, llss->inode1, llss->inode2, NULL, 0,
1999 0, LUSTRE_OPC_ANY, &msl);
2000 if (IS_ERR(op_data))
2001 GOTO(free, rc = PTR_ERR(op_data));
2002
2003 rc = obd_iocontrol(LL_IOC_LOV_SWAP_LAYOUTS, ll_i2mdexp(llss->inode1),
2004 sizeof(*op_data), op_data, NULL);
2005 ll_finish_md_op_data(op_data);
2006
2007 putgl:
2008 if (gid != 0) {
2009 ll_put_grouplock(llss->inode2, file2, gid);
2010 ll_put_grouplock(llss->inode1, file1, gid);
2011 }
2012
2013 /* rc can be set from obd_iocontrol() or from a GOTO(putgl, ...) */
2014 if (rc != 0)
2015 GOTO(free, rc);
2016
2017 /* clear useless flags */
2018 if (!(lsl->sl_flags & SWAP_LAYOUTS_KEEP_MTIME)) {
2019 llss->ia1.ia_valid &= ~ATTR_MTIME;
2020 llss->ia2.ia_valid &= ~ATTR_MTIME;
2021 }
2022
2023 if (!(lsl->sl_flags & SWAP_LAYOUTS_KEEP_ATIME)) {
2024 llss->ia1.ia_valid &= ~ATTR_ATIME;
2025 llss->ia2.ia_valid &= ~ATTR_ATIME;
2026 }
2027
2028 /* update time if requested */
2029 rc = 0;
2030 if (llss->ia2.ia_valid != 0) {
2031 mutex_lock(&llss->inode1->i_mutex);
2032 rc = ll_setattr(file1->f_dentry, &llss->ia2);
2033 mutex_unlock(&llss->inode1->i_mutex);
2034 }
2035
2036 if (llss->ia1.ia_valid != 0) {
2037 int rc1;
2038
2039 mutex_lock(&llss->inode2->i_mutex);
2040 rc1 = ll_setattr(file2->f_dentry, &llss->ia1);
2041 mutex_unlock(&llss->inode2->i_mutex);
2042 if (rc == 0)
2043 rc = rc1;
2044 }
2045
2046 free:
2047 if (llss != NULL)
2048 OBD_FREE_PTR(llss);
2049
2050 return rc;
2051 }
2052
2053 static int ll_hsm_state_set(struct inode *inode, struct hsm_state_set *hss)
2054 {
2055 struct md_op_data *op_data;
2056 int rc;
2057
2058 /* Non-root users are forbidden to set or clear flags which are
2059 * NOT defined in HSM_USER_MASK. */
2060 if (((hss->hss_setmask | hss->hss_clearmask) & ~HSM_USER_MASK) &&
2061 !capable(CFS_CAP_SYS_ADMIN))
2062 return -EPERM;
2063
2064 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2065 LUSTRE_OPC_ANY, hss);
2066 if (IS_ERR(op_data))
2067 return PTR_ERR(op_data);
2068
2069 rc = obd_iocontrol(LL_IOC_HSM_STATE_SET, ll_i2mdexp(inode),
2070 sizeof(*op_data), op_data, NULL);
2071
2072 ll_finish_md_op_data(op_data);
2073
2074 return rc;
2075 }
2076
2077 static int ll_hsm_import(struct inode *inode, struct file *file,
2078 struct hsm_user_import *hui)
2079 {
2080 struct hsm_state_set *hss = NULL;
2081 struct iattr *attr = NULL;
2082 int rc;
2083
2084
2085 if (!S_ISREG(inode->i_mode))
2086 return -EINVAL;
2087
2088 /* set HSM flags */
2089 OBD_ALLOC_PTR(hss);
2090 if (hss == NULL)
2091 GOTO(out, rc = -ENOMEM);
2092
2093 hss->hss_valid = HSS_SETMASK | HSS_ARCHIVE_ID;
2094 hss->hss_archive_id = hui->hui_archive_id;
2095 hss->hss_setmask = HS_ARCHIVED | HS_EXISTS | HS_RELEASED;
2096 rc = ll_hsm_state_set(inode, hss);
2097 if (rc != 0)
2098 GOTO(out, rc);
2099
2100 OBD_ALLOC_PTR(attr);
2101 if (attr == NULL)
2102 GOTO(out, rc = -ENOMEM);
2103
2104 attr->ia_mode = hui->hui_mode & (S_IRWXU | S_IRWXG | S_IRWXO);
2105 attr->ia_mode |= S_IFREG;
2106 attr->ia_uid = make_kuid(&init_user_ns, hui->hui_uid);
2107 attr->ia_gid = make_kgid(&init_user_ns, hui->hui_gid);
2108 attr->ia_size = hui->hui_size;
2109 attr->ia_mtime.tv_sec = hui->hui_mtime;
2110 attr->ia_mtime.tv_nsec = hui->hui_mtime_ns;
2111 attr->ia_atime.tv_sec = hui->hui_atime;
2112 attr->ia_atime.tv_nsec = hui->hui_atime_ns;
2113
2114 attr->ia_valid = ATTR_SIZE | ATTR_MODE | ATTR_FORCE |
2115 ATTR_UID | ATTR_GID |
2116 ATTR_MTIME | ATTR_MTIME_SET |
2117 ATTR_ATIME | ATTR_ATIME_SET;
2118
2119 rc = ll_setattr_raw(file->f_dentry, attr, true);
2120 if (rc == -ENODATA)
2121 rc = 0;
2122
2123 out:
2124 if (hss != NULL)
2125 OBD_FREE_PTR(hss);
2126
2127 if (attr != NULL)
2128 OBD_FREE_PTR(attr);
2129
2130 return rc;
2131 }
2132
2133 static long
2134 ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
2135 {
2136 struct inode *inode = file->f_dentry->d_inode;
2137 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2138 int flags, rc;
2139
2140 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%x\n", inode->i_ino,
2141 inode->i_generation, inode, cmd);
2142 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
2143
2144 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
2145 if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
2146 return -ENOTTY;
2147
2148 switch(cmd) {
2149 case LL_IOC_GETFLAGS:
2150 /* Get the current value of the file flags */
2151 return put_user(fd->fd_flags, (int *)arg);
2152 case LL_IOC_SETFLAGS:
2153 case LL_IOC_CLRFLAGS:
2154 /* Set or clear specific file flags */
2155 /* XXX This probably needs checks to ensure the flags are
2156 * not abused, and to handle any flag side effects.
2157 */
2158 if (get_user(flags, (int *) arg))
2159 return -EFAULT;
2160
2161 if (cmd == LL_IOC_SETFLAGS) {
2162 if ((flags & LL_FILE_IGNORE_LOCK) &&
2163 !(file->f_flags & O_DIRECT)) {
2164 CERROR("%s: unable to disable locking on "
2165 "non-O_DIRECT file\n", current->comm);
2166 return -EINVAL;
2167 }
2168
2169 fd->fd_flags |= flags;
2170 } else {
2171 fd->fd_flags &= ~flags;
2172 }
2173 return 0;
2174 case LL_IOC_LOV_SETSTRIPE:
2175 return ll_lov_setstripe(inode, file, arg);
2176 case LL_IOC_LOV_SETEA:
2177 return ll_lov_setea(inode, file, arg);
2178 case LL_IOC_LOV_SWAP_LAYOUTS: {
2179 struct file *file2;
2180 struct lustre_swap_layouts lsl;
2181
2182 if (copy_from_user(&lsl, (char *)arg,
2183 sizeof(struct lustre_swap_layouts)))
2184 return -EFAULT;
2185
2186 if ((file->f_flags & O_ACCMODE) == 0) /* O_RDONLY */
2187 return -EPERM;
2188
2189 file2 = fget(lsl.sl_fd);
2190 if (file2 == NULL)
2191 return -EBADF;
2192
2193 rc = -EPERM;
2194 if ((file2->f_flags & O_ACCMODE) != 0) /* O_WRONLY or O_RDWR */
2195 rc = ll_swap_layouts(file, file2, &lsl);
2196 fput(file2);
2197 return rc;
2198 }
2199 case LL_IOC_LOV_GETSTRIPE:
2200 return ll_lov_getstripe(inode, arg);
2201 case LL_IOC_RECREATE_OBJ:
2202 return ll_lov_recreate_obj(inode, arg);
2203 case LL_IOC_RECREATE_FID:
2204 return ll_lov_recreate_fid(inode, arg);
2205 case FSFILT_IOC_FIEMAP:
2206 return ll_ioctl_fiemap(inode, arg);
2207 case FSFILT_IOC_GETFLAGS:
2208 case FSFILT_IOC_SETFLAGS:
2209 return ll_iocontrol(inode, file, cmd, arg);
2210 case FSFILT_IOC_GETVERSION_OLD:
2211 case FSFILT_IOC_GETVERSION:
2212 return put_user(inode->i_generation, (int *)arg);
2213 case LL_IOC_GROUP_LOCK:
2214 return ll_get_grouplock(inode, file, arg);
2215 case LL_IOC_GROUP_UNLOCK:
2216 return ll_put_grouplock(inode, file, arg);
2217 case IOC_OBD_STATFS:
2218 return ll_obd_statfs(inode, (void *)arg);
2219
2220 /* We need to special case any other ioctls we want to handle,
2221 * to send them to the MDS/OST as appropriate and to properly
2222 * network encode the arg field.
2223 case FSFILT_IOC_SETVERSION_OLD:
2224 case FSFILT_IOC_SETVERSION:
2225 */
2226 case LL_IOC_FLUSHCTX:
2227 return ll_flush_ctx(inode);
2228 case LL_IOC_PATH2FID: {
2229 if (copy_to_user((void *)arg, ll_inode2fid(inode),
2230 sizeof(struct lu_fid)))
2231 return -EFAULT;
2232
2233 return 0;
2234 }
2235 case OBD_IOC_FID2PATH:
2236 return ll_fid2path(inode, (void *)arg);
2237 case LL_IOC_DATA_VERSION: {
2238 struct ioc_data_version idv;
2239 int rc;
2240
2241 if (copy_from_user(&idv, (char *)arg, sizeof(idv)))
2242 return -EFAULT;
2243
2244 rc = ll_data_version(inode, &idv.idv_version,
2245 !(idv.idv_flags & LL_DV_NOFLUSH));
2246
2247 if (rc == 0 && copy_to_user((char *) arg, &idv, sizeof(idv)))
2248 return -EFAULT;
2249
2250 return rc;
2251 }
2252
2253 case LL_IOC_GET_MDTIDX: {
2254 int mdtidx;
2255
2256 mdtidx = ll_get_mdt_idx(inode);
2257 if (mdtidx < 0)
2258 return mdtidx;
2259
2260 if (put_user((int)mdtidx, (int*)arg))
2261 return -EFAULT;
2262
2263 return 0;
2264 }
2265 case OBD_IOC_GETDTNAME:
2266 case OBD_IOC_GETMDNAME:
2267 return ll_get_obd_name(inode, cmd, arg);
2268 case LL_IOC_HSM_STATE_GET: {
2269 struct md_op_data *op_data;
2270 struct hsm_user_state *hus;
2271 int rc;
2272
2273 OBD_ALLOC_PTR(hus);
2274 if (hus == NULL)
2275 return -ENOMEM;
2276
2277 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2278 LUSTRE_OPC_ANY, hus);
2279 if (IS_ERR(op_data)) {
2280 OBD_FREE_PTR(hus);
2281 return PTR_ERR(op_data);
2282 }
2283
2284 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
2285 op_data, NULL);
2286
2287 if (copy_to_user((void *)arg, hus, sizeof(*hus)))
2288 rc = -EFAULT;
2289
2290 ll_finish_md_op_data(op_data);
2291 OBD_FREE_PTR(hus);
2292 return rc;
2293 }
2294 case LL_IOC_HSM_STATE_SET: {
2295 struct hsm_state_set *hss;
2296 int rc;
2297
2298 OBD_ALLOC_PTR(hss);
2299 if (hss == NULL)
2300 return -ENOMEM;
2301
2302 if (copy_from_user(hss, (char *)arg, sizeof(*hss))) {
2303 OBD_FREE_PTR(hss);
2304 return -EFAULT;
2305 }
2306
2307 rc = ll_hsm_state_set(inode, hss);
2308
2309 OBD_FREE_PTR(hss);
2310 return rc;
2311 }
2312 case LL_IOC_HSM_ACTION: {
2313 struct md_op_data *op_data;
2314 struct hsm_current_action *hca;
2315 int rc;
2316
2317 OBD_ALLOC_PTR(hca);
2318 if (hca == NULL)
2319 return -ENOMEM;
2320
2321 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2322 LUSTRE_OPC_ANY, hca);
2323 if (IS_ERR(op_data)) {
2324 OBD_FREE_PTR(hca);
2325 return PTR_ERR(op_data);
2326 }
2327
2328 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
2329 op_data, NULL);
2330
2331 if (copy_to_user((char *)arg, hca, sizeof(*hca)))
2332 rc = -EFAULT;
2333
2334 ll_finish_md_op_data(op_data);
2335 OBD_FREE_PTR(hca);
2336 return rc;
2337 }
2338 case LL_IOC_SET_LEASE: {
2339 struct ll_inode_info *lli = ll_i2info(inode);
2340 struct obd_client_handle *och = NULL;
2341 bool lease_broken;
2342 fmode_t mode = 0;
2343
2344 switch (arg) {
2345 case F_WRLCK:
2346 if (!(file->f_mode & FMODE_WRITE))
2347 return -EPERM;
2348 mode = FMODE_WRITE;
2349 break;
2350 case F_RDLCK:
2351 if (!(file->f_mode & FMODE_READ))
2352 return -EPERM;
2353 mode = FMODE_READ;
2354 break;
2355 case F_UNLCK:
2356 mutex_lock(&lli->lli_och_mutex);
2357 if (fd->fd_lease_och != NULL) {
2358 och = fd->fd_lease_och;
2359 fd->fd_lease_och = NULL;
2360 }
2361 mutex_unlock(&lli->lli_och_mutex);
2362
2363 if (och != NULL) {
2364 mode = och->och_flags &
2365 (FMODE_READ|FMODE_WRITE);
2366 rc = ll_lease_close(och, inode, &lease_broken);
2367 if (rc == 0 && lease_broken)
2368 mode = 0;
2369 } else {
2370 rc = -ENOLCK;
2371 }
2372
2373 /* return the type of lease or error */
2374 return rc < 0 ? rc : (int)mode;
2375 default:
2376 return -EINVAL;
2377 }
2378
2379 CDEBUG(D_INODE, "Set lease with mode %d\n", mode);
2380
2381 /* apply for lease */
2382 och = ll_lease_open(inode, file, mode, 0);
2383 if (IS_ERR(och))
2384 return PTR_ERR(och);
2385
2386 rc = 0;
2387 mutex_lock(&lli->lli_och_mutex);
2388 if (fd->fd_lease_och == NULL) {
2389 fd->fd_lease_och = och;
2390 och = NULL;
2391 }
2392 mutex_unlock(&lli->lli_och_mutex);
2393 if (och != NULL) {
2394 /* impossible now that only excl is supported for now */
2395 ll_lease_close(och, inode, &lease_broken);
2396 rc = -EBUSY;
2397 }
2398 return rc;
2399 }
2400 case LL_IOC_GET_LEASE: {
2401 struct ll_inode_info *lli = ll_i2info(inode);
2402 struct ldlm_lock *lock = NULL;
2403
2404 rc = 0;
2405 mutex_lock(&lli->lli_och_mutex);
2406 if (fd->fd_lease_och != NULL) {
2407 struct obd_client_handle *och = fd->fd_lease_och;
2408
2409 lock = ldlm_handle2lock(&och->och_lease_handle);
2410 if (lock != NULL) {
2411 lock_res_and_lock(lock);
2412 if (!ldlm_is_cancel(lock))
2413 rc = och->och_flags &
2414 (FMODE_READ | FMODE_WRITE);
2415 unlock_res_and_lock(lock);
2416 ldlm_lock_put(lock);
2417 }
2418 }
2419 mutex_unlock(&lli->lli_och_mutex);
2420 return rc;
2421 }
2422 case LL_IOC_HSM_IMPORT: {
2423 struct hsm_user_import *hui;
2424
2425 OBD_ALLOC_PTR(hui);
2426 if (hui == NULL)
2427 return -ENOMEM;
2428
2429 if (copy_from_user(hui, (void *)arg, sizeof(*hui))) {
2430 OBD_FREE_PTR(hui);
2431 return -EFAULT;
2432 }
2433
2434 rc = ll_hsm_import(inode, file, hui);
2435
2436 OBD_FREE_PTR(hui);
2437 return rc;
2438 }
2439 default: {
2440 int err;
2441
2442 if (LLIOC_STOP ==
2443 ll_iocontrol_call(inode, file, cmd, arg, &err))
2444 return err;
2445
2446 return obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
2447 (void *)arg);
2448 }
2449 }
2450 }
2451
2452
2453 static loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
2454 {
2455 struct inode *inode = file->f_dentry->d_inode;
2456 loff_t retval, eof = 0;
2457
2458 retval = offset + ((origin == SEEK_END) ? i_size_read(inode) :
2459 (origin == SEEK_CUR) ? file->f_pos : 0);
2460 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), to=%llu=%#llx(%d)\n",
2461 inode->i_ino, inode->i_generation, inode, retval, retval,
2462 origin);
2463 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
2464
2465 if (origin == SEEK_END || origin == SEEK_HOLE || origin == SEEK_DATA) {
2466 retval = ll_glimpse_size(inode);
2467 if (retval != 0)
2468 return retval;
2469 eof = i_size_read(inode);
2470 }
2471
2472 retval = generic_file_llseek_size(file, offset, origin,
2473 ll_file_maxbytes(inode), eof);
2474 return retval;
2475 }
2476
2477 static int ll_flush(struct file *file, fl_owner_t id)
2478 {
2479 struct inode *inode = file->f_dentry->d_inode;
2480 struct ll_inode_info *lli = ll_i2info(inode);
2481 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2482 int rc, err;
2483
2484 LASSERT(!S_ISDIR(inode->i_mode));
2485
2486 /* catch async errors that were recorded back when async writeback
2487 * failed for pages in this mapping. */
2488 rc = lli->lli_async_rc;
2489 lli->lli_async_rc = 0;
2490 err = lov_read_and_clear_async_rc(lli->lli_clob);
2491 if (rc == 0)
2492 rc = err;
2493
2494 /* The application has been told write failure already.
2495 * Do not report failure again. */
2496 if (fd->fd_write_failed)
2497 return 0;
2498 return rc ? -EIO : 0;
2499 }
2500
2501 /**
2502 * Called to make sure a portion of file has been written out.
2503 * if @mode is not CL_FSYNC_LOCAL, it will send OST_SYNC RPCs to OST.
2504 *
2505 * Return how many pages have been written.
2506 */
2507 int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end,
2508 enum cl_fsync_mode mode, int ignore_layout)
2509 {
2510 struct cl_env_nest nest;
2511 struct lu_env *env;
2512 struct cl_io *io;
2513 struct obd_capa *capa = NULL;
2514 struct cl_fsync_io *fio;
2515 int result;
2516
2517 if (mode != CL_FSYNC_NONE && mode != CL_FSYNC_LOCAL &&
2518 mode != CL_FSYNC_DISCARD && mode != CL_FSYNC_ALL)
2519 return -EINVAL;
2520
2521 env = cl_env_nested_get(&nest);
2522 if (IS_ERR(env))
2523 return PTR_ERR(env);
2524
2525 capa = ll_osscapa_get(inode, CAPA_OPC_OSS_WRITE);
2526
2527 io = ccc_env_thread_io(env);
2528 io->ci_obj = cl_i2info(inode)->lli_clob;
2529 io->ci_ignore_layout = ignore_layout;
2530
2531 /* initialize parameters for sync */
2532 fio = &io->u.ci_fsync;
2533 fio->fi_capa = capa;
2534 fio->fi_start = start;
2535 fio->fi_end = end;
2536 fio->fi_fid = ll_inode2fid(inode);
2537 fio->fi_mode = mode;
2538 fio->fi_nr_written = 0;
2539
2540 if (cl_io_init(env, io, CIT_FSYNC, io->ci_obj) == 0)
2541 result = cl_io_loop(env, io);
2542 else
2543 result = io->ci_result;
2544 if (result == 0)
2545 result = fio->fi_nr_written;
2546 cl_io_fini(env, io);
2547 cl_env_nested_put(&nest, env);
2548
2549 capa_put(capa);
2550
2551 return result;
2552 }
2553
2554 /*
2555 * When dentry is provided (the 'else' case), *file->f_dentry may be
2556 * null and dentry must be used directly rather than pulled from
2557 * *file->f_dentry as is done otherwise.
2558 */
2559
2560 int ll_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2561 {
2562 struct dentry *dentry = file->f_dentry;
2563 struct inode *inode = dentry->d_inode;
2564 struct ll_inode_info *lli = ll_i2info(inode);
2565 struct ptlrpc_request *req;
2566 struct obd_capa *oc;
2567 int rc, err;
2568
2569 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
2570 inode->i_generation, inode);
2571 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
2572
2573 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2574 mutex_lock(&inode->i_mutex);
2575
2576 /* catch async errors that were recorded back when async writeback
2577 * failed for pages in this mapping. */
2578 if (!S_ISDIR(inode->i_mode)) {
2579 err = lli->lli_async_rc;
2580 lli->lli_async_rc = 0;
2581 if (rc == 0)
2582 rc = err;
2583 err = lov_read_and_clear_async_rc(lli->lli_clob);
2584 if (rc == 0)
2585 rc = err;
2586 }
2587
2588 oc = ll_mdscapa_get(inode);
2589 err = md_sync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), oc,
2590 &req);
2591 capa_put(oc);
2592 if (!rc)
2593 rc = err;
2594 if (!err)
2595 ptlrpc_req_finished(req);
2596
2597 if (S_ISREG(inode->i_mode)) {
2598 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2599
2600 err = cl_sync_file_range(inode, start, end, CL_FSYNC_ALL, 0);
2601 if (rc == 0 && err < 0)
2602 rc = err;
2603 if (rc < 0)
2604 fd->fd_write_failed = true;
2605 else
2606 fd->fd_write_failed = false;
2607 }
2608
2609 mutex_unlock(&inode->i_mutex);
2610 return rc;
2611 }
2612
2613 static int
2614 ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
2615 {
2616 struct inode *inode = file->f_dentry->d_inode;
2617 struct ll_sb_info *sbi = ll_i2sbi(inode);
2618 struct ldlm_enqueue_info einfo = {
2619 .ei_type = LDLM_FLOCK,
2620 .ei_cb_cp = ldlm_flock_completion_ast,
2621 .ei_cbdata = file_lock,
2622 };
2623 struct md_op_data *op_data;
2624 struct lustre_handle lockh = {0};
2625 ldlm_policy_data_t flock = {{0}};
2626 int flags = 0;
2627 int rc;
2628 int rc2 = 0;
2629
2630 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu file_lock=%p\n",
2631 inode->i_ino, file_lock);
2632
2633 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
2634
2635 if (file_lock->fl_flags & FL_FLOCK)
2636 LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK));
2637 else if (!(file_lock->fl_flags & FL_POSIX))
2638 return -EINVAL;
2639
2640 flock.l_flock.owner = (unsigned long)file_lock->fl_owner;
2641 flock.l_flock.pid = file_lock->fl_pid;
2642 flock.l_flock.start = file_lock->fl_start;
2643 flock.l_flock.end = file_lock->fl_end;
2644
2645 /* Somewhat ugly workaround for svc lockd.
2646 * lockd installs custom fl_lmops->lm_compare_owner that checks
2647 * for the fl_owner to be the same (which it always is on local node
2648 * I guess between lockd processes) and then compares pid.
2649 * As such we assign pid to the owner field to make it all work,
2650 * conflict with normal locks is unlikely since pid space and
2651 * pointer space for current->files are not intersecting */
2652 if (file_lock->fl_lmops && file_lock->fl_lmops->lm_compare_owner)
2653 flock.l_flock.owner = (unsigned long)file_lock->fl_pid;
2654
2655 switch (file_lock->fl_type) {
2656 case F_RDLCK:
2657 einfo.ei_mode = LCK_PR;
2658 break;
2659 case F_UNLCK:
2660 /* An unlock request may or may not have any relation to
2661 * existing locks so we may not be able to pass a lock handle
2662 * via a normal ldlm_lock_cancel() request. The request may even
2663 * unlock a byte range in the middle of an existing lock. In
2664 * order to process an unlock request we need all of the same
2665 * information that is given with a normal read or write record
2666 * lock request. To avoid creating another ldlm unlock (cancel)
2667 * message we'll treat a LCK_NL flock request as an unlock. */
2668 einfo.ei_mode = LCK_NL;
2669 break;
2670 case F_WRLCK:
2671 einfo.ei_mode = LCK_PW;
2672 break;
2673 default:
2674 CDEBUG(D_INFO, "Unknown fcntl lock type: %d\n",
2675 file_lock->fl_type);
2676 return -ENOTSUPP;
2677 }
2678
2679 switch (cmd) {
2680 case F_SETLKW:
2681 #ifdef F_SETLKW64
2682 case F_SETLKW64:
2683 #endif
2684 flags = 0;
2685 break;
2686 case F_SETLK:
2687 #ifdef F_SETLK64
2688 case F_SETLK64:
2689 #endif
2690 flags = LDLM_FL_BLOCK_NOWAIT;
2691 break;
2692 case F_GETLK:
2693 #ifdef F_GETLK64
2694 case F_GETLK64:
2695 #endif
2696 flags = LDLM_FL_TEST_LOCK;
2697 /* Save the old mode so that if the mode in the lock changes we
2698 * can decrement the appropriate reader or writer refcount. */
2699 file_lock->fl_type = einfo.ei_mode;
2700 break;
2701 default:
2702 CERROR("unknown fcntl lock command: %d\n", cmd);
2703 return -EINVAL;
2704 }
2705
2706 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2707 LUSTRE_OPC_ANY, NULL);
2708 if (IS_ERR(op_data))
2709 return PTR_ERR(op_data);
2710
2711 CDEBUG(D_DLMTRACE, "inode=%lu, pid=%u, flags=%#x, mode=%u, "
2712 "start="LPU64", end="LPU64"\n", inode->i_ino, flock.l_flock.pid,
2713 flags, einfo.ei_mode, flock.l_flock.start, flock.l_flock.end);
2714
2715 rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL,
2716 op_data, &lockh, &flock, 0, NULL /* req */, flags);
2717
2718 if ((file_lock->fl_flags & FL_FLOCK) &&
2719 (rc == 0 || file_lock->fl_type == F_UNLCK))
2720 rc2 = flock_lock_file_wait(file, file_lock);
2721 if ((file_lock->fl_flags & FL_POSIX) &&
2722 (rc == 0 || file_lock->fl_type == F_UNLCK) &&
2723 !(flags & LDLM_FL_TEST_LOCK))
2724 rc2 = posix_lock_file_wait(file, file_lock);
2725
2726 if (rc2 && file_lock->fl_type != F_UNLCK) {
2727 einfo.ei_mode = LCK_NL;
2728 md_enqueue(sbi->ll_md_exp, &einfo, NULL,
2729 op_data, &lockh, &flock, 0, NULL /* req */, flags);
2730 rc = rc2;
2731 }
2732
2733 ll_finish_md_op_data(op_data);
2734
2735 return rc;
2736 }
2737
2738 static int
2739 ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
2740 {
2741 return -ENOSYS;
2742 }
2743
2744 /**
2745 * test if some locks matching bits and l_req_mode are acquired
2746 * - bits can be in different locks
2747 * - if found clear the common lock bits in *bits
2748 * - the bits not found, are kept in *bits
2749 * \param inode [IN]
2750 * \param bits [IN] searched lock bits [IN]
2751 * \param l_req_mode [IN] searched lock mode
2752 * \retval boolean, true iff all bits are found
2753 */
2754 int ll_have_md_lock(struct inode *inode, __u64 *bits, ldlm_mode_t l_req_mode)
2755 {
2756 struct lustre_handle lockh;
2757 ldlm_policy_data_t policy;
2758 ldlm_mode_t mode = (l_req_mode == LCK_MINMODE) ?
2759 (LCK_CR|LCK_CW|LCK_PR|LCK_PW) : l_req_mode;
2760 struct lu_fid *fid;
2761 __u64 flags;
2762 int i;
2763
2764 if (!inode)
2765 return 0;
2766
2767 fid = &ll_i2info(inode)->lli_fid;
2768 CDEBUG(D_INFO, "trying to match res "DFID" mode %s\n", PFID(fid),
2769 ldlm_lockname[mode]);
2770
2771 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
2772 for (i = 0; i <= MDS_INODELOCK_MAXSHIFT && *bits != 0; i++) {
2773 policy.l_inodebits.bits = *bits & (1 << i);
2774 if (policy.l_inodebits.bits == 0)
2775 continue;
2776
2777 if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS,
2778 &policy, mode, &lockh)) {
2779 struct ldlm_lock *lock;
2780
2781 lock = ldlm_handle2lock(&lockh);
2782 if (lock) {
2783 *bits &=
2784 ~(lock->l_policy_data.l_inodebits.bits);
2785 LDLM_LOCK_PUT(lock);
2786 } else {
2787 *bits &= ~policy.l_inodebits.bits;
2788 }
2789 }
2790 }
2791 return *bits == 0;
2792 }
2793
2794 ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits,
2795 struct lustre_handle *lockh, __u64 flags,
2796 ldlm_mode_t mode)
2797 {
2798 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
2799 struct lu_fid *fid;
2800 ldlm_mode_t rc;
2801
2802 fid = &ll_i2info(inode)->lli_fid;
2803 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
2804
2805 rc = md_lock_match(ll_i2mdexp(inode), LDLM_FL_BLOCK_GRANTED|flags,
2806 fid, LDLM_IBITS, &policy, mode, lockh);
2807
2808 return rc;
2809 }
2810
2811 static int ll_inode_revalidate_fini(struct inode *inode, int rc)
2812 {
2813 /* Already unlinked. Just update nlink and return success */
2814 if (rc == -ENOENT) {
2815 clear_nlink(inode);
2816 /* This path cannot be hit for regular files unless in
2817 * case of obscure races, so no need to validate size.
2818 */
2819 if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
2820 return 0;
2821 } else if (rc != 0) {
2822 CDEBUG_LIMIT((rc == -EACCES || rc == -EIDRM) ? D_INFO : D_ERROR,
2823 "%s: revalidate FID "DFID" error: rc = %d\n",
2824 ll_get_fsname(inode->i_sb, NULL, 0),
2825 PFID(ll_inode2fid(inode)), rc);
2826 }
2827
2828 return rc;
2829 }
2830
2831 static int __ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
2832 {
2833 struct inode *inode = dentry->d_inode;
2834 struct ptlrpc_request *req = NULL;
2835 struct obd_export *exp;
2836 int rc = 0;
2837
2838 LASSERT(inode != NULL);
2839
2840 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),name=%s\n",
2841 inode->i_ino, inode->i_generation, inode, dentry->d_name.name);
2842
2843 exp = ll_i2mdexp(inode);
2844
2845 /* XXX: Enable OBD_CONNECT_ATTRFID to reduce unnecessary getattr RPC.
2846 * But under CMD case, it caused some lock issues, should be fixed
2847 * with new CMD ibits lock. See bug 12718 */
2848 if (exp_connect_flags(exp) & OBD_CONNECT_ATTRFID) {
2849 struct lookup_intent oit = { .it_op = IT_GETATTR };
2850 struct md_op_data *op_data;
2851
2852 if (ibits == MDS_INODELOCK_LOOKUP)
2853 oit.it_op = IT_LOOKUP;
2854
2855 /* Call getattr by fid, so do not provide name at all. */
2856 op_data = ll_prep_md_op_data(NULL, dentry->d_inode,
2857 dentry->d_inode, NULL, 0, 0,
2858 LUSTRE_OPC_ANY, NULL);
2859 if (IS_ERR(op_data))
2860 return PTR_ERR(op_data);
2861
2862 oit.it_create_mode |= M_CHECK_STALE;
2863 rc = md_intent_lock(exp, op_data, NULL, 0,
2864 /* we are not interested in name
2865 based lookup */
2866 &oit, 0, &req,
2867 ll_md_blocking_ast, 0);
2868 ll_finish_md_op_data(op_data);
2869 oit.it_create_mode &= ~M_CHECK_STALE;
2870 if (rc < 0) {
2871 rc = ll_inode_revalidate_fini(inode, rc);
2872 GOTO (out, rc);
2873 }
2874
2875 rc = ll_revalidate_it_finish(req, &oit, dentry);
2876 if (rc != 0) {
2877 ll_intent_release(&oit);
2878 GOTO(out, rc);
2879 }
2880
2881 /* Unlinked? Unhash dentry, so it is not picked up later by
2882 do_lookup() -> ll_revalidate_it(). We cannot use d_drop
2883 here to preserve get_cwd functionality on 2.6.
2884 Bug 10503 */
2885 if (!dentry->d_inode->i_nlink)
2886 d_lustre_invalidate(dentry, 0);
2887
2888 ll_lookup_finish_locks(&oit, dentry);
2889 } else if (!ll_have_md_lock(dentry->d_inode, &ibits, LCK_MINMODE)) {
2890 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
2891 obd_valid valid = OBD_MD_FLGETATTR;
2892 struct md_op_data *op_data;
2893 int ealen = 0;
2894
2895 if (S_ISREG(inode->i_mode)) {
2896 rc = ll_get_default_mdsize(sbi, &ealen);
2897 if (rc)
2898 return rc;
2899 valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
2900 }
2901
2902 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL,
2903 0, ealen, LUSTRE_OPC_ANY,
2904 NULL);
2905 if (IS_ERR(op_data))
2906 return PTR_ERR(op_data);
2907
2908 op_data->op_valid = valid;
2909 /* Once OBD_CONNECT_ATTRFID is not supported, we can't find one
2910 * capa for this inode. Because we only keep capas of dirs
2911 * fresh. */
2912 rc = md_getattr(sbi->ll_md_exp, op_data, &req);
2913 ll_finish_md_op_data(op_data);
2914 if (rc) {
2915 rc = ll_inode_revalidate_fini(inode, rc);
2916 return rc;
2917 }
2918
2919 rc = ll_prep_inode(&inode, req, NULL, NULL);
2920 }
2921 out:
2922 ptlrpc_req_finished(req);
2923 return rc;
2924 }
2925
2926 static int ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
2927 {
2928 struct inode *inode = dentry->d_inode;
2929 int rc;
2930
2931 rc = __ll_inode_revalidate(dentry, ibits);
2932 if (rc != 0)
2933 return rc;
2934
2935 /* if object isn't regular file, don't validate size */
2936 if (!S_ISREG(inode->i_mode)) {
2937 LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_lvb.lvb_atime;
2938 LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_lvb.lvb_mtime;
2939 LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_lvb.lvb_ctime;
2940 } else {
2941 /* In case of restore, the MDT has the right size and has
2942 * already send it back without granting the layout lock,
2943 * inode is up-to-date so glimpse is useless.
2944 * Also to glimpse we need the layout, in case of a running
2945 * restore the MDT holds the layout lock so the glimpse will
2946 * block up to the end of restore (getattr will block)
2947 */
2948 if (!(ll_i2info(inode)->lli_flags & LLIF_FILE_RESTORING))
2949 rc = ll_glimpse_size(inode);
2950 }
2951 return rc;
2952 }
2953
2954 int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
2955 {
2956 struct inode *inode = de->d_inode;
2957 struct ll_sb_info *sbi = ll_i2sbi(inode);
2958 struct ll_inode_info *lli = ll_i2info(inode);
2959 int res = 0;
2960
2961 res = ll_inode_revalidate(de, MDS_INODELOCK_UPDATE |
2962 MDS_INODELOCK_LOOKUP);
2963 ll_stats_ops_tally(sbi, LPROC_LL_GETATTR, 1);
2964
2965 if (res)
2966 return res;
2967
2968 stat->dev = inode->i_sb->s_dev;
2969 if (ll_need_32bit_api(sbi))
2970 stat->ino = cl_fid_build_ino(&lli->lli_fid, 1);
2971 else
2972 stat->ino = inode->i_ino;
2973 stat->mode = inode->i_mode;
2974 stat->nlink = inode->i_nlink;
2975 stat->uid = inode->i_uid;
2976 stat->gid = inode->i_gid;
2977 stat->rdev = inode->i_rdev;
2978 stat->atime = inode->i_atime;
2979 stat->mtime = inode->i_mtime;
2980 stat->ctime = inode->i_ctime;
2981 stat->blksize = 1 << inode->i_blkbits;
2982
2983 stat->size = i_size_read(inode);
2984 stat->blocks = inode->i_blocks;
2985
2986 return 0;
2987 }
2988
2989 static int ll_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2990 __u64 start, __u64 len)
2991 {
2992 int rc;
2993 size_t num_bytes;
2994 struct ll_user_fiemap *fiemap;
2995 unsigned int extent_count = fieinfo->fi_extents_max;
2996
2997 num_bytes = sizeof(*fiemap) + (extent_count *
2998 sizeof(struct ll_fiemap_extent));
2999 OBD_ALLOC_LARGE(fiemap, num_bytes);
3000
3001 if (fiemap == NULL)
3002 return -ENOMEM;
3003
3004 fiemap->fm_flags = fieinfo->fi_flags;
3005 fiemap->fm_extent_count = fieinfo->fi_extents_max;
3006 fiemap->fm_start = start;
3007 fiemap->fm_length = len;
3008 if (extent_count > 0)
3009 memcpy(&fiemap->fm_extents[0], fieinfo->fi_extents_start,
3010 sizeof(struct ll_fiemap_extent));
3011
3012 rc = ll_do_fiemap(inode, fiemap, num_bytes);
3013
3014 fieinfo->fi_flags = fiemap->fm_flags;
3015 fieinfo->fi_extents_mapped = fiemap->fm_mapped_extents;
3016 if (extent_count > 0)
3017 memcpy(fieinfo->fi_extents_start, &fiemap->fm_extents[0],
3018 fiemap->fm_mapped_extents *
3019 sizeof(struct ll_fiemap_extent));
3020
3021 OBD_FREE_LARGE(fiemap, num_bytes);
3022 return rc;
3023 }
3024
3025 struct posix_acl *ll_get_acl(struct inode *inode, int type)
3026 {
3027 struct ll_inode_info *lli = ll_i2info(inode);
3028 struct posix_acl *acl = NULL;
3029
3030 spin_lock(&lli->lli_lock);
3031 /* VFS' acl_permission_check->check_acl will release the refcount */
3032 acl = posix_acl_dup(lli->lli_posix_acl);
3033 spin_unlock(&lli->lli_lock);
3034
3035 return acl;
3036 }
3037
3038
3039 int ll_inode_permission(struct inode *inode, int mask)
3040 {
3041 int rc = 0;
3042
3043 #ifdef MAY_NOT_BLOCK
3044 if (mask & MAY_NOT_BLOCK)
3045 return -ECHILD;
3046 #endif
3047
3048 /* as root inode are NOT getting validated in lookup operation,
3049 * need to do it before permission check. */
3050
3051 if (inode == inode->i_sb->s_root->d_inode) {
3052 rc = __ll_inode_revalidate(inode->i_sb->s_root,
3053 MDS_INODELOCK_LOOKUP);
3054 if (rc)
3055 return rc;
3056 }
3057
3058 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), inode mode %x mask %o\n",
3059 inode->i_ino, inode->i_generation, inode, inode->i_mode, mask);
3060
3061 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
3062 return lustre_check_remote_perm(inode, mask);
3063
3064 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
3065 rc = generic_permission(inode, mask);
3066
3067 return rc;
3068 }
3069
3070 /* -o localflock - only provides locally consistent flock locks */
3071 struct file_operations ll_file_operations = {
3072 .read = new_sync_read,
3073 .read_iter = ll_file_read_iter,
3074 .write = new_sync_write,
3075 .write_iter = ll_file_write_iter,
3076 .unlocked_ioctl = ll_file_ioctl,
3077 .open = ll_file_open,
3078 .release = ll_file_release,
3079 .mmap = ll_file_mmap,
3080 .llseek = ll_file_seek,
3081 .splice_read = ll_file_splice_read,
3082 .fsync = ll_fsync,
3083 .flush = ll_flush
3084 };
3085
3086 struct file_operations ll_file_operations_flock = {
3087 .read = new_sync_read,
3088 .read_iter = ll_file_read_iter,
3089 .write = new_sync_write,
3090 .write_iter = ll_file_write_iter,
3091 .unlocked_ioctl = ll_file_ioctl,
3092 .open = ll_file_open,
3093 .release = ll_file_release,
3094 .mmap = ll_file_mmap,
3095 .llseek = ll_file_seek,
3096 .splice_read = ll_file_splice_read,
3097 .fsync = ll_fsync,
3098 .flush = ll_flush,
3099 .flock = ll_file_flock,
3100 .lock = ll_file_flock
3101 };
3102
3103 /* These are for -o noflock - to return ENOSYS on flock calls */
3104 struct file_operations ll_file_operations_noflock = {
3105 .read = new_sync_read,
3106 .read_iter = ll_file_read_iter,
3107 .write = new_sync_write,
3108 .write_iter = ll_file_write_iter,
3109 .unlocked_ioctl = ll_file_ioctl,
3110 .open = ll_file_open,
3111 .release = ll_file_release,
3112 .mmap = ll_file_mmap,
3113 .llseek = ll_file_seek,
3114 .splice_read = ll_file_splice_read,
3115 .fsync = ll_fsync,
3116 .flush = ll_flush,
3117 .flock = ll_file_noflock,
3118 .lock = ll_file_noflock
3119 };
3120
3121 struct inode_operations ll_file_inode_operations = {
3122 .setattr = ll_setattr,
3123 .getattr = ll_getattr,
3124 .permission = ll_inode_permission,
3125 .setxattr = ll_setxattr,
3126 .getxattr = ll_getxattr,
3127 .listxattr = ll_listxattr,
3128 .removexattr = ll_removexattr,
3129 .fiemap = ll_fiemap,
3130 .get_acl = ll_get_acl,
3131 };
3132
3133 /* dynamic ioctl number support routines */
3134 static struct llioc_ctl_data {
3135 struct rw_semaphore ioc_sem;
3136 struct list_head ioc_head;
3137 } llioc = {
3138 __RWSEM_INITIALIZER(llioc.ioc_sem),
3139 LIST_HEAD_INIT(llioc.ioc_head)
3140 };
3141
3142
3143 struct llioc_data {
3144 struct list_head iocd_list;
3145 unsigned int iocd_size;
3146 llioc_callback_t iocd_cb;
3147 unsigned int iocd_count;
3148 unsigned int iocd_cmd[0];
3149 };
3150
3151 void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
3152 {
3153 unsigned int size;
3154 struct llioc_data *in_data = NULL;
3155
3156 if (cb == NULL || cmd == NULL ||
3157 count > LLIOC_MAX_CMD || count < 0)
3158 return NULL;
3159
3160 size = sizeof(*in_data) + count * sizeof(unsigned int);
3161 OBD_ALLOC(in_data, size);
3162 if (in_data == NULL)
3163 return NULL;
3164
3165 memset(in_data, 0, sizeof(*in_data));
3166 in_data->iocd_size = size;
3167 in_data->iocd_cb = cb;
3168 in_data->iocd_count = count;
3169 memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
3170
3171 down_write(&llioc.ioc_sem);
3172 list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
3173 up_write(&llioc.ioc_sem);
3174
3175 return in_data;
3176 }
3177
3178 void ll_iocontrol_unregister(void *magic)
3179 {
3180 struct llioc_data *tmp;
3181
3182 if (magic == NULL)
3183 return;
3184
3185 down_write(&llioc.ioc_sem);
3186 list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
3187 if (tmp == magic) {
3188 unsigned int size = tmp->iocd_size;
3189
3190 list_del(&tmp->iocd_list);
3191 up_write(&llioc.ioc_sem);
3192
3193 OBD_FREE(tmp, size);
3194 return;
3195 }
3196 }
3197 up_write(&llioc.ioc_sem);
3198
3199 CWARN("didn't find iocontrol register block with magic: %p\n", magic);
3200 }
3201
3202 EXPORT_SYMBOL(ll_iocontrol_register);
3203 EXPORT_SYMBOL(ll_iocontrol_unregister);
3204
3205 static enum llioc_iter
3206 ll_iocontrol_call(struct inode *inode, struct file *file,
3207 unsigned int cmd, unsigned long arg, int *rcp)
3208 {
3209 enum llioc_iter ret = LLIOC_CONT;
3210 struct llioc_data *data;
3211 int rc = -EINVAL, i;
3212
3213 down_read(&llioc.ioc_sem);
3214 list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
3215 for (i = 0; i < data->iocd_count; i++) {
3216 if (cmd != data->iocd_cmd[i])
3217 continue;
3218
3219 ret = data->iocd_cb(inode, file, cmd, arg, data, &rc);
3220 break;
3221 }
3222
3223 if (ret == LLIOC_STOP)
3224 break;
3225 }
3226 up_read(&llioc.ioc_sem);
3227
3228 if (rcp)
3229 *rcp = rc;
3230 return ret;
3231 }
3232
3233 int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf)
3234 {
3235 struct ll_inode_info *lli = ll_i2info(inode);
3236 struct cl_env_nest nest;
3237 struct lu_env *env;
3238 int result;
3239
3240 if (lli->lli_clob == NULL)
3241 return 0;
3242
3243 env = cl_env_nested_get(&nest);
3244 if (IS_ERR(env))
3245 return PTR_ERR(env);
3246
3247 result = cl_conf_set(env, lli->lli_clob, conf);
3248 cl_env_nested_put(&nest, env);
3249
3250 if (conf->coc_opc == OBJECT_CONF_SET) {
3251 struct ldlm_lock *lock = conf->coc_lock;
3252
3253 LASSERT(lock != NULL);
3254 LASSERT(ldlm_has_layout(lock));
3255 if (result == 0) {
3256 /* it can only be allowed to match after layout is
3257 * applied to inode otherwise false layout would be
3258 * seen. Applying layout should happen before dropping
3259 * the intent lock. */
3260 ldlm_lock_allow_match(lock);
3261 }
3262 }
3263 return result;
3264 }
3265
3266 /* Fetch layout from MDT with getxattr request, if it's not ready yet */
3267 static int ll_layout_fetch(struct inode *inode, struct ldlm_lock *lock)
3268
3269 {
3270 struct ll_sb_info *sbi = ll_i2sbi(inode);
3271 struct obd_capa *oc;
3272 struct ptlrpc_request *req;
3273 struct mdt_body *body;
3274 void *lvbdata;
3275 void *lmm;
3276 int lmmsize;
3277 int rc;
3278
3279 CDEBUG(D_INODE, DFID" LVB_READY=%d l_lvb_data=%p l_lvb_len=%d\n",
3280 PFID(ll_inode2fid(inode)), !!(lock->l_flags & LDLM_FL_LVB_READY),
3281 lock->l_lvb_data, lock->l_lvb_len);
3282
3283 if ((lock->l_lvb_data != NULL) && (lock->l_flags & LDLM_FL_LVB_READY))
3284 return 0;
3285
3286 /* if layout lock was granted right away, the layout is returned
3287 * within DLM_LVB of dlm reply; otherwise if the lock was ever
3288 * blocked and then granted via completion ast, we have to fetch
3289 * layout here. Please note that we can't use the LVB buffer in
3290 * completion AST because it doesn't have a large enough buffer */
3291 oc = ll_mdscapa_get(inode);
3292 rc = ll_get_default_mdsize(sbi, &lmmsize);
3293 if (rc == 0)
3294 rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode), oc,
3295 OBD_MD_FLXATTR, XATTR_NAME_LOV, NULL, 0,
3296 lmmsize, 0, &req);
3297 capa_put(oc);
3298 if (rc < 0)
3299 return rc;
3300
3301 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
3302 if (body == NULL)
3303 GOTO(out, rc = -EPROTO);
3304
3305 lmmsize = body->eadatasize;
3306 if (lmmsize == 0) /* empty layout */
3307 GOTO(out, rc = 0);
3308
3309 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_EADATA, lmmsize);
3310 if (lmm == NULL)
3311 GOTO(out, rc = -EFAULT);
3312
3313 OBD_ALLOC_LARGE(lvbdata, lmmsize);
3314 if (lvbdata == NULL)
3315 GOTO(out, rc = -ENOMEM);
3316
3317 memcpy(lvbdata, lmm, lmmsize);
3318 lock_res_and_lock(lock);
3319 if (lock->l_lvb_data != NULL)
3320 OBD_FREE_LARGE(lock->l_lvb_data, lock->l_lvb_len);
3321
3322 lock->l_lvb_data = lvbdata;
3323 lock->l_lvb_len = lmmsize;
3324 unlock_res_and_lock(lock);
3325
3326 out:
3327 ptlrpc_req_finished(req);
3328 return rc;
3329 }
3330
3331 /**
3332 * Apply the layout to the inode. Layout lock is held and will be released
3333 * in this function.
3334 */
3335 static int ll_layout_lock_set(struct lustre_handle *lockh, ldlm_mode_t mode,
3336 struct inode *inode, __u32 *gen, bool reconf)
3337 {
3338 struct ll_inode_info *lli = ll_i2info(inode);
3339 struct ll_sb_info *sbi = ll_i2sbi(inode);
3340 struct ldlm_lock *lock;
3341 struct lustre_md md = { NULL };
3342 struct cl_object_conf conf;
3343 int rc = 0;
3344 bool lvb_ready;
3345 bool wait_layout = false;
3346
3347 LASSERT(lustre_handle_is_used(lockh));
3348
3349 lock = ldlm_handle2lock(lockh);
3350 LASSERT(lock != NULL);
3351 LASSERT(ldlm_has_layout(lock));
3352
3353 LDLM_DEBUG(lock, "File %p/"DFID" being reconfigured: %d.\n",
3354 inode, PFID(&lli->lli_fid), reconf);
3355
3356 /* in case this is a caching lock and reinstate with new inode */
3357 md_set_lock_data(sbi->ll_md_exp, &lockh->cookie, inode, NULL);
3358
3359 lock_res_and_lock(lock);
3360 lvb_ready = !!(lock->l_flags & LDLM_FL_LVB_READY);
3361 unlock_res_and_lock(lock);
3362 /* checking lvb_ready is racy but this is okay. The worst case is
3363 * that multi processes may configure the file on the same time. */
3364 if (lvb_ready || !reconf) {
3365 rc = -ENODATA;
3366 if (lvb_ready) {
3367 /* layout_gen must be valid if layout lock is not
3368 * cancelled and stripe has already set */
3369 *gen = ll_layout_version_get(lli);
3370 rc = 0;
3371 }
3372 GOTO(out, rc);
3373 }
3374
3375 rc = ll_layout_fetch(inode, lock);
3376 if (rc < 0)
3377 GOTO(out, rc);
3378
3379 /* for layout lock, lmm is returned in lock's lvb.
3380 * lvb_data is immutable if the lock is held so it's safe to access it
3381 * without res lock. See the description in ldlm_lock_decref_internal()
3382 * for the condition to free lvb_data of layout lock */
3383 if (lock->l_lvb_data != NULL) {
3384 rc = obd_unpackmd(sbi->ll_dt_exp, &md.lsm,
3385 lock->l_lvb_data, lock->l_lvb_len);
3386 if (rc >= 0) {
3387 *gen = LL_LAYOUT_GEN_EMPTY;
3388 if (md.lsm != NULL)
3389 *gen = md.lsm->lsm_layout_gen;
3390 rc = 0;
3391 } else {
3392 CERROR("%s: file "DFID" unpackmd error: %d\n",
3393 ll_get_fsname(inode->i_sb, NULL, 0),
3394 PFID(&lli->lli_fid), rc);
3395 }
3396 }
3397 if (rc < 0)
3398 GOTO(out, rc);
3399
3400 /* set layout to file. Unlikely this will fail as old layout was
3401 * surely eliminated */
3402 memset(&conf, 0, sizeof(conf));
3403 conf.coc_opc = OBJECT_CONF_SET;
3404 conf.coc_inode = inode;
3405 conf.coc_lock = lock;
3406 conf.u.coc_md = &md;
3407 rc = ll_layout_conf(inode, &conf);
3408
3409 if (md.lsm != NULL)
3410 obd_free_memmd(sbi->ll_dt_exp, &md.lsm);
3411
3412 /* refresh layout failed, need to wait */
3413 wait_layout = rc == -EBUSY;
3414
3415 out:
3416 LDLM_LOCK_PUT(lock);
3417 ldlm_lock_decref(lockh, mode);
3418
3419 /* wait for IO to complete if it's still being used. */
3420 if (wait_layout) {
3421 CDEBUG(D_INODE, "%s: %p/"DFID" wait for layout reconf.\n",
3422 ll_get_fsname(inode->i_sb, NULL, 0),
3423 inode, PFID(&lli->lli_fid));
3424
3425 memset(&conf, 0, sizeof(conf));
3426 conf.coc_opc = OBJECT_CONF_WAIT;
3427 conf.coc_inode = inode;
3428 rc = ll_layout_conf(inode, &conf);
3429 if (rc == 0)
3430 rc = -EAGAIN;
3431
3432 CDEBUG(D_INODE, "file: "DFID" waiting layout return: %d.\n",
3433 PFID(&lli->lli_fid), rc);
3434 }
3435 return rc;
3436 }
3437
3438 /**
3439 * This function checks if there exists a LAYOUT lock on the client side,
3440 * or enqueues it if it doesn't have one in cache.
3441 *
3442 * This function will not hold layout lock so it may be revoked any time after
3443 * this function returns. Any operations depend on layout should be redone
3444 * in that case.
3445 *
3446 * This function should be called before lov_io_init() to get an uptodate
3447 * layout version, the caller should save the version number and after IO
3448 * is finished, this function should be called again to verify that layout
3449 * is not changed during IO time.
3450 */
3451 int ll_layout_refresh(struct inode *inode, __u32 *gen)
3452 {
3453 struct ll_inode_info *lli = ll_i2info(inode);
3454 struct ll_sb_info *sbi = ll_i2sbi(inode);
3455 struct md_op_data *op_data;
3456 struct lookup_intent it;
3457 struct lustre_handle lockh;
3458 ldlm_mode_t mode;
3459 struct ldlm_enqueue_info einfo = {
3460 .ei_type = LDLM_IBITS,
3461 .ei_mode = LCK_CR,
3462 .ei_cb_bl = ll_md_blocking_ast,
3463 .ei_cb_cp = ldlm_completion_ast,
3464 };
3465 int rc;
3466
3467 *gen = ll_layout_version_get(lli);
3468 if (!(sbi->ll_flags & LL_SBI_LAYOUT_LOCK) || *gen != LL_LAYOUT_GEN_NONE)
3469 return 0;
3470
3471 /* sanity checks */
3472 LASSERT(fid_is_sane(ll_inode2fid(inode)));
3473 LASSERT(S_ISREG(inode->i_mode));
3474
3475 /* take layout lock mutex to enqueue layout lock exclusively. */
3476 mutex_lock(&lli->lli_layout_mutex);
3477
3478 again:
3479 /* mostly layout lock is caching on the local side, so try to match
3480 * it before grabbing layout lock mutex. */
3481 mode = ll_take_md_lock(inode, MDS_INODELOCK_LAYOUT, &lockh, 0,
3482 LCK_CR | LCK_CW | LCK_PR | LCK_PW);
3483 if (mode != 0) { /* hit cached lock */
3484 rc = ll_layout_lock_set(&lockh, mode, inode, gen, true);
3485 if (rc == -EAGAIN)
3486 goto again;
3487
3488 mutex_unlock(&lli->lli_layout_mutex);
3489 return rc;
3490 }
3491
3492 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL,
3493 0, 0, LUSTRE_OPC_ANY, NULL);
3494 if (IS_ERR(op_data)) {
3495 mutex_unlock(&lli->lli_layout_mutex);
3496 return PTR_ERR(op_data);
3497 }
3498
3499 /* have to enqueue one */
3500 memset(&it, 0, sizeof(it));
3501 it.it_op = IT_LAYOUT;
3502 lockh.cookie = 0ULL;
3503
3504 LDLM_DEBUG_NOLOCK("%s: requeue layout lock for file %p/"DFID".\n",
3505 ll_get_fsname(inode->i_sb, NULL, 0), inode,
3506 PFID(&lli->lli_fid));
3507
3508 rc = md_enqueue(sbi->ll_md_exp, &einfo, &it, op_data, &lockh,
3509 NULL, 0, NULL, 0);
3510 if (it.d.lustre.it_data != NULL)
3511 ptlrpc_req_finished(it.d.lustre.it_data);
3512 it.d.lustre.it_data = NULL;
3513
3514 ll_finish_md_op_data(op_data);
3515
3516 mode = it.d.lustre.it_lock_mode;
3517 it.d.lustre.it_lock_mode = 0;
3518 ll_intent_drop_lock(&it);
3519
3520 if (rc == 0) {
3521 /* set lock data in case this is a new lock */
3522 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
3523 rc = ll_layout_lock_set(&lockh, mode, inode, gen, true);
3524 if (rc == -EAGAIN)
3525 goto again;
3526 }
3527 mutex_unlock(&lli->lli_layout_mutex);
3528
3529 return rc;
3530 }
3531
3532 /**
3533 * This function send a restore request to the MDT
3534 */
3535 int ll_layout_restore(struct inode *inode)
3536 {
3537 struct hsm_user_request *hur;
3538 int len, rc;
3539
3540 len = sizeof(struct hsm_user_request) +
3541 sizeof(struct hsm_user_item);
3542 OBD_ALLOC(hur, len);
3543 if (hur == NULL)
3544 return -ENOMEM;
3545
3546 hur->hur_request.hr_action = HUA_RESTORE;
3547 hur->hur_request.hr_archive_id = 0;
3548 hur->hur_request.hr_flags = 0;
3549 memcpy(&hur->hur_user_item[0].hui_fid, &ll_i2info(inode)->lli_fid,
3550 sizeof(hur->hur_user_item[0].hui_fid));
3551 hur->hur_user_item[0].hui_extent.length = -1;
3552 hur->hur_request.hr_itemcount = 1;
3553 rc = obd_iocontrol(LL_IOC_HSM_REQUEST, cl_i2sbi(inode)->ll_md_exp,
3554 len, hur, NULL);
3555 OBD_FREE(hur, len);
3556 return rc;
3557 }
This page took 0.107906 seconds and 6 git commands to generate.