Merge tag 'nfs-for-3.14-2' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
[deliverable/linux.git] / drivers / staging / lustre / lustre / llite / file.c
1 /*
2 * GPL HEADER START
3 *
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19 *
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
22 * have any questions.
23 *
24 * GPL HEADER END
25 */
26 /*
27 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
29 *
30 * Copyright (c) 2011, 2012, Intel Corporation.
31 */
32 /*
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
35 *
36 * lustre/llite/file.c
37 *
38 * Author: Peter Braam <braam@clusterfs.com>
39 * Author: Phil Schwan <phil@clusterfs.com>
40 * Author: Andreas Dilger <adilger@clusterfs.com>
41 */
42
43 #define DEBUG_SUBSYSTEM S_LLITE
44 #include <lustre_dlm.h>
45 #include <lustre_lite.h>
46 #include <linux/pagemap.h>
47 #include <linux/file.h>
48 #include "llite_internal.h"
49 #include <lustre/ll_fiemap.h>
50
51 #include "cl_object.h"
52
53 struct ll_file_data *ll_file_data_get(void)
54 {
55 struct ll_file_data *fd;
56
57 OBD_SLAB_ALLOC_PTR_GFP(fd, ll_file_data_slab, __GFP_IO);
58 if (fd == NULL)
59 return NULL;
60 fd->fd_write_failed = false;
61 return fd;
62 }
63
64 static void ll_file_data_put(struct ll_file_data *fd)
65 {
66 if (fd != NULL)
67 OBD_SLAB_FREE_PTR(fd, ll_file_data_slab);
68 }
69
70 void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
71 struct lustre_handle *fh)
72 {
73 op_data->op_fid1 = ll_i2info(inode)->lli_fid;
74 op_data->op_attr.ia_mode = inode->i_mode;
75 op_data->op_attr.ia_atime = inode->i_atime;
76 op_data->op_attr.ia_mtime = inode->i_mtime;
77 op_data->op_attr.ia_ctime = inode->i_ctime;
78 op_data->op_attr.ia_size = i_size_read(inode);
79 op_data->op_attr_blocks = inode->i_blocks;
80 ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags =
81 ll_inode_to_ext_flags(inode->i_flags);
82 op_data->op_ioepoch = ll_i2info(inode)->lli_ioepoch;
83 if (fh)
84 op_data->op_handle = *fh;
85 op_data->op_capa1 = ll_mdscapa_get(inode);
86
87 if (LLIF_DATA_MODIFIED & ll_i2info(inode)->lli_flags)
88 op_data->op_bias |= MDS_DATA_MODIFIED;
89 }
90
91 /**
92 * Closes the IO epoch and packs all the attributes into @op_data for
93 * the CLOSE rpc.
94 */
95 static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
96 struct obd_client_handle *och)
97 {
98 op_data->op_attr.ia_valid = ATTR_MODE | ATTR_ATIME | ATTR_ATIME_SET |
99 ATTR_MTIME | ATTR_MTIME_SET |
100 ATTR_CTIME | ATTR_CTIME_SET;
101
102 if (!(och->och_flags & FMODE_WRITE))
103 goto out;
104
105 if (!exp_connect_som(ll_i2mdexp(inode)) || !S_ISREG(inode->i_mode))
106 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
107 else
108 ll_ioepoch_close(inode, op_data, &och, 0);
109
110 out:
111 ll_pack_inode2opdata(inode, op_data, &och->och_fh);
112 ll_prep_md_op_data(op_data, inode, NULL, NULL,
113 0, 0, LUSTRE_OPC_ANY, NULL);
114 }
115
116 static int ll_close_inode_openhandle(struct obd_export *md_exp,
117 struct inode *inode,
118 struct obd_client_handle *och,
119 const __u64 *data_version)
120 {
121 struct obd_export *exp = ll_i2mdexp(inode);
122 struct md_op_data *op_data;
123 struct ptlrpc_request *req = NULL;
124 struct obd_device *obd = class_exp2obd(exp);
125 int epoch_close = 1;
126 int rc;
127
128 if (obd == NULL) {
129 /*
130 * XXX: in case of LMV, is this correct to access
131 * ->exp_handle?
132 */
133 CERROR("Invalid MDC connection handle "LPX64"\n",
134 ll_i2mdexp(inode)->exp_handle.h_cookie);
135 GOTO(out, rc = 0);
136 }
137
138 OBD_ALLOC_PTR(op_data);
139 if (op_data == NULL)
140 GOTO(out, rc = -ENOMEM); // XXX We leak openhandle and request here.
141
142 ll_prepare_close(inode, op_data, och);
143 if (data_version != NULL) {
144 /* Pass in data_version implies release. */
145 op_data->op_bias |= MDS_HSM_RELEASE;
146 op_data->op_data_version = *data_version;
147 op_data->op_lease_handle = och->och_lease_handle;
148 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
149 }
150 epoch_close = (op_data->op_flags & MF_EPOCH_CLOSE);
151 rc = md_close(md_exp, op_data, och->och_mod, &req);
152 if (rc == -EAGAIN) {
153 /* This close must have the epoch closed. */
154 LASSERT(epoch_close);
155 /* MDS has instructed us to obtain Size-on-MDS attribute from
156 * OSTs and send setattr to back to MDS. */
157 rc = ll_som_update(inode, op_data);
158 if (rc) {
159 CERROR("inode %lu mdc Size-on-MDS update failed: "
160 "rc = %d\n", inode->i_ino, rc);
161 rc = 0;
162 }
163 } else if (rc) {
164 CERROR("inode %lu mdc close failed: rc = %d\n",
165 inode->i_ino, rc);
166 }
167
168 /* DATA_MODIFIED flag was successfully sent on close, cancel data
169 * modification flag. */
170 if (rc == 0 && (op_data->op_bias & MDS_DATA_MODIFIED)) {
171 struct ll_inode_info *lli = ll_i2info(inode);
172
173 spin_lock(&lli->lli_lock);
174 lli->lli_flags &= ~LLIF_DATA_MODIFIED;
175 spin_unlock(&lli->lli_lock);
176 }
177
178 if (rc == 0) {
179 rc = ll_objects_destroy(req, inode);
180 if (rc)
181 CERROR("inode %lu ll_objects destroy: rc = %d\n",
182 inode->i_ino, rc);
183 }
184 if (rc == 0 && op_data->op_bias & MDS_HSM_RELEASE) {
185 struct mdt_body *body;
186 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
187 if (!(body->valid & OBD_MD_FLRELEASED))
188 rc = -EBUSY;
189 }
190
191 ll_finish_md_op_data(op_data);
192
193 out:
194 if (exp_connect_som(exp) && !epoch_close &&
195 S_ISREG(inode->i_mode) && (och->och_flags & FMODE_WRITE)) {
196 ll_queue_done_writing(inode, LLIF_DONE_WRITING);
197 } else {
198 md_clear_open_replay_data(md_exp, och);
199 /* Free @och if it is not waiting for DONE_WRITING. */
200 och->och_fh.cookie = DEAD_HANDLE_MAGIC;
201 OBD_FREE_PTR(och);
202 }
203 if (req) /* This is close request */
204 ptlrpc_req_finished(req);
205 return rc;
206 }
207
208 int ll_md_real_close(struct inode *inode, int flags)
209 {
210 struct ll_inode_info *lli = ll_i2info(inode);
211 struct obd_client_handle **och_p;
212 struct obd_client_handle *och;
213 __u64 *och_usecount;
214 int rc = 0;
215
216 if (flags & FMODE_WRITE) {
217 och_p = &lli->lli_mds_write_och;
218 och_usecount = &lli->lli_open_fd_write_count;
219 } else if (flags & FMODE_EXEC) {
220 och_p = &lli->lli_mds_exec_och;
221 och_usecount = &lli->lli_open_fd_exec_count;
222 } else {
223 LASSERT(flags & FMODE_READ);
224 och_p = &lli->lli_mds_read_och;
225 och_usecount = &lli->lli_open_fd_read_count;
226 }
227
228 mutex_lock(&lli->lli_och_mutex);
229 if (*och_usecount) { /* There are still users of this handle, so
230 skip freeing it. */
231 mutex_unlock(&lli->lli_och_mutex);
232 return 0;
233 }
234 och=*och_p;
235 *och_p = NULL;
236 mutex_unlock(&lli->lli_och_mutex);
237
238 if (och) { /* There might be a race and somebody have freed this och
239 already */
240 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
241 inode, och, NULL);
242 }
243
244 return rc;
245 }
246
247 int ll_md_close(struct obd_export *md_exp, struct inode *inode,
248 struct file *file)
249 {
250 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
251 struct ll_inode_info *lli = ll_i2info(inode);
252 int rc = 0;
253
254 /* clear group lock, if present */
255 if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED))
256 ll_put_grouplock(inode, file, fd->fd_grouplock.cg_gid);
257
258 if (fd->fd_lease_och != NULL) {
259 bool lease_broken;
260
261 /* Usually the lease is not released when the
262 * application crashed, we need to release here. */
263 rc = ll_lease_close(fd->fd_lease_och, inode, &lease_broken);
264 CDEBUG(rc ? D_ERROR : D_INODE, "Clean up lease "DFID" %d/%d\n",
265 PFID(&lli->lli_fid), rc, lease_broken);
266
267 fd->fd_lease_och = NULL;
268 }
269
270 if (fd->fd_och != NULL) {
271 rc = ll_close_inode_openhandle(md_exp, inode, fd->fd_och, NULL);
272 fd->fd_och = NULL;
273 GOTO(out, rc);
274 }
275
276 /* Let's see if we have good enough OPEN lock on the file and if
277 we can skip talking to MDS */
278 if (file->f_dentry->d_inode) { /* Can this ever be false? */
279 int lockmode;
280 int flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
281 struct lustre_handle lockh;
282 struct inode *inode = file->f_dentry->d_inode;
283 ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_OPEN}};
284
285 mutex_lock(&lli->lli_och_mutex);
286 if (fd->fd_omode & FMODE_WRITE) {
287 lockmode = LCK_CW;
288 LASSERT(lli->lli_open_fd_write_count);
289 lli->lli_open_fd_write_count--;
290 } else if (fd->fd_omode & FMODE_EXEC) {
291 lockmode = LCK_PR;
292 LASSERT(lli->lli_open_fd_exec_count);
293 lli->lli_open_fd_exec_count--;
294 } else {
295 lockmode = LCK_CR;
296 LASSERT(lli->lli_open_fd_read_count);
297 lli->lli_open_fd_read_count--;
298 }
299 mutex_unlock(&lli->lli_och_mutex);
300
301 if (!md_lock_match(md_exp, flags, ll_inode2fid(inode),
302 LDLM_IBITS, &policy, lockmode,
303 &lockh)) {
304 rc = ll_md_real_close(file->f_dentry->d_inode,
305 fd->fd_omode);
306 }
307 } else {
308 CERROR("Releasing a file %p with negative dentry %p. Name %s",
309 file, file->f_dentry, file->f_dentry->d_name.name);
310 }
311
312 out:
313 LUSTRE_FPRIVATE(file) = NULL;
314 ll_file_data_put(fd);
315 ll_capa_close(inode);
316
317 return rc;
318 }
319
320 /* While this returns an error code, fput() the caller does not, so we need
321 * to make every effort to clean up all of our state here. Also, applications
322 * rarely check close errors and even if an error is returned they will not
323 * re-try the close call.
324 */
325 int ll_file_release(struct inode *inode, struct file *file)
326 {
327 struct ll_file_data *fd;
328 struct ll_sb_info *sbi = ll_i2sbi(inode);
329 struct ll_inode_info *lli = ll_i2info(inode);
330 int rc;
331
332 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
333 inode->i_generation, inode);
334
335 #ifdef CONFIG_FS_POSIX_ACL
336 if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
337 inode == inode->i_sb->s_root->d_inode) {
338 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
339
340 LASSERT(fd != NULL);
341 if (unlikely(fd->fd_flags & LL_FILE_RMTACL)) {
342 fd->fd_flags &= ~LL_FILE_RMTACL;
343 rct_del(&sbi->ll_rct, current_pid());
344 et_search_free(&sbi->ll_et, current_pid());
345 }
346 }
347 #endif
348
349 if (inode->i_sb->s_root != file->f_dentry)
350 ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
351 fd = LUSTRE_FPRIVATE(file);
352 LASSERT(fd != NULL);
353
354 /* The last ref on @file, maybe not the the owner pid of statahead.
355 * Different processes can open the same dir, "ll_opendir_key" means:
356 * it is me that should stop the statahead thread. */
357 if (S_ISDIR(inode->i_mode) && lli->lli_opendir_key == fd &&
358 lli->lli_opendir_pid != 0)
359 ll_stop_statahead(inode, lli->lli_opendir_key);
360
361 if (inode->i_sb->s_root == file->f_dentry) {
362 LUSTRE_FPRIVATE(file) = NULL;
363 ll_file_data_put(fd);
364 return 0;
365 }
366
367 if (!S_ISDIR(inode->i_mode)) {
368 lov_read_and_clear_async_rc(lli->lli_clob);
369 lli->lli_async_rc = 0;
370 }
371
372 rc = ll_md_close(sbi->ll_md_exp, inode, file);
373
374 if (CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_DUMP_LOG, cfs_fail_val))
375 libcfs_debug_dumplog();
376
377 return rc;
378 }
379
380 static int ll_intent_file_open(struct file *file, void *lmm,
381 int lmmsize, struct lookup_intent *itp)
382 {
383 struct ll_sb_info *sbi = ll_i2sbi(file->f_dentry->d_inode);
384 struct dentry *parent = file->f_dentry->d_parent;
385 const char *name = file->f_dentry->d_name.name;
386 const int len = file->f_dentry->d_name.len;
387 struct md_op_data *op_data;
388 struct ptlrpc_request *req;
389 __u32 opc = LUSTRE_OPC_ANY;
390 int rc;
391
392 if (!parent)
393 return -ENOENT;
394
395 /* Usually we come here only for NFSD, and we want open lock.
396 But we can also get here with pre 2.6.15 patchless kernels, and in
397 that case that lock is also ok */
398 /* We can also get here if there was cached open handle in revalidate_it
399 * but it disappeared while we were getting from there to ll_file_open.
400 * But this means this file was closed and immediately opened which
401 * makes a good candidate for using OPEN lock */
402 /* If lmmsize & lmm are not 0, we are just setting stripe info
403 * parameters. No need for the open lock */
404 if (lmm == NULL && lmmsize == 0) {
405 itp->it_flags |= MDS_OPEN_LOCK;
406 if (itp->it_flags & FMODE_WRITE)
407 opc = LUSTRE_OPC_CREATE;
408 }
409
410 op_data = ll_prep_md_op_data(NULL, parent->d_inode,
411 file->f_dentry->d_inode, name, len,
412 O_RDWR, opc, NULL);
413 if (IS_ERR(op_data))
414 return PTR_ERR(op_data);
415
416 itp->it_flags |= MDS_OPEN_BY_FID;
417 rc = md_intent_lock(sbi->ll_md_exp, op_data, lmm, lmmsize, itp,
418 0 /*unused */, &req, ll_md_blocking_ast, 0);
419 ll_finish_md_op_data(op_data);
420 if (rc == -ESTALE) {
421 /* reason for keep own exit path - don`t flood log
422 * with messages with -ESTALE errors.
423 */
424 if (!it_disposition(itp, DISP_OPEN_OPEN) ||
425 it_open_error(DISP_OPEN_OPEN, itp))
426 GOTO(out, rc);
427 ll_release_openhandle(file->f_dentry, itp);
428 GOTO(out, rc);
429 }
430
431 if (it_disposition(itp, DISP_LOOKUP_NEG))
432 GOTO(out, rc = -ENOENT);
433
434 if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) {
435 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp);
436 CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc);
437 GOTO(out, rc);
438 }
439
440 rc = ll_prep_inode(&file->f_dentry->d_inode, req, NULL, itp);
441 if (!rc && itp->d.lustre.it_lock_mode)
442 ll_set_lock_data(sbi->ll_md_exp, file->f_dentry->d_inode,
443 itp, NULL);
444
445 out:
446 ptlrpc_req_finished(itp->d.lustre.it_data);
447 it_clear_disposition(itp, DISP_ENQ_COMPLETE);
448 ll_intent_drop_lock(itp);
449
450 return rc;
451 }
452
453 /**
454 * Assign an obtained @ioepoch to client's inode. No lock is needed, MDS does
455 * not believe attributes if a few ioepoch holders exist. Attributes for
456 * previous ioepoch if new one is opened are also skipped by MDS.
457 */
458 void ll_ioepoch_open(struct ll_inode_info *lli, __u64 ioepoch)
459 {
460 if (ioepoch && lli->lli_ioepoch != ioepoch) {
461 lli->lli_ioepoch = ioepoch;
462 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
463 ioepoch, PFID(&lli->lli_fid));
464 }
465 }
466
467 static int ll_och_fill(struct obd_export *md_exp, struct lookup_intent *it,
468 struct obd_client_handle *och)
469 {
470 struct ptlrpc_request *req = it->d.lustre.it_data;
471 struct mdt_body *body;
472
473 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
474 och->och_fh = body->handle;
475 och->och_fid = body->fid1;
476 och->och_lease_handle.cookie = it->d.lustre.it_lock_handle;
477 och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
478 och->och_flags = it->it_flags;
479
480 return md_set_open_replay_data(md_exp, och, req);
481 }
482
483 int ll_local_open(struct file *file, struct lookup_intent *it,
484 struct ll_file_data *fd, struct obd_client_handle *och)
485 {
486 struct inode *inode = file->f_dentry->d_inode;
487 struct ll_inode_info *lli = ll_i2info(inode);
488
489 LASSERT(!LUSTRE_FPRIVATE(file));
490
491 LASSERT(fd != NULL);
492
493 if (och) {
494 struct ptlrpc_request *req = it->d.lustre.it_data;
495 struct mdt_body *body;
496 int rc;
497
498 rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
499 if (rc != 0)
500 return rc;
501
502 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
503 ll_ioepoch_open(lli, body->ioepoch);
504 }
505
506 LUSTRE_FPRIVATE(file) = fd;
507 ll_readahead_init(inode, &fd->fd_ras);
508 fd->fd_omode = it->it_flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC);
509 return 0;
510 }
511
512 /* Open a file, and (for the very first open) create objects on the OSTs at
513 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
514 * creation or open until ll_lov_setstripe() ioctl is called.
515 *
516 * If we already have the stripe MD locally then we don't request it in
517 * md_open(), by passing a lmm_size = 0.
518 *
519 * It is up to the application to ensure no other processes open this file
520 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
521 * used. We might be able to avoid races of that sort by getting lli_open_sem
522 * before returning in the O_LOV_DELAY_CREATE case and dropping it here
523 * or in ll_file_release(), but I'm not sure that is desirable/necessary.
524 */
525 int ll_file_open(struct inode *inode, struct file *file)
526 {
527 struct ll_inode_info *lli = ll_i2info(inode);
528 struct lookup_intent *it, oit = { .it_op = IT_OPEN,
529 .it_flags = file->f_flags };
530 struct obd_client_handle **och_p = NULL;
531 __u64 *och_usecount = NULL;
532 struct ll_file_data *fd;
533 int rc = 0, opendir_set = 0;
534
535 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), flags %o\n", inode->i_ino,
536 inode->i_generation, inode, file->f_flags);
537
538 it = file->private_data; /* XXX: compat macro */
539 file->private_data = NULL; /* prevent ll_local_open assertion */
540
541 fd = ll_file_data_get();
542 if (fd == NULL)
543 GOTO(out_openerr, rc = -ENOMEM);
544
545 fd->fd_file = file;
546 if (S_ISDIR(inode->i_mode)) {
547 spin_lock(&lli->lli_sa_lock);
548 if (lli->lli_opendir_key == NULL && lli->lli_sai == NULL &&
549 lli->lli_opendir_pid == 0) {
550 lli->lli_opendir_key = fd;
551 lli->lli_opendir_pid = current_pid();
552 opendir_set = 1;
553 }
554 spin_unlock(&lli->lli_sa_lock);
555 }
556
557 if (inode->i_sb->s_root == file->f_dentry) {
558 LUSTRE_FPRIVATE(file) = fd;
559 return 0;
560 }
561
562 if (!it || !it->d.lustre.it_disposition) {
563 /* Convert f_flags into access mode. We cannot use file->f_mode,
564 * because everything but O_ACCMODE mask was stripped from
565 * there */
566 if ((oit.it_flags + 1) & O_ACCMODE)
567 oit.it_flags++;
568 if (file->f_flags & O_TRUNC)
569 oit.it_flags |= FMODE_WRITE;
570
571 /* kernel only call f_op->open in dentry_open. filp_open calls
572 * dentry_open after call to open_namei that checks permissions.
573 * Only nfsd_open call dentry_open directly without checking
574 * permissions and because of that this code below is safe. */
575 if (oit.it_flags & (FMODE_WRITE | FMODE_READ))
576 oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
577
578 /* We do not want O_EXCL here, presumably we opened the file
579 * already? XXX - NFS implications? */
580 oit.it_flags &= ~O_EXCL;
581
582 /* bug20584, if "it_flags" contains O_CREAT, the file will be
583 * created if necessary, then "IT_CREAT" should be set to keep
584 * consistent with it */
585 if (oit.it_flags & O_CREAT)
586 oit.it_op |= IT_CREAT;
587
588 it = &oit;
589 }
590
591 restart:
592 /* Let's see if we have file open on MDS already. */
593 if (it->it_flags & FMODE_WRITE) {
594 och_p = &lli->lli_mds_write_och;
595 och_usecount = &lli->lli_open_fd_write_count;
596 } else if (it->it_flags & FMODE_EXEC) {
597 och_p = &lli->lli_mds_exec_och;
598 och_usecount = &lli->lli_open_fd_exec_count;
599 } else {
600 och_p = &lli->lli_mds_read_och;
601 och_usecount = &lli->lli_open_fd_read_count;
602 }
603
604 mutex_lock(&lli->lli_och_mutex);
605 if (*och_p) { /* Open handle is present */
606 if (it_disposition(it, DISP_OPEN_OPEN)) {
607 /* Well, there's extra open request that we do not need,
608 let's close it somehow. This will decref request. */
609 rc = it_open_error(DISP_OPEN_OPEN, it);
610 if (rc) {
611 mutex_unlock(&lli->lli_och_mutex);
612 GOTO(out_openerr, rc);
613 }
614
615 ll_release_openhandle(file->f_dentry, it);
616 }
617 (*och_usecount)++;
618
619 rc = ll_local_open(file, it, fd, NULL);
620 if (rc) {
621 (*och_usecount)--;
622 mutex_unlock(&lli->lli_och_mutex);
623 GOTO(out_openerr, rc);
624 }
625 } else {
626 LASSERT(*och_usecount == 0);
627 if (!it->d.lustre.it_disposition) {
628 /* We cannot just request lock handle now, new ELC code
629 means that one of other OPEN locks for this file
630 could be cancelled, and since blocking ast handler
631 would attempt to grab och_mutex as well, that would
632 result in a deadlock */
633 mutex_unlock(&lli->lli_och_mutex);
634 it->it_create_mode |= M_CHECK_STALE;
635 rc = ll_intent_file_open(file, NULL, 0, it);
636 it->it_create_mode &= ~M_CHECK_STALE;
637 if (rc)
638 GOTO(out_openerr, rc);
639
640 goto restart;
641 }
642 OBD_ALLOC(*och_p, sizeof (struct obd_client_handle));
643 if (!*och_p)
644 GOTO(out_och_free, rc = -ENOMEM);
645
646 (*och_usecount)++;
647
648 /* md_intent_lock() didn't get a request ref if there was an
649 * open error, so don't do cleanup on the request here
650 * (bug 3430) */
651 /* XXX (green): Should not we bail out on any error here, not
652 * just open error? */
653 rc = it_open_error(DISP_OPEN_OPEN, it);
654 if (rc)
655 GOTO(out_och_free, rc);
656
657 LASSERT(it_disposition(it, DISP_ENQ_OPEN_REF));
658
659 rc = ll_local_open(file, it, fd, *och_p);
660 if (rc)
661 GOTO(out_och_free, rc);
662 }
663 mutex_unlock(&lli->lli_och_mutex);
664 fd = NULL;
665
666 /* Must do this outside lli_och_mutex lock to prevent deadlock where
667 different kind of OPEN lock for this same inode gets cancelled
668 by ldlm_cancel_lru */
669 if (!S_ISREG(inode->i_mode))
670 GOTO(out_och_free, rc);
671
672 ll_capa_open(inode);
673
674 if (!lli->lli_has_smd) {
675 if (file->f_flags & O_LOV_DELAY_CREATE ||
676 !(file->f_mode & FMODE_WRITE)) {
677 CDEBUG(D_INODE, "object creation was delayed\n");
678 GOTO(out_och_free, rc);
679 }
680 }
681 file->f_flags &= ~O_LOV_DELAY_CREATE;
682 GOTO(out_och_free, rc);
683
684 out_och_free:
685 if (rc) {
686 if (och_p && *och_p) {
687 OBD_FREE(*och_p, sizeof (struct obd_client_handle));
688 *och_p = NULL; /* OBD_FREE writes some magic there */
689 (*och_usecount)--;
690 }
691 mutex_unlock(&lli->lli_och_mutex);
692
693 out_openerr:
694 if (opendir_set != 0)
695 ll_stop_statahead(inode, lli->lli_opendir_key);
696 if (fd != NULL)
697 ll_file_data_put(fd);
698 } else {
699 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
700 }
701
702 if (it && it_disposition(it, DISP_ENQ_OPEN_REF)) {
703 ptlrpc_req_finished(it->d.lustre.it_data);
704 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
705 }
706
707 return rc;
708 }
709
710 static int ll_md_blocking_lease_ast(struct ldlm_lock *lock,
711 struct ldlm_lock_desc *desc, void *data, int flag)
712 {
713 int rc;
714 struct lustre_handle lockh;
715
716 switch (flag) {
717 case LDLM_CB_BLOCKING:
718 ldlm_lock2handle(lock, &lockh);
719 rc = ldlm_cli_cancel(&lockh, LCF_ASYNC);
720 if (rc < 0) {
721 CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc);
722 return rc;
723 }
724 break;
725 case LDLM_CB_CANCELING:
726 /* do nothing */
727 break;
728 }
729 return 0;
730 }
731
732 /**
733 * Acquire a lease and open the file.
734 */
735 struct obd_client_handle *ll_lease_open(struct inode *inode, struct file *file,
736 fmode_t fmode, __u64 open_flags)
737 {
738 struct lookup_intent it = { .it_op = IT_OPEN };
739 struct ll_sb_info *sbi = ll_i2sbi(inode);
740 struct md_op_data *op_data;
741 struct ptlrpc_request *req;
742 struct lustre_handle old_handle = { 0 };
743 struct obd_client_handle *och = NULL;
744 int rc;
745 int rc2;
746
747 if (fmode != FMODE_WRITE && fmode != FMODE_READ)
748 return ERR_PTR(-EINVAL);
749
750 if (file != NULL) {
751 struct ll_inode_info *lli = ll_i2info(inode);
752 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
753 struct obd_client_handle **och_p;
754 __u64 *och_usecount;
755
756 if (!(fmode & file->f_mode) || (file->f_mode & FMODE_EXEC))
757 return ERR_PTR(-EPERM);
758
759 /* Get the openhandle of the file */
760 rc = -EBUSY;
761 mutex_lock(&lli->lli_och_mutex);
762 if (fd->fd_lease_och != NULL) {
763 mutex_unlock(&lli->lli_och_mutex);
764 return ERR_PTR(rc);
765 }
766
767 if (fd->fd_och == NULL) {
768 if (file->f_mode & FMODE_WRITE) {
769 LASSERT(lli->lli_mds_write_och != NULL);
770 och_p = &lli->lli_mds_write_och;
771 och_usecount = &lli->lli_open_fd_write_count;
772 } else {
773 LASSERT(lli->lli_mds_read_och != NULL);
774 och_p = &lli->lli_mds_read_och;
775 och_usecount = &lli->lli_open_fd_read_count;
776 }
777 if (*och_usecount == 1) {
778 fd->fd_och = *och_p;
779 *och_p = NULL;
780 *och_usecount = 0;
781 rc = 0;
782 }
783 }
784 mutex_unlock(&lli->lli_och_mutex);
785 if (rc < 0) /* more than 1 opener */
786 return ERR_PTR(rc);
787
788 LASSERT(fd->fd_och != NULL);
789 old_handle = fd->fd_och->och_fh;
790 }
791
792 OBD_ALLOC_PTR(och);
793 if (och == NULL)
794 return ERR_PTR(-ENOMEM);
795
796 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL, 0, 0,
797 LUSTRE_OPC_ANY, NULL);
798 if (IS_ERR(op_data))
799 GOTO(out, rc = PTR_ERR(op_data));
800
801 /* To tell the MDT this openhandle is from the same owner */
802 op_data->op_handle = old_handle;
803
804 it.it_flags = fmode | open_flags;
805 it.it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID | MDS_OPEN_LEASE;
806 rc = md_intent_lock(sbi->ll_md_exp, op_data, NULL, 0, &it, 0, &req,
807 ll_md_blocking_lease_ast,
808 /* LDLM_FL_NO_LRU: To not put the lease lock into LRU list, otherwise
809 * it can be cancelled which may mislead applications that the lease is
810 * broken;
811 * LDLM_FL_EXCL: Set this flag so that it won't be matched by normal
812 * open in ll_md_blocking_ast(). Otherwise as ll_md_blocking_lease_ast
813 * doesn't deal with openhandle, so normal openhandle will be leaked. */
814 LDLM_FL_NO_LRU | LDLM_FL_EXCL);
815 ll_finish_md_op_data(op_data);
816 if (req != NULL) {
817 ptlrpc_req_finished(req);
818 it_clear_disposition(&it, DISP_ENQ_COMPLETE);
819 }
820 if (rc < 0)
821 GOTO(out_release_it, rc);
822
823 if (it_disposition(&it, DISP_LOOKUP_NEG))
824 GOTO(out_release_it, rc = -ENOENT);
825
826 rc = it_open_error(DISP_OPEN_OPEN, &it);
827 if (rc)
828 GOTO(out_release_it, rc);
829
830 LASSERT(it_disposition(&it, DISP_ENQ_OPEN_REF));
831 ll_och_fill(sbi->ll_md_exp, &it, och);
832
833 if (!it_disposition(&it, DISP_OPEN_LEASE)) /* old server? */
834 GOTO(out_close, rc = -EOPNOTSUPP);
835
836 /* already get lease, handle lease lock */
837 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
838 if (it.d.lustre.it_lock_mode == 0 ||
839 it.d.lustre.it_lock_bits != MDS_INODELOCK_OPEN) {
840 /* open lock must return for lease */
841 CERROR(DFID "lease granted but no open lock, %d/%llu.\n",
842 PFID(ll_inode2fid(inode)), it.d.lustre.it_lock_mode,
843 it.d.lustre.it_lock_bits);
844 GOTO(out_close, rc = -EPROTO);
845 }
846
847 ll_intent_release(&it);
848 return och;
849
850 out_close:
851 rc2 = ll_close_inode_openhandle(sbi->ll_md_exp, inode, och, NULL);
852 if (rc2)
853 CERROR("Close openhandle returned %d\n", rc2);
854
855 /* cancel open lock */
856 if (it.d.lustre.it_lock_mode != 0) {
857 ldlm_lock_decref_and_cancel(&och->och_lease_handle,
858 it.d.lustre.it_lock_mode);
859 it.d.lustre.it_lock_mode = 0;
860 }
861 out_release_it:
862 ll_intent_release(&it);
863 out:
864 OBD_FREE_PTR(och);
865 return ERR_PTR(rc);
866 }
867 EXPORT_SYMBOL(ll_lease_open);
868
869 /**
870 * Release lease and close the file.
871 * It will check if the lease has ever broken.
872 */
873 int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
874 bool *lease_broken)
875 {
876 struct ldlm_lock *lock;
877 bool cancelled = true;
878 int rc;
879
880 lock = ldlm_handle2lock(&och->och_lease_handle);
881 if (lock != NULL) {
882 lock_res_and_lock(lock);
883 cancelled = ldlm_is_cancel(lock);
884 unlock_res_and_lock(lock);
885 ldlm_lock_put(lock);
886 }
887
888 CDEBUG(D_INODE, "lease for "DFID" broken? %d\n",
889 PFID(&ll_i2info(inode)->lli_fid), cancelled);
890
891 if (!cancelled)
892 ldlm_cli_cancel(&och->och_lease_handle, 0);
893 if (lease_broken != NULL)
894 *lease_broken = cancelled;
895
896 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, inode, och,
897 NULL);
898 return rc;
899 }
900 EXPORT_SYMBOL(ll_lease_close);
901
902 /* Fills the obdo with the attributes for the lsm */
903 static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
904 struct obd_capa *capa, struct obdo *obdo,
905 __u64 ioepoch, int sync)
906 {
907 struct ptlrpc_request_set *set;
908 struct obd_info oinfo = { { { 0 } } };
909 int rc;
910
911 LASSERT(lsm != NULL);
912
913 oinfo.oi_md = lsm;
914 oinfo.oi_oa = obdo;
915 oinfo.oi_oa->o_oi = lsm->lsm_oi;
916 oinfo.oi_oa->o_mode = S_IFREG;
917 oinfo.oi_oa->o_ioepoch = ioepoch;
918 oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE |
919 OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
920 OBD_MD_FLBLKSZ | OBD_MD_FLATIME |
921 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
922 OBD_MD_FLGROUP | OBD_MD_FLEPOCH |
923 OBD_MD_FLDATAVERSION;
924 oinfo.oi_capa = capa;
925 if (sync) {
926 oinfo.oi_oa->o_valid |= OBD_MD_FLFLAGS;
927 oinfo.oi_oa->o_flags |= OBD_FL_SRVLOCK;
928 }
929
930 set = ptlrpc_prep_set();
931 if (set == NULL) {
932 CERROR("can't allocate ptlrpc set\n");
933 rc = -ENOMEM;
934 } else {
935 rc = obd_getattr_async(exp, &oinfo, set);
936 if (rc == 0)
937 rc = ptlrpc_set_wait(set);
938 ptlrpc_set_destroy(set);
939 }
940 if (rc == 0)
941 oinfo.oi_oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
942 OBD_MD_FLATIME | OBD_MD_FLMTIME |
943 OBD_MD_FLCTIME | OBD_MD_FLSIZE |
944 OBD_MD_FLDATAVERSION);
945 return rc;
946 }
947
948 /**
949 * Performs the getattr on the inode and updates its fields.
950 * If @sync != 0, perform the getattr under the server-side lock.
951 */
952 int ll_inode_getattr(struct inode *inode, struct obdo *obdo,
953 __u64 ioepoch, int sync)
954 {
955 struct obd_capa *capa = ll_mdscapa_get(inode);
956 struct lov_stripe_md *lsm;
957 int rc;
958
959 lsm = ccc_inode_lsm_get(inode);
960 rc = ll_lsm_getattr(lsm, ll_i2dtexp(inode),
961 capa, obdo, ioepoch, sync);
962 capa_put(capa);
963 if (rc == 0) {
964 struct ost_id *oi = lsm ? &lsm->lsm_oi : &obdo->o_oi;
965
966 obdo_refresh_inode(inode, obdo, obdo->o_valid);
967 CDEBUG(D_INODE, "objid "DOSTID" size %llu, blocks %llu,"
968 " blksize %lu\n", POSTID(oi), i_size_read(inode),
969 (unsigned long long)inode->i_blocks,
970 (unsigned long)ll_inode_blksize(inode));
971 }
972 ccc_inode_lsm_put(inode, lsm);
973 return rc;
974 }
975
976 int ll_merge_lvb(const struct lu_env *env, struct inode *inode)
977 {
978 struct ll_inode_info *lli = ll_i2info(inode);
979 struct cl_object *obj = lli->lli_clob;
980 struct cl_attr *attr = ccc_env_thread_attr(env);
981 struct ost_lvb lvb;
982 int rc = 0;
983
984 ll_inode_size_lock(inode);
985 /* merge timestamps the most recently obtained from mds with
986 timestamps obtained from osts */
987 LTIME_S(inode->i_atime) = lli->lli_lvb.lvb_atime;
988 LTIME_S(inode->i_mtime) = lli->lli_lvb.lvb_mtime;
989 LTIME_S(inode->i_ctime) = lli->lli_lvb.lvb_ctime;
990 inode_init_lvb(inode, &lvb);
991
992 cl_object_attr_lock(obj);
993 rc = cl_object_attr_get(env, obj, attr);
994 cl_object_attr_unlock(obj);
995
996 if (rc == 0) {
997 if (lvb.lvb_atime < attr->cat_atime)
998 lvb.lvb_atime = attr->cat_atime;
999 if (lvb.lvb_ctime < attr->cat_ctime)
1000 lvb.lvb_ctime = attr->cat_ctime;
1001 if (lvb.lvb_mtime < attr->cat_mtime)
1002 lvb.lvb_mtime = attr->cat_mtime;
1003
1004 CDEBUG(D_VFSTRACE, DFID" updating i_size "LPU64"\n",
1005 PFID(&lli->lli_fid), attr->cat_size);
1006 cl_isize_write_nolock(inode, attr->cat_size);
1007
1008 inode->i_blocks = attr->cat_blocks;
1009
1010 LTIME_S(inode->i_mtime) = lvb.lvb_mtime;
1011 LTIME_S(inode->i_atime) = lvb.lvb_atime;
1012 LTIME_S(inode->i_ctime) = lvb.lvb_ctime;
1013 }
1014 ll_inode_size_unlock(inode);
1015
1016 return rc;
1017 }
1018
1019 int ll_glimpse_ioctl(struct ll_sb_info *sbi, struct lov_stripe_md *lsm,
1020 lstat_t *st)
1021 {
1022 struct obdo obdo = { 0 };
1023 int rc;
1024
1025 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, &obdo, 0, 0);
1026 if (rc == 0) {
1027 st->st_size = obdo.o_size;
1028 st->st_blocks = obdo.o_blocks;
1029 st->st_mtime = obdo.o_mtime;
1030 st->st_atime = obdo.o_atime;
1031 st->st_ctime = obdo.o_ctime;
1032 }
1033 return rc;
1034 }
1035
1036 void ll_io_init(struct cl_io *io, const struct file *file, int write)
1037 {
1038 struct inode *inode = file->f_dentry->d_inode;
1039
1040 io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK;
1041 if (write) {
1042 io->u.ci_wr.wr_append = !!(file->f_flags & O_APPEND);
1043 io->u.ci_wr.wr_sync = file->f_flags & O_SYNC ||
1044 file->f_flags & O_DIRECT ||
1045 IS_SYNC(inode);
1046 }
1047 io->ci_obj = ll_i2info(inode)->lli_clob;
1048 io->ci_lockreq = CILR_MAYBE;
1049 if (ll_file_nolock(file)) {
1050 io->ci_lockreq = CILR_NEVER;
1051 io->ci_no_srvlock = 1;
1052 } else if (file->f_flags & O_APPEND) {
1053 io->ci_lockreq = CILR_MANDATORY;
1054 }
1055 }
1056
1057 static ssize_t
1058 ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args,
1059 struct file *file, enum cl_io_type iot,
1060 loff_t *ppos, size_t count)
1061 {
1062 struct ll_inode_info *lli = ll_i2info(file->f_dentry->d_inode);
1063 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1064 struct cl_io *io;
1065 ssize_t result;
1066
1067 restart:
1068 io = ccc_env_thread_io(env);
1069 ll_io_init(io, file, iot == CIT_WRITE);
1070
1071 if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
1072 struct vvp_io *vio = vvp_env_io(env);
1073 struct ccc_io *cio = ccc_env_io(env);
1074 int write_mutex_locked = 0;
1075
1076 cio->cui_fd = LUSTRE_FPRIVATE(file);
1077 vio->cui_io_subtype = args->via_io_subtype;
1078
1079 switch (vio->cui_io_subtype) {
1080 case IO_NORMAL:
1081 cio->cui_iov = args->u.normal.via_iov;
1082 cio->cui_nrsegs = args->u.normal.via_nrsegs;
1083 cio->cui_tot_nrsegs = cio->cui_nrsegs;
1084 cio->cui_iocb = args->u.normal.via_iocb;
1085 if ((iot == CIT_WRITE) &&
1086 !(cio->cui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1087 if (mutex_lock_interruptible(&lli->
1088 lli_write_mutex))
1089 GOTO(out, result = -ERESTARTSYS);
1090 write_mutex_locked = 1;
1091 } else if (iot == CIT_READ) {
1092 down_read(&lli->lli_trunc_sem);
1093 }
1094 break;
1095 case IO_SENDFILE:
1096 vio->u.sendfile.cui_actor = args->u.sendfile.via_actor;
1097 vio->u.sendfile.cui_target = args->u.sendfile.via_target;
1098 break;
1099 case IO_SPLICE:
1100 vio->u.splice.cui_pipe = args->u.splice.via_pipe;
1101 vio->u.splice.cui_flags = args->u.splice.via_flags;
1102 break;
1103 default:
1104 CERROR("Unknow IO type - %u\n", vio->cui_io_subtype);
1105 LBUG();
1106 }
1107 result = cl_io_loop(env, io);
1108 if (write_mutex_locked)
1109 mutex_unlock(&lli->lli_write_mutex);
1110 else if (args->via_io_subtype == IO_NORMAL && iot == CIT_READ)
1111 up_read(&lli->lli_trunc_sem);
1112 } else {
1113 /* cl_io_rw_init() handled IO */
1114 result = io->ci_result;
1115 }
1116
1117 if (io->ci_nob > 0) {
1118 result = io->ci_nob;
1119 *ppos = io->u.ci_wr.wr.crw_pos;
1120 }
1121 GOTO(out, result);
1122 out:
1123 cl_io_fini(env, io);
1124 /* If any bit been read/written (result != 0), we just return
1125 * short read/write instead of restart io. */
1126 if ((result == 0 || result == -ENODATA) && io->ci_need_restart) {
1127 CDEBUG(D_VFSTRACE, "Restart %s on %s from %lld, count:%zd\n",
1128 iot == CIT_READ ? "read" : "write",
1129 file->f_dentry->d_name.name, *ppos, count);
1130 LASSERTF(io->ci_nob == 0, "%zd", io->ci_nob);
1131 goto restart;
1132 }
1133
1134 if (iot == CIT_READ) {
1135 if (result >= 0)
1136 ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode),
1137 LPROC_LL_READ_BYTES, result);
1138 } else if (iot == CIT_WRITE) {
1139 if (result >= 0) {
1140 ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode),
1141 LPROC_LL_WRITE_BYTES, result);
1142 fd->fd_write_failed = false;
1143 } else if (result != -ERESTARTSYS) {
1144 fd->fd_write_failed = true;
1145 }
1146 }
1147
1148 return result;
1149 }
1150
1151 static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
1152 unsigned long nr_segs, loff_t pos)
1153 {
1154 struct lu_env *env;
1155 struct vvp_io_args *args;
1156 size_t count = 0;
1157 ssize_t result;
1158 int refcheck;
1159
1160 result = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE);
1161 if (result)
1162 return result;
1163
1164 env = cl_env_get(&refcheck);
1165 if (IS_ERR(env))
1166 return PTR_ERR(env);
1167
1168 args = vvp_env_args(env, IO_NORMAL);
1169 args->u.normal.via_iov = (struct iovec *)iov;
1170 args->u.normal.via_nrsegs = nr_segs;
1171 args->u.normal.via_iocb = iocb;
1172
1173 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ,
1174 &iocb->ki_pos, count);
1175 cl_env_put(env, &refcheck);
1176 return result;
1177 }
1178
1179 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
1180 loff_t *ppos)
1181 {
1182 struct lu_env *env;
1183 struct iovec *local_iov;
1184 struct kiocb *kiocb;
1185 ssize_t result;
1186 int refcheck;
1187
1188 env = cl_env_get(&refcheck);
1189 if (IS_ERR(env))
1190 return PTR_ERR(env);
1191
1192 local_iov = &vvp_env_info(env)->vti_local_iov;
1193 kiocb = &vvp_env_info(env)->vti_kiocb;
1194 local_iov->iov_base = (void __user *)buf;
1195 local_iov->iov_len = count;
1196 init_sync_kiocb(kiocb, file);
1197 kiocb->ki_pos = *ppos;
1198 kiocb->ki_nbytes = count;
1199
1200 result = ll_file_aio_read(kiocb, local_iov, 1, kiocb->ki_pos);
1201 *ppos = kiocb->ki_pos;
1202
1203 cl_env_put(env, &refcheck);
1204 return result;
1205 }
1206
1207 /*
1208 * Write to a file (through the page cache).
1209 */
1210 static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
1211 unsigned long nr_segs, loff_t pos)
1212 {
1213 struct lu_env *env;
1214 struct vvp_io_args *args;
1215 size_t count = 0;
1216 ssize_t result;
1217 int refcheck;
1218
1219 result = generic_segment_checks(iov, &nr_segs, &count, VERIFY_READ);
1220 if (result)
1221 return result;
1222
1223 env = cl_env_get(&refcheck);
1224 if (IS_ERR(env))
1225 return PTR_ERR(env);
1226
1227 args = vvp_env_args(env, IO_NORMAL);
1228 args->u.normal.via_iov = (struct iovec *)iov;
1229 args->u.normal.via_nrsegs = nr_segs;
1230 args->u.normal.via_iocb = iocb;
1231
1232 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
1233 &iocb->ki_pos, count);
1234 cl_env_put(env, &refcheck);
1235 return result;
1236 }
1237
1238 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
1239 loff_t *ppos)
1240 {
1241 struct lu_env *env;
1242 struct iovec *local_iov;
1243 struct kiocb *kiocb;
1244 ssize_t result;
1245 int refcheck;
1246
1247 env = cl_env_get(&refcheck);
1248 if (IS_ERR(env))
1249 return PTR_ERR(env);
1250
1251 local_iov = &vvp_env_info(env)->vti_local_iov;
1252 kiocb = &vvp_env_info(env)->vti_kiocb;
1253 local_iov->iov_base = (void __user *)buf;
1254 local_iov->iov_len = count;
1255 init_sync_kiocb(kiocb, file);
1256 kiocb->ki_pos = *ppos;
1257 kiocb->ki_nbytes = count;
1258
1259 result = ll_file_aio_write(kiocb, local_iov, 1, kiocb->ki_pos);
1260 *ppos = kiocb->ki_pos;
1261
1262 cl_env_put(env, &refcheck);
1263 return result;
1264 }
1265
1266
1267
1268 /*
1269 * Send file content (through pagecache) somewhere with helper
1270 */
1271 static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
1272 struct pipe_inode_info *pipe, size_t count,
1273 unsigned int flags)
1274 {
1275 struct lu_env *env;
1276 struct vvp_io_args *args;
1277 ssize_t result;
1278 int refcheck;
1279
1280 env = cl_env_get(&refcheck);
1281 if (IS_ERR(env))
1282 return PTR_ERR(env);
1283
1284 args = vvp_env_args(env, IO_SPLICE);
1285 args->u.splice.via_pipe = pipe;
1286 args->u.splice.via_flags = flags;
1287
1288 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1289 cl_env_put(env, &refcheck);
1290 return result;
1291 }
1292
1293 static int ll_lov_recreate(struct inode *inode, struct ost_id *oi,
1294 obd_count ost_idx)
1295 {
1296 struct obd_export *exp = ll_i2dtexp(inode);
1297 struct obd_trans_info oti = { 0 };
1298 struct obdo *oa = NULL;
1299 int lsm_size;
1300 int rc = 0;
1301 struct lov_stripe_md *lsm = NULL, *lsm2;
1302
1303 OBDO_ALLOC(oa);
1304 if (oa == NULL)
1305 return -ENOMEM;
1306
1307 lsm = ccc_inode_lsm_get(inode);
1308 if (!lsm_has_objects(lsm))
1309 GOTO(out, rc = -ENOENT);
1310
1311 lsm_size = sizeof(*lsm) + (sizeof(struct lov_oinfo) *
1312 (lsm->lsm_stripe_count));
1313
1314 OBD_ALLOC_LARGE(lsm2, lsm_size);
1315 if (lsm2 == NULL)
1316 GOTO(out, rc = -ENOMEM);
1317
1318 oa->o_oi = *oi;
1319 oa->o_nlink = ost_idx;
1320 oa->o_flags |= OBD_FL_RECREATE_OBJS;
1321 oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS | OBD_MD_FLGROUP;
1322 obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
1323 OBD_MD_FLMTIME | OBD_MD_FLCTIME);
1324 obdo_set_parent_fid(oa, &ll_i2info(inode)->lli_fid);
1325 memcpy(lsm2, lsm, lsm_size);
1326 ll_inode_size_lock(inode);
1327 rc = obd_create(NULL, exp, oa, &lsm2, &oti);
1328 ll_inode_size_unlock(inode);
1329
1330 OBD_FREE_LARGE(lsm2, lsm_size);
1331 GOTO(out, rc);
1332 out:
1333 ccc_inode_lsm_put(inode, lsm);
1334 OBDO_FREE(oa);
1335 return rc;
1336 }
1337
1338 static int ll_lov_recreate_obj(struct inode *inode, unsigned long arg)
1339 {
1340 struct ll_recreate_obj ucreat;
1341 struct ost_id oi;
1342
1343 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1344 return -EPERM;
1345
1346 if (copy_from_user(&ucreat, (struct ll_recreate_obj *)arg,
1347 sizeof(ucreat)))
1348 return -EFAULT;
1349
1350 ostid_set_seq_mdt0(&oi);
1351 ostid_set_id(&oi, ucreat.lrc_id);
1352 return ll_lov_recreate(inode, &oi, ucreat.lrc_ost_idx);
1353 }
1354
1355 static int ll_lov_recreate_fid(struct inode *inode, unsigned long arg)
1356 {
1357 struct lu_fid fid;
1358 struct ost_id oi;
1359 obd_count ost_idx;
1360
1361 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1362 return -EPERM;
1363
1364 if (copy_from_user(&fid, (struct lu_fid *)arg, sizeof(fid)))
1365 return -EFAULT;
1366
1367 fid_to_ostid(&fid, &oi);
1368 ost_idx = (fid_seq(&fid) >> 16) & 0xffff;
1369 return ll_lov_recreate(inode, &oi, ost_idx);
1370 }
1371
1372 int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
1373 int flags, struct lov_user_md *lum, int lum_size)
1374 {
1375 struct lov_stripe_md *lsm = NULL;
1376 struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags};
1377 int rc = 0;
1378
1379 lsm = ccc_inode_lsm_get(inode);
1380 if (lsm != NULL) {
1381 ccc_inode_lsm_put(inode, lsm);
1382 CDEBUG(D_IOCTL, "stripe already exists for ino %lu\n",
1383 inode->i_ino);
1384 return -EEXIST;
1385 }
1386
1387 ll_inode_size_lock(inode);
1388 rc = ll_intent_file_open(file, lum, lum_size, &oit);
1389 if (rc)
1390 GOTO(out, rc);
1391 rc = oit.d.lustre.it_status;
1392 if (rc < 0)
1393 GOTO(out_req_free, rc);
1394
1395 ll_release_openhandle(file->f_dentry, &oit);
1396
1397 out:
1398 ll_inode_size_unlock(inode);
1399 ll_intent_release(&oit);
1400 ccc_inode_lsm_put(inode, lsm);
1401 return rc;
1402 out_req_free:
1403 ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data);
1404 goto out;
1405 }
1406
1407 int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
1408 struct lov_mds_md **lmmp, int *lmm_size,
1409 struct ptlrpc_request **request)
1410 {
1411 struct ll_sb_info *sbi = ll_i2sbi(inode);
1412 struct mdt_body *body;
1413 struct lov_mds_md *lmm = NULL;
1414 struct ptlrpc_request *req = NULL;
1415 struct md_op_data *op_data;
1416 int rc, lmmsize;
1417
1418 rc = ll_get_max_mdsize(sbi, &lmmsize);
1419 if (rc)
1420 return rc;
1421
1422 op_data = ll_prep_md_op_data(NULL, inode, NULL, filename,
1423 strlen(filename), lmmsize,
1424 LUSTRE_OPC_ANY, NULL);
1425 if (IS_ERR(op_data))
1426 return PTR_ERR(op_data);
1427
1428 op_data->op_valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA;
1429 rc = md_getattr_name(sbi->ll_md_exp, op_data, &req);
1430 ll_finish_md_op_data(op_data);
1431 if (rc < 0) {
1432 CDEBUG(D_INFO, "md_getattr_name failed "
1433 "on %s: rc %d\n", filename, rc);
1434 GOTO(out, rc);
1435 }
1436
1437 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
1438 LASSERT(body != NULL); /* checked by mdc_getattr_name */
1439
1440 lmmsize = body->eadatasize;
1441
1442 if (!(body->valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
1443 lmmsize == 0) {
1444 GOTO(out, rc = -ENODATA);
1445 }
1446
1447 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize);
1448 LASSERT(lmm != NULL);
1449
1450 if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1)) &&
1451 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3))) {
1452 GOTO(out, rc = -EPROTO);
1453 }
1454
1455 /*
1456 * This is coming from the MDS, so is probably in
1457 * little endian. We convert it to host endian before
1458 * passing it to userspace.
1459 */
1460 if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) {
1461 int stripe_count;
1462
1463 stripe_count = le16_to_cpu(lmm->lmm_stripe_count);
1464 if (le32_to_cpu(lmm->lmm_pattern) & LOV_PATTERN_F_RELEASED)
1465 stripe_count = 0;
1466
1467 /* if function called for directory - we should
1468 * avoid swab not existent lsm objects */
1469 if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) {
1470 lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
1471 if (S_ISREG(body->mode))
1472 lustre_swab_lov_user_md_objects(
1473 ((struct lov_user_md_v1 *)lmm)->lmm_objects,
1474 stripe_count);
1475 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
1476 lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm);
1477 if (S_ISREG(body->mode))
1478 lustre_swab_lov_user_md_objects(
1479 ((struct lov_user_md_v3 *)lmm)->lmm_objects,
1480 stripe_count);
1481 }
1482 }
1483
1484 out:
1485 *lmmp = lmm;
1486 *lmm_size = lmmsize;
1487 *request = req;
1488 return rc;
1489 }
1490
1491 static int ll_lov_setea(struct inode *inode, struct file *file,
1492 unsigned long arg)
1493 {
1494 int flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
1495 struct lov_user_md *lump;
1496 int lum_size = sizeof(struct lov_user_md) +
1497 sizeof(struct lov_user_ost_data);
1498 int rc;
1499
1500 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1501 return -EPERM;
1502
1503 OBD_ALLOC_LARGE(lump, lum_size);
1504 if (lump == NULL)
1505 return -ENOMEM;
1506
1507 if (copy_from_user(lump, (struct lov_user_md *)arg, lum_size)) {
1508 OBD_FREE_LARGE(lump, lum_size);
1509 return -EFAULT;
1510 }
1511
1512 rc = ll_lov_setstripe_ea_info(inode, file, flags, lump, lum_size);
1513
1514 OBD_FREE_LARGE(lump, lum_size);
1515 return rc;
1516 }
1517
1518 static int ll_lov_setstripe(struct inode *inode, struct file *file,
1519 unsigned long arg)
1520 {
1521 struct lov_user_md_v3 lumv3;
1522 struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3;
1523 struct lov_user_md_v1 *lumv1p = (struct lov_user_md_v1 *)arg;
1524 struct lov_user_md_v3 *lumv3p = (struct lov_user_md_v3 *)arg;
1525 int lum_size, rc;
1526 int flags = FMODE_WRITE;
1527
1528 /* first try with v1 which is smaller than v3 */
1529 lum_size = sizeof(struct lov_user_md_v1);
1530 if (copy_from_user(lumv1, lumv1p, lum_size))
1531 return -EFAULT;
1532
1533 if (lumv1->lmm_magic == LOV_USER_MAGIC_V3) {
1534 lum_size = sizeof(struct lov_user_md_v3);
1535 if (copy_from_user(&lumv3, lumv3p, lum_size))
1536 return -EFAULT;
1537 }
1538
1539 rc = ll_lov_setstripe_ea_info(inode, file, flags, lumv1, lum_size);
1540 if (rc == 0) {
1541 struct lov_stripe_md *lsm;
1542 __u32 gen;
1543
1544 put_user(0, &lumv1p->lmm_stripe_count);
1545
1546 ll_layout_refresh(inode, &gen);
1547 lsm = ccc_inode_lsm_get(inode);
1548 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode),
1549 0, lsm, (void *)arg);
1550 ccc_inode_lsm_put(inode, lsm);
1551 }
1552 return rc;
1553 }
1554
1555 static int ll_lov_getstripe(struct inode *inode, unsigned long arg)
1556 {
1557 struct lov_stripe_md *lsm;
1558 int rc = -ENODATA;
1559
1560 lsm = ccc_inode_lsm_get(inode);
1561 if (lsm != NULL)
1562 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode), 0,
1563 lsm, (void *)arg);
1564 ccc_inode_lsm_put(inode, lsm);
1565 return rc;
1566 }
1567
1568 int ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1569 {
1570 struct ll_inode_info *lli = ll_i2info(inode);
1571 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1572 struct ccc_grouplock grouplock;
1573 int rc;
1574
1575 if (ll_file_nolock(file))
1576 return -EOPNOTSUPP;
1577
1578 spin_lock(&lli->lli_lock);
1579 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1580 CWARN("group lock already existed with gid %lu\n",
1581 fd->fd_grouplock.cg_gid);
1582 spin_unlock(&lli->lli_lock);
1583 return -EINVAL;
1584 }
1585 LASSERT(fd->fd_grouplock.cg_lock == NULL);
1586 spin_unlock(&lli->lli_lock);
1587
1588 rc = cl_get_grouplock(cl_i2info(inode)->lli_clob,
1589 arg, (file->f_flags & O_NONBLOCK), &grouplock);
1590 if (rc)
1591 return rc;
1592
1593 spin_lock(&lli->lli_lock);
1594 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1595 spin_unlock(&lli->lli_lock);
1596 CERROR("another thread just won the race\n");
1597 cl_put_grouplock(&grouplock);
1598 return -EINVAL;
1599 }
1600
1601 fd->fd_flags |= LL_FILE_GROUP_LOCKED;
1602 fd->fd_grouplock = grouplock;
1603 spin_unlock(&lli->lli_lock);
1604
1605 CDEBUG(D_INFO, "group lock %lu obtained\n", arg);
1606 return 0;
1607 }
1608
1609 int ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1610 {
1611 struct ll_inode_info *lli = ll_i2info(inode);
1612 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1613 struct ccc_grouplock grouplock;
1614
1615 spin_lock(&lli->lli_lock);
1616 if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1617 spin_unlock(&lli->lli_lock);
1618 CWARN("no group lock held\n");
1619 return -EINVAL;
1620 }
1621 LASSERT(fd->fd_grouplock.cg_lock != NULL);
1622
1623 if (fd->fd_grouplock.cg_gid != arg) {
1624 CWARN("group lock %lu doesn't match current id %lu\n",
1625 arg, fd->fd_grouplock.cg_gid);
1626 spin_unlock(&lli->lli_lock);
1627 return -EINVAL;
1628 }
1629
1630 grouplock = fd->fd_grouplock;
1631 memset(&fd->fd_grouplock, 0, sizeof(fd->fd_grouplock));
1632 fd->fd_flags &= ~LL_FILE_GROUP_LOCKED;
1633 spin_unlock(&lli->lli_lock);
1634
1635 cl_put_grouplock(&grouplock);
1636 CDEBUG(D_INFO, "group lock %lu released\n", arg);
1637 return 0;
1638 }
1639
1640 /**
1641 * Close inode open handle
1642 *
1643 * \param dentry [in] dentry which contains the inode
1644 * \param it [in,out] intent which contains open info and result
1645 *
1646 * \retval 0 success
1647 * \retval <0 failure
1648 */
1649 int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it)
1650 {
1651 struct inode *inode = dentry->d_inode;
1652 struct obd_client_handle *och;
1653 int rc;
1654
1655 LASSERT(inode);
1656
1657 /* Root ? Do nothing. */
1658 if (dentry->d_inode->i_sb->s_root == dentry)
1659 return 0;
1660
1661 /* No open handle to close? Move away */
1662 if (!it_disposition(it, DISP_OPEN_OPEN))
1663 return 0;
1664
1665 LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0);
1666
1667 OBD_ALLOC(och, sizeof(*och));
1668 if (!och)
1669 GOTO(out, rc = -ENOMEM);
1670
1671 ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
1672
1673 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
1674 inode, och, NULL);
1675 out:
1676 /* this one is in place of ll_file_open */
1677 if (it_disposition(it, DISP_ENQ_OPEN_REF)) {
1678 ptlrpc_req_finished(it->d.lustre.it_data);
1679 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
1680 }
1681 return rc;
1682 }
1683
1684 /**
1685 * Get size for inode for which FIEMAP mapping is requested.
1686 * Make the FIEMAP get_info call and returns the result.
1687 */
1688 int ll_do_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
1689 int num_bytes)
1690 {
1691 struct obd_export *exp = ll_i2dtexp(inode);
1692 struct lov_stripe_md *lsm = NULL;
1693 struct ll_fiemap_info_key fm_key = { .name = KEY_FIEMAP, };
1694 int vallen = num_bytes;
1695 int rc;
1696
1697 /* Checks for fiemap flags */
1698 if (fiemap->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
1699 fiemap->fm_flags &= ~LUSTRE_FIEMAP_FLAGS_COMPAT;
1700 return -EBADR;
1701 }
1702
1703 /* Check for FIEMAP_FLAG_SYNC */
1704 if (fiemap->fm_flags & FIEMAP_FLAG_SYNC) {
1705 rc = filemap_fdatawrite(inode->i_mapping);
1706 if (rc)
1707 return rc;
1708 }
1709
1710 lsm = ccc_inode_lsm_get(inode);
1711 if (lsm == NULL)
1712 return -ENOENT;
1713
1714 /* If the stripe_count > 1 and the application does not understand
1715 * DEVICE_ORDER flag, then it cannot interpret the extents correctly.
1716 */
1717 if (lsm->lsm_stripe_count > 1 &&
1718 !(fiemap->fm_flags & FIEMAP_FLAG_DEVICE_ORDER))
1719 GOTO(out, rc = -EOPNOTSUPP);
1720
1721 fm_key.oa.o_oi = lsm->lsm_oi;
1722 fm_key.oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1723
1724 obdo_from_inode(&fm_key.oa, inode, OBD_MD_FLSIZE);
1725 obdo_set_parent_fid(&fm_key.oa, &ll_i2info(inode)->lli_fid);
1726 /* If filesize is 0, then there would be no objects for mapping */
1727 if (fm_key.oa.o_size == 0) {
1728 fiemap->fm_mapped_extents = 0;
1729 GOTO(out, rc = 0);
1730 }
1731
1732 memcpy(&fm_key.fiemap, fiemap, sizeof(*fiemap));
1733
1734 rc = obd_get_info(NULL, exp, sizeof(fm_key), &fm_key, &vallen,
1735 fiemap, lsm);
1736 if (rc)
1737 CERROR("obd_get_info failed: rc = %d\n", rc);
1738
1739 out:
1740 ccc_inode_lsm_put(inode, lsm);
1741 return rc;
1742 }
1743
1744 int ll_fid2path(struct inode *inode, void *arg)
1745 {
1746 struct obd_export *exp = ll_i2mdexp(inode);
1747 struct getinfo_fid2path *gfout, *gfin;
1748 int outsize, rc;
1749
1750 if (!cfs_capable(CFS_CAP_DAC_READ_SEARCH) &&
1751 !(ll_i2sbi(inode)->ll_flags & LL_SBI_USER_FID2PATH))
1752 return -EPERM;
1753
1754 /* Need to get the buflen */
1755 OBD_ALLOC_PTR(gfin);
1756 if (gfin == NULL)
1757 return -ENOMEM;
1758 if (copy_from_user(gfin, arg, sizeof(*gfin))) {
1759 OBD_FREE_PTR(gfin);
1760 return -EFAULT;
1761 }
1762
1763 outsize = sizeof(*gfout) + gfin->gf_pathlen;
1764 OBD_ALLOC(gfout, outsize);
1765 if (gfout == NULL) {
1766 OBD_FREE_PTR(gfin);
1767 return -ENOMEM;
1768 }
1769 memcpy(gfout, gfin, sizeof(*gfout));
1770 OBD_FREE_PTR(gfin);
1771
1772 /* Call mdc_iocontrol */
1773 rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL);
1774 if (rc)
1775 GOTO(gf_free, rc);
1776
1777 if (copy_to_user(arg, gfout, outsize))
1778 rc = -EFAULT;
1779
1780 gf_free:
1781 OBD_FREE(gfout, outsize);
1782 return rc;
1783 }
1784
1785 static int ll_ioctl_fiemap(struct inode *inode, unsigned long arg)
1786 {
1787 struct ll_user_fiemap *fiemap_s;
1788 size_t num_bytes, ret_bytes;
1789 unsigned int extent_count;
1790 int rc = 0;
1791
1792 /* Get the extent count so we can calculate the size of
1793 * required fiemap buffer */
1794 if (get_user(extent_count,
1795 &((struct ll_user_fiemap __user *)arg)->fm_extent_count))
1796 return -EFAULT;
1797 num_bytes = sizeof(*fiemap_s) + (extent_count *
1798 sizeof(struct ll_fiemap_extent));
1799
1800 OBD_ALLOC_LARGE(fiemap_s, num_bytes);
1801 if (fiemap_s == NULL)
1802 return -ENOMEM;
1803
1804 /* get the fiemap value */
1805 if (copy_from_user(fiemap_s, (struct ll_user_fiemap __user *)arg,
1806 sizeof(*fiemap_s)))
1807 GOTO(error, rc = -EFAULT);
1808
1809 /* If fm_extent_count is non-zero, read the first extent since
1810 * it is used to calculate end_offset and device from previous
1811 * fiemap call. */
1812 if (extent_count) {
1813 if (copy_from_user(&fiemap_s->fm_extents[0],
1814 (char __user *)arg + sizeof(*fiemap_s),
1815 sizeof(struct ll_fiemap_extent)))
1816 GOTO(error, rc = -EFAULT);
1817 }
1818
1819 rc = ll_do_fiemap(inode, fiemap_s, num_bytes);
1820 if (rc)
1821 GOTO(error, rc);
1822
1823 ret_bytes = sizeof(struct ll_user_fiemap);
1824
1825 if (extent_count != 0)
1826 ret_bytes += (fiemap_s->fm_mapped_extents *
1827 sizeof(struct ll_fiemap_extent));
1828
1829 if (copy_to_user((void *)arg, fiemap_s, ret_bytes))
1830 rc = -EFAULT;
1831
1832 error:
1833 OBD_FREE_LARGE(fiemap_s, num_bytes);
1834 return rc;
1835 }
1836
1837 /*
1838 * Read the data_version for inode.
1839 *
1840 * This value is computed using stripe object version on OST.
1841 * Version is computed using server side locking.
1842 *
1843 * @param extent_lock Take extent lock. Not needed if a process is already
1844 * holding the OST object group locks.
1845 */
1846 int ll_data_version(struct inode *inode, __u64 *data_version,
1847 int extent_lock)
1848 {
1849 struct lov_stripe_md *lsm = NULL;
1850 struct ll_sb_info *sbi = ll_i2sbi(inode);
1851 struct obdo *obdo = NULL;
1852 int rc;
1853
1854 /* If no stripe, we consider version is 0. */
1855 lsm = ccc_inode_lsm_get(inode);
1856 if (!lsm_has_objects(lsm)) {
1857 *data_version = 0;
1858 CDEBUG(D_INODE, "No object for inode\n");
1859 GOTO(out, rc = 0);
1860 }
1861
1862 OBD_ALLOC_PTR(obdo);
1863 if (obdo == NULL)
1864 GOTO(out, rc = -ENOMEM);
1865
1866 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, obdo, 0, extent_lock);
1867 if (rc == 0) {
1868 if (!(obdo->o_valid & OBD_MD_FLDATAVERSION))
1869 rc = -EOPNOTSUPP;
1870 else
1871 *data_version = obdo->o_data_version;
1872 }
1873
1874 OBD_FREE_PTR(obdo);
1875 out:
1876 ccc_inode_lsm_put(inode, lsm);
1877 return rc;
1878 }
1879
1880 /*
1881 * Trigger a HSM release request for the provided inode.
1882 */
1883 int ll_hsm_release(struct inode *inode)
1884 {
1885 struct cl_env_nest nest;
1886 struct lu_env *env;
1887 struct obd_client_handle *och = NULL;
1888 __u64 data_version = 0;
1889 int rc;
1890
1891
1892 CDEBUG(D_INODE, "%s: Releasing file "DFID".\n",
1893 ll_get_fsname(inode->i_sb, NULL, 0),
1894 PFID(&ll_i2info(inode)->lli_fid));
1895
1896 och = ll_lease_open(inode, NULL, FMODE_WRITE, MDS_OPEN_RELEASE);
1897 if (IS_ERR(och))
1898 GOTO(out, rc = PTR_ERR(och));
1899
1900 /* Grab latest data_version and [am]time values */
1901 rc = ll_data_version(inode, &data_version, 1);
1902 if (rc != 0)
1903 GOTO(out, rc);
1904
1905 env = cl_env_nested_get(&nest);
1906 if (IS_ERR(env))
1907 GOTO(out, rc = PTR_ERR(env));
1908
1909 ll_merge_lvb(env, inode);
1910 cl_env_nested_put(&nest, env);
1911
1912 /* Release the file.
1913 * NB: lease lock handle is released in mdc_hsm_release_pack() because
1914 * we still need it to pack l_remote_handle to MDT. */
1915 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, inode, och,
1916 &data_version);
1917 och = NULL;
1918
1919
1920 out:
1921 if (och != NULL && !IS_ERR(och)) /* close the file */
1922 ll_lease_close(och, inode, NULL);
1923
1924 return rc;
1925 }
1926
1927 struct ll_swap_stack {
1928 struct iattr ia1, ia2;
1929 __u64 dv1, dv2;
1930 struct inode *inode1, *inode2;
1931 bool check_dv1, check_dv2;
1932 };
1933
1934 static int ll_swap_layouts(struct file *file1, struct file *file2,
1935 struct lustre_swap_layouts *lsl)
1936 {
1937 struct mdc_swap_layouts msl;
1938 struct md_op_data *op_data;
1939 __u32 gid;
1940 __u64 dv;
1941 struct ll_swap_stack *llss = NULL;
1942 int rc;
1943
1944 OBD_ALLOC_PTR(llss);
1945 if (llss == NULL)
1946 return -ENOMEM;
1947
1948 llss->inode1 = file1->f_dentry->d_inode;
1949 llss->inode2 = file2->f_dentry->d_inode;
1950
1951 if (!S_ISREG(llss->inode2->i_mode))
1952 GOTO(free, rc = -EINVAL);
1953
1954 if (inode_permission(llss->inode1, MAY_WRITE) ||
1955 inode_permission(llss->inode2, MAY_WRITE))
1956 GOTO(free, rc = -EPERM);
1957
1958 if (llss->inode2->i_sb != llss->inode1->i_sb)
1959 GOTO(free, rc = -EXDEV);
1960
1961 /* we use 2 bool because it is easier to swap than 2 bits */
1962 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV1)
1963 llss->check_dv1 = true;
1964
1965 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV2)
1966 llss->check_dv2 = true;
1967
1968 /* we cannot use lsl->sl_dvX directly because we may swap them */
1969 llss->dv1 = lsl->sl_dv1;
1970 llss->dv2 = lsl->sl_dv2;
1971
1972 rc = lu_fid_cmp(ll_inode2fid(llss->inode1), ll_inode2fid(llss->inode2));
1973 if (rc == 0) /* same file, done! */
1974 GOTO(free, rc = 0);
1975
1976 if (rc < 0) { /* sequentialize it */
1977 swap(llss->inode1, llss->inode2);
1978 swap(file1, file2);
1979 swap(llss->dv1, llss->dv2);
1980 swap(llss->check_dv1, llss->check_dv2);
1981 }
1982
1983 gid = lsl->sl_gid;
1984 if (gid != 0) { /* application asks to flush dirty cache */
1985 rc = ll_get_grouplock(llss->inode1, file1, gid);
1986 if (rc < 0)
1987 GOTO(free, rc);
1988
1989 rc = ll_get_grouplock(llss->inode2, file2, gid);
1990 if (rc < 0) {
1991 ll_put_grouplock(llss->inode1, file1, gid);
1992 GOTO(free, rc);
1993 }
1994 }
1995
1996 /* to be able to restore mtime and atime after swap
1997 * we need to first save them */
1998 if (lsl->sl_flags &
1999 (SWAP_LAYOUTS_KEEP_MTIME | SWAP_LAYOUTS_KEEP_ATIME)) {
2000 llss->ia1.ia_mtime = llss->inode1->i_mtime;
2001 llss->ia1.ia_atime = llss->inode1->i_atime;
2002 llss->ia1.ia_valid = ATTR_MTIME | ATTR_ATIME;
2003 llss->ia2.ia_mtime = llss->inode2->i_mtime;
2004 llss->ia2.ia_atime = llss->inode2->i_atime;
2005 llss->ia2.ia_valid = ATTR_MTIME | ATTR_ATIME;
2006 }
2007
2008 /* ultimate check, before swaping the layouts we check if
2009 * dataversion has changed (if requested) */
2010 if (llss->check_dv1) {
2011 rc = ll_data_version(llss->inode1, &dv, 0);
2012 if (rc)
2013 GOTO(putgl, rc);
2014 if (dv != llss->dv1)
2015 GOTO(putgl, rc = -EAGAIN);
2016 }
2017
2018 if (llss->check_dv2) {
2019 rc = ll_data_version(llss->inode2, &dv, 0);
2020 if (rc)
2021 GOTO(putgl, rc);
2022 if (dv != llss->dv2)
2023 GOTO(putgl, rc = -EAGAIN);
2024 }
2025
2026 /* struct md_op_data is used to send the swap args to the mdt
2027 * only flags is missing, so we use struct mdc_swap_layouts
2028 * through the md_op_data->op_data */
2029 /* flags from user space have to be converted before they are send to
2030 * server, no flag is sent today, they are only used on the client */
2031 msl.msl_flags = 0;
2032 rc = -ENOMEM;
2033 op_data = ll_prep_md_op_data(NULL, llss->inode1, llss->inode2, NULL, 0,
2034 0, LUSTRE_OPC_ANY, &msl);
2035 if (IS_ERR(op_data))
2036 GOTO(free, rc = PTR_ERR(op_data));
2037
2038 rc = obd_iocontrol(LL_IOC_LOV_SWAP_LAYOUTS, ll_i2mdexp(llss->inode1),
2039 sizeof(*op_data), op_data, NULL);
2040 ll_finish_md_op_data(op_data);
2041
2042 putgl:
2043 if (gid != 0) {
2044 ll_put_grouplock(llss->inode2, file2, gid);
2045 ll_put_grouplock(llss->inode1, file1, gid);
2046 }
2047
2048 /* rc can be set from obd_iocontrol() or from a GOTO(putgl, ...) */
2049 if (rc != 0)
2050 GOTO(free, rc);
2051
2052 /* clear useless flags */
2053 if (!(lsl->sl_flags & SWAP_LAYOUTS_KEEP_MTIME)) {
2054 llss->ia1.ia_valid &= ~ATTR_MTIME;
2055 llss->ia2.ia_valid &= ~ATTR_MTIME;
2056 }
2057
2058 if (!(lsl->sl_flags & SWAP_LAYOUTS_KEEP_ATIME)) {
2059 llss->ia1.ia_valid &= ~ATTR_ATIME;
2060 llss->ia2.ia_valid &= ~ATTR_ATIME;
2061 }
2062
2063 /* update time if requested */
2064 rc = 0;
2065 if (llss->ia2.ia_valid != 0) {
2066 mutex_lock(&llss->inode1->i_mutex);
2067 rc = ll_setattr(file1->f_dentry, &llss->ia2);
2068 mutex_unlock(&llss->inode1->i_mutex);
2069 }
2070
2071 if (llss->ia1.ia_valid != 0) {
2072 int rc1;
2073
2074 mutex_lock(&llss->inode2->i_mutex);
2075 rc1 = ll_setattr(file2->f_dentry, &llss->ia1);
2076 mutex_unlock(&llss->inode2->i_mutex);
2077 if (rc == 0)
2078 rc = rc1;
2079 }
2080
2081 free:
2082 if (llss != NULL)
2083 OBD_FREE_PTR(llss);
2084
2085 return rc;
2086 }
2087
2088 static int ll_hsm_state_set(struct inode *inode, struct hsm_state_set *hss)
2089 {
2090 struct md_op_data *op_data;
2091 int rc;
2092
2093 /* Non-root users are forbidden to set or clear flags which are
2094 * NOT defined in HSM_USER_MASK. */
2095 if (((hss->hss_setmask | hss->hss_clearmask) & ~HSM_USER_MASK) &&
2096 !cfs_capable(CFS_CAP_SYS_ADMIN))
2097 return -EPERM;
2098
2099 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2100 LUSTRE_OPC_ANY, hss);
2101 if (IS_ERR(op_data))
2102 return PTR_ERR(op_data);
2103
2104 rc = obd_iocontrol(LL_IOC_HSM_STATE_SET, ll_i2mdexp(inode),
2105 sizeof(*op_data), op_data, NULL);
2106
2107 ll_finish_md_op_data(op_data);
2108
2109 return rc;
2110 }
2111
2112 static int ll_hsm_import(struct inode *inode, struct file *file,
2113 struct hsm_user_import *hui)
2114 {
2115 struct hsm_state_set *hss = NULL;
2116 struct iattr *attr = NULL;
2117 int rc;
2118
2119
2120 if (!S_ISREG(inode->i_mode))
2121 return -EINVAL;
2122
2123 /* set HSM flags */
2124 OBD_ALLOC_PTR(hss);
2125 if (hss == NULL)
2126 GOTO(out, rc = -ENOMEM);
2127
2128 hss->hss_valid = HSS_SETMASK | HSS_ARCHIVE_ID;
2129 hss->hss_archive_id = hui->hui_archive_id;
2130 hss->hss_setmask = HS_ARCHIVED | HS_EXISTS | HS_RELEASED;
2131 rc = ll_hsm_state_set(inode, hss);
2132 if (rc != 0)
2133 GOTO(out, rc);
2134
2135 OBD_ALLOC_PTR(attr);
2136 if (attr == NULL)
2137 GOTO(out, rc = -ENOMEM);
2138
2139 attr->ia_mode = hui->hui_mode & (S_IRWXU | S_IRWXG | S_IRWXO);
2140 attr->ia_mode |= S_IFREG;
2141 attr->ia_uid = make_kuid(&init_user_ns, hui->hui_uid);
2142 attr->ia_gid = make_kgid(&init_user_ns, hui->hui_gid);
2143 attr->ia_size = hui->hui_size;
2144 attr->ia_mtime.tv_sec = hui->hui_mtime;
2145 attr->ia_mtime.tv_nsec = hui->hui_mtime_ns;
2146 attr->ia_atime.tv_sec = hui->hui_atime;
2147 attr->ia_atime.tv_nsec = hui->hui_atime_ns;
2148
2149 attr->ia_valid = ATTR_SIZE | ATTR_MODE | ATTR_FORCE |
2150 ATTR_UID | ATTR_GID |
2151 ATTR_MTIME | ATTR_MTIME_SET |
2152 ATTR_ATIME | ATTR_ATIME_SET;
2153
2154 rc = ll_setattr_raw(file->f_dentry, attr, true);
2155 if (rc == -ENODATA)
2156 rc = 0;
2157
2158 out:
2159 if (hss != NULL)
2160 OBD_FREE_PTR(hss);
2161
2162 if (attr != NULL)
2163 OBD_FREE_PTR(attr);
2164
2165 return rc;
2166 }
2167
2168 long ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
2169 {
2170 struct inode *inode = file->f_dentry->d_inode;
2171 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2172 int flags, rc;
2173
2174 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%x\n", inode->i_ino,
2175 inode->i_generation, inode, cmd);
2176 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
2177
2178 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
2179 if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
2180 return -ENOTTY;
2181
2182 switch(cmd) {
2183 case LL_IOC_GETFLAGS:
2184 /* Get the current value of the file flags */
2185 return put_user(fd->fd_flags, (int *)arg);
2186 case LL_IOC_SETFLAGS:
2187 case LL_IOC_CLRFLAGS:
2188 /* Set or clear specific file flags */
2189 /* XXX This probably needs checks to ensure the flags are
2190 * not abused, and to handle any flag side effects.
2191 */
2192 if (get_user(flags, (int *) arg))
2193 return -EFAULT;
2194
2195 if (cmd == LL_IOC_SETFLAGS) {
2196 if ((flags & LL_FILE_IGNORE_LOCK) &&
2197 !(file->f_flags & O_DIRECT)) {
2198 CERROR("%s: unable to disable locking on "
2199 "non-O_DIRECT file\n", current->comm);
2200 return -EINVAL;
2201 }
2202
2203 fd->fd_flags |= flags;
2204 } else {
2205 fd->fd_flags &= ~flags;
2206 }
2207 return 0;
2208 case LL_IOC_LOV_SETSTRIPE:
2209 return ll_lov_setstripe(inode, file, arg);
2210 case LL_IOC_LOV_SETEA:
2211 return ll_lov_setea(inode, file, arg);
2212 case LL_IOC_LOV_SWAP_LAYOUTS: {
2213 struct file *file2;
2214 struct lustre_swap_layouts lsl;
2215
2216 if (copy_from_user(&lsl, (char *)arg,
2217 sizeof(struct lustre_swap_layouts)))
2218 return -EFAULT;
2219
2220 if ((file->f_flags & O_ACCMODE) == 0) /* O_RDONLY */
2221 return -EPERM;
2222
2223 file2 = fget(lsl.sl_fd);
2224 if (file2 == NULL)
2225 return -EBADF;
2226
2227 rc = -EPERM;
2228 if ((file2->f_flags & O_ACCMODE) != 0) /* O_WRONLY or O_RDWR */
2229 rc = ll_swap_layouts(file, file2, &lsl);
2230 fput(file2);
2231 return rc;
2232 }
2233 case LL_IOC_LOV_GETSTRIPE:
2234 return ll_lov_getstripe(inode, arg);
2235 case LL_IOC_RECREATE_OBJ:
2236 return ll_lov_recreate_obj(inode, arg);
2237 case LL_IOC_RECREATE_FID:
2238 return ll_lov_recreate_fid(inode, arg);
2239 case FSFILT_IOC_FIEMAP:
2240 return ll_ioctl_fiemap(inode, arg);
2241 case FSFILT_IOC_GETFLAGS:
2242 case FSFILT_IOC_SETFLAGS:
2243 return ll_iocontrol(inode, file, cmd, arg);
2244 case FSFILT_IOC_GETVERSION_OLD:
2245 case FSFILT_IOC_GETVERSION:
2246 return put_user(inode->i_generation, (int *)arg);
2247 case LL_IOC_GROUP_LOCK:
2248 return ll_get_grouplock(inode, file, arg);
2249 case LL_IOC_GROUP_UNLOCK:
2250 return ll_put_grouplock(inode, file, arg);
2251 case IOC_OBD_STATFS:
2252 return ll_obd_statfs(inode, (void *)arg);
2253
2254 /* We need to special case any other ioctls we want to handle,
2255 * to send them to the MDS/OST as appropriate and to properly
2256 * network encode the arg field.
2257 case FSFILT_IOC_SETVERSION_OLD:
2258 case FSFILT_IOC_SETVERSION:
2259 */
2260 case LL_IOC_FLUSHCTX:
2261 return ll_flush_ctx(inode);
2262 case LL_IOC_PATH2FID: {
2263 if (copy_to_user((void *)arg, ll_inode2fid(inode),
2264 sizeof(struct lu_fid)))
2265 return -EFAULT;
2266
2267 return 0;
2268 }
2269 case OBD_IOC_FID2PATH:
2270 return ll_fid2path(inode, (void *)arg);
2271 case LL_IOC_DATA_VERSION: {
2272 struct ioc_data_version idv;
2273 int rc;
2274
2275 if (copy_from_user(&idv, (char *)arg, sizeof(idv)))
2276 return -EFAULT;
2277
2278 rc = ll_data_version(inode, &idv.idv_version,
2279 !(idv.idv_flags & LL_DV_NOFLUSH));
2280
2281 if (rc == 0 && copy_to_user((char *) arg, &idv, sizeof(idv)))
2282 return -EFAULT;
2283
2284 return rc;
2285 }
2286
2287 case LL_IOC_GET_MDTIDX: {
2288 int mdtidx;
2289
2290 mdtidx = ll_get_mdt_idx(inode);
2291 if (mdtidx < 0)
2292 return mdtidx;
2293
2294 if (put_user((int)mdtidx, (int*)arg))
2295 return -EFAULT;
2296
2297 return 0;
2298 }
2299 case OBD_IOC_GETDTNAME:
2300 case OBD_IOC_GETMDNAME:
2301 return ll_get_obd_name(inode, cmd, arg);
2302 case LL_IOC_HSM_STATE_GET: {
2303 struct md_op_data *op_data;
2304 struct hsm_user_state *hus;
2305 int rc;
2306
2307 OBD_ALLOC_PTR(hus);
2308 if (hus == NULL)
2309 return -ENOMEM;
2310
2311 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2312 LUSTRE_OPC_ANY, hus);
2313 if (IS_ERR(op_data)) {
2314 OBD_FREE_PTR(hus);
2315 return PTR_ERR(op_data);
2316 }
2317
2318 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
2319 op_data, NULL);
2320
2321 if (copy_to_user((void *)arg, hus, sizeof(*hus)))
2322 rc = -EFAULT;
2323
2324 ll_finish_md_op_data(op_data);
2325 OBD_FREE_PTR(hus);
2326 return rc;
2327 }
2328 case LL_IOC_HSM_STATE_SET: {
2329 struct hsm_state_set *hss;
2330 int rc;
2331
2332 OBD_ALLOC_PTR(hss);
2333 if (hss == NULL)
2334 return -ENOMEM;
2335
2336 if (copy_from_user(hss, (char *)arg, sizeof(*hss))) {
2337 OBD_FREE_PTR(hss);
2338 return -EFAULT;
2339 }
2340
2341 rc = ll_hsm_state_set(inode, hss);
2342
2343 OBD_FREE_PTR(hss);
2344 return rc;
2345 }
2346 case LL_IOC_HSM_ACTION: {
2347 struct md_op_data *op_data;
2348 struct hsm_current_action *hca;
2349 int rc;
2350
2351 OBD_ALLOC_PTR(hca);
2352 if (hca == NULL)
2353 return -ENOMEM;
2354
2355 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2356 LUSTRE_OPC_ANY, hca);
2357 if (IS_ERR(op_data)) {
2358 OBD_FREE_PTR(hca);
2359 return PTR_ERR(op_data);
2360 }
2361
2362 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
2363 op_data, NULL);
2364
2365 if (copy_to_user((char *)arg, hca, sizeof(*hca)))
2366 rc = -EFAULT;
2367
2368 ll_finish_md_op_data(op_data);
2369 OBD_FREE_PTR(hca);
2370 return rc;
2371 }
2372 case LL_IOC_SET_LEASE: {
2373 struct ll_inode_info *lli = ll_i2info(inode);
2374 struct obd_client_handle *och = NULL;
2375 bool lease_broken;
2376 fmode_t mode = 0;
2377
2378 switch (arg) {
2379 case F_WRLCK:
2380 if (!(file->f_mode & FMODE_WRITE))
2381 return -EPERM;
2382 mode = FMODE_WRITE;
2383 break;
2384 case F_RDLCK:
2385 if (!(file->f_mode & FMODE_READ))
2386 return -EPERM;
2387 mode = FMODE_READ;
2388 break;
2389 case F_UNLCK:
2390 mutex_lock(&lli->lli_och_mutex);
2391 if (fd->fd_lease_och != NULL) {
2392 och = fd->fd_lease_och;
2393 fd->fd_lease_och = NULL;
2394 }
2395 mutex_unlock(&lli->lli_och_mutex);
2396
2397 if (och != NULL) {
2398 mode = och->och_flags &
2399 (FMODE_READ|FMODE_WRITE);
2400 rc = ll_lease_close(och, inode, &lease_broken);
2401 if (rc == 0 && lease_broken)
2402 mode = 0;
2403 } else {
2404 rc = -ENOLCK;
2405 }
2406
2407 /* return the type of lease or error */
2408 return rc < 0 ? rc : (int)mode;
2409 default:
2410 return -EINVAL;
2411 }
2412
2413 CDEBUG(D_INODE, "Set lease with mode %d\n", mode);
2414
2415 /* apply for lease */
2416 och = ll_lease_open(inode, file, mode, 0);
2417 if (IS_ERR(och))
2418 return PTR_ERR(och);
2419
2420 rc = 0;
2421 mutex_lock(&lli->lli_och_mutex);
2422 if (fd->fd_lease_och == NULL) {
2423 fd->fd_lease_och = och;
2424 och = NULL;
2425 }
2426 mutex_unlock(&lli->lli_och_mutex);
2427 if (och != NULL) {
2428 /* impossible now that only excl is supported for now */
2429 ll_lease_close(och, inode, &lease_broken);
2430 rc = -EBUSY;
2431 }
2432 return rc;
2433 }
2434 case LL_IOC_GET_LEASE: {
2435 struct ll_inode_info *lli = ll_i2info(inode);
2436 struct ldlm_lock *lock = NULL;
2437
2438 rc = 0;
2439 mutex_lock(&lli->lli_och_mutex);
2440 if (fd->fd_lease_och != NULL) {
2441 struct obd_client_handle *och = fd->fd_lease_och;
2442
2443 lock = ldlm_handle2lock(&och->och_lease_handle);
2444 if (lock != NULL) {
2445 lock_res_and_lock(lock);
2446 if (!ldlm_is_cancel(lock))
2447 rc = och->och_flags &
2448 (FMODE_READ | FMODE_WRITE);
2449 unlock_res_and_lock(lock);
2450 ldlm_lock_put(lock);
2451 }
2452 }
2453 mutex_unlock(&lli->lli_och_mutex);
2454 return rc;
2455 }
2456 case LL_IOC_HSM_IMPORT: {
2457 struct hsm_user_import *hui;
2458
2459 OBD_ALLOC_PTR(hui);
2460 if (hui == NULL)
2461 return -ENOMEM;
2462
2463 if (copy_from_user(hui, (void *)arg, sizeof(*hui))) {
2464 OBD_FREE_PTR(hui);
2465 return -EFAULT;
2466 }
2467
2468 rc = ll_hsm_import(inode, file, hui);
2469
2470 OBD_FREE_PTR(hui);
2471 return rc;
2472 }
2473 default: {
2474 int err;
2475
2476 if (LLIOC_STOP ==
2477 ll_iocontrol_call(inode, file, cmd, arg, &err))
2478 return err;
2479
2480 return obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
2481 (void *)arg);
2482 }
2483 }
2484 }
2485
2486
2487 loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
2488 {
2489 struct inode *inode = file->f_dentry->d_inode;
2490 loff_t retval, eof = 0;
2491
2492 retval = offset + ((origin == SEEK_END) ? i_size_read(inode) :
2493 (origin == SEEK_CUR) ? file->f_pos : 0);
2494 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), to=%llu=%#llx(%d)\n",
2495 inode->i_ino, inode->i_generation, inode, retval, retval,
2496 origin);
2497 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
2498
2499 if (origin == SEEK_END || origin == SEEK_HOLE || origin == SEEK_DATA) {
2500 retval = ll_glimpse_size(inode);
2501 if (retval != 0)
2502 return retval;
2503 eof = i_size_read(inode);
2504 }
2505
2506 retval = generic_file_llseek_size(file, offset, origin,
2507 ll_file_maxbytes(inode), eof);
2508 return retval;
2509 }
2510
2511 int ll_flush(struct file *file, fl_owner_t id)
2512 {
2513 struct inode *inode = file->f_dentry->d_inode;
2514 struct ll_inode_info *lli = ll_i2info(inode);
2515 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2516 int rc, err;
2517
2518 LASSERT(!S_ISDIR(inode->i_mode));
2519
2520 /* catch async errors that were recorded back when async writeback
2521 * failed for pages in this mapping. */
2522 rc = lli->lli_async_rc;
2523 lli->lli_async_rc = 0;
2524 err = lov_read_and_clear_async_rc(lli->lli_clob);
2525 if (rc == 0)
2526 rc = err;
2527
2528 /* The application has been told write failure already.
2529 * Do not report failure again. */
2530 if (fd->fd_write_failed)
2531 return 0;
2532 return rc ? -EIO : 0;
2533 }
2534
2535 /**
2536 * Called to make sure a portion of file has been written out.
2537 * if @local_only is not true, it will send OST_SYNC RPCs to ost.
2538 *
2539 * Return how many pages have been written.
2540 */
2541 int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end,
2542 enum cl_fsync_mode mode, int ignore_layout)
2543 {
2544 struct cl_env_nest nest;
2545 struct lu_env *env;
2546 struct cl_io *io;
2547 struct obd_capa *capa = NULL;
2548 struct cl_fsync_io *fio;
2549 int result;
2550
2551 if (mode != CL_FSYNC_NONE && mode != CL_FSYNC_LOCAL &&
2552 mode != CL_FSYNC_DISCARD && mode != CL_FSYNC_ALL)
2553 return -EINVAL;
2554
2555 env = cl_env_nested_get(&nest);
2556 if (IS_ERR(env))
2557 return PTR_ERR(env);
2558
2559 capa = ll_osscapa_get(inode, CAPA_OPC_OSS_WRITE);
2560
2561 io = ccc_env_thread_io(env);
2562 io->ci_obj = cl_i2info(inode)->lli_clob;
2563 io->ci_ignore_layout = ignore_layout;
2564
2565 /* initialize parameters for sync */
2566 fio = &io->u.ci_fsync;
2567 fio->fi_capa = capa;
2568 fio->fi_start = start;
2569 fio->fi_end = end;
2570 fio->fi_fid = ll_inode2fid(inode);
2571 fio->fi_mode = mode;
2572 fio->fi_nr_written = 0;
2573
2574 if (cl_io_init(env, io, CIT_FSYNC, io->ci_obj) == 0)
2575 result = cl_io_loop(env, io);
2576 else
2577 result = io->ci_result;
2578 if (result == 0)
2579 result = fio->fi_nr_written;
2580 cl_io_fini(env, io);
2581 cl_env_nested_put(&nest, env);
2582
2583 capa_put(capa);
2584
2585 return result;
2586 }
2587
2588 /*
2589 * When dentry is provided (the 'else' case), *file->f_dentry may be
2590 * null and dentry must be used directly rather than pulled from
2591 * *file->f_dentry as is done otherwise.
2592 */
2593
2594 int ll_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2595 {
2596 struct dentry *dentry = file->f_dentry;
2597 struct inode *inode = dentry->d_inode;
2598 struct ll_inode_info *lli = ll_i2info(inode);
2599 struct ptlrpc_request *req;
2600 struct obd_capa *oc;
2601 int rc, err;
2602
2603 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
2604 inode->i_generation, inode);
2605 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
2606
2607 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2608 mutex_lock(&inode->i_mutex);
2609
2610 /* catch async errors that were recorded back when async writeback
2611 * failed for pages in this mapping. */
2612 if (!S_ISDIR(inode->i_mode)) {
2613 err = lli->lli_async_rc;
2614 lli->lli_async_rc = 0;
2615 if (rc == 0)
2616 rc = err;
2617 err = lov_read_and_clear_async_rc(lli->lli_clob);
2618 if (rc == 0)
2619 rc = err;
2620 }
2621
2622 oc = ll_mdscapa_get(inode);
2623 err = md_sync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), oc,
2624 &req);
2625 capa_put(oc);
2626 if (!rc)
2627 rc = err;
2628 if (!err)
2629 ptlrpc_req_finished(req);
2630
2631 if (datasync && S_ISREG(inode->i_mode)) {
2632 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2633
2634 err = cl_sync_file_range(inode, 0, OBD_OBJECT_EOF,
2635 CL_FSYNC_ALL, 0);
2636 if (rc == 0 && err < 0)
2637 rc = err;
2638 if (rc < 0)
2639 fd->fd_write_failed = true;
2640 else
2641 fd->fd_write_failed = false;
2642 }
2643
2644 mutex_unlock(&inode->i_mutex);
2645 return rc;
2646 }
2647
2648 int ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
2649 {
2650 struct inode *inode = file->f_dentry->d_inode;
2651 struct ll_sb_info *sbi = ll_i2sbi(inode);
2652 struct ldlm_enqueue_info einfo = {
2653 .ei_type = LDLM_FLOCK,
2654 .ei_cb_cp = ldlm_flock_completion_ast,
2655 .ei_cbdata = file_lock,
2656 };
2657 struct md_op_data *op_data;
2658 struct lustre_handle lockh = {0};
2659 ldlm_policy_data_t flock = {{0}};
2660 int flags = 0;
2661 int rc;
2662 int rc2 = 0;
2663
2664 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu file_lock=%p\n",
2665 inode->i_ino, file_lock);
2666
2667 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
2668
2669 if (file_lock->fl_flags & FL_FLOCK) {
2670 LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK));
2671 /* flocks are whole-file locks */
2672 flock.l_flock.end = OFFSET_MAX;
2673 /* For flocks owner is determined by the local file desctiptor*/
2674 flock.l_flock.owner = (unsigned long)file_lock->fl_file;
2675 } else if (file_lock->fl_flags & FL_POSIX) {
2676 flock.l_flock.owner = (unsigned long)file_lock->fl_owner;
2677 flock.l_flock.start = file_lock->fl_start;
2678 flock.l_flock.end = file_lock->fl_end;
2679 } else {
2680 return -EINVAL;
2681 }
2682 flock.l_flock.pid = file_lock->fl_pid;
2683
2684 /* Somewhat ugly workaround for svc lockd.
2685 * lockd installs custom fl_lmops->lm_compare_owner that checks
2686 * for the fl_owner to be the same (which it always is on local node
2687 * I guess between lockd processes) and then compares pid.
2688 * As such we assign pid to the owner field to make it all work,
2689 * conflict with normal locks is unlikely since pid space and
2690 * pointer space for current->files are not intersecting */
2691 if (file_lock->fl_lmops && file_lock->fl_lmops->lm_compare_owner)
2692 flock.l_flock.owner = (unsigned long)file_lock->fl_pid;
2693
2694 switch (file_lock->fl_type) {
2695 case F_RDLCK:
2696 einfo.ei_mode = LCK_PR;
2697 break;
2698 case F_UNLCK:
2699 /* An unlock request may or may not have any relation to
2700 * existing locks so we may not be able to pass a lock handle
2701 * via a normal ldlm_lock_cancel() request. The request may even
2702 * unlock a byte range in the middle of an existing lock. In
2703 * order to process an unlock request we need all of the same
2704 * information that is given with a normal read or write record
2705 * lock request. To avoid creating another ldlm unlock (cancel)
2706 * message we'll treat a LCK_NL flock request as an unlock. */
2707 einfo.ei_mode = LCK_NL;
2708 break;
2709 case F_WRLCK:
2710 einfo.ei_mode = LCK_PW;
2711 break;
2712 default:
2713 CDEBUG(D_INFO, "Unknown fcntl lock type: %d\n",
2714 file_lock->fl_type);
2715 return -ENOTSUPP;
2716 }
2717
2718 switch (cmd) {
2719 case F_SETLKW:
2720 #ifdef F_SETLKW64
2721 case F_SETLKW64:
2722 #endif
2723 flags = 0;
2724 break;
2725 case F_SETLK:
2726 #ifdef F_SETLK64
2727 case F_SETLK64:
2728 #endif
2729 flags = LDLM_FL_BLOCK_NOWAIT;
2730 break;
2731 case F_GETLK:
2732 #ifdef F_GETLK64
2733 case F_GETLK64:
2734 #endif
2735 flags = LDLM_FL_TEST_LOCK;
2736 /* Save the old mode so that if the mode in the lock changes we
2737 * can decrement the appropriate reader or writer refcount. */
2738 file_lock->fl_type = einfo.ei_mode;
2739 break;
2740 default:
2741 CERROR("unknown fcntl lock command: %d\n", cmd);
2742 return -EINVAL;
2743 }
2744
2745 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2746 LUSTRE_OPC_ANY, NULL);
2747 if (IS_ERR(op_data))
2748 return PTR_ERR(op_data);
2749
2750 CDEBUG(D_DLMTRACE, "inode=%lu, pid=%u, flags=%#x, mode=%u, "
2751 "start="LPU64", end="LPU64"\n", inode->i_ino, flock.l_flock.pid,
2752 flags, einfo.ei_mode, flock.l_flock.start, flock.l_flock.end);
2753
2754 rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL,
2755 op_data, &lockh, &flock, 0, NULL /* req */, flags);
2756
2757 if ((file_lock->fl_flags & FL_FLOCK) &&
2758 (rc == 0 || file_lock->fl_type == F_UNLCK))
2759 rc2 = flock_lock_file_wait(file, file_lock);
2760 if ((file_lock->fl_flags & FL_POSIX) &&
2761 (rc == 0 || file_lock->fl_type == F_UNLCK) &&
2762 !(flags & LDLM_FL_TEST_LOCK))
2763 rc2 = posix_lock_file_wait(file, file_lock);
2764
2765 if (rc2 && file_lock->fl_type != F_UNLCK) {
2766 einfo.ei_mode = LCK_NL;
2767 md_enqueue(sbi->ll_md_exp, &einfo, NULL,
2768 op_data, &lockh, &flock, 0, NULL /* req */, flags);
2769 rc = rc2;
2770 }
2771
2772 ll_finish_md_op_data(op_data);
2773
2774 return rc;
2775 }
2776
2777 int ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
2778 {
2779 return -ENOSYS;
2780 }
2781
2782 /**
2783 * test if some locks matching bits and l_req_mode are acquired
2784 * - bits can be in different locks
2785 * - if found clear the common lock bits in *bits
2786 * - the bits not found, are kept in *bits
2787 * \param inode [IN]
2788 * \param bits [IN] searched lock bits [IN]
2789 * \param l_req_mode [IN] searched lock mode
2790 * \retval boolean, true iff all bits are found
2791 */
2792 int ll_have_md_lock(struct inode *inode, __u64 *bits, ldlm_mode_t l_req_mode)
2793 {
2794 struct lustre_handle lockh;
2795 ldlm_policy_data_t policy;
2796 ldlm_mode_t mode = (l_req_mode == LCK_MINMODE) ?
2797 (LCK_CR|LCK_CW|LCK_PR|LCK_PW) : l_req_mode;
2798 struct lu_fid *fid;
2799 __u64 flags;
2800 int i;
2801
2802 if (!inode)
2803 return 0;
2804
2805 fid = &ll_i2info(inode)->lli_fid;
2806 CDEBUG(D_INFO, "trying to match res "DFID" mode %s\n", PFID(fid),
2807 ldlm_lockname[mode]);
2808
2809 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
2810 for (i = 0; i <= MDS_INODELOCK_MAXSHIFT && *bits != 0; i++) {
2811 policy.l_inodebits.bits = *bits & (1 << i);
2812 if (policy.l_inodebits.bits == 0)
2813 continue;
2814
2815 if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS,
2816 &policy, mode, &lockh)) {
2817 struct ldlm_lock *lock;
2818
2819 lock = ldlm_handle2lock(&lockh);
2820 if (lock) {
2821 *bits &=
2822 ~(lock->l_policy_data.l_inodebits.bits);
2823 LDLM_LOCK_PUT(lock);
2824 } else {
2825 *bits &= ~policy.l_inodebits.bits;
2826 }
2827 }
2828 }
2829 return *bits == 0;
2830 }
2831
2832 ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits,
2833 struct lustre_handle *lockh, __u64 flags,
2834 ldlm_mode_t mode)
2835 {
2836 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
2837 struct lu_fid *fid;
2838 ldlm_mode_t rc;
2839
2840 fid = &ll_i2info(inode)->lli_fid;
2841 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
2842
2843 rc = md_lock_match(ll_i2mdexp(inode), LDLM_FL_BLOCK_GRANTED|flags,
2844 fid, LDLM_IBITS, &policy, mode, lockh);
2845
2846 return rc;
2847 }
2848
2849 static int ll_inode_revalidate_fini(struct inode *inode, int rc)
2850 {
2851 /* Already unlinked. Just update nlink and return success */
2852 if (rc == -ENOENT) {
2853 clear_nlink(inode);
2854 /* This path cannot be hit for regular files unless in
2855 * case of obscure races, so no need to validate size.
2856 */
2857 if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
2858 return 0;
2859 } else if (rc != 0) {
2860 CERROR("%s: revalidate FID "DFID" error: rc = %d\n",
2861 ll_get_fsname(inode->i_sb, NULL, 0),
2862 PFID(ll_inode2fid(inode)), rc);
2863 }
2864
2865 return rc;
2866 }
2867
2868 int __ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it,
2869 __u64 ibits)
2870 {
2871 struct inode *inode = dentry->d_inode;
2872 struct ptlrpc_request *req = NULL;
2873 struct obd_export *exp;
2874 int rc = 0;
2875
2876 LASSERT(inode != NULL);
2877
2878 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),name=%s\n",
2879 inode->i_ino, inode->i_generation, inode, dentry->d_name.name);
2880
2881 exp = ll_i2mdexp(inode);
2882
2883 /* XXX: Enable OBD_CONNECT_ATTRFID to reduce unnecessary getattr RPC.
2884 * But under CMD case, it caused some lock issues, should be fixed
2885 * with new CMD ibits lock. See bug 12718 */
2886 if (exp_connect_flags(exp) & OBD_CONNECT_ATTRFID) {
2887 struct lookup_intent oit = { .it_op = IT_GETATTR };
2888 struct md_op_data *op_data;
2889
2890 if (ibits == MDS_INODELOCK_LOOKUP)
2891 oit.it_op = IT_LOOKUP;
2892
2893 /* Call getattr by fid, so do not provide name at all. */
2894 op_data = ll_prep_md_op_data(NULL, dentry->d_parent->d_inode,
2895 dentry->d_inode, NULL, 0, 0,
2896 LUSTRE_OPC_ANY, NULL);
2897 if (IS_ERR(op_data))
2898 return PTR_ERR(op_data);
2899
2900 oit.it_create_mode |= M_CHECK_STALE;
2901 rc = md_intent_lock(exp, op_data, NULL, 0,
2902 /* we are not interested in name
2903 based lookup */
2904 &oit, 0, &req,
2905 ll_md_blocking_ast, 0);
2906 ll_finish_md_op_data(op_data);
2907 oit.it_create_mode &= ~M_CHECK_STALE;
2908 if (rc < 0) {
2909 rc = ll_inode_revalidate_fini(inode, rc);
2910 GOTO (out, rc);
2911 }
2912
2913 rc = ll_revalidate_it_finish(req, &oit, dentry);
2914 if (rc != 0) {
2915 ll_intent_release(&oit);
2916 GOTO(out, rc);
2917 }
2918
2919 /* Unlinked? Unhash dentry, so it is not picked up later by
2920 do_lookup() -> ll_revalidate_it(). We cannot use d_drop
2921 here to preserve get_cwd functionality on 2.6.
2922 Bug 10503 */
2923 if (!dentry->d_inode->i_nlink)
2924 d_lustre_invalidate(dentry, 0);
2925
2926 ll_lookup_finish_locks(&oit, dentry);
2927 } else if (!ll_have_md_lock(dentry->d_inode, &ibits, LCK_MINMODE)) {
2928 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
2929 obd_valid valid = OBD_MD_FLGETATTR;
2930 struct md_op_data *op_data;
2931 int ealen = 0;
2932
2933 if (S_ISREG(inode->i_mode)) {
2934 rc = ll_get_max_mdsize(sbi, &ealen);
2935 if (rc)
2936 return rc;
2937 valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
2938 }
2939
2940 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL,
2941 0, ealen, LUSTRE_OPC_ANY,
2942 NULL);
2943 if (IS_ERR(op_data))
2944 return PTR_ERR(op_data);
2945
2946 op_data->op_valid = valid;
2947 /* Once OBD_CONNECT_ATTRFID is not supported, we can't find one
2948 * capa for this inode. Because we only keep capas of dirs
2949 * fresh. */
2950 rc = md_getattr(sbi->ll_md_exp, op_data, &req);
2951 ll_finish_md_op_data(op_data);
2952 if (rc) {
2953 rc = ll_inode_revalidate_fini(inode, rc);
2954 return rc;
2955 }
2956
2957 rc = ll_prep_inode(&inode, req, NULL, NULL);
2958 }
2959 out:
2960 ptlrpc_req_finished(req);
2961 return rc;
2962 }
2963
2964 int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it,
2965 __u64 ibits)
2966 {
2967 struct inode *inode = dentry->d_inode;
2968 int rc;
2969
2970 rc = __ll_inode_revalidate_it(dentry, it, ibits);
2971 if (rc != 0)
2972 return rc;
2973
2974 /* if object isn't regular file, don't validate size */
2975 if (!S_ISREG(inode->i_mode)) {
2976 LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_lvb.lvb_atime;
2977 LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_lvb.lvb_mtime;
2978 LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_lvb.lvb_ctime;
2979 } else {
2980 /* In case of restore, the MDT has the right size and has
2981 * already send it back without granting the layout lock,
2982 * inode is up-to-date so glimpse is useless.
2983 * Also to glimpse we need the layout, in case of a running
2984 * restore the MDT holds the layout lock so the glimpse will
2985 * block up to the end of restore (getattr will block)
2986 */
2987 if (!(ll_i2info(inode)->lli_flags & LLIF_FILE_RESTORING))
2988 rc = ll_glimpse_size(inode);
2989 }
2990 return rc;
2991 }
2992
2993 int ll_getattr_it(struct vfsmount *mnt, struct dentry *de,
2994 struct lookup_intent *it, struct kstat *stat)
2995 {
2996 struct inode *inode = de->d_inode;
2997 struct ll_sb_info *sbi = ll_i2sbi(inode);
2998 struct ll_inode_info *lli = ll_i2info(inode);
2999 int res = 0;
3000
3001 res = ll_inode_revalidate_it(de, it, MDS_INODELOCK_UPDATE |
3002 MDS_INODELOCK_LOOKUP);
3003 ll_stats_ops_tally(sbi, LPROC_LL_GETATTR, 1);
3004
3005 if (res)
3006 return res;
3007
3008 stat->dev = inode->i_sb->s_dev;
3009 if (ll_need_32bit_api(sbi))
3010 stat->ino = cl_fid_build_ino(&lli->lli_fid, 1);
3011 else
3012 stat->ino = inode->i_ino;
3013 stat->mode = inode->i_mode;
3014 stat->nlink = inode->i_nlink;
3015 stat->uid = inode->i_uid;
3016 stat->gid = inode->i_gid;
3017 stat->rdev = inode->i_rdev;
3018 stat->atime = inode->i_atime;
3019 stat->mtime = inode->i_mtime;
3020 stat->ctime = inode->i_ctime;
3021 stat->blksize = 1 << inode->i_blkbits;
3022
3023 stat->size = i_size_read(inode);
3024 stat->blocks = inode->i_blocks;
3025
3026 return 0;
3027 }
3028 int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
3029 {
3030 struct lookup_intent it = { .it_op = IT_GETATTR };
3031
3032 return ll_getattr_it(mnt, de, &it, stat);
3033 }
3034
3035 int ll_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3036 __u64 start, __u64 len)
3037 {
3038 int rc;
3039 size_t num_bytes;
3040 struct ll_user_fiemap *fiemap;
3041 unsigned int extent_count = fieinfo->fi_extents_max;
3042
3043 num_bytes = sizeof(*fiemap) + (extent_count *
3044 sizeof(struct ll_fiemap_extent));
3045 OBD_ALLOC_LARGE(fiemap, num_bytes);
3046
3047 if (fiemap == NULL)
3048 return -ENOMEM;
3049
3050 fiemap->fm_flags = fieinfo->fi_flags;
3051 fiemap->fm_extent_count = fieinfo->fi_extents_max;
3052 fiemap->fm_start = start;
3053 fiemap->fm_length = len;
3054 memcpy(&fiemap->fm_extents[0], fieinfo->fi_extents_start,
3055 sizeof(struct ll_fiemap_extent));
3056
3057 rc = ll_do_fiemap(inode, fiemap, num_bytes);
3058
3059 fieinfo->fi_flags = fiemap->fm_flags;
3060 fieinfo->fi_extents_mapped = fiemap->fm_mapped_extents;
3061 memcpy(fieinfo->fi_extents_start, &fiemap->fm_extents[0],
3062 fiemap->fm_mapped_extents * sizeof(struct ll_fiemap_extent));
3063
3064 OBD_FREE_LARGE(fiemap, num_bytes);
3065 return rc;
3066 }
3067
3068 struct posix_acl * ll_get_acl(struct inode *inode, int type)
3069 {
3070 struct ll_inode_info *lli = ll_i2info(inode);
3071 struct posix_acl *acl = NULL;
3072
3073 spin_lock(&lli->lli_lock);
3074 /* VFS' acl_permission_check->check_acl will release the refcount */
3075 acl = posix_acl_dup(lli->lli_posix_acl);
3076 spin_unlock(&lli->lli_lock);
3077
3078 return acl;
3079 }
3080
3081
3082 int ll_inode_permission(struct inode *inode, int mask)
3083 {
3084 int rc = 0;
3085
3086 #ifdef MAY_NOT_BLOCK
3087 if (mask & MAY_NOT_BLOCK)
3088 return -ECHILD;
3089 #endif
3090
3091 /* as root inode are NOT getting validated in lookup operation,
3092 * need to do it before permission check. */
3093
3094 if (inode == inode->i_sb->s_root->d_inode) {
3095 struct lookup_intent it = { .it_op = IT_LOOKUP };
3096
3097 rc = __ll_inode_revalidate_it(inode->i_sb->s_root, &it,
3098 MDS_INODELOCK_LOOKUP);
3099 if (rc)
3100 return rc;
3101 }
3102
3103 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), inode mode %x mask %o\n",
3104 inode->i_ino, inode->i_generation, inode, inode->i_mode, mask);
3105
3106 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
3107 return lustre_check_remote_perm(inode, mask);
3108
3109 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
3110 rc = generic_permission(inode, mask);
3111
3112 return rc;
3113 }
3114
3115 /* -o localflock - only provides locally consistent flock locks */
3116 struct file_operations ll_file_operations = {
3117 .read = ll_file_read,
3118 .aio_read = ll_file_aio_read,
3119 .write = ll_file_write,
3120 .aio_write = ll_file_aio_write,
3121 .unlocked_ioctl = ll_file_ioctl,
3122 .open = ll_file_open,
3123 .release = ll_file_release,
3124 .mmap = ll_file_mmap,
3125 .llseek = ll_file_seek,
3126 .splice_read = ll_file_splice_read,
3127 .fsync = ll_fsync,
3128 .flush = ll_flush
3129 };
3130
3131 struct file_operations ll_file_operations_flock = {
3132 .read = ll_file_read,
3133 .aio_read = ll_file_aio_read,
3134 .write = ll_file_write,
3135 .aio_write = ll_file_aio_write,
3136 .unlocked_ioctl = ll_file_ioctl,
3137 .open = ll_file_open,
3138 .release = ll_file_release,
3139 .mmap = ll_file_mmap,
3140 .llseek = ll_file_seek,
3141 .splice_read = ll_file_splice_read,
3142 .fsync = ll_fsync,
3143 .flush = ll_flush,
3144 .flock = ll_file_flock,
3145 .lock = ll_file_flock
3146 };
3147
3148 /* These are for -o noflock - to return ENOSYS on flock calls */
3149 struct file_operations ll_file_operations_noflock = {
3150 .read = ll_file_read,
3151 .aio_read = ll_file_aio_read,
3152 .write = ll_file_write,
3153 .aio_write = ll_file_aio_write,
3154 .unlocked_ioctl = ll_file_ioctl,
3155 .open = ll_file_open,
3156 .release = ll_file_release,
3157 .mmap = ll_file_mmap,
3158 .llseek = ll_file_seek,
3159 .splice_read = ll_file_splice_read,
3160 .fsync = ll_fsync,
3161 .flush = ll_flush,
3162 .flock = ll_file_noflock,
3163 .lock = ll_file_noflock
3164 };
3165
3166 struct inode_operations ll_file_inode_operations = {
3167 .setattr = ll_setattr,
3168 .getattr = ll_getattr,
3169 .permission = ll_inode_permission,
3170 .setxattr = ll_setxattr,
3171 .getxattr = ll_getxattr,
3172 .listxattr = ll_listxattr,
3173 .removexattr = ll_removexattr,
3174 .fiemap = ll_fiemap,
3175 .get_acl = ll_get_acl,
3176 };
3177
3178 /* dynamic ioctl number support routins */
3179 static struct llioc_ctl_data {
3180 struct rw_semaphore ioc_sem;
3181 struct list_head ioc_head;
3182 } llioc = {
3183 __RWSEM_INITIALIZER(llioc.ioc_sem),
3184 LIST_HEAD_INIT(llioc.ioc_head)
3185 };
3186
3187
3188 struct llioc_data {
3189 struct list_head iocd_list;
3190 unsigned int iocd_size;
3191 llioc_callback_t iocd_cb;
3192 unsigned int iocd_count;
3193 unsigned int iocd_cmd[0];
3194 };
3195
3196 void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
3197 {
3198 unsigned int size;
3199 struct llioc_data *in_data = NULL;
3200
3201 if (cb == NULL || cmd == NULL ||
3202 count > LLIOC_MAX_CMD || count < 0)
3203 return NULL;
3204
3205 size = sizeof(*in_data) + count * sizeof(unsigned int);
3206 OBD_ALLOC(in_data, size);
3207 if (in_data == NULL)
3208 return NULL;
3209
3210 memset(in_data, 0, sizeof(*in_data));
3211 in_data->iocd_size = size;
3212 in_data->iocd_cb = cb;
3213 in_data->iocd_count = count;
3214 memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
3215
3216 down_write(&llioc.ioc_sem);
3217 list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
3218 up_write(&llioc.ioc_sem);
3219
3220 return in_data;
3221 }
3222
3223 void ll_iocontrol_unregister(void *magic)
3224 {
3225 struct llioc_data *tmp;
3226
3227 if (magic == NULL)
3228 return;
3229
3230 down_write(&llioc.ioc_sem);
3231 list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
3232 if (tmp == magic) {
3233 unsigned int size = tmp->iocd_size;
3234
3235 list_del(&tmp->iocd_list);
3236 up_write(&llioc.ioc_sem);
3237
3238 OBD_FREE(tmp, size);
3239 return;
3240 }
3241 }
3242 up_write(&llioc.ioc_sem);
3243
3244 CWARN("didn't find iocontrol register block with magic: %p\n", magic);
3245 }
3246
3247 EXPORT_SYMBOL(ll_iocontrol_register);
3248 EXPORT_SYMBOL(ll_iocontrol_unregister);
3249
3250 enum llioc_iter ll_iocontrol_call(struct inode *inode, struct file *file,
3251 unsigned int cmd, unsigned long arg, int *rcp)
3252 {
3253 enum llioc_iter ret = LLIOC_CONT;
3254 struct llioc_data *data;
3255 int rc = -EINVAL, i;
3256
3257 down_read(&llioc.ioc_sem);
3258 list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
3259 for (i = 0; i < data->iocd_count; i++) {
3260 if (cmd != data->iocd_cmd[i])
3261 continue;
3262
3263 ret = data->iocd_cb(inode, file, cmd, arg, data, &rc);
3264 break;
3265 }
3266
3267 if (ret == LLIOC_STOP)
3268 break;
3269 }
3270 up_read(&llioc.ioc_sem);
3271
3272 if (rcp)
3273 *rcp = rc;
3274 return ret;
3275 }
3276
3277 int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf)
3278 {
3279 struct ll_inode_info *lli = ll_i2info(inode);
3280 struct cl_env_nest nest;
3281 struct lu_env *env;
3282 int result;
3283
3284 if (lli->lli_clob == NULL)
3285 return 0;
3286
3287 env = cl_env_nested_get(&nest);
3288 if (IS_ERR(env))
3289 return PTR_ERR(env);
3290
3291 result = cl_conf_set(env, lli->lli_clob, conf);
3292 cl_env_nested_put(&nest, env);
3293
3294 if (conf->coc_opc == OBJECT_CONF_SET) {
3295 struct ldlm_lock *lock = conf->coc_lock;
3296
3297 LASSERT(lock != NULL);
3298 LASSERT(ldlm_has_layout(lock));
3299 if (result == 0) {
3300 /* it can only be allowed to match after layout is
3301 * applied to inode otherwise false layout would be
3302 * seen. Applying layout shoud happen before dropping
3303 * the intent lock. */
3304 ldlm_lock_allow_match(lock);
3305 }
3306 }
3307 return result;
3308 }
3309
3310 /* Fetch layout from MDT with getxattr request, if it's not ready yet */
3311 static int ll_layout_fetch(struct inode *inode, struct ldlm_lock *lock)
3312
3313 {
3314 struct ll_sb_info *sbi = ll_i2sbi(inode);
3315 struct obd_capa *oc;
3316 struct ptlrpc_request *req;
3317 struct mdt_body *body;
3318 void *lvbdata;
3319 void *lmm;
3320 int lmmsize;
3321 int rc;
3322
3323 CDEBUG(D_INODE, DFID" LVB_READY=%d l_lvb_data=%p l_lvb_len=%d\n",
3324 PFID(ll_inode2fid(inode)), !!(lock->l_flags & LDLM_FL_LVB_READY),
3325 lock->l_lvb_data, lock->l_lvb_len);
3326
3327 if ((lock->l_lvb_data != NULL) && (lock->l_flags & LDLM_FL_LVB_READY))
3328 return 0;
3329
3330 /* if layout lock was granted right away, the layout is returned
3331 * within DLM_LVB of dlm reply; otherwise if the lock was ever
3332 * blocked and then granted via completion ast, we have to fetch
3333 * layout here. Please note that we can't use the LVB buffer in
3334 * completion AST because it doesn't have a large enough buffer */
3335 oc = ll_mdscapa_get(inode);
3336 rc = ll_get_max_mdsize(sbi, &lmmsize);
3337 if (rc == 0)
3338 rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode), oc,
3339 OBD_MD_FLXATTR, XATTR_NAME_LOV, NULL, 0,
3340 lmmsize, 0, &req);
3341 capa_put(oc);
3342 if (rc < 0)
3343 return rc;
3344
3345 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
3346 if (body == NULL || body->eadatasize > lmmsize)
3347 GOTO(out, rc = -EPROTO);
3348
3349 lmmsize = body->eadatasize;
3350 if (lmmsize == 0) /* empty layout */
3351 GOTO(out, rc = 0);
3352
3353 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_EADATA, lmmsize);
3354 if (lmm == NULL)
3355 GOTO(out, rc = -EFAULT);
3356
3357 OBD_ALLOC_LARGE(lvbdata, lmmsize);
3358 if (lvbdata == NULL)
3359 GOTO(out, rc = -ENOMEM);
3360
3361 memcpy(lvbdata, lmm, lmmsize);
3362 lock_res_and_lock(lock);
3363 if (lock->l_lvb_data != NULL)
3364 OBD_FREE_LARGE(lock->l_lvb_data, lock->l_lvb_len);
3365
3366 lock->l_lvb_data = lvbdata;
3367 lock->l_lvb_len = lmmsize;
3368 unlock_res_and_lock(lock);
3369
3370 out:
3371 ptlrpc_req_finished(req);
3372 return rc;
3373 }
3374
3375 /**
3376 * Apply the layout to the inode. Layout lock is held and will be released
3377 * in this function.
3378 */
3379 static int ll_layout_lock_set(struct lustre_handle *lockh, ldlm_mode_t mode,
3380 struct inode *inode, __u32 *gen, bool reconf)
3381 {
3382 struct ll_inode_info *lli = ll_i2info(inode);
3383 struct ll_sb_info *sbi = ll_i2sbi(inode);
3384 struct ldlm_lock *lock;
3385 struct lustre_md md = { NULL };
3386 struct cl_object_conf conf;
3387 int rc = 0;
3388 bool lvb_ready;
3389 bool wait_layout = false;
3390
3391 LASSERT(lustre_handle_is_used(lockh));
3392
3393 lock = ldlm_handle2lock(lockh);
3394 LASSERT(lock != NULL);
3395 LASSERT(ldlm_has_layout(lock));
3396
3397 LDLM_DEBUG(lock, "File %p/"DFID" being reconfigured: %d.\n",
3398 inode, PFID(&lli->lli_fid), reconf);
3399
3400 /* in case this is a caching lock and reinstate with new inode */
3401 md_set_lock_data(sbi->ll_md_exp, &lockh->cookie, inode, NULL);
3402
3403 lock_res_and_lock(lock);
3404 lvb_ready = !!(lock->l_flags & LDLM_FL_LVB_READY);
3405 unlock_res_and_lock(lock);
3406 /* checking lvb_ready is racy but this is okay. The worst case is
3407 * that multi processes may configure the file on the same time. */
3408 if (lvb_ready || !reconf) {
3409 rc = -ENODATA;
3410 if (lvb_ready) {
3411 /* layout_gen must be valid if layout lock is not
3412 * cancelled and stripe has already set */
3413 *gen = lli->lli_layout_gen;
3414 rc = 0;
3415 }
3416 GOTO(out, rc);
3417 }
3418
3419 rc = ll_layout_fetch(inode, lock);
3420 if (rc < 0)
3421 GOTO(out, rc);
3422
3423 /* for layout lock, lmm is returned in lock's lvb.
3424 * lvb_data is immutable if the lock is held so it's safe to access it
3425 * without res lock. See the description in ldlm_lock_decref_internal()
3426 * for the condition to free lvb_data of layout lock */
3427 if (lock->l_lvb_data != NULL) {
3428 rc = obd_unpackmd(sbi->ll_dt_exp, &md.lsm,
3429 lock->l_lvb_data, lock->l_lvb_len);
3430 if (rc >= 0) {
3431 *gen = LL_LAYOUT_GEN_EMPTY;
3432 if (md.lsm != NULL)
3433 *gen = md.lsm->lsm_layout_gen;
3434 rc = 0;
3435 } else {
3436 CERROR("%s: file "DFID" unpackmd error: %d\n",
3437 ll_get_fsname(inode->i_sb, NULL, 0),
3438 PFID(&lli->lli_fid), rc);
3439 }
3440 }
3441 if (rc < 0)
3442 GOTO(out, rc);
3443
3444 /* set layout to file. Unlikely this will fail as old layout was
3445 * surely eliminated */
3446 memset(&conf, 0, sizeof(conf));
3447 conf.coc_opc = OBJECT_CONF_SET;
3448 conf.coc_inode = inode;
3449 conf.coc_lock = lock;
3450 conf.u.coc_md = &md;
3451 rc = ll_layout_conf(inode, &conf);
3452
3453 if (md.lsm != NULL)
3454 obd_free_memmd(sbi->ll_dt_exp, &md.lsm);
3455
3456 /* refresh layout failed, need to wait */
3457 wait_layout = rc == -EBUSY;
3458
3459 out:
3460 LDLM_LOCK_PUT(lock);
3461 ldlm_lock_decref(lockh, mode);
3462
3463 /* wait for IO to complete if it's still being used. */
3464 if (wait_layout) {
3465 CDEBUG(D_INODE, "%s: %p/"DFID" wait for layout reconf.\n",
3466 ll_get_fsname(inode->i_sb, NULL, 0),
3467 inode, PFID(&lli->lli_fid));
3468
3469 memset(&conf, 0, sizeof(conf));
3470 conf.coc_opc = OBJECT_CONF_WAIT;
3471 conf.coc_inode = inode;
3472 rc = ll_layout_conf(inode, &conf);
3473 if (rc == 0)
3474 rc = -EAGAIN;
3475
3476 CDEBUG(D_INODE, "file: "DFID" waiting layout return: %d.\n",
3477 PFID(&lli->lli_fid), rc);
3478 }
3479 return rc;
3480 }
3481
3482 /**
3483 * This function checks if there exists a LAYOUT lock on the client side,
3484 * or enqueues it if it doesn't have one in cache.
3485 *
3486 * This function will not hold layout lock so it may be revoked any time after
3487 * this function returns. Any operations depend on layout should be redone
3488 * in that case.
3489 *
3490 * This function should be called before lov_io_init() to get an uptodate
3491 * layout version, the caller should save the version number and after IO
3492 * is finished, this function should be called again to verify that layout
3493 * is not changed during IO time.
3494 */
3495 int ll_layout_refresh(struct inode *inode, __u32 *gen)
3496 {
3497 struct ll_inode_info *lli = ll_i2info(inode);
3498 struct ll_sb_info *sbi = ll_i2sbi(inode);
3499 struct md_op_data *op_data;
3500 struct lookup_intent it;
3501 struct lustre_handle lockh;
3502 ldlm_mode_t mode;
3503 struct ldlm_enqueue_info einfo = {
3504 .ei_type = LDLM_IBITS,
3505 .ei_mode = LCK_CR,
3506 .ei_cb_bl = ll_md_blocking_ast,
3507 .ei_cb_cp = ldlm_completion_ast,
3508 };
3509 int rc;
3510
3511 *gen = lli->lli_layout_gen;
3512 if (!(sbi->ll_flags & LL_SBI_LAYOUT_LOCK))
3513 return 0;
3514
3515 /* sanity checks */
3516 LASSERT(fid_is_sane(ll_inode2fid(inode)));
3517 LASSERT(S_ISREG(inode->i_mode));
3518
3519 /* mostly layout lock is caching on the local side, so try to match
3520 * it before grabbing layout lock mutex. */
3521 mode = ll_take_md_lock(inode, MDS_INODELOCK_LAYOUT, &lockh, 0,
3522 LCK_CR | LCK_CW | LCK_PR | LCK_PW);
3523 if (mode != 0) { /* hit cached lock */
3524 rc = ll_layout_lock_set(&lockh, mode, inode, gen, false);
3525 if (rc == 0)
3526 return 0;
3527
3528 /* better hold lli_layout_mutex to try again otherwise
3529 * it will have starvation problem. */
3530 }
3531
3532 /* take layout lock mutex to enqueue layout lock exclusively. */
3533 mutex_lock(&lli->lli_layout_mutex);
3534
3535 again:
3536 /* try again. Maybe somebody else has done this. */
3537 mode = ll_take_md_lock(inode, MDS_INODELOCK_LAYOUT, &lockh, 0,
3538 LCK_CR | LCK_CW | LCK_PR | LCK_PW);
3539 if (mode != 0) { /* hit cached lock */
3540 rc = ll_layout_lock_set(&lockh, mode, inode, gen, true);
3541 if (rc == -EAGAIN)
3542 goto again;
3543
3544 mutex_unlock(&lli->lli_layout_mutex);
3545 return rc;
3546 }
3547
3548 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL,
3549 0, 0, LUSTRE_OPC_ANY, NULL);
3550 if (IS_ERR(op_data)) {
3551 mutex_unlock(&lli->lli_layout_mutex);
3552 return PTR_ERR(op_data);
3553 }
3554
3555 /* have to enqueue one */
3556 memset(&it, 0, sizeof(it));
3557 it.it_op = IT_LAYOUT;
3558 lockh.cookie = 0ULL;
3559
3560 LDLM_DEBUG_NOLOCK("%s: requeue layout lock for file %p/"DFID".\n",
3561 ll_get_fsname(inode->i_sb, NULL, 0), inode,
3562 PFID(&lli->lli_fid));
3563
3564 rc = md_enqueue(sbi->ll_md_exp, &einfo, &it, op_data, &lockh,
3565 NULL, 0, NULL, 0);
3566 if (it.d.lustre.it_data != NULL)
3567 ptlrpc_req_finished(it.d.lustre.it_data);
3568 it.d.lustre.it_data = NULL;
3569
3570 ll_finish_md_op_data(op_data);
3571
3572 mode = it.d.lustre.it_lock_mode;
3573 it.d.lustre.it_lock_mode = 0;
3574 ll_intent_drop_lock(&it);
3575
3576 if (rc == 0) {
3577 /* set lock data in case this is a new lock */
3578 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
3579 rc = ll_layout_lock_set(&lockh, mode, inode, gen, true);
3580 if (rc == -EAGAIN)
3581 goto again;
3582 }
3583 mutex_unlock(&lli->lli_layout_mutex);
3584
3585 return rc;
3586 }
3587
3588 /**
3589 * This function send a restore request to the MDT
3590 */
3591 int ll_layout_restore(struct inode *inode)
3592 {
3593 struct hsm_user_request *hur;
3594 int len, rc;
3595
3596 len = sizeof(struct hsm_user_request) +
3597 sizeof(struct hsm_user_item);
3598 OBD_ALLOC(hur, len);
3599 if (hur == NULL)
3600 return -ENOMEM;
3601
3602 hur->hur_request.hr_action = HUA_RESTORE;
3603 hur->hur_request.hr_archive_id = 0;
3604 hur->hur_request.hr_flags = 0;
3605 memcpy(&hur->hur_user_item[0].hui_fid, &ll_i2info(inode)->lli_fid,
3606 sizeof(hur->hur_user_item[0].hui_fid));
3607 hur->hur_user_item[0].hui_extent.length = -1;
3608 hur->hur_request.hr_itemcount = 1;
3609 rc = obd_iocontrol(LL_IOC_HSM_REQUEST, cl_i2sbi(inode)->ll_md_exp,
3610 len, hur, NULL);
3611 OBD_FREE(hur, len);
3612 return rc;
3613 }
This page took 0.108672 seconds and 6 git commands to generate.