staging/lustre: fix 'program hangs' errors
[deliverable/linux.git] / drivers / staging / lustre / lustre / llite / dir.c
CommitLineData
d7e09d03
PT
1/*
2 * GPL HEADER START
3 *
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19 *
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
22 * have any questions.
23 *
24 * GPL HEADER END
25 */
26/*
27 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
29 *
30 * Copyright (c) 2011, 2012, Intel Corporation.
31 */
32/*
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
35 *
36 * lustre/llite/dir.c
37 *
38 * Directory code for lustre client.
39 */
40
41#include <linux/fs.h>
42#include <linux/pagemap.h>
43#include <linux/mm.h>
44#include <linux/version.h>
45#include <asm/uaccess.h>
46#include <linux/buffer_head.h> // for wait_on_buffer
47#include <linux/pagevec.h>
2870cd10 48#include <linux/prefetch.h>
d7e09d03
PT
49
50#define DEBUG_SUBSYSTEM S_LLITE
51
52#include <lustre/lustre_idl.h>
53#include <obd_support.h>
54#include <obd_class.h>
55#include <lustre_lib.h>
56#include <lustre/lustre_idl.h>
57#include <lustre_lite.h>
58#include <lustre_dlm.h>
59#include <lustre_fid.h>
60#include "llite_internal.h"
61
62/*
63 * (new) readdir implementation overview.
64 *
65 * Original lustre readdir implementation cached exact copy of raw directory
66 * pages on the client. These pages were indexed in client page cache by
67 * logical offset in the directory file. This design, while very simple and
68 * intuitive had some inherent problems:
69 *
70 * . it implies that byte offset to the directory entry serves as a
71 * telldir(3)/seekdir(3) cookie, but that offset is not stable: in
72 * ext3/htree directory entries may move due to splits, and more
73 * importantly,
74 *
75 * . it is incompatible with the design of split directories for cmd3,
76 * that assumes that names are distributed across nodes based on their
77 * hash, and so readdir should be done in hash order.
78 *
79 * New readdir implementation does readdir in hash order, and uses hash of a
80 * file name as a telldir/seekdir cookie. This led to number of complications:
81 *
82 * . hash is not unique, so it cannot be used to index cached directory
83 * pages on the client (note, that it requires a whole pageful of hash
84 * collided entries to cause two pages to have identical hashes);
85 *
86 * . hash is not unique, so it cannot, strictly speaking, be used as an
87 * entry cookie. ext3/htree has the same problem and lustre implementation
88 * mimics their solution: seekdir(hash) positions directory at the first
89 * entry with the given hash.
90 *
91 * Client side.
92 *
93 * 0. caching
94 *
95 * Client caches directory pages using hash of the first entry as an index. As
96 * noted above hash is not unique, so this solution doesn't work as is:
97 * special processing is needed for "page hash chains" (i.e., sequences of
98 * pages filled with entries all having the same hash value).
99 *
100 * First, such chains have to be detected. To this end, server returns to the
101 * client the hash of the first entry on the page next to one returned. When
102 * client detects that this hash is the same as hash of the first entry on the
103 * returned page, page hash collision has to be handled. Pages in the
104 * hash chain, except first one, are termed "overflow pages".
105 *
106 * Solution to index uniqueness problem is to not cache overflow
107 * pages. Instead, when page hash collision is detected, all overflow pages
108 * from emerging chain are immediately requested from the server and placed in
109 * a special data structure (struct ll_dir_chain). This data structure is used
110 * by ll_readdir() to process entries from overflow pages. When readdir
111 * invocation finishes, overflow pages are discarded. If page hash collision
112 * chain weren't completely processed, next call to readdir will again detect
113 * page hash collision, again read overflow pages in, process next portion of
114 * entries and again discard the pages. This is not as wasteful as it looks,
115 * because, given reasonable hash, page hash collisions are extremely rare.
116 *
117 * 1. directory positioning
118 *
119 * When seekdir(hash) is called, original
120 *
121 *
122 *
123 *
124 *
125 *
126 *
127 *
128 * Server.
129 *
130 * identification of and access to overflow pages
131 *
132 * page format
133 *
134 * Page in MDS_READPAGE RPC is packed in LU_PAGE_SIZE, and each page contains
135 * a header lu_dirpage which describes the start/end hash, and whether this
136 * page is empty (contains no dir entry) or hash collide with next page.
137 * After client receives reply, several pages will be integrated into dir page
138 * in PAGE_CACHE_SIZE (if PAGE_CACHE_SIZE greater than LU_PAGE_SIZE), and the
139 * lu_dirpage for this integrated page will be adjusted. See
140 * lmv_adjust_dirpages().
141 *
142 */
143
144/* returns the page unlocked, but with a reference */
145static int ll_dir_filler(void *_hash, struct page *page0)
146{
147 struct inode *inode = page0->mapping->host;
148 int hash64 = ll_i2sbi(inode)->ll_flags & LL_SBI_64BIT_HASH;
149 struct obd_export *exp = ll_i2sbi(inode)->ll_md_exp;
150 struct ptlrpc_request *request;
151 struct mdt_body *body;
152 struct md_op_data *op_data;
153 __u64 hash = *((__u64 *)_hash);
154 struct page **page_pool;
155 struct page *page;
156 struct lu_dirpage *dp;
157 int max_pages = ll_i2sbi(inode)->ll_md_brw_size >> PAGE_CACHE_SHIFT;
158 int nrdpgs = 0; /* number of pages read actually */
159 int npages;
160 int i;
161 int rc;
162 ENTRY;
163
164 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p) hash "LPU64"\n",
165 inode->i_ino, inode->i_generation, inode, hash);
166
167 LASSERT(max_pages > 0 && max_pages <= MD_MAX_BRW_PAGES);
168
169 OBD_ALLOC(page_pool, sizeof(page) * max_pages);
170 if (page_pool != NULL) {
171 page_pool[0] = page0;
172 } else {
173 page_pool = &page0;
174 max_pages = 1;
175 }
176 for (npages = 1; npages < max_pages; npages++) {
177 page = page_cache_alloc_cold(inode->i_mapping);
178 if (!page)
179 break;
180 page_pool[npages] = page;
181 }
182
183 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
184 LUSTRE_OPC_ANY, NULL);
185 op_data->op_npages = npages;
186 op_data->op_offset = hash;
187 rc = md_readpage(exp, op_data, page_pool, &request);
188 ll_finish_md_op_data(op_data);
189 if (rc == 0) {
190 body = req_capsule_server_get(&request->rq_pill, &RMF_MDT_BODY);
191 /* Checked by mdc_readpage() */
192 LASSERT(body != NULL);
193
194 if (body->valid & OBD_MD_FLSIZE)
195 cl_isize_write(inode, body->size);
196
197 nrdpgs = (request->rq_bulk->bd_nob_transferred+PAGE_CACHE_SIZE-1)
198 >> PAGE_CACHE_SHIFT;
199 SetPageUptodate(page0);
200 }
201 unlock_page(page0);
202 ptlrpc_req_finished(request);
203
204 CDEBUG(D_VFSTRACE, "read %d/%d pages\n", nrdpgs, npages);
205
206 ll_pagevec_init(&lru_pvec, 0);
207 for (i = 1; i < npages; i++) {
208 unsigned long offset;
209 int ret;
210
211 page = page_pool[i];
212
213 if (rc < 0 || i >= nrdpgs) {
214 page_cache_release(page);
215 continue;
216 }
217
218 SetPageUptodate(page);
219
220 dp = kmap(page);
221 hash = le64_to_cpu(dp->ldp_hash_start);
222 kunmap(page);
223
224 offset = hash_x_index(hash, hash64);
225
226 prefetchw(&page->flags);
227 ret = add_to_page_cache_lru(page, inode->i_mapping, offset,
228 GFP_KERNEL);
229 if (ret == 0) {
230 unlock_page(page);
231 if (ll_pagevec_add(&lru_pvec, page) == 0)
232 ll_pagevec_lru_add_file(&lru_pvec);
233 } else {
234 CDEBUG(D_VFSTRACE, "page %lu add to page cache failed:"
235 " %d\n", offset, ret);
236 }
237 page_cache_release(page);
238 }
239 ll_pagevec_lru_add_file(&lru_pvec);
240
241 if (page_pool != &page0)
242 OBD_FREE(page_pool, sizeof(struct page *) * max_pages);
243 EXIT;
244 return rc;
245}
246
247static void ll_check_page(struct inode *dir, struct page *page)
248{
249 /* XXX: check page format later */
250 SetPageChecked(page);
251}
252
253void ll_release_page(struct page *page, int remove)
254{
255 kunmap(page);
256 if (remove) {
257 lock_page(page);
258 if (likely(page->mapping != NULL))
259 truncate_complete_page(page->mapping, page);
260 unlock_page(page);
261 }
262 page_cache_release(page);
263}
264
265/*
266 * Find, kmap and return page that contains given hash.
267 */
268static struct page *ll_dir_page_locate(struct inode *dir, __u64 *hash,
269 __u64 *start, __u64 *end)
270{
271 int hash64 = ll_i2sbi(dir)->ll_flags & LL_SBI_64BIT_HASH;
272 struct address_space *mapping = dir->i_mapping;
273 /*
274 * Complement of hash is used as an index so that
275 * radix_tree_gang_lookup() can be used to find a page with starting
276 * hash _smaller_ than one we are looking for.
277 */
278 unsigned long offset = hash_x_index(*hash, hash64);
279 struct page *page;
280 int found;
281
282 TREE_READ_LOCK_IRQ(mapping);
283 found = radix_tree_gang_lookup(&mapping->page_tree,
284 (void **)&page, offset, 1);
285 if (found > 0) {
286 struct lu_dirpage *dp;
287
288 page_cache_get(page);
289 TREE_READ_UNLOCK_IRQ(mapping);
290 /*
291 * In contrast to find_lock_page() we are sure that directory
292 * page cannot be truncated (while DLM lock is held) and,
293 * hence, can avoid restart.
294 *
295 * In fact, page cannot be locked here at all, because
296 * ll_dir_filler() does synchronous io.
297 */
298 wait_on_page_locked(page);
299 if (PageUptodate(page)) {
300 dp = kmap(page);
301 if (BITS_PER_LONG == 32 && hash64) {
302 *start = le64_to_cpu(dp->ldp_hash_start) >> 32;
303 *end = le64_to_cpu(dp->ldp_hash_end) >> 32;
304 *hash = *hash >> 32;
305 } else {
306 *start = le64_to_cpu(dp->ldp_hash_start);
307 *end = le64_to_cpu(dp->ldp_hash_end);
308 }
309 LASSERTF(*start <= *hash, "start = "LPX64",end = "
310 LPX64",hash = "LPX64"\n", *start, *end, *hash);
311 CDEBUG(D_VFSTRACE, "page %lu [%llu %llu], hash "LPU64"\n",
312 offset, *start, *end, *hash);
313 if (*hash > *end) {
314 ll_release_page(page, 0);
315 page = NULL;
316 } else if (*end != *start && *hash == *end) {
317 /*
318 * upon hash collision, remove this page,
319 * otherwise put page reference, and
320 * ll_get_dir_page() will issue RPC to fetch
321 * the page we want.
322 */
323 ll_release_page(page,
324 le32_to_cpu(dp->ldp_flags) & LDF_COLLIDE);
325 page = NULL;
326 }
327 } else {
328 page_cache_release(page);
329 page = ERR_PTR(-EIO);
330 }
331
332 } else {
333 TREE_READ_UNLOCK_IRQ(mapping);
334 page = NULL;
335 }
336 return page;
337}
338
339struct page *ll_get_dir_page(struct inode *dir, __u64 hash,
340 struct ll_dir_chain *chain)
341{
342 ldlm_policy_data_t policy = {.l_inodebits = {MDS_INODELOCK_UPDATE} };
343 struct address_space *mapping = dir->i_mapping;
344 struct lustre_handle lockh;
345 struct lu_dirpage *dp;
346 struct page *page;
347 ldlm_mode_t mode;
348 int rc;
349 __u64 start = 0;
350 __u64 end = 0;
351 __u64 lhash = hash;
352 struct ll_inode_info *lli = ll_i2info(dir);
353 int hash64 = ll_i2sbi(dir)->ll_flags & LL_SBI_64BIT_HASH;
354
355 mode = LCK_PR;
356 rc = md_lock_match(ll_i2sbi(dir)->ll_md_exp, LDLM_FL_BLOCK_GRANTED,
357 ll_inode2fid(dir), LDLM_IBITS, &policy, mode, &lockh);
358 if (!rc) {
359 struct ldlm_enqueue_info einfo = {.ei_type = LDLM_IBITS,
360 .ei_mode = mode,
361 .ei_cb_bl =
362 ll_md_blocking_ast,
363 .ei_cb_cp =
364 ldlm_completion_ast,
365 .ei_cb_gl = NULL,
366 .ei_cb_wg = NULL,
367 .ei_cbdata = NULL};
368 struct lookup_intent it = { .it_op = IT_READDIR };
369 struct ptlrpc_request *request;
370 struct md_op_data *op_data;
371
372 op_data = ll_prep_md_op_data(NULL, dir, NULL, NULL, 0, 0,
373 LUSTRE_OPC_ANY, NULL);
374 if (IS_ERR(op_data))
375 return (void *)op_data;
376
377 rc = md_enqueue(ll_i2sbi(dir)->ll_md_exp, &einfo, &it,
378 op_data, &lockh, NULL, 0, NULL, 0);
379
380 ll_finish_md_op_data(op_data);
381
382 request = (struct ptlrpc_request *)it.d.lustre.it_data;
383 if (request)
384 ptlrpc_req_finished(request);
385 if (rc < 0) {
386 CERROR("lock enqueue: "DFID" at "LPU64": rc %d\n",
387 PFID(ll_inode2fid(dir)), hash, rc);
388 return ERR_PTR(rc);
389 }
390
391 CDEBUG(D_INODE, "setting lr_lvb_inode to inode %p (%lu/%u)\n",
392 dir, dir->i_ino, dir->i_generation);
393 md_set_lock_data(ll_i2sbi(dir)->ll_md_exp,
394 &it.d.lustre.it_lock_handle, dir, NULL);
395 } else {
396 /* for cross-ref object, l_ast_data of the lock may not be set,
397 * we reset it here */
398 md_set_lock_data(ll_i2sbi(dir)->ll_md_exp, &lockh.cookie,
399 dir, NULL);
400 }
401 ldlm_lock_dump_handle(D_OTHER, &lockh);
402
403 mutex_lock(&lli->lli_readdir_mutex);
404 page = ll_dir_page_locate(dir, &lhash, &start, &end);
405 if (IS_ERR(page)) {
406 CERROR("dir page locate: "DFID" at "LPU64": rc %ld\n",
407 PFID(ll_inode2fid(dir)), lhash, PTR_ERR(page));
408 GOTO(out_unlock, page);
409 } else if (page != NULL) {
410 /*
411 * XXX nikita: not entirely correct handling of a corner case:
412 * suppose hash chain of entries with hash value HASH crosses
413 * border between pages P0 and P1. First both P0 and P1 are
414 * cached, seekdir() is called for some entry from the P0 part
415 * of the chain. Later P0 goes out of cache. telldir(HASH)
416 * happens and finds P1, as it starts with matching hash
417 * value. Remaining entries from P0 part of the chain are
418 * skipped. (Is that really a bug?)
419 *
420 * Possible solutions: 0. don't cache P1 is such case, handle
421 * it as an "overflow" page. 1. invalidate all pages at
422 * once. 2. use HASH|1 as an index for P1.
423 */
424 GOTO(hash_collision, page);
425 }
426
427 page = read_cache_page(mapping, hash_x_index(hash, hash64),
428 ll_dir_filler, &lhash);
429 if (IS_ERR(page)) {
430 CERROR("read cache page: "DFID" at "LPU64": rc %ld\n",
431 PFID(ll_inode2fid(dir)), hash, PTR_ERR(page));
432 GOTO(out_unlock, page);
433 }
434
435 wait_on_page_locked(page);
436 (void)kmap(page);
437 if (!PageUptodate(page)) {
438 CERROR("page not updated: "DFID" at "LPU64": rc %d\n",
439 PFID(ll_inode2fid(dir)), hash, -5);
440 goto fail;
441 }
442 if (!PageChecked(page))
443 ll_check_page(dir, page);
444 if (PageError(page)) {
445 CERROR("page error: "DFID" at "LPU64": rc %d\n",
446 PFID(ll_inode2fid(dir)), hash, -5);
447 goto fail;
448 }
449hash_collision:
450 dp = page_address(page);
451 if (BITS_PER_LONG == 32 && hash64) {
452 start = le64_to_cpu(dp->ldp_hash_start) >> 32;
453 end = le64_to_cpu(dp->ldp_hash_end) >> 32;
454 lhash = hash >> 32;
455 } else {
456 start = le64_to_cpu(dp->ldp_hash_start);
457 end = le64_to_cpu(dp->ldp_hash_end);
458 lhash = hash;
459 }
460 if (end == start) {
461 LASSERT(start == lhash);
462 CWARN("Page-wide hash collision: "LPU64"\n", end);
463 if (BITS_PER_LONG == 32 && hash64)
464 CWARN("Real page-wide hash collision at ["LPU64" "LPU64
465 "] with hash "LPU64"\n",
466 le64_to_cpu(dp->ldp_hash_start),
467 le64_to_cpu(dp->ldp_hash_end), hash);
468 /*
469 * Fetch whole overflow chain...
470 *
471 * XXX not yet.
472 */
473 goto fail;
474 }
475out_unlock:
476 mutex_unlock(&lli->lli_readdir_mutex);
477 ldlm_lock_decref(&lockh, mode);
478 return page;
479
480fail:
481 ll_release_page(page, 1);
482 page = ERR_PTR(-EIO);
483 goto out_unlock;
484}
485
0b09d381 486int ll_dir_read(struct inode *inode, struct dir_context *ctx)
d7e09d03
PT
487{
488 struct ll_inode_info *info = ll_i2info(inode);
489 struct ll_sb_info *sbi = ll_i2sbi(inode);
0b09d381 490 __u64 pos = ctx->pos;
d7e09d03
PT
491 int api32 = ll_need_32bit_api(sbi);
492 int hash64 = sbi->ll_flags & LL_SBI_64BIT_HASH;
493 struct page *page;
494 struct ll_dir_chain chain;
495 int done = 0;
496 int rc = 0;
497 ENTRY;
498
499 ll_dir_chain_init(&chain);
500
501 page = ll_get_dir_page(inode, pos, &chain);
502
503 while (rc == 0 && !done) {
504 struct lu_dirpage *dp;
505 struct lu_dirent *ent;
506
507 if (!IS_ERR(page)) {
508 /*
509 * If page is empty (end of directory is reached),
510 * use this value.
511 */
512 __u64 hash = MDS_DIR_END_OFF;
513 __u64 next;
514
515 dp = page_address(page);
516 for (ent = lu_dirent_start(dp); ent != NULL && !done;
517 ent = lu_dirent_next(ent)) {
518 __u16 type;
519 int namelen;
520 struct lu_fid fid;
521 __u64 lhash;
522 __u64 ino;
523
524 /*
525 * XXX: implement correct swabbing here.
526 */
527
528 hash = le64_to_cpu(ent->lde_hash);
529 if (hash < pos)
530 /*
531 * Skip until we find target hash
532 * value.
533 */
534 continue;
535
536 namelen = le16_to_cpu(ent->lde_namelen);
537 if (namelen == 0)
538 /*
539 * Skip dummy record.
540 */
541 continue;
542
543 if (api32 && hash64)
544 lhash = hash >> 32;
545 else
546 lhash = hash;
547 fid_le_to_cpu(&fid, &ent->lde_fid);
548 ino = cl_fid_build_ino(&fid, api32);
549 type = ll_dirent_type_get(ent);
0b09d381 550 ctx->pos = lhash;
d7e09d03
PT
551 /* For 'll_nfs_get_name_filldir()', it will try
552 * to access the 'ent' through its 'lde_name',
0b09d381
PT
553 * so the parameter 'name' for 'ctx->actor()'
554 * must be part of the 'ent'.
555 */
556 done = !dir_emit(ctx, ent->lde_name,
557 namelen, ino, type);
d7e09d03
PT
558 }
559 next = le64_to_cpu(dp->ldp_hash_end);
560 if (!done) {
561 pos = next;
562 if (pos == MDS_DIR_END_OFF) {
563 /*
564 * End of directory reached.
565 */
566 done = 1;
567 ll_release_page(page, 0);
568 } else if (1 /* chain is exhausted*/) {
569 /*
570 * Normal case: continue to the next
571 * page.
572 */
573 ll_release_page(page,
574 le32_to_cpu(dp->ldp_flags) &
575 LDF_COLLIDE);
576 next = pos;
577 page = ll_get_dir_page(inode, pos,
578 &chain);
579 } else {
580 /*
581 * go into overflow page.
582 */
583 LASSERT(le32_to_cpu(dp->ldp_flags) &
584 LDF_COLLIDE);
585 ll_release_page(page, 1);
586 }
587 } else {
588 pos = hash;
589 ll_release_page(page, 0);
590 }
591 } else {
592 rc = PTR_ERR(page);
593 CERROR("error reading dir "DFID" at %lu: rc %d\n",
594 PFID(&info->lli_fid), (unsigned long)pos, rc);
595 }
596 }
597
0b09d381 598 ctx->pos = pos;
d7e09d03
PT
599 ll_dir_chain_fini(&chain);
600 RETURN(rc);
601}
602
0b09d381 603static int ll_readdir(struct file *filp, struct dir_context *ctx)
d7e09d03
PT
604{
605 struct inode *inode = filp->f_dentry->d_inode;
606 struct ll_file_data *lfd = LUSTRE_FPRIVATE(filp);
607 struct ll_sb_info *sbi = ll_i2sbi(inode);
d7e09d03
PT
608 int hash64 = sbi->ll_flags & LL_SBI_64BIT_HASH;
609 int api32 = ll_need_32bit_api(sbi);
610 int rc;
d7e09d03
PT
611 ENTRY;
612
613 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p) pos %lu/%llu "
614 " 32bit_api %d\n", inode->i_ino, inode->i_generation,
0b09d381 615 inode, (unsigned long)lfd->lfd_pos, i_size_read(inode), api32);
d7e09d03 616
0b09d381 617 if (lfd->lfd_pos == MDS_DIR_END_OFF)
d7e09d03
PT
618 /*
619 * end-of-file.
620 */
621 GOTO(out, rc = 0);
622
0b09d381
PT
623 ctx->pos = lfd->lfd_pos;
624 rc = ll_dir_read(inode, ctx);
625 lfd->lfd_pos = ctx->pos;
626 if (ctx->pos == MDS_DIR_END_OFF) {
d7e09d03 627 if (api32)
0b09d381 628 ctx->pos = LL_DIR_END_OFF_32BIT;
d7e09d03 629 else
0b09d381 630 ctx->pos = LL_DIR_END_OFF;
d7e09d03
PT
631 } else {
632 if (api32 && hash64)
0b09d381 633 ctx->pos >>= 32;
d7e09d03
PT
634 }
635 filp->f_version = inode->i_version;
d7e09d03
PT
636
637out:
638 if (!rc)
639 ll_stats_ops_tally(sbi, LPROC_LL_READDIR, 1);
640
641 RETURN(rc);
642}
643
644int ll_send_mgc_param(struct obd_export *mgc, char *string)
645{
646 struct mgs_send_param *msp;
647 int rc = 0;
648
649 OBD_ALLOC_PTR(msp);
650 if (!msp)
651 return -ENOMEM;
652
653 strncpy(msp->mgs_param, string, MGS_PARAM_MAXLEN);
654 rc = obd_set_info_async(NULL, mgc, sizeof(KEY_SET_INFO), KEY_SET_INFO,
655 sizeof(struct mgs_send_param), msp, NULL);
656 if (rc)
657 CERROR("Failed to set parameter: %d\n", rc);
658 OBD_FREE_PTR(msp);
659
660 return rc;
661}
662
663int ll_dir_setdirstripe(struct inode *dir, struct lmv_user_md *lump,
664 char *filename)
665{
666 struct ptlrpc_request *request = NULL;
667 struct md_op_data *op_data;
668 struct ll_sb_info *sbi = ll_i2sbi(dir);
669 int mode;
670 int err;
671
672 ENTRY;
673
674 mode = (0755 & (S_IRWXUGO|S_ISVTX) & ~current->fs->umask) | S_IFDIR;
675 op_data = ll_prep_md_op_data(NULL, dir, NULL, filename,
676 strlen(filename), mode, LUSTRE_OPC_MKDIR,
677 lump);
678 if (IS_ERR(op_data))
679 GOTO(err_exit, err = PTR_ERR(op_data));
680
681 op_data->op_cli_flags |= CLI_SET_MEA;
682 err = md_create(sbi->ll_md_exp, op_data, lump, sizeof(*lump), mode,
4b1a25f0
PT
683 from_kuid(&init_user_ns, current_fsuid()),
684 from_kgid(&init_user_ns, current_fsgid()),
d7e09d03
PT
685 cfs_curproc_cap_pack(), 0, &request);
686 ll_finish_md_op_data(op_data);
687 if (err)
688 GOTO(err_exit, err);
689err_exit:
690 ptlrpc_req_finished(request);
691 return err;
692}
693
694int ll_dir_setstripe(struct inode *inode, struct lov_user_md *lump,
695 int set_default)
696{
697 struct ll_sb_info *sbi = ll_i2sbi(inode);
698 struct md_op_data *op_data;
699 struct ptlrpc_request *req = NULL;
700 int rc = 0;
701 struct lustre_sb_info *lsi = s2lsi(inode->i_sb);
702 struct obd_device *mgc = lsi->lsi_mgc;
703 int lum_size;
704 ENTRY;
705
706 if (lump != NULL) {
707 /*
708 * This is coming from userspace, so should be in
709 * local endian. But the MDS would like it in little
710 * endian, so we swab it before we send it.
711 */
712 switch (lump->lmm_magic) {
713 case LOV_USER_MAGIC_V1: {
714 if (lump->lmm_magic != cpu_to_le32(LOV_USER_MAGIC_V1))
715 lustre_swab_lov_user_md_v1(lump);
716 lum_size = sizeof(struct lov_user_md_v1);
717 break;
718 }
719 case LOV_USER_MAGIC_V3: {
720 if (lump->lmm_magic != cpu_to_le32(LOV_USER_MAGIC_V3))
721 lustre_swab_lov_user_md_v3(
722 (struct lov_user_md_v3 *)lump);
723 lum_size = sizeof(struct lov_user_md_v3);
724 break;
725 }
726 default: {
727 CDEBUG(D_IOCTL, "bad userland LOV MAGIC:"
728 " %#08x != %#08x nor %#08x\n",
729 lump->lmm_magic, LOV_USER_MAGIC_V1,
730 LOV_USER_MAGIC_V3);
731 RETURN(-EINVAL);
732 }
733 }
734 } else {
735 lum_size = sizeof(struct lov_user_md_v1);
736 }
737
738 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
739 LUSTRE_OPC_ANY, NULL);
740 if (IS_ERR(op_data))
741 RETURN(PTR_ERR(op_data));
742
743 if (lump != NULL && lump->lmm_magic == cpu_to_le32(LMV_USER_MAGIC))
744 op_data->op_cli_flags |= CLI_SET_MEA;
745
746 /* swabbing is done in lov_setstripe() on server side */
747 rc = md_setattr(sbi->ll_md_exp, op_data, lump, lum_size,
748 NULL, 0, &req, NULL);
749 ll_finish_md_op_data(op_data);
750 ptlrpc_req_finished(req);
751 if (rc) {
752 if (rc != -EPERM && rc != -EACCES)
753 CERROR("mdc_setattr fails: rc = %d\n", rc);
754 }
755
756 /* In the following we use the fact that LOV_USER_MAGIC_V1 and
757 LOV_USER_MAGIC_V3 have the same initial fields so we do not
758 need the make the distiction between the 2 versions */
759 if (set_default && mgc->u.cli.cl_mgc_mgsexp) {
760 char *param = NULL;
761 char *buf;
762
763 OBD_ALLOC(param, MGS_PARAM_MAXLEN);
764 if (param == NULL)
765 GOTO(end, rc = -ENOMEM);
766
767 buf = param;
768 /* Get fsname and assume devname to be -MDT0000. */
769 ll_get_fsname(inode->i_sb, buf, MTI_NAME_MAXLEN);
770 strcat(buf, "-MDT0000.lov");
771 buf += strlen(buf);
772
773 /* Set root stripesize */
774 sprintf(buf, ".stripesize=%u",
775 lump ? le32_to_cpu(lump->lmm_stripe_size) : 0);
776 rc = ll_send_mgc_param(mgc->u.cli.cl_mgc_mgsexp, param);
777 if (rc)
778 GOTO(end, rc);
779
780 /* Set root stripecount */
781 sprintf(buf, ".stripecount=%hd",
782 lump ? le16_to_cpu(lump->lmm_stripe_count) : 0);
783 rc = ll_send_mgc_param(mgc->u.cli.cl_mgc_mgsexp, param);
784 if (rc)
785 GOTO(end, rc);
786
787 /* Set root stripeoffset */
788 sprintf(buf, ".stripeoffset=%hd",
789 lump ? le16_to_cpu(lump->lmm_stripe_offset) :
790 (typeof(lump->lmm_stripe_offset))(-1));
791 rc = ll_send_mgc_param(mgc->u.cli.cl_mgc_mgsexp, param);
792
793end:
794 if (param != NULL)
795 OBD_FREE(param, MGS_PARAM_MAXLEN);
796 }
797 RETURN(rc);
798}
799
800int ll_dir_getstripe(struct inode *inode, struct lov_mds_md **lmmp,
801 int *lmm_size, struct ptlrpc_request **request)
802{
803 struct ll_sb_info *sbi = ll_i2sbi(inode);
804 struct mdt_body *body;
805 struct lov_mds_md *lmm = NULL;
806 struct ptlrpc_request *req = NULL;
807 int rc, lmmsize;
808 struct md_op_data *op_data;
809
810 rc = ll_get_max_mdsize(sbi, &lmmsize);
811 if (rc)
812 RETURN(rc);
813
814 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL,
815 0, lmmsize, LUSTRE_OPC_ANY,
816 NULL);
817 if (IS_ERR(op_data))
818 RETURN(PTR_ERR(op_data));
819
820 op_data->op_valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA;
821 rc = md_getattr(sbi->ll_md_exp, op_data, &req);
822 ll_finish_md_op_data(op_data);
823 if (rc < 0) {
824 CDEBUG(D_INFO, "md_getattr failed on inode "
825 "%lu/%u: rc %d\n", inode->i_ino,
826 inode->i_generation, rc);
827 GOTO(out, rc);
828 }
829
830 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
831 LASSERT(body != NULL);
832
833 lmmsize = body->eadatasize;
834
835 if (!(body->valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
836 lmmsize == 0) {
837 GOTO(out, rc = -ENODATA);
838 }
839
840 lmm = req_capsule_server_sized_get(&req->rq_pill,
841 &RMF_MDT_MD, lmmsize);
842 LASSERT(lmm != NULL);
843
844 /*
845 * This is coming from the MDS, so is probably in
846 * little endian. We convert it to host endian before
847 * passing it to userspace.
848 */
849 /* We don't swab objects for directories */
850 switch (le32_to_cpu(lmm->lmm_magic)) {
851 case LOV_MAGIC_V1:
852 if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC))
853 lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
854 break;
855 case LOV_MAGIC_V3:
856 if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC))
857 lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm);
858 break;
859 default:
860 CERROR("unknown magic: %lX\n", (unsigned long)lmm->lmm_magic);
861 rc = -EPROTO;
862 }
863out:
864 *lmmp = lmm;
865 *lmm_size = lmmsize;
866 *request = req;
867 return rc;
868}
869
870/*
871 * Get MDT index for the inode.
872 */
873int ll_get_mdt_idx(struct inode *inode)
874{
875 struct ll_sb_info *sbi = ll_i2sbi(inode);
876 struct md_op_data *op_data;
877 int rc, mdtidx;
878 ENTRY;
879
880 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0,
881 0, LUSTRE_OPC_ANY, NULL);
882 if (IS_ERR(op_data))
883 RETURN(PTR_ERR(op_data));
884
885 op_data->op_flags |= MF_GET_MDT_IDX;
886 rc = md_getattr(sbi->ll_md_exp, op_data, NULL);
887 mdtidx = op_data->op_mds;
888 ll_finish_md_op_data(op_data);
889 if (rc < 0) {
890 CDEBUG(D_INFO, "md_getattr_name: %d\n", rc);
891 RETURN(rc);
892 }
893 return mdtidx;
894}
895
896/**
897 * Generic handler to do any pre-copy work.
898 *
899 * It send a first hsm_progress (with extent length == 0) to coordinator as a
900 * first information for it that real work has started.
901 *
902 * Moreover, for a ARCHIVE request, it will sample the file data version and
903 * store it in \a copy.
904 *
905 * \return 0 on success.
906 */
907static int ll_ioc_copy_start(struct super_block *sb, struct hsm_copy *copy)
908{
909 struct ll_sb_info *sbi = ll_s2sbi(sb);
910 struct hsm_progress_kernel hpk;
911 int rc;
912 ENTRY;
913
914 /* Forge a hsm_progress based on data from copy. */
915 hpk.hpk_fid = copy->hc_hai.hai_fid;
916 hpk.hpk_cookie = copy->hc_hai.hai_cookie;
917 hpk.hpk_extent.offset = copy->hc_hai.hai_extent.offset;
918 hpk.hpk_extent.length = 0;
919 hpk.hpk_flags = 0;
920 hpk.hpk_errval = 0;
921 hpk.hpk_data_version = 0;
922
923
924 /* For archive request, we need to read the current file version. */
925 if (copy->hc_hai.hai_action == HSMA_ARCHIVE) {
926 struct inode *inode;
927 __u64 data_version = 0;
928
929 /* Get inode for this fid */
930 inode = search_inode_for_lustre(sb, &copy->hc_hai.hai_fid);
931 if (IS_ERR(inode)) {
932 hpk.hpk_flags |= HP_FLAG_RETRY;
933 /* hpk_errval is >= 0 */
934 hpk.hpk_errval = -PTR_ERR(inode);
935 GOTO(progress, rc = PTR_ERR(inode));
936 }
937
938 /* Read current file data version */
939 rc = ll_data_version(inode, &data_version, 1);
940 iput(inode);
941 if (rc != 0) {
942 CDEBUG(D_HSM, "Could not read file data version of "
943 DFID" (rc = %d). Archive request ("
944 LPX64") could not be done.\n",
945 PFID(&copy->hc_hai.hai_fid), rc,
946 copy->hc_hai.hai_cookie);
947 hpk.hpk_flags |= HP_FLAG_RETRY;
948 /* hpk_errval must be >= 0 */
949 hpk.hpk_errval = -rc;
950 GOTO(progress, rc);
951 }
952
953 /* Store it the hsm_copy for later copytool use.
954 * Always modified even if no lsm. */
955 copy->hc_data_version = data_version;
956 }
957
958progress:
959 rc = obd_iocontrol(LL_IOC_HSM_PROGRESS, sbi->ll_md_exp, sizeof(hpk),
960 &hpk, NULL);
961
962 RETURN(rc);
963}
964
965/**
966 * Generic handler to do any post-copy work.
967 *
968 * It will send the last hsm_progress update to coordinator to inform it
969 * that copy is finished and whether it was successful or not.
970 *
971 * Moreover,
972 * - for ARCHIVE request, it will sample the file data version and compare it
973 * with the version saved in ll_ioc_copy_start(). If they do not match, copy
974 * will be considered as failed.
975 * - for RESTORE request, it will sample the file data version and send it to
976 * coordinator which is useful if the file was imported as 'released'.
977 *
978 * \return 0 on success.
979 */
980static int ll_ioc_copy_end(struct super_block *sb, struct hsm_copy *copy)
981{
982 struct ll_sb_info *sbi = ll_s2sbi(sb);
983 struct hsm_progress_kernel hpk;
984 int rc;
985 ENTRY;
986
987 /* If you modify the logic here, also check llapi_hsm_copy_end(). */
988 /* Take care: copy->hc_hai.hai_action, len, gid and data are not
989 * initialized if copy_end was called with copy == NULL.
990 */
991
992 /* Forge a hsm_progress based on data from copy. */
993 hpk.hpk_fid = copy->hc_hai.hai_fid;
994 hpk.hpk_cookie = copy->hc_hai.hai_cookie;
995 hpk.hpk_extent = copy->hc_hai.hai_extent;
996 hpk.hpk_flags = copy->hc_flags | HP_FLAG_COMPLETED;
997 hpk.hpk_errval = copy->hc_errval;
998 hpk.hpk_data_version = 0;
999
1000 /* For archive request, we need to check the file data was not changed.
1001 *
1002 * For restore request, we need to send the file data version, this is
1003 * useful when the file was created using hsm_import.
1004 */
1005 if (((copy->hc_hai.hai_action == HSMA_ARCHIVE) ||
1006 (copy->hc_hai.hai_action == HSMA_RESTORE)) &&
1007 (copy->hc_errval == 0)) {
1008 struct inode *inode;
1009 __u64 data_version = 0;
1010
1011 /* Get lsm for this fid */
1012 inode = search_inode_for_lustre(sb, &copy->hc_hai.hai_fid);
1013 if (IS_ERR(inode)) {
1014 hpk.hpk_flags |= HP_FLAG_RETRY;
1015 /* hpk_errval must be >= 0 */
1016 hpk.hpk_errval = -PTR_ERR(inode);
1017 GOTO(progress, rc = PTR_ERR(inode));
1018 }
1019
1020 rc = ll_data_version(inode, &data_version,
1021 copy->hc_hai.hai_action == HSMA_ARCHIVE);
1022 iput(inode);
1023 if (rc) {
1024 CDEBUG(D_HSM, "Could not read file data version. "
1025 "Request could not be confirmed.\n");
1026 if (hpk.hpk_errval == 0)
1027 hpk.hpk_errval = -rc;
1028 GOTO(progress, rc);
1029 }
1030
1031 /* Store it the hsm_copy for later copytool use.
1032 * Always modified even if no lsm. */
1033 hpk.hpk_data_version = data_version;
1034
1035 /* File could have been stripped during archiving, so we need
1036 * to check anyway. */
1037 if ((copy->hc_hai.hai_action == HSMA_ARCHIVE) &&
1038 (copy->hc_data_version != data_version)) {
1039 CDEBUG(D_HSM, "File data version mismatched. "
1040 "File content was changed during archiving. "
1041 DFID", start:"LPX64" current:"LPX64"\n",
1042 PFID(&copy->hc_hai.hai_fid),
1043 copy->hc_data_version, data_version);
1044 /* File was changed, send error to cdt. Do not ask for
1045 * retry because if a file is modified frequently,
1046 * the cdt will loop on retried archive requests.
1047 * The policy engine will ask for a new archive later
1048 * when the file will not be modified for some tunable
1049 * time */
1050 /* we do not notify caller */
1051 hpk.hpk_flags &= ~HP_FLAG_RETRY;
1052 /* hpk_errval must be >= 0 */
1053 hpk.hpk_errval = EBUSY;
1054 }
1055
1056 }
1057
1058progress:
1059 rc = obd_iocontrol(LL_IOC_HSM_PROGRESS, sbi->ll_md_exp, sizeof(hpk),
1060 &hpk, NULL);
1061
1062 RETURN(rc);
1063}
1064
1065
1066static int copy_and_ioctl(int cmd, struct obd_export *exp, void *data, int len)
1067{
1068 void *ptr;
1069 int rc;
1070
1071 OBD_ALLOC(ptr, len);
1072 if (ptr == NULL)
1073 return -ENOMEM;
1074 if (copy_from_user(ptr, data, len)) {
1075 OBD_FREE(ptr, len);
1076 return -EFAULT;
1077 }
1078 rc = obd_iocontrol(cmd, exp, len, data, NULL);
1079 OBD_FREE(ptr, len);
1080 return rc;
1081}
1082
1083static int quotactl_ioctl(struct ll_sb_info *sbi, struct if_quotactl *qctl)
1084{
1085 int cmd = qctl->qc_cmd;
1086 int type = qctl->qc_type;
1087 int id = qctl->qc_id;
1088 int valid = qctl->qc_valid;
1089 int rc = 0;
1090 ENTRY;
1091
1092 switch (cmd) {
1093 case LUSTRE_Q_INVALIDATE:
1094 case LUSTRE_Q_FINVALIDATE:
1095 case Q_QUOTAON:
1096 case Q_QUOTAOFF:
1097 case Q_SETQUOTA:
1098 case Q_SETINFO:
1099 if (!cfs_capable(CFS_CAP_SYS_ADMIN) ||
1100 sbi->ll_flags & LL_SBI_RMT_CLIENT)
1101 RETURN(-EPERM);
1102 break;
1103 case Q_GETQUOTA:
4b1a25f0
PT
1104 if (((type == USRQUOTA &&
1105 uid_eq(current_euid(), make_kuid(&init_user_ns, id))) ||
1106 (type == GRPQUOTA &&
1107 !in_egroup_p(make_kgid(&init_user_ns, id)))) &&
d7e09d03
PT
1108 (!cfs_capable(CFS_CAP_SYS_ADMIN) ||
1109 sbi->ll_flags & LL_SBI_RMT_CLIENT))
1110 RETURN(-EPERM);
1111 break;
1112 case Q_GETINFO:
1113 break;
1114 default:
1115 CERROR("unsupported quotactl op: %#x\n", cmd);
1116 RETURN(-ENOTTY);
1117 }
1118
1119 if (valid != QC_GENERAL) {
1120 if (sbi->ll_flags & LL_SBI_RMT_CLIENT)
1121 RETURN(-EOPNOTSUPP);
1122
1123 if (cmd == Q_GETINFO)
1124 qctl->qc_cmd = Q_GETOINFO;
1125 else if (cmd == Q_GETQUOTA)
1126 qctl->qc_cmd = Q_GETOQUOTA;
1127 else
1128 RETURN(-EINVAL);
1129
1130 switch (valid) {
1131 case QC_MDTIDX:
1132 rc = obd_iocontrol(OBD_IOC_QUOTACTL, sbi->ll_md_exp,
1133 sizeof(*qctl), qctl, NULL);
1134 break;
1135 case QC_OSTIDX:
1136 rc = obd_iocontrol(OBD_IOC_QUOTACTL, sbi->ll_dt_exp,
1137 sizeof(*qctl), qctl, NULL);
1138 break;
1139 case QC_UUID:
1140 rc = obd_iocontrol(OBD_IOC_QUOTACTL, sbi->ll_md_exp,
1141 sizeof(*qctl), qctl, NULL);
1142 if (rc == -EAGAIN)
1143 rc = obd_iocontrol(OBD_IOC_QUOTACTL,
1144 sbi->ll_dt_exp,
1145 sizeof(*qctl), qctl, NULL);
1146 break;
1147 default:
1148 rc = -EINVAL;
1149 break;
1150 }
1151
1152 if (rc)
1153 RETURN(rc);
1154
1155 qctl->qc_cmd = cmd;
1156 } else {
1157 struct obd_quotactl *oqctl;
1158
1159 OBD_ALLOC_PTR(oqctl);
1160 if (oqctl == NULL)
1161 RETURN(-ENOMEM);
1162
1163 QCTL_COPY(oqctl, qctl);
1164 rc = obd_quotactl(sbi->ll_md_exp, oqctl);
1165 if (rc) {
1166 if (rc != -EALREADY && cmd == Q_QUOTAON) {
1167 oqctl->qc_cmd = Q_QUOTAOFF;
1168 obd_quotactl(sbi->ll_md_exp, oqctl);
1169 }
1170 OBD_FREE_PTR(oqctl);
1171 RETURN(rc);
1172 }
1173 /* If QIF_SPACE is not set, client should collect the
1174 * space usage from OSSs by itself */
1175 if (cmd == Q_GETQUOTA &&
1176 !(oqctl->qc_dqblk.dqb_valid & QIF_SPACE) &&
1177 !oqctl->qc_dqblk.dqb_curspace) {
1178 struct obd_quotactl *oqctl_tmp;
1179
1180 OBD_ALLOC_PTR(oqctl_tmp);
1181 if (oqctl_tmp == NULL)
1182 GOTO(out, rc = -ENOMEM);
1183
1184 oqctl_tmp->qc_cmd = Q_GETOQUOTA;
1185 oqctl_tmp->qc_id = oqctl->qc_id;
1186 oqctl_tmp->qc_type = oqctl->qc_type;
1187
1188 /* collect space usage from OSTs */
1189 oqctl_tmp->qc_dqblk.dqb_curspace = 0;
1190 rc = obd_quotactl(sbi->ll_dt_exp, oqctl_tmp);
1191 if (!rc || rc == -EREMOTEIO) {
1192 oqctl->qc_dqblk.dqb_curspace =
1193 oqctl_tmp->qc_dqblk.dqb_curspace;
1194 oqctl->qc_dqblk.dqb_valid |= QIF_SPACE;
1195 }
1196
1197 /* collect space & inode usage from MDTs */
1198 oqctl_tmp->qc_dqblk.dqb_curspace = 0;
1199 oqctl_tmp->qc_dqblk.dqb_curinodes = 0;
1200 rc = obd_quotactl(sbi->ll_md_exp, oqctl_tmp);
1201 if (!rc || rc == -EREMOTEIO) {
1202 oqctl->qc_dqblk.dqb_curspace +=
1203 oqctl_tmp->qc_dqblk.dqb_curspace;
1204 oqctl->qc_dqblk.dqb_curinodes =
1205 oqctl_tmp->qc_dqblk.dqb_curinodes;
1206 oqctl->qc_dqblk.dqb_valid |= QIF_INODES;
1207 } else {
1208 oqctl->qc_dqblk.dqb_valid &= ~QIF_SPACE;
1209 }
1210
1211 OBD_FREE_PTR(oqctl_tmp);
1212 }
1213out:
1214 QCTL_COPY(qctl, oqctl);
1215 OBD_FREE_PTR(oqctl);
1216 }
1217
1218 RETURN(rc);
1219}
1220
1221static char *
1222ll_getname(const char __user *filename)
1223{
1224 int ret = 0, len;
1225 char *tmp = __getname();
1226
1227 if (!tmp)
1228 return ERR_PTR(-ENOMEM);
1229
1230 len = strncpy_from_user(tmp, filename, PATH_MAX);
1231 if (len == 0)
1232 ret = -ENOENT;
1233 else if (len > PATH_MAX)
1234 ret = -ENAMETOOLONG;
1235
1236 if (ret) {
1237 __putname(tmp);
1238 tmp = ERR_PTR(ret);
1239 }
1240 return tmp;
1241}
1242
1243#define ll_putname(filename) __putname(filename)
1244
1245static long ll_dir_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
1246{
1247 struct inode *inode = file->f_dentry->d_inode;
1248 struct ll_sb_info *sbi = ll_i2sbi(inode);
1249 struct obd_ioctl_data *data;
1250 int rc = 0;
1251 ENTRY;
1252
1253 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), cmd=%#x\n",
1254 inode->i_ino, inode->i_generation, inode, cmd);
1255
1256 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
1257 if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
1258 return -ENOTTY;
1259
1260 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
1261 switch(cmd) {
1262 case FSFILT_IOC_GETFLAGS:
1263 case FSFILT_IOC_SETFLAGS:
1264 RETURN(ll_iocontrol(inode, file, cmd, arg));
1265 case FSFILT_IOC_GETVERSION_OLD:
1266 case FSFILT_IOC_GETVERSION:
1267 RETURN(put_user(inode->i_generation, (int *)arg));
1268 /* We need to special case any other ioctls we want to handle,
1269 * to send them to the MDS/OST as appropriate and to properly
1270 * network encode the arg field.
1271 case FSFILT_IOC_SETVERSION_OLD:
1272 case FSFILT_IOC_SETVERSION:
1273 */
1274 case LL_IOC_GET_MDTIDX: {
1275 int mdtidx;
1276
1277 mdtidx = ll_get_mdt_idx(inode);
1278 if (mdtidx < 0)
1279 RETURN(mdtidx);
1280
1281 if (put_user((int)mdtidx, (int*)arg))
1282 RETURN(-EFAULT);
1283
1284 return 0;
1285 }
1286 case IOC_MDC_LOOKUP: {
1287 struct ptlrpc_request *request = NULL;
1288 int namelen, len = 0;
1289 char *buf = NULL;
1290 char *filename;
1291 struct md_op_data *op_data;
1292
1293 rc = obd_ioctl_getdata(&buf, &len, (void *)arg);
1294 if (rc)
1295 RETURN(rc);
1296 data = (void *)buf;
1297
1298 filename = data->ioc_inlbuf1;
1299 namelen = strlen(filename);
1300
1301 if (namelen < 1) {
1302 CDEBUG(D_INFO, "IOC_MDC_LOOKUP missing filename\n");
1303 GOTO(out_free, rc = -EINVAL);
1304 }
1305
1306 op_data = ll_prep_md_op_data(NULL, inode, NULL, filename, namelen,
1307 0, LUSTRE_OPC_ANY, NULL);
1308 if (IS_ERR(op_data))
1309 GOTO(out_free, rc = PTR_ERR(op_data));
1310
1311 op_data->op_valid = OBD_MD_FLID;
1312 rc = md_getattr_name(sbi->ll_md_exp, op_data, &request);
1313 ll_finish_md_op_data(op_data);
1314 if (rc < 0) {
1315 CDEBUG(D_INFO, "md_getattr_name: %d\n", rc);
1316 GOTO(out_free, rc);
1317 }
1318 ptlrpc_req_finished(request);
1319 EXIT;
1320out_free:
1321 obd_ioctl_freedata(buf, len);
1322 return rc;
1323 }
1324 case LL_IOC_LMV_SETSTRIPE: {
1325 struct lmv_user_md *lum;
1326 char *buf = NULL;
1327 char *filename;
1328 int namelen = 0;
1329 int lumlen = 0;
1330 int len;
1331 int rc;
1332
1333 rc = obd_ioctl_getdata(&buf, &len, (void *)arg);
1334 if (rc)
1335 RETURN(rc);
1336
1337 data = (void *)buf;
1338 if (data->ioc_inlbuf1 == NULL || data->ioc_inlbuf2 == NULL ||
1339 data->ioc_inllen1 == 0 || data->ioc_inllen2 == 0)
1340 GOTO(lmv_out_free, rc = -EINVAL);
1341
1342 filename = data->ioc_inlbuf1;
1343 namelen = data->ioc_inllen1;
1344
1345 if (namelen < 1) {
1346 CDEBUG(D_INFO, "IOC_MDC_LOOKUP missing filename\n");
1347 GOTO(lmv_out_free, rc = -EINVAL);
1348 }
1349 lum = (struct lmv_user_md *)data->ioc_inlbuf2;
1350 lumlen = data->ioc_inllen2;
1351
1352 if (lum->lum_magic != LMV_USER_MAGIC ||
1353 lumlen != sizeof(*lum)) {
1354 CERROR("%s: wrong lum magic %x or size %d: rc = %d\n",
1355 filename, lum->lum_magic, lumlen, -EFAULT);
1356 GOTO(lmv_out_free, rc = -EINVAL);
1357 }
1358
1359 /**
1360 * ll_dir_setdirstripe will be used to set dir stripe
1361 * mdc_create--->mdt_reint_create (with dirstripe)
1362 */
1363 rc = ll_dir_setdirstripe(inode, lum, filename);
1364lmv_out_free:
1365 obd_ioctl_freedata(buf, len);
1366 RETURN(rc);
1367
1368 }
1369 case LL_IOC_LOV_SETSTRIPE: {
1370 struct lov_user_md_v3 lumv3;
1371 struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3;
1372 struct lov_user_md_v1 *lumv1p = (struct lov_user_md_v1 *)arg;
1373 struct lov_user_md_v3 *lumv3p = (struct lov_user_md_v3 *)arg;
1374
1375 int set_default = 0;
1376
1377 LASSERT(sizeof(lumv3) == sizeof(*lumv3p));
1378 LASSERT(sizeof(lumv3.lmm_objects[0]) ==
1379 sizeof(lumv3p->lmm_objects[0]));
1380 /* first try with v1 which is smaller than v3 */
1381 if (copy_from_user(lumv1, lumv1p, sizeof(*lumv1)))
1382 RETURN(-EFAULT);
1383
1384 if ((lumv1->lmm_magic == LOV_USER_MAGIC_V3) ) {
1385 if (copy_from_user(&lumv3, lumv3p, sizeof(lumv3)))
1386 RETURN(-EFAULT);
1387 }
1388
1389 if (inode->i_sb->s_root == file->f_dentry)
1390 set_default = 1;
1391
1392 /* in v1 and v3 cases lumv1 points to data */
1393 rc = ll_dir_setstripe(inode, lumv1, set_default);
1394
1395 RETURN(rc);
1396 }
1397 case LL_IOC_LMV_GETSTRIPE: {
1398 struct lmv_user_md *lump = (struct lmv_user_md *)arg;
1399 struct lmv_user_md lum;
1400 struct lmv_user_md *tmp;
1401 int lum_size;
1402 int rc = 0;
1403 int mdtindex;
1404
1405 if (copy_from_user(&lum, lump, sizeof(struct lmv_user_md)))
1406 RETURN(-EFAULT);
1407
1408 if (lum.lum_magic != LMV_MAGIC_V1)
1409 RETURN(-EINVAL);
1410
1411 lum_size = lmv_user_md_size(1, LMV_MAGIC_V1);
1412 OBD_ALLOC(tmp, lum_size);
1413 if (tmp == NULL)
1414 GOTO(free_lmv, rc = -ENOMEM);
1415
1416 memcpy(tmp, &lum, sizeof(lum));
1417 tmp->lum_type = LMV_STRIPE_TYPE;
1418 tmp->lum_stripe_count = 1;
1419 mdtindex = ll_get_mdt_idx(inode);
1420 if (mdtindex < 0)
1421 GOTO(free_lmv, rc = -ENOMEM);
1422
1423 tmp->lum_stripe_offset = mdtindex;
1424 tmp->lum_objects[0].lum_mds = mdtindex;
1425 memcpy(&tmp->lum_objects[0].lum_fid, ll_inode2fid(inode),
1426 sizeof(struct lu_fid));
1427 if (copy_to_user((void *)arg, tmp, lum_size))
1428 GOTO(free_lmv, rc = -EFAULT);
1429free_lmv:
1430 if (tmp)
1431 OBD_FREE(tmp, lum_size);
1432 RETURN(rc);
1433 }
1434 case LL_IOC_REMOVE_ENTRY: {
1435 char *filename = NULL;
1436 int namelen = 0;
1437 int rc;
1438
1439 /* Here is a little hack to avoid sending REINT_RMENTRY to
1440 * unsupported server, which might crash the server(LU-2730),
1441 * Because both LVB_TYPE and REINT_RMENTRY will be supported
1442 * on 2.4, we use OBD_CONNECT_LVB_TYPE to detect whether the
1443 * server will support REINT_RMENTRY XXX*/
1444 if (!(exp_connect_flags(sbi->ll_md_exp) & OBD_CONNECT_LVB_TYPE))
1445 return -ENOTSUPP;
1446
1447 filename = ll_getname((const char *)arg);
1448 if (IS_ERR(filename))
1449 RETURN(PTR_ERR(filename));
1450
1451 namelen = strlen(filename);
1452 if (namelen < 1)
1453 GOTO(out_rmdir, rc = -EINVAL);
1454
1455 rc = ll_rmdir_entry(inode, filename, namelen);
1456out_rmdir:
1457 if (filename)
1458 ll_putname(filename);
1459 RETURN(rc);
1460 }
1461 case LL_IOC_LOV_SWAP_LAYOUTS:
1462 RETURN(-EPERM);
1463 case LL_IOC_OBD_STATFS:
1464 RETURN(ll_obd_statfs(inode, (void *)arg));
1465 case LL_IOC_LOV_GETSTRIPE:
1466 case LL_IOC_MDC_GETINFO:
1467 case IOC_MDC_GETFILEINFO:
1468 case IOC_MDC_GETFILESTRIPE: {
1469 struct ptlrpc_request *request = NULL;
1470 struct lov_user_md *lump;
1471 struct lov_mds_md *lmm = NULL;
1472 struct mdt_body *body;
1473 char *filename = NULL;
1474 int lmmsize;
1475
1476 if (cmd == IOC_MDC_GETFILEINFO ||
1477 cmd == IOC_MDC_GETFILESTRIPE) {
1478 filename = ll_getname((const char *)arg);
1479 if (IS_ERR(filename))
1480 RETURN(PTR_ERR(filename));
1481
1482 rc = ll_lov_getstripe_ea_info(inode, filename, &lmm,
1483 &lmmsize, &request);
1484 } else {
1485 rc = ll_dir_getstripe(inode, &lmm, &lmmsize, &request);
1486 }
1487
1488 if (request) {
1489 body = req_capsule_server_get(&request->rq_pill,
1490 &RMF_MDT_BODY);
1491 LASSERT(body != NULL);
1492 } else {
1493 GOTO(out_req, rc);
1494 }
1495
1496 if (rc < 0) {
1497 if (rc == -ENODATA && (cmd == IOC_MDC_GETFILEINFO ||
1498 cmd == LL_IOC_MDC_GETINFO))
1499 GOTO(skip_lmm, rc = 0);
1500 else
1501 GOTO(out_req, rc);
1502 }
1503
1504 if (cmd == IOC_MDC_GETFILESTRIPE ||
1505 cmd == LL_IOC_LOV_GETSTRIPE) {
1506 lump = (struct lov_user_md *)arg;
1507 } else {
1508 struct lov_user_mds_data *lmdp;
1509 lmdp = (struct lov_user_mds_data *)arg;
1510 lump = &lmdp->lmd_lmm;
1511 }
1512 if (copy_to_user(lump, lmm, lmmsize)) {
1513 if (copy_to_user(lump, lmm, sizeof(*lump)))
1514 GOTO(out_req, rc = -EFAULT);
1515 rc = -EOVERFLOW;
1516 }
1517 skip_lmm:
1518 if (cmd == IOC_MDC_GETFILEINFO || cmd == LL_IOC_MDC_GETINFO) {
1519 struct lov_user_mds_data *lmdp;
1520 lstat_t st = { 0 };
1521
1522 st.st_dev = inode->i_sb->s_dev;
1523 st.st_mode = body->mode;
1524 st.st_nlink = body->nlink;
1525 st.st_uid = body->uid;
1526 st.st_gid = body->gid;
1527 st.st_rdev = body->rdev;
1528 st.st_size = body->size;
1529 st.st_blksize = PAGE_CACHE_SIZE;
1530 st.st_blocks = body->blocks;
1531 st.st_atime = body->atime;
1532 st.st_mtime = body->mtime;
1533 st.st_ctime = body->ctime;
1534 st.st_ino = inode->i_ino;
1535
1536 lmdp = (struct lov_user_mds_data *)arg;
1537 if (copy_to_user(&lmdp->lmd_st, &st, sizeof(st)))
1538 GOTO(out_req, rc = -EFAULT);
1539 }
1540
1541 EXIT;
1542 out_req:
1543 ptlrpc_req_finished(request);
1544 if (filename)
1545 ll_putname(filename);
1546 return rc;
1547 }
1548 case IOC_LOV_GETINFO: {
1549 struct lov_user_mds_data *lumd;
1550 struct lov_stripe_md *lsm;
1551 struct lov_user_md *lum;
1552 struct lov_mds_md *lmm;
1553 int lmmsize;
1554 lstat_t st;
1555
1556 lumd = (struct lov_user_mds_data *)arg;
1557 lum = &lumd->lmd_lmm;
1558
1559 rc = ll_get_max_mdsize(sbi, &lmmsize);
1560 if (rc)
1561 RETURN(rc);
1562
1563 OBD_ALLOC_LARGE(lmm, lmmsize);
1564 if (copy_from_user(lmm, lum, lmmsize))
1565 GOTO(free_lmm, rc = -EFAULT);
1566
1567 switch (lmm->lmm_magic) {
1568 case LOV_USER_MAGIC_V1:
1569 if (LOV_USER_MAGIC_V1 == cpu_to_le32(LOV_USER_MAGIC_V1))
1570 break;
1571 /* swab objects first so that stripes num will be sane */
1572 lustre_swab_lov_user_md_objects(
1573 ((struct lov_user_md_v1 *)lmm)->lmm_objects,
1574 ((struct lov_user_md_v1 *)lmm)->lmm_stripe_count);
1575 lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
1576 break;
1577 case LOV_USER_MAGIC_V3:
1578 if (LOV_USER_MAGIC_V3 == cpu_to_le32(LOV_USER_MAGIC_V3))
1579 break;
1580 /* swab objects first so that stripes num will be sane */
1581 lustre_swab_lov_user_md_objects(
1582 ((struct lov_user_md_v3 *)lmm)->lmm_objects,
1583 ((struct lov_user_md_v3 *)lmm)->lmm_stripe_count);
1584 lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm);
1585 break;
1586 default:
1587 GOTO(free_lmm, rc = -EINVAL);
1588 }
1589
1590 rc = obd_unpackmd(sbi->ll_dt_exp, &lsm, lmm, lmmsize);
1591 if (rc < 0)
1592 GOTO(free_lmm, rc = -ENOMEM);
1593
1594 /* Perform glimpse_size operation. */
1595 memset(&st, 0, sizeof(st));
1596
1597 rc = ll_glimpse_ioctl(sbi, lsm, &st);
1598 if (rc)
1599 GOTO(free_lsm, rc);
1600
1601 if (copy_to_user(&lumd->lmd_st, &st, sizeof(st)))
1602 GOTO(free_lsm, rc = -EFAULT);
1603
1604 EXIT;
1605 free_lsm:
1606 obd_free_memmd(sbi->ll_dt_exp, &lsm);
1607 free_lmm:
1608 OBD_FREE_LARGE(lmm, lmmsize);
1609 return rc;
1610 }
1611 case OBD_IOC_LLOG_CATINFO: {
1612 RETURN(-EOPNOTSUPP);
1613 }
1614 case OBD_IOC_QUOTACHECK: {
1615 struct obd_quotactl *oqctl;
1616 int error = 0;
1617
1618 if (!cfs_capable(CFS_CAP_SYS_ADMIN) ||
1619 sbi->ll_flags & LL_SBI_RMT_CLIENT)
1620 RETURN(-EPERM);
1621
1622 OBD_ALLOC_PTR(oqctl);
1623 if (!oqctl)
1624 RETURN(-ENOMEM);
1625 oqctl->qc_type = arg;
1626 rc = obd_quotacheck(sbi->ll_md_exp, oqctl);
1627 if (rc < 0) {
1628 CDEBUG(D_INFO, "md_quotacheck failed: rc %d\n", rc);
1629 error = rc;
1630 }
1631
1632 rc = obd_quotacheck(sbi->ll_dt_exp, oqctl);
1633 if (rc < 0)
1634 CDEBUG(D_INFO, "obd_quotacheck failed: rc %d\n", rc);
1635
1636 OBD_FREE_PTR(oqctl);
1637 return error ?: rc;
1638 }
1639 case OBD_IOC_POLL_QUOTACHECK: {
1640 struct if_quotacheck *check;
1641
1642 if (!cfs_capable(CFS_CAP_SYS_ADMIN) ||
1643 sbi->ll_flags & LL_SBI_RMT_CLIENT)
1644 RETURN(-EPERM);
1645
1646 OBD_ALLOC_PTR(check);
1647 if (!check)
1648 RETURN(-ENOMEM);
1649
1650 rc = obd_iocontrol(cmd, sbi->ll_md_exp, 0, (void *)check,
1651 NULL);
1652 if (rc) {
1653 CDEBUG(D_QUOTA, "mdc ioctl %d failed: %d\n", cmd, rc);
1654 if (copy_to_user((void *)arg, check,
1655 sizeof(*check)))
1656 CDEBUG(D_QUOTA, "copy_to_user failed\n");
1657 GOTO(out_poll, rc);
1658 }
1659
1660 rc = obd_iocontrol(cmd, sbi->ll_dt_exp, 0, (void *)check,
1661 NULL);
1662 if (rc) {
1663 CDEBUG(D_QUOTA, "osc ioctl %d failed: %d\n", cmd, rc);
1664 if (copy_to_user((void *)arg, check,
1665 sizeof(*check)))
1666 CDEBUG(D_QUOTA, "copy_to_user failed\n");
1667 GOTO(out_poll, rc);
1668 }
1669 out_poll:
1670 OBD_FREE_PTR(check);
1671 RETURN(rc);
1672 }
1673#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 7, 50, 0)
1674 case LL_IOC_QUOTACTL_18: {
1675 /* copy the old 1.x quota struct for internal use, then copy
1676 * back into old format struct. For 1.8 compatibility. */
1677 struct if_quotactl_18 *qctl_18;
1678 struct if_quotactl *qctl_20;
1679
1680 OBD_ALLOC_PTR(qctl_18);
1681 if (!qctl_18)
1682 RETURN(-ENOMEM);
1683
1684 OBD_ALLOC_PTR(qctl_20);
1685 if (!qctl_20)
1686 GOTO(out_quotactl_18, rc = -ENOMEM);
1687
1688 if (copy_from_user(qctl_18, (void *)arg, sizeof(*qctl_18)))
1689 GOTO(out_quotactl_20, rc = -ENOMEM);
1690
1691 QCTL_COPY(qctl_20, qctl_18);
1692 qctl_20->qc_idx = 0;
1693
1694 /* XXX: dqb_valid was borrowed as a flag to mark that
1695 * only mds quota is wanted */
1696 if (qctl_18->qc_cmd == Q_GETQUOTA &&
1697 qctl_18->qc_dqblk.dqb_valid) {
1698 qctl_20->qc_valid = QC_MDTIDX;
1699 qctl_20->qc_dqblk.dqb_valid = 0;
1700 } else if (qctl_18->obd_uuid.uuid[0] != '\0') {
1701 qctl_20->qc_valid = QC_UUID;
1702 qctl_20->obd_uuid = qctl_18->obd_uuid;
1703 } else {
1704 qctl_20->qc_valid = QC_GENERAL;
1705 }
1706
1707 rc = quotactl_ioctl(sbi, qctl_20);
1708
1709 if (rc == 0) {
1710 QCTL_COPY(qctl_18, qctl_20);
1711 qctl_18->obd_uuid = qctl_20->obd_uuid;
1712
1713 if (copy_to_user((void *)arg, qctl_18,
1714 sizeof(*qctl_18)))
1715 rc = -EFAULT;
1716 }
1717
1718 out_quotactl_20:
1719 OBD_FREE_PTR(qctl_20);
1720 out_quotactl_18:
1721 OBD_FREE_PTR(qctl_18);
1722 RETURN(rc);
1723 }
1724#else
1725#warning "remove old LL_IOC_QUOTACTL_18 compatibility code"
1726#endif /* LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 7, 50, 0) */
1727 case LL_IOC_QUOTACTL: {
1728 struct if_quotactl *qctl;
1729
1730 OBD_ALLOC_PTR(qctl);
1731 if (!qctl)
1732 RETURN(-ENOMEM);
1733
1734 if (copy_from_user(qctl, (void *)arg, sizeof(*qctl)))
1735 GOTO(out_quotactl, rc = -EFAULT);
1736
1737 rc = quotactl_ioctl(sbi, qctl);
1738
1739 if (rc == 0 && copy_to_user((void *)arg,qctl,sizeof(*qctl)))
1740 rc = -EFAULT;
1741
1742 out_quotactl:
1743 OBD_FREE_PTR(qctl);
1744 RETURN(rc);
1745 }
1746 case OBD_IOC_GETDTNAME:
1747 case OBD_IOC_GETMDNAME:
1748 RETURN(ll_get_obd_name(inode, cmd, arg));
1749 case LL_IOC_FLUSHCTX:
1750 RETURN(ll_flush_ctx(inode));
1751#ifdef CONFIG_FS_POSIX_ACL
1752 case LL_IOC_RMTACL: {
1753 if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
1754 inode == inode->i_sb->s_root->d_inode) {
1755 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1756
1757 LASSERT(fd != NULL);
1758 rc = rct_add(&sbi->ll_rct, current_pid(), arg);
1759 if (!rc)
1760 fd->fd_flags |= LL_FILE_RMTACL;
1761 RETURN(rc);
1762 } else
1763 RETURN(0);
1764 }
1765#endif
1766 case LL_IOC_GETOBDCOUNT: {
1767 int count, vallen;
1768 struct obd_export *exp;
1769
1770 if (copy_from_user(&count, (int *)arg, sizeof(int)))
1771 RETURN(-EFAULT);
1772
1773 /* get ost count when count is zero, get mdt count otherwise */
1774 exp = count ? sbi->ll_md_exp : sbi->ll_dt_exp;
1775 vallen = sizeof(count);
1776 rc = obd_get_info(NULL, exp, sizeof(KEY_TGT_COUNT),
1777 KEY_TGT_COUNT, &vallen, &count, NULL);
1778 if (rc) {
1779 CERROR("get target count failed: %d\n", rc);
1780 RETURN(rc);
1781 }
1782
1783 if (copy_to_user((int *)arg, &count, sizeof(int)))
1784 RETURN(-EFAULT);
1785
1786 RETURN(0);
1787 }
1788 case LL_IOC_PATH2FID:
1789 if (copy_to_user((void *)arg, ll_inode2fid(inode),
1790 sizeof(struct lu_fid)))
1791 RETURN(-EFAULT);
1792 RETURN(0);
1793 case LL_IOC_GET_CONNECT_FLAGS: {
1794 RETURN(obd_iocontrol(cmd, sbi->ll_md_exp, 0, NULL, (void*)arg));
1795 }
1796 case OBD_IOC_CHANGELOG_SEND:
1797 case OBD_IOC_CHANGELOG_CLEAR:
1798 rc = copy_and_ioctl(cmd, sbi->ll_md_exp, (void *)arg,
1799 sizeof(struct ioc_changelog));
1800 RETURN(rc);
1801 case OBD_IOC_FID2PATH:
1802 RETURN(ll_fid2path(inode, (void *)arg));
1803 case LL_IOC_HSM_REQUEST: {
1804 struct hsm_user_request *hur;
1805 int totalsize;
1806
1807 OBD_ALLOC_PTR(hur);
1808 if (hur == NULL)
1809 RETURN(-ENOMEM);
1810
1811 /* We don't know the true size yet; copy the fixed-size part */
1812 if (copy_from_user(hur, (void *)arg, sizeof(*hur))) {
1813 OBD_FREE_PTR(hur);
1814 RETURN(-EFAULT);
1815 }
1816
1817 /* Compute the whole struct size */
1818 totalsize = hur_len(hur);
1819 OBD_FREE_PTR(hur);
1820 OBD_ALLOC_LARGE(hur, totalsize);
1821 if (hur == NULL)
1822 RETURN(-ENOMEM);
1823
1824 /* Copy the whole struct */
1825 if (copy_from_user(hur, (void *)arg, totalsize)) {
1826 OBD_FREE_LARGE(hur, totalsize);
1827 RETURN(-EFAULT);
1828 }
1829
1830 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), totalsize,
1831 hur, NULL);
1832
1833 OBD_FREE_LARGE(hur, totalsize);
1834
1835 RETURN(rc);
1836 }
1837 case LL_IOC_HSM_PROGRESS: {
1838 struct hsm_progress_kernel hpk;
1839 struct hsm_progress hp;
1840
1841 if (copy_from_user(&hp, (void *)arg, sizeof(hp)))
1842 RETURN(-EFAULT);
1843
1844 hpk.hpk_fid = hp.hp_fid;
1845 hpk.hpk_cookie = hp.hp_cookie;
1846 hpk.hpk_extent = hp.hp_extent;
1847 hpk.hpk_flags = hp.hp_flags;
1848 hpk.hpk_errval = hp.hp_errval;
1849 hpk.hpk_data_version = 0;
1850
1851 /* File may not exist in Lustre; all progress
1852 * reported to Lustre root */
1853 rc = obd_iocontrol(cmd, sbi->ll_md_exp, sizeof(hpk), &hpk,
1854 NULL);
1855 RETURN(rc);
1856 }
1857 case LL_IOC_HSM_CT_START:
1858 rc = copy_and_ioctl(cmd, sbi->ll_md_exp, (void *)arg,
1859 sizeof(struct lustre_kernelcomm));
1860 RETURN(rc);
1861
1862 case LL_IOC_HSM_COPY_START: {
1863 struct hsm_copy *copy;
1864 int rc;
1865
1866 OBD_ALLOC_PTR(copy);
1867 if (copy == NULL)
1868 RETURN(-ENOMEM);
1869 if (copy_from_user(copy, (char *)arg, sizeof(*copy))) {
1870 OBD_FREE_PTR(copy);
1871 RETURN(-EFAULT);
1872 }
1873
1874 rc = ll_ioc_copy_start(inode->i_sb, copy);
1875 if (copy_to_user((char *)arg, copy, sizeof(*copy)))
1876 rc = -EFAULT;
1877
1878 OBD_FREE_PTR(copy);
1879 RETURN(rc);
1880 }
1881 case LL_IOC_HSM_COPY_END: {
1882 struct hsm_copy *copy;
1883 int rc;
1884
1885 OBD_ALLOC_PTR(copy);
1886 if (copy == NULL)
1887 RETURN(-ENOMEM);
1888 if (copy_from_user(copy, (char *)arg, sizeof(*copy))) {
1889 OBD_FREE_PTR(copy);
1890 RETURN(-EFAULT);
1891 }
1892
1893 rc = ll_ioc_copy_end(inode->i_sb, copy);
1894 if (copy_to_user((char *)arg, copy, sizeof(*copy)))
1895 rc = -EFAULT;
1896
1897 OBD_FREE_PTR(copy);
1898 RETURN(rc);
1899 }
1900 default:
1901 RETURN(obd_iocontrol(cmd, sbi->ll_dt_exp, 0, NULL,
1902 (void *)arg));
1903 }
1904}
1905
1906static loff_t ll_dir_seek(struct file *file, loff_t offset, int origin)
1907{
1908 struct inode *inode = file->f_mapping->host;
1909 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1910 struct ll_sb_info *sbi = ll_i2sbi(inode);
1911 int api32 = ll_need_32bit_api(sbi);
1912 loff_t ret = -EINVAL;
1913 ENTRY;
1914
1915 mutex_lock(&inode->i_mutex);
1916 switch (origin) {
1917 case SEEK_SET:
1918 break;
1919 case SEEK_CUR:
1920 offset += file->f_pos;
1921 break;
1922 case SEEK_END:
1923 if (offset > 0)
1924 GOTO(out, ret);
1925 if (api32)
1926 offset += LL_DIR_END_OFF_32BIT;
1927 else
1928 offset += LL_DIR_END_OFF;
1929 break;
1930 default:
1931 GOTO(out, ret);
1932 }
1933
1934 if (offset >= 0 &&
1935 ((api32 && offset <= LL_DIR_END_OFF_32BIT) ||
1936 (!api32 && offset <= LL_DIR_END_OFF))) {
1937 if (offset != file->f_pos) {
1938 if ((api32 && offset == LL_DIR_END_OFF_32BIT) ||
1939 (!api32 && offset == LL_DIR_END_OFF))
1940 fd->lfd_pos = MDS_DIR_END_OFF;
1941 else if (api32 && sbi->ll_flags & LL_SBI_64BIT_HASH)
1942 fd->lfd_pos = offset << 32;
1943 else
1944 fd->lfd_pos = offset;
1945 file->f_pos = offset;
1946 file->f_version = 0;
1947 }
1948 ret = offset;
1949 }
1950 GOTO(out, ret);
1951
1952out:
1953 mutex_unlock(&inode->i_mutex);
1954 return ret;
1955}
1956
1957int ll_dir_open(struct inode *inode, struct file *file)
1958{
1959 ENTRY;
1960 RETURN(ll_file_open(inode, file));
1961}
1962
1963int ll_dir_release(struct inode *inode, struct file *file)
1964{
1965 ENTRY;
1966 RETURN(ll_file_release(inode, file));
1967}
1968
1969struct file_operations ll_dir_operations = {
1970 .llseek = ll_dir_seek,
1971 .open = ll_dir_open,
1972 .release = ll_dir_release,
1973 .read = generic_read_dir,
0b09d381 1974 .iterate = ll_readdir,
d7e09d03
PT
1975 .unlocked_ioctl = ll_dir_ioctl,
1976 .fsync = ll_fsync,
1977};
This page took 0.209116 seconds and 5 git commands to generate.