Btrfs: handle errors from btrfs_map_bio() everywhere
[deliverable/linux.git] / fs / btrfs / super.c
1 /*
2 * Copyright (C) 2007 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19 #include <linux/blkdev.h>
20 #include <linux/module.h>
21 #include <linux/buffer_head.h>
22 #include <linux/fs.h>
23 #include <linux/pagemap.h>
24 #include <linux/highmem.h>
25 #include <linux/time.h>
26 #include <linux/init.h>
27 #include <linux/seq_file.h>
28 #include <linux/string.h>
29 #include <linux/backing-dev.h>
30 #include <linux/mount.h>
31 #include <linux/mpage.h>
32 #include <linux/swap.h>
33 #include <linux/writeback.h>
34 #include <linux/statfs.h>
35 #include <linux/compat.h>
36 #include <linux/parser.h>
37 #include <linux/ctype.h>
38 #include <linux/namei.h>
39 #include <linux/miscdevice.h>
40 #include <linux/magic.h>
41 #include <linux/slab.h>
42 #include <linux/cleancache.h>
43 #include <linux/ratelimit.h>
44 #include "compat.h"
45 #include "delayed-inode.h"
46 #include "ctree.h"
47 #include "disk-io.h"
48 #include "transaction.h"
49 #include "btrfs_inode.h"
50 #include "ioctl.h"
51 #include "print-tree.h"
52 #include "xattr.h"
53 #include "volumes.h"
54 #include "version.h"
55 #include "export.h"
56 #include "compression.h"
57 #include "rcu-string.h"
58
59 #define CREATE_TRACE_POINTS
60 #include <trace/events/btrfs.h>
61
62 static const struct super_operations btrfs_super_ops;
63 static struct file_system_type btrfs_fs_type;
64
65 static const char *btrfs_decode_error(struct btrfs_fs_info *fs_info, int errno,
66 char nbuf[16])
67 {
68 char *errstr = NULL;
69
70 switch (errno) {
71 case -EIO:
72 errstr = "IO failure";
73 break;
74 case -ENOMEM:
75 errstr = "Out of memory";
76 break;
77 case -EROFS:
78 errstr = "Readonly filesystem";
79 break;
80 case -EEXIST:
81 errstr = "Object already exists";
82 break;
83 default:
84 if (nbuf) {
85 if (snprintf(nbuf, 16, "error %d", -errno) >= 0)
86 errstr = nbuf;
87 }
88 break;
89 }
90
91 return errstr;
92 }
93
94 static void __save_error_info(struct btrfs_fs_info *fs_info)
95 {
96 /*
97 * today we only save the error info into ram. Long term we'll
98 * also send it down to the disk
99 */
100 fs_info->fs_state = BTRFS_SUPER_FLAG_ERROR;
101 }
102
103 static void save_error_info(struct btrfs_fs_info *fs_info)
104 {
105 __save_error_info(fs_info);
106 }
107
108 /* btrfs handle error by forcing the filesystem readonly */
109 static void btrfs_handle_error(struct btrfs_fs_info *fs_info)
110 {
111 struct super_block *sb = fs_info->sb;
112
113 if (sb->s_flags & MS_RDONLY)
114 return;
115
116 if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
117 sb->s_flags |= MS_RDONLY;
118 printk(KERN_INFO "btrfs is forced readonly\n");
119 /*
120 * Note that a running device replace operation is not
121 * canceled here although there is no way to update
122 * the progress. It would add the risk of a deadlock,
123 * therefore the canceling is ommited. The only penalty
124 * is that some I/O remains active until the procedure
125 * completes. The next time when the filesystem is
126 * mounted writeable again, the device replace
127 * operation continues.
128 */
129 // WARN_ON(1);
130 }
131 }
132
133 #ifdef CONFIG_PRINTK
134 /*
135 * __btrfs_std_error decodes expected errors from the caller and
136 * invokes the approciate error response.
137 */
138 void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
139 unsigned int line, int errno, const char *fmt, ...)
140 {
141 struct super_block *sb = fs_info->sb;
142 char nbuf[16];
143 const char *errstr;
144 va_list args;
145 va_start(args, fmt);
146
147 /*
148 * Special case: if the error is EROFS, and we're already
149 * under MS_RDONLY, then it is safe here.
150 */
151 if (errno == -EROFS && (sb->s_flags & MS_RDONLY))
152 return;
153
154 errstr = btrfs_decode_error(fs_info, errno, nbuf);
155 if (fmt) {
156 struct va_format vaf = {
157 .fmt = fmt,
158 .va = &args,
159 };
160
161 printk(KERN_CRIT "BTRFS error (device %s) in %s:%d: %s (%pV)\n",
162 sb->s_id, function, line, errstr, &vaf);
163 } else {
164 printk(KERN_CRIT "BTRFS error (device %s) in %s:%d: %s\n",
165 sb->s_id, function, line, errstr);
166 }
167
168 /* Don't go through full error handling during mount */
169 if (sb->s_flags & MS_BORN) {
170 save_error_info(fs_info);
171 btrfs_handle_error(fs_info);
172 }
173 va_end(args);
174 }
175
176 static const char * const logtypes[] = {
177 "emergency",
178 "alert",
179 "critical",
180 "error",
181 "warning",
182 "notice",
183 "info",
184 "debug",
185 };
186
187 void btrfs_printk(struct btrfs_fs_info *fs_info, const char *fmt, ...)
188 {
189 struct super_block *sb = fs_info->sb;
190 char lvl[4];
191 struct va_format vaf;
192 va_list args;
193 const char *type = logtypes[4];
194 int kern_level;
195
196 va_start(args, fmt);
197
198 kern_level = printk_get_level(fmt);
199 if (kern_level) {
200 size_t size = printk_skip_level(fmt) - fmt;
201 memcpy(lvl, fmt, size);
202 lvl[size] = '\0';
203 fmt += size;
204 type = logtypes[kern_level - '0'];
205 } else
206 *lvl = '\0';
207
208 vaf.fmt = fmt;
209 vaf.va = &args;
210
211 printk("%sBTRFS %s (device %s): %pV", lvl, type, sb->s_id, &vaf);
212
213 va_end(args);
214 }
215
216 #else
217
218 void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
219 unsigned int line, int errno, const char *fmt, ...)
220 {
221 struct super_block *sb = fs_info->sb;
222
223 /*
224 * Special case: if the error is EROFS, and we're already
225 * under MS_RDONLY, then it is safe here.
226 */
227 if (errno == -EROFS && (sb->s_flags & MS_RDONLY))
228 return;
229
230 /* Don't go through full error handling during mount */
231 if (sb->s_flags & MS_BORN) {
232 save_error_info(fs_info);
233 btrfs_handle_error(fs_info);
234 }
235 }
236 #endif
237
238 /*
239 * We only mark the transaction aborted and then set the file system read-only.
240 * This will prevent new transactions from starting or trying to join this
241 * one.
242 *
243 * This means that error recovery at the call site is limited to freeing
244 * any local memory allocations and passing the error code up without
245 * further cleanup. The transaction should complete as it normally would
246 * in the call path but will return -EIO.
247 *
248 * We'll complete the cleanup in btrfs_end_transaction and
249 * btrfs_commit_transaction.
250 */
251 void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
252 struct btrfs_root *root, const char *function,
253 unsigned int line, int errno)
254 {
255 WARN_ONCE(1, KERN_DEBUG "btrfs: Transaction aborted\n");
256 trans->aborted = errno;
257 /* Nothing used. The other threads that have joined this
258 * transaction may be able to continue. */
259 if (!trans->blocks_used) {
260 char nbuf[16];
261 const char *errstr;
262
263 errstr = btrfs_decode_error(root->fs_info, errno, nbuf);
264 btrfs_printk(root->fs_info,
265 "%s:%d: Aborting unused transaction(%s).\n",
266 function, line, errstr);
267 return;
268 }
269 trans->transaction->aborted = errno;
270 __btrfs_std_error(root->fs_info, function, line, errno, NULL);
271 }
272 /*
273 * __btrfs_panic decodes unexpected, fatal errors from the caller,
274 * issues an alert, and either panics or BUGs, depending on mount options.
275 */
276 void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function,
277 unsigned int line, int errno, const char *fmt, ...)
278 {
279 char nbuf[16];
280 char *s_id = "<unknown>";
281 const char *errstr;
282 struct va_format vaf = { .fmt = fmt };
283 va_list args;
284
285 if (fs_info)
286 s_id = fs_info->sb->s_id;
287
288 va_start(args, fmt);
289 vaf.va = &args;
290
291 errstr = btrfs_decode_error(fs_info, errno, nbuf);
292 if (fs_info->mount_opt & BTRFS_MOUNT_PANIC_ON_FATAL_ERROR)
293 panic(KERN_CRIT "BTRFS panic (device %s) in %s:%d: %pV (%s)\n",
294 s_id, function, line, &vaf, errstr);
295
296 printk(KERN_CRIT "BTRFS panic (device %s) in %s:%d: %pV (%s)\n",
297 s_id, function, line, &vaf, errstr);
298 va_end(args);
299 /* Caller calls BUG() */
300 }
301
302 static void btrfs_put_super(struct super_block *sb)
303 {
304 (void)close_ctree(btrfs_sb(sb)->tree_root);
305 /* FIXME: need to fix VFS to return error? */
306 /* AV: return it _where_? ->put_super() can be triggered by any number
307 * of async events, up to and including delivery of SIGKILL to the
308 * last process that kept it busy. Or segfault in the aforementioned
309 * process... Whom would you report that to?
310 */
311 }
312
313 enum {
314 Opt_degraded, Opt_subvol, Opt_subvolid, Opt_device, Opt_nodatasum,
315 Opt_nodatacow, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd,
316 Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress,
317 Opt_compress_type, Opt_compress_force, Opt_compress_force_type,
318 Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
319 Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed,
320 Opt_enospc_debug, Opt_subvolrootid, Opt_defrag, Opt_inode_cache,
321 Opt_no_space_cache, Opt_recovery, Opt_skip_balance,
322 Opt_check_integrity, Opt_check_integrity_including_extent_data,
323 Opt_check_integrity_print_mask, Opt_fatal_errors,
324 Opt_err,
325 };
326
327 static match_table_t tokens = {
328 {Opt_degraded, "degraded"},
329 {Opt_subvol, "subvol=%s"},
330 {Opt_subvolid, "subvolid=%d"},
331 {Opt_device, "device=%s"},
332 {Opt_nodatasum, "nodatasum"},
333 {Opt_nodatacow, "nodatacow"},
334 {Opt_nobarrier, "nobarrier"},
335 {Opt_max_inline, "max_inline=%s"},
336 {Opt_alloc_start, "alloc_start=%s"},
337 {Opt_thread_pool, "thread_pool=%d"},
338 {Opt_compress, "compress"},
339 {Opt_compress_type, "compress=%s"},
340 {Opt_compress_force, "compress-force"},
341 {Opt_compress_force_type, "compress-force=%s"},
342 {Opt_ssd, "ssd"},
343 {Opt_ssd_spread, "ssd_spread"},
344 {Opt_nossd, "nossd"},
345 {Opt_noacl, "noacl"},
346 {Opt_notreelog, "notreelog"},
347 {Opt_flushoncommit, "flushoncommit"},
348 {Opt_ratio, "metadata_ratio=%d"},
349 {Opt_discard, "discard"},
350 {Opt_space_cache, "space_cache"},
351 {Opt_clear_cache, "clear_cache"},
352 {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
353 {Opt_enospc_debug, "enospc_debug"},
354 {Opt_subvolrootid, "subvolrootid=%d"},
355 {Opt_defrag, "autodefrag"},
356 {Opt_inode_cache, "inode_cache"},
357 {Opt_no_space_cache, "nospace_cache"},
358 {Opt_recovery, "recovery"},
359 {Opt_skip_balance, "skip_balance"},
360 {Opt_check_integrity, "check_int"},
361 {Opt_check_integrity_including_extent_data, "check_int_data"},
362 {Opt_check_integrity_print_mask, "check_int_print_mask=%d"},
363 {Opt_fatal_errors, "fatal_errors=%s"},
364 {Opt_err, NULL},
365 };
366
367 /*
368 * Regular mount options parser. Everything that is needed only when
369 * reading in a new superblock is parsed here.
370 * XXX JDM: This needs to be cleaned up for remount.
371 */
372 int btrfs_parse_options(struct btrfs_root *root, char *options)
373 {
374 struct btrfs_fs_info *info = root->fs_info;
375 substring_t args[MAX_OPT_ARGS];
376 char *p, *num, *orig = NULL;
377 u64 cache_gen;
378 int intarg;
379 int ret = 0;
380 char *compress_type;
381 bool compress_force = false;
382
383 cache_gen = btrfs_super_cache_generation(root->fs_info->super_copy);
384 if (cache_gen)
385 btrfs_set_opt(info->mount_opt, SPACE_CACHE);
386
387 if (!options)
388 goto out;
389
390 /*
391 * strsep changes the string, duplicate it because parse_options
392 * gets called twice
393 */
394 options = kstrdup(options, GFP_NOFS);
395 if (!options)
396 return -ENOMEM;
397
398 orig = options;
399
400 while ((p = strsep(&options, ",")) != NULL) {
401 int token;
402 if (!*p)
403 continue;
404
405 token = match_token(p, tokens, args);
406 switch (token) {
407 case Opt_degraded:
408 printk(KERN_INFO "btrfs: allowing degraded mounts\n");
409 btrfs_set_opt(info->mount_opt, DEGRADED);
410 break;
411 case Opt_subvol:
412 case Opt_subvolid:
413 case Opt_subvolrootid:
414 case Opt_device:
415 /*
416 * These are parsed by btrfs_parse_early_options
417 * and can be happily ignored here.
418 */
419 break;
420 case Opt_nodatasum:
421 printk(KERN_INFO "btrfs: setting nodatasum\n");
422 btrfs_set_opt(info->mount_opt, NODATASUM);
423 break;
424 case Opt_nodatacow:
425 if (!btrfs_test_opt(root, COMPRESS) ||
426 !btrfs_test_opt(root, FORCE_COMPRESS)) {
427 printk(KERN_INFO "btrfs: setting nodatacow, compression disabled\n");
428 } else {
429 printk(KERN_INFO "btrfs: setting nodatacow\n");
430 }
431 info->compress_type = BTRFS_COMPRESS_NONE;
432 btrfs_clear_opt(info->mount_opt, COMPRESS);
433 btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS);
434 btrfs_set_opt(info->mount_opt, NODATACOW);
435 btrfs_set_opt(info->mount_opt, NODATASUM);
436 break;
437 case Opt_compress_force:
438 case Opt_compress_force_type:
439 compress_force = true;
440 case Opt_compress:
441 case Opt_compress_type:
442 if (token == Opt_compress ||
443 token == Opt_compress_force ||
444 strcmp(args[0].from, "zlib") == 0) {
445 compress_type = "zlib";
446 info->compress_type = BTRFS_COMPRESS_ZLIB;
447 btrfs_set_opt(info->mount_opt, COMPRESS);
448 btrfs_clear_opt(info->mount_opt, NODATACOW);
449 btrfs_clear_opt(info->mount_opt, NODATASUM);
450 } else if (strcmp(args[0].from, "lzo") == 0) {
451 compress_type = "lzo";
452 info->compress_type = BTRFS_COMPRESS_LZO;
453 btrfs_set_opt(info->mount_opt, COMPRESS);
454 btrfs_clear_opt(info->mount_opt, NODATACOW);
455 btrfs_clear_opt(info->mount_opt, NODATASUM);
456 btrfs_set_fs_incompat(info, COMPRESS_LZO);
457 } else if (strncmp(args[0].from, "no", 2) == 0) {
458 compress_type = "no";
459 info->compress_type = BTRFS_COMPRESS_NONE;
460 btrfs_clear_opt(info->mount_opt, COMPRESS);
461 btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS);
462 compress_force = false;
463 } else {
464 ret = -EINVAL;
465 goto out;
466 }
467
468 if (compress_force) {
469 btrfs_set_opt(info->mount_opt, FORCE_COMPRESS);
470 pr_info("btrfs: force %s compression\n",
471 compress_type);
472 } else
473 pr_info("btrfs: use %s compression\n",
474 compress_type);
475 break;
476 case Opt_ssd:
477 printk(KERN_INFO "btrfs: use ssd allocation scheme\n");
478 btrfs_set_opt(info->mount_opt, SSD);
479 break;
480 case Opt_ssd_spread:
481 printk(KERN_INFO "btrfs: use spread ssd "
482 "allocation scheme\n");
483 btrfs_set_opt(info->mount_opt, SSD);
484 btrfs_set_opt(info->mount_opt, SSD_SPREAD);
485 break;
486 case Opt_nossd:
487 printk(KERN_INFO "btrfs: not using ssd allocation "
488 "scheme\n");
489 btrfs_set_opt(info->mount_opt, NOSSD);
490 btrfs_clear_opt(info->mount_opt, SSD);
491 btrfs_clear_opt(info->mount_opt, SSD_SPREAD);
492 break;
493 case Opt_nobarrier:
494 printk(KERN_INFO "btrfs: turning off barriers\n");
495 btrfs_set_opt(info->mount_opt, NOBARRIER);
496 break;
497 case Opt_thread_pool:
498 intarg = 0;
499 match_int(&args[0], &intarg);
500 if (intarg)
501 info->thread_pool_size = intarg;
502 break;
503 case Opt_max_inline:
504 num = match_strdup(&args[0]);
505 if (num) {
506 info->max_inline = memparse(num, NULL);
507 kfree(num);
508
509 if (info->max_inline) {
510 info->max_inline = max_t(u64,
511 info->max_inline,
512 root->sectorsize);
513 }
514 printk(KERN_INFO "btrfs: max_inline at %llu\n",
515 (unsigned long long)info->max_inline);
516 }
517 break;
518 case Opt_alloc_start:
519 num = match_strdup(&args[0]);
520 if (num) {
521 info->alloc_start = memparse(num, NULL);
522 kfree(num);
523 printk(KERN_INFO
524 "btrfs: allocations start at %llu\n",
525 (unsigned long long)info->alloc_start);
526 }
527 break;
528 case Opt_noacl:
529 root->fs_info->sb->s_flags &= ~MS_POSIXACL;
530 break;
531 case Opt_notreelog:
532 printk(KERN_INFO "btrfs: disabling tree log\n");
533 btrfs_set_opt(info->mount_opt, NOTREELOG);
534 break;
535 case Opt_flushoncommit:
536 printk(KERN_INFO "btrfs: turning on flush-on-commit\n");
537 btrfs_set_opt(info->mount_opt, FLUSHONCOMMIT);
538 break;
539 case Opt_ratio:
540 intarg = 0;
541 match_int(&args[0], &intarg);
542 if (intarg) {
543 info->metadata_ratio = intarg;
544 printk(KERN_INFO "btrfs: metadata ratio %d\n",
545 info->metadata_ratio);
546 }
547 break;
548 case Opt_discard:
549 btrfs_set_opt(info->mount_opt, DISCARD);
550 break;
551 case Opt_space_cache:
552 btrfs_set_opt(info->mount_opt, SPACE_CACHE);
553 break;
554 case Opt_no_space_cache:
555 printk(KERN_INFO "btrfs: disabling disk space caching\n");
556 btrfs_clear_opt(info->mount_opt, SPACE_CACHE);
557 break;
558 case Opt_inode_cache:
559 printk(KERN_INFO "btrfs: enabling inode map caching\n");
560 btrfs_set_opt(info->mount_opt, INODE_MAP_CACHE);
561 break;
562 case Opt_clear_cache:
563 printk(KERN_INFO "btrfs: force clearing of disk cache\n");
564 btrfs_set_opt(info->mount_opt, CLEAR_CACHE);
565 break;
566 case Opt_user_subvol_rm_allowed:
567 btrfs_set_opt(info->mount_opt, USER_SUBVOL_RM_ALLOWED);
568 break;
569 case Opt_enospc_debug:
570 btrfs_set_opt(info->mount_opt, ENOSPC_DEBUG);
571 break;
572 case Opt_defrag:
573 printk(KERN_INFO "btrfs: enabling auto defrag\n");
574 btrfs_set_opt(info->mount_opt, AUTO_DEFRAG);
575 break;
576 case Opt_recovery:
577 printk(KERN_INFO "btrfs: enabling auto recovery\n");
578 btrfs_set_opt(info->mount_opt, RECOVERY);
579 break;
580 case Opt_skip_balance:
581 btrfs_set_opt(info->mount_opt, SKIP_BALANCE);
582 break;
583 #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
584 case Opt_check_integrity_including_extent_data:
585 printk(KERN_INFO "btrfs: enabling check integrity"
586 " including extent data\n");
587 btrfs_set_opt(info->mount_opt,
588 CHECK_INTEGRITY_INCLUDING_EXTENT_DATA);
589 btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY);
590 break;
591 case Opt_check_integrity:
592 printk(KERN_INFO "btrfs: enabling check integrity\n");
593 btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY);
594 break;
595 case Opt_check_integrity_print_mask:
596 intarg = 0;
597 match_int(&args[0], &intarg);
598 if (intarg) {
599 info->check_integrity_print_mask = intarg;
600 printk(KERN_INFO "btrfs:"
601 " check_integrity_print_mask 0x%x\n",
602 info->check_integrity_print_mask);
603 }
604 break;
605 #else
606 case Opt_check_integrity_including_extent_data:
607 case Opt_check_integrity:
608 case Opt_check_integrity_print_mask:
609 printk(KERN_ERR "btrfs: support for check_integrity*"
610 " not compiled in!\n");
611 ret = -EINVAL;
612 goto out;
613 #endif
614 case Opt_fatal_errors:
615 if (strcmp(args[0].from, "panic") == 0)
616 btrfs_set_opt(info->mount_opt,
617 PANIC_ON_FATAL_ERROR);
618 else if (strcmp(args[0].from, "bug") == 0)
619 btrfs_clear_opt(info->mount_opt,
620 PANIC_ON_FATAL_ERROR);
621 else {
622 ret = -EINVAL;
623 goto out;
624 }
625 break;
626 case Opt_err:
627 printk(KERN_INFO "btrfs: unrecognized mount option "
628 "'%s'\n", p);
629 ret = -EINVAL;
630 goto out;
631 default:
632 break;
633 }
634 }
635 out:
636 if (!ret && btrfs_test_opt(root, SPACE_CACHE))
637 printk(KERN_INFO "btrfs: disk space caching is enabled\n");
638 kfree(orig);
639 return ret;
640 }
641
642 /*
643 * Parse mount options that are required early in the mount process.
644 *
645 * All other options will be parsed on much later in the mount process and
646 * only when we need to allocate a new super block.
647 */
648 static int btrfs_parse_early_options(const char *options, fmode_t flags,
649 void *holder, char **subvol_name, u64 *subvol_objectid,
650 u64 *subvol_rootid, struct btrfs_fs_devices **fs_devices)
651 {
652 substring_t args[MAX_OPT_ARGS];
653 char *device_name, *opts, *orig, *p;
654 int error = 0;
655 int intarg;
656
657 if (!options)
658 return 0;
659
660 /*
661 * strsep changes the string, duplicate it because parse_options
662 * gets called twice
663 */
664 opts = kstrdup(options, GFP_KERNEL);
665 if (!opts)
666 return -ENOMEM;
667 orig = opts;
668
669 while ((p = strsep(&opts, ",")) != NULL) {
670 int token;
671 if (!*p)
672 continue;
673
674 token = match_token(p, tokens, args);
675 switch (token) {
676 case Opt_subvol:
677 kfree(*subvol_name);
678 *subvol_name = match_strdup(&args[0]);
679 break;
680 case Opt_subvolid:
681 intarg = 0;
682 error = match_int(&args[0], &intarg);
683 if (!error) {
684 /* we want the original fs_tree */
685 if (!intarg)
686 *subvol_objectid =
687 BTRFS_FS_TREE_OBJECTID;
688 else
689 *subvol_objectid = intarg;
690 }
691 break;
692 case Opt_subvolrootid:
693 intarg = 0;
694 error = match_int(&args[0], &intarg);
695 if (!error) {
696 /* we want the original fs_tree */
697 if (!intarg)
698 *subvol_rootid =
699 BTRFS_FS_TREE_OBJECTID;
700 else
701 *subvol_rootid = intarg;
702 }
703 break;
704 case Opt_device:
705 device_name = match_strdup(&args[0]);
706 if (!device_name) {
707 error = -ENOMEM;
708 goto out;
709 }
710 error = btrfs_scan_one_device(device_name,
711 flags, holder, fs_devices);
712 kfree(device_name);
713 if (error)
714 goto out;
715 break;
716 default:
717 break;
718 }
719 }
720
721 out:
722 kfree(orig);
723 return error;
724 }
725
726 static struct dentry *get_default_root(struct super_block *sb,
727 u64 subvol_objectid)
728 {
729 struct btrfs_fs_info *fs_info = btrfs_sb(sb);
730 struct btrfs_root *root = fs_info->tree_root;
731 struct btrfs_root *new_root;
732 struct btrfs_dir_item *di;
733 struct btrfs_path *path;
734 struct btrfs_key location;
735 struct inode *inode;
736 u64 dir_id;
737 int new = 0;
738
739 /*
740 * We have a specific subvol we want to mount, just setup location and
741 * go look up the root.
742 */
743 if (subvol_objectid) {
744 location.objectid = subvol_objectid;
745 location.type = BTRFS_ROOT_ITEM_KEY;
746 location.offset = (u64)-1;
747 goto find_root;
748 }
749
750 path = btrfs_alloc_path();
751 if (!path)
752 return ERR_PTR(-ENOMEM);
753 path->leave_spinning = 1;
754
755 /*
756 * Find the "default" dir item which points to the root item that we
757 * will mount by default if we haven't been given a specific subvolume
758 * to mount.
759 */
760 dir_id = btrfs_super_root_dir(fs_info->super_copy);
761 di = btrfs_lookup_dir_item(NULL, root, path, dir_id, "default", 7, 0);
762 if (IS_ERR(di)) {
763 btrfs_free_path(path);
764 return ERR_CAST(di);
765 }
766 if (!di) {
767 /*
768 * Ok the default dir item isn't there. This is weird since
769 * it's always been there, but don't freak out, just try and
770 * mount to root most subvolume.
771 */
772 btrfs_free_path(path);
773 dir_id = BTRFS_FIRST_FREE_OBJECTID;
774 new_root = fs_info->fs_root;
775 goto setup_root;
776 }
777
778 btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location);
779 btrfs_free_path(path);
780
781 find_root:
782 new_root = btrfs_read_fs_root_no_name(fs_info, &location);
783 if (IS_ERR(new_root))
784 return ERR_CAST(new_root);
785
786 if (btrfs_root_refs(&new_root->root_item) == 0)
787 return ERR_PTR(-ENOENT);
788
789 dir_id = btrfs_root_dirid(&new_root->root_item);
790 setup_root:
791 location.objectid = dir_id;
792 location.type = BTRFS_INODE_ITEM_KEY;
793 location.offset = 0;
794
795 inode = btrfs_iget(sb, &location, new_root, &new);
796 if (IS_ERR(inode))
797 return ERR_CAST(inode);
798
799 /*
800 * If we're just mounting the root most subvol put the inode and return
801 * a reference to the dentry. We will have already gotten a reference
802 * to the inode in btrfs_fill_super so we're good to go.
803 */
804 if (!new && sb->s_root->d_inode == inode) {
805 iput(inode);
806 return dget(sb->s_root);
807 }
808
809 return d_obtain_alias(inode);
810 }
811
812 static int btrfs_fill_super(struct super_block *sb,
813 struct btrfs_fs_devices *fs_devices,
814 void *data, int silent)
815 {
816 struct inode *inode;
817 struct btrfs_fs_info *fs_info = btrfs_sb(sb);
818 struct btrfs_key key;
819 int err;
820
821 sb->s_maxbytes = MAX_LFS_FILESIZE;
822 sb->s_magic = BTRFS_SUPER_MAGIC;
823 sb->s_op = &btrfs_super_ops;
824 sb->s_d_op = &btrfs_dentry_operations;
825 sb->s_export_op = &btrfs_export_ops;
826 sb->s_xattr = btrfs_xattr_handlers;
827 sb->s_time_gran = 1;
828 #ifdef CONFIG_BTRFS_FS_POSIX_ACL
829 sb->s_flags |= MS_POSIXACL;
830 #endif
831 sb->s_flags |= MS_I_VERSION;
832 err = open_ctree(sb, fs_devices, (char *)data);
833 if (err) {
834 printk("btrfs: open_ctree failed\n");
835 return err;
836 }
837
838 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
839 key.type = BTRFS_INODE_ITEM_KEY;
840 key.offset = 0;
841 inode = btrfs_iget(sb, &key, fs_info->fs_root, NULL);
842 if (IS_ERR(inode)) {
843 err = PTR_ERR(inode);
844 goto fail_close;
845 }
846
847 sb->s_root = d_make_root(inode);
848 if (!sb->s_root) {
849 err = -ENOMEM;
850 goto fail_close;
851 }
852
853 save_mount_options(sb, data);
854 cleancache_init_fs(sb);
855 sb->s_flags |= MS_ACTIVE;
856 return 0;
857
858 fail_close:
859 close_ctree(fs_info->tree_root);
860 return err;
861 }
862
863 int btrfs_sync_fs(struct super_block *sb, int wait)
864 {
865 struct btrfs_trans_handle *trans;
866 struct btrfs_fs_info *fs_info = btrfs_sb(sb);
867 struct btrfs_root *root = fs_info->tree_root;
868
869 trace_btrfs_sync_fs(wait);
870
871 if (!wait) {
872 filemap_flush(fs_info->btree_inode->i_mapping);
873 return 0;
874 }
875
876 btrfs_wait_ordered_extents(root, 0);
877
878 trans = btrfs_attach_transaction(root);
879 if (IS_ERR(trans)) {
880 /* no transaction, don't bother */
881 if (PTR_ERR(trans) == -ENOENT)
882 return 0;
883 return PTR_ERR(trans);
884 }
885 return btrfs_commit_transaction(trans, root);
886 }
887
888 static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
889 {
890 struct btrfs_fs_info *info = btrfs_sb(dentry->d_sb);
891 struct btrfs_root *root = info->tree_root;
892 char *compress_type;
893
894 if (btrfs_test_opt(root, DEGRADED))
895 seq_puts(seq, ",degraded");
896 if (btrfs_test_opt(root, NODATASUM))
897 seq_puts(seq, ",nodatasum");
898 if (btrfs_test_opt(root, NODATACOW))
899 seq_puts(seq, ",nodatacow");
900 if (btrfs_test_opt(root, NOBARRIER))
901 seq_puts(seq, ",nobarrier");
902 if (info->max_inline != 8192 * 1024)
903 seq_printf(seq, ",max_inline=%llu",
904 (unsigned long long)info->max_inline);
905 if (info->alloc_start != 0)
906 seq_printf(seq, ",alloc_start=%llu",
907 (unsigned long long)info->alloc_start);
908 if (info->thread_pool_size != min_t(unsigned long,
909 num_online_cpus() + 2, 8))
910 seq_printf(seq, ",thread_pool=%d", info->thread_pool_size);
911 if (btrfs_test_opt(root, COMPRESS)) {
912 if (info->compress_type == BTRFS_COMPRESS_ZLIB)
913 compress_type = "zlib";
914 else
915 compress_type = "lzo";
916 if (btrfs_test_opt(root, FORCE_COMPRESS))
917 seq_printf(seq, ",compress-force=%s", compress_type);
918 else
919 seq_printf(seq, ",compress=%s", compress_type);
920 }
921 if (btrfs_test_opt(root, NOSSD))
922 seq_puts(seq, ",nossd");
923 if (btrfs_test_opt(root, SSD_SPREAD))
924 seq_puts(seq, ",ssd_spread");
925 else if (btrfs_test_opt(root, SSD))
926 seq_puts(seq, ",ssd");
927 if (btrfs_test_opt(root, NOTREELOG))
928 seq_puts(seq, ",notreelog");
929 if (btrfs_test_opt(root, FLUSHONCOMMIT))
930 seq_puts(seq, ",flushoncommit");
931 if (btrfs_test_opt(root, DISCARD))
932 seq_puts(seq, ",discard");
933 if (!(root->fs_info->sb->s_flags & MS_POSIXACL))
934 seq_puts(seq, ",noacl");
935 if (btrfs_test_opt(root, SPACE_CACHE))
936 seq_puts(seq, ",space_cache");
937 else
938 seq_puts(seq, ",nospace_cache");
939 if (btrfs_test_opt(root, CLEAR_CACHE))
940 seq_puts(seq, ",clear_cache");
941 if (btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED))
942 seq_puts(seq, ",user_subvol_rm_allowed");
943 if (btrfs_test_opt(root, ENOSPC_DEBUG))
944 seq_puts(seq, ",enospc_debug");
945 if (btrfs_test_opt(root, AUTO_DEFRAG))
946 seq_puts(seq, ",autodefrag");
947 if (btrfs_test_opt(root, INODE_MAP_CACHE))
948 seq_puts(seq, ",inode_cache");
949 if (btrfs_test_opt(root, SKIP_BALANCE))
950 seq_puts(seq, ",skip_balance");
951 if (btrfs_test_opt(root, PANIC_ON_FATAL_ERROR))
952 seq_puts(seq, ",fatal_errors=panic");
953 return 0;
954 }
955
956 static int btrfs_test_super(struct super_block *s, void *data)
957 {
958 struct btrfs_fs_info *p = data;
959 struct btrfs_fs_info *fs_info = btrfs_sb(s);
960
961 return fs_info->fs_devices == p->fs_devices;
962 }
963
964 static int btrfs_set_super(struct super_block *s, void *data)
965 {
966 int err = set_anon_super(s, data);
967 if (!err)
968 s->s_fs_info = data;
969 return err;
970 }
971
972 /*
973 * subvolumes are identified by ino 256
974 */
975 static inline int is_subvolume_inode(struct inode *inode)
976 {
977 if (inode && inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)
978 return 1;
979 return 0;
980 }
981
982 /*
983 * This will strip out the subvol=%s argument for an argument string and add
984 * subvolid=0 to make sure we get the actual tree root for path walking to the
985 * subvol we want.
986 */
987 static char *setup_root_args(char *args)
988 {
989 unsigned len = strlen(args) + 2 + 1;
990 char *src, *dst, *buf;
991
992 /*
993 * We need the same args as before, but with this substitution:
994 * s!subvol=[^,]+!subvolid=0!
995 *
996 * Since the replacement string is up to 2 bytes longer than the
997 * original, allocate strlen(args) + 2 + 1 bytes.
998 */
999
1000 src = strstr(args, "subvol=");
1001 /* This shouldn't happen, but just in case.. */
1002 if (!src)
1003 return NULL;
1004
1005 buf = dst = kmalloc(len, GFP_NOFS);
1006 if (!buf)
1007 return NULL;
1008
1009 /*
1010 * If the subvol= arg is not at the start of the string,
1011 * copy whatever precedes it into buf.
1012 */
1013 if (src != args) {
1014 *src++ = '\0';
1015 strcpy(buf, args);
1016 dst += strlen(args);
1017 }
1018
1019 strcpy(dst, "subvolid=0");
1020 dst += strlen("subvolid=0");
1021
1022 /*
1023 * If there is a "," after the original subvol=... string,
1024 * copy that suffix into our buffer. Otherwise, we're done.
1025 */
1026 src = strchr(src, ',');
1027 if (src)
1028 strcpy(dst, src);
1029
1030 return buf;
1031 }
1032
1033 static struct dentry *mount_subvol(const char *subvol_name, int flags,
1034 const char *device_name, char *data)
1035 {
1036 struct dentry *root;
1037 struct vfsmount *mnt;
1038 char *newargs;
1039
1040 newargs = setup_root_args(data);
1041 if (!newargs)
1042 return ERR_PTR(-ENOMEM);
1043 mnt = vfs_kern_mount(&btrfs_fs_type, flags, device_name,
1044 newargs);
1045 kfree(newargs);
1046 if (IS_ERR(mnt))
1047 return ERR_CAST(mnt);
1048
1049 root = mount_subtree(mnt, subvol_name);
1050
1051 if (!IS_ERR(root) && !is_subvolume_inode(root->d_inode)) {
1052 struct super_block *s = root->d_sb;
1053 dput(root);
1054 root = ERR_PTR(-EINVAL);
1055 deactivate_locked_super(s);
1056 printk(KERN_ERR "btrfs: '%s' is not a valid subvolume\n",
1057 subvol_name);
1058 }
1059
1060 return root;
1061 }
1062
1063 /*
1064 * Find a superblock for the given device / mount point.
1065 *
1066 * Note: This is based on get_sb_bdev from fs/super.c with a few additions
1067 * for multiple device setup. Make sure to keep it in sync.
1068 */
1069 static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
1070 const char *device_name, void *data)
1071 {
1072 struct block_device *bdev = NULL;
1073 struct super_block *s;
1074 struct dentry *root;
1075 struct btrfs_fs_devices *fs_devices = NULL;
1076 struct btrfs_fs_info *fs_info = NULL;
1077 fmode_t mode = FMODE_READ;
1078 char *subvol_name = NULL;
1079 u64 subvol_objectid = 0;
1080 u64 subvol_rootid = 0;
1081 int error = 0;
1082
1083 if (!(flags & MS_RDONLY))
1084 mode |= FMODE_WRITE;
1085
1086 error = btrfs_parse_early_options(data, mode, fs_type,
1087 &subvol_name, &subvol_objectid,
1088 &subvol_rootid, &fs_devices);
1089 if (error) {
1090 kfree(subvol_name);
1091 return ERR_PTR(error);
1092 }
1093
1094 if (subvol_name) {
1095 root = mount_subvol(subvol_name, flags, device_name, data);
1096 kfree(subvol_name);
1097 return root;
1098 }
1099
1100 error = btrfs_scan_one_device(device_name, mode, fs_type, &fs_devices);
1101 if (error)
1102 return ERR_PTR(error);
1103
1104 /*
1105 * Setup a dummy root and fs_info for test/set super. This is because
1106 * we don't actually fill this stuff out until open_ctree, but we need
1107 * it for searching for existing supers, so this lets us do that and
1108 * then open_ctree will properly initialize everything later.
1109 */
1110 fs_info = kzalloc(sizeof(struct btrfs_fs_info), GFP_NOFS);
1111 if (!fs_info)
1112 return ERR_PTR(-ENOMEM);
1113
1114 fs_info->fs_devices = fs_devices;
1115
1116 fs_info->super_copy = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_NOFS);
1117 fs_info->super_for_commit = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_NOFS);
1118 if (!fs_info->super_copy || !fs_info->super_for_commit) {
1119 error = -ENOMEM;
1120 goto error_fs_info;
1121 }
1122
1123 error = btrfs_open_devices(fs_devices, mode, fs_type);
1124 if (error)
1125 goto error_fs_info;
1126
1127 if (!(flags & MS_RDONLY) && fs_devices->rw_devices == 0) {
1128 error = -EACCES;
1129 goto error_close_devices;
1130 }
1131
1132 bdev = fs_devices->latest_bdev;
1133 s = sget(fs_type, btrfs_test_super, btrfs_set_super, flags | MS_NOSEC,
1134 fs_info);
1135 if (IS_ERR(s)) {
1136 error = PTR_ERR(s);
1137 goto error_close_devices;
1138 }
1139
1140 if (s->s_root) {
1141 btrfs_close_devices(fs_devices);
1142 free_fs_info(fs_info);
1143 if ((flags ^ s->s_flags) & MS_RDONLY)
1144 error = -EBUSY;
1145 } else {
1146 char b[BDEVNAME_SIZE];
1147
1148 strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id));
1149 btrfs_sb(s)->bdev_holder = fs_type;
1150 error = btrfs_fill_super(s, fs_devices, data,
1151 flags & MS_SILENT ? 1 : 0);
1152 }
1153
1154 root = !error ? get_default_root(s, subvol_objectid) : ERR_PTR(error);
1155 if (IS_ERR(root))
1156 deactivate_locked_super(s);
1157
1158 return root;
1159
1160 error_close_devices:
1161 btrfs_close_devices(fs_devices);
1162 error_fs_info:
1163 free_fs_info(fs_info);
1164 return ERR_PTR(error);
1165 }
1166
1167 static void btrfs_set_max_workers(struct btrfs_workers *workers, int new_limit)
1168 {
1169 spin_lock_irq(&workers->lock);
1170 workers->max_workers = new_limit;
1171 spin_unlock_irq(&workers->lock);
1172 }
1173
1174 static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info,
1175 int new_pool_size, int old_pool_size)
1176 {
1177 if (new_pool_size == old_pool_size)
1178 return;
1179
1180 fs_info->thread_pool_size = new_pool_size;
1181
1182 printk(KERN_INFO "btrfs: resize thread pool %d -> %d\n",
1183 old_pool_size, new_pool_size);
1184
1185 btrfs_set_max_workers(&fs_info->generic_worker, new_pool_size);
1186 btrfs_set_max_workers(&fs_info->workers, new_pool_size);
1187 btrfs_set_max_workers(&fs_info->delalloc_workers, new_pool_size);
1188 btrfs_set_max_workers(&fs_info->submit_workers, new_pool_size);
1189 btrfs_set_max_workers(&fs_info->caching_workers, new_pool_size);
1190 btrfs_set_max_workers(&fs_info->fixup_workers, new_pool_size);
1191 btrfs_set_max_workers(&fs_info->endio_workers, new_pool_size);
1192 btrfs_set_max_workers(&fs_info->endio_meta_workers, new_pool_size);
1193 btrfs_set_max_workers(&fs_info->endio_meta_write_workers, new_pool_size);
1194 btrfs_set_max_workers(&fs_info->endio_write_workers, new_pool_size);
1195 btrfs_set_max_workers(&fs_info->endio_freespace_worker, new_pool_size);
1196 btrfs_set_max_workers(&fs_info->delayed_workers, new_pool_size);
1197 btrfs_set_max_workers(&fs_info->readahead_workers, new_pool_size);
1198 btrfs_set_max_workers(&fs_info->scrub_workers, new_pool_size);
1199 }
1200
1201 static int btrfs_remount(struct super_block *sb, int *flags, char *data)
1202 {
1203 struct btrfs_fs_info *fs_info = btrfs_sb(sb);
1204 struct btrfs_root *root = fs_info->tree_root;
1205 unsigned old_flags = sb->s_flags;
1206 unsigned long old_opts = fs_info->mount_opt;
1207 unsigned long old_compress_type = fs_info->compress_type;
1208 u64 old_max_inline = fs_info->max_inline;
1209 u64 old_alloc_start = fs_info->alloc_start;
1210 int old_thread_pool_size = fs_info->thread_pool_size;
1211 unsigned int old_metadata_ratio = fs_info->metadata_ratio;
1212 int ret;
1213
1214 ret = btrfs_parse_options(root, data);
1215 if (ret) {
1216 ret = -EINVAL;
1217 goto restore;
1218 }
1219
1220 btrfs_resize_thread_pool(fs_info,
1221 fs_info->thread_pool_size, old_thread_pool_size);
1222
1223 if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
1224 return 0;
1225
1226 if (*flags & MS_RDONLY) {
1227 sb->s_flags |= MS_RDONLY;
1228
1229 ret = btrfs_commit_super(root);
1230 if (ret)
1231 goto restore;
1232 } else {
1233 if (fs_info->fs_devices->rw_devices == 0) {
1234 ret = -EACCES;
1235 goto restore;
1236 }
1237
1238 if (fs_info->fs_devices->missing_devices >
1239 fs_info->num_tolerated_disk_barrier_failures &&
1240 !(*flags & MS_RDONLY)) {
1241 printk(KERN_WARNING
1242 "Btrfs: too many missing devices, writeable remount is not allowed\n");
1243 ret = -EACCES;
1244 goto restore;
1245 }
1246
1247 if (btrfs_super_log_root(fs_info->super_copy) != 0) {
1248 ret = -EINVAL;
1249 goto restore;
1250 }
1251
1252 ret = btrfs_cleanup_fs_roots(fs_info);
1253 if (ret)
1254 goto restore;
1255
1256 /* recover relocation */
1257 ret = btrfs_recover_relocation(root);
1258 if (ret)
1259 goto restore;
1260
1261 ret = btrfs_resume_balance_async(fs_info);
1262 if (ret)
1263 goto restore;
1264
1265 sb->s_flags &= ~MS_RDONLY;
1266 }
1267
1268 return 0;
1269
1270 restore:
1271 /* We've hit an error - don't reset MS_RDONLY */
1272 if (sb->s_flags & MS_RDONLY)
1273 old_flags |= MS_RDONLY;
1274 sb->s_flags = old_flags;
1275 fs_info->mount_opt = old_opts;
1276 fs_info->compress_type = old_compress_type;
1277 fs_info->max_inline = old_max_inline;
1278 fs_info->alloc_start = old_alloc_start;
1279 btrfs_resize_thread_pool(fs_info,
1280 old_thread_pool_size, fs_info->thread_pool_size);
1281 fs_info->metadata_ratio = old_metadata_ratio;
1282 return ret;
1283 }
1284
1285 /* Used to sort the devices by max_avail(descending sort) */
1286 static int btrfs_cmp_device_free_bytes(const void *dev_info1,
1287 const void *dev_info2)
1288 {
1289 if (((struct btrfs_device_info *)dev_info1)->max_avail >
1290 ((struct btrfs_device_info *)dev_info2)->max_avail)
1291 return -1;
1292 else if (((struct btrfs_device_info *)dev_info1)->max_avail <
1293 ((struct btrfs_device_info *)dev_info2)->max_avail)
1294 return 1;
1295 else
1296 return 0;
1297 }
1298
1299 /*
1300 * sort the devices by max_avail, in which max free extent size of each device
1301 * is stored.(Descending Sort)
1302 */
1303 static inline void btrfs_descending_sort_devices(
1304 struct btrfs_device_info *devices,
1305 size_t nr_devices)
1306 {
1307 sort(devices, nr_devices, sizeof(struct btrfs_device_info),
1308 btrfs_cmp_device_free_bytes, NULL);
1309 }
1310
1311 /*
1312 * The helper to calc the free space on the devices that can be used to store
1313 * file data.
1314 */
1315 static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes)
1316 {
1317 struct btrfs_fs_info *fs_info = root->fs_info;
1318 struct btrfs_device_info *devices_info;
1319 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
1320 struct btrfs_device *device;
1321 u64 skip_space;
1322 u64 type;
1323 u64 avail_space;
1324 u64 used_space;
1325 u64 min_stripe_size;
1326 int min_stripes = 1, num_stripes = 1;
1327 int i = 0, nr_devices;
1328 int ret;
1329
1330 nr_devices = fs_info->fs_devices->open_devices;
1331 BUG_ON(!nr_devices);
1332
1333 devices_info = kmalloc(sizeof(*devices_info) * nr_devices,
1334 GFP_NOFS);
1335 if (!devices_info)
1336 return -ENOMEM;
1337
1338 /* calc min stripe number for data space alloction */
1339 type = btrfs_get_alloc_profile(root, 1);
1340 if (type & BTRFS_BLOCK_GROUP_RAID0) {
1341 min_stripes = 2;
1342 num_stripes = nr_devices;
1343 } else if (type & BTRFS_BLOCK_GROUP_RAID1) {
1344 min_stripes = 2;
1345 num_stripes = 2;
1346 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
1347 min_stripes = 4;
1348 num_stripes = 4;
1349 }
1350
1351 if (type & BTRFS_BLOCK_GROUP_DUP)
1352 min_stripe_size = 2 * BTRFS_STRIPE_LEN;
1353 else
1354 min_stripe_size = BTRFS_STRIPE_LEN;
1355
1356 list_for_each_entry(device, &fs_devices->devices, dev_list) {
1357 if (!device->in_fs_metadata || !device->bdev ||
1358 device->is_tgtdev_for_dev_replace)
1359 continue;
1360
1361 avail_space = device->total_bytes - device->bytes_used;
1362
1363 /* align with stripe_len */
1364 do_div(avail_space, BTRFS_STRIPE_LEN);
1365 avail_space *= BTRFS_STRIPE_LEN;
1366
1367 /*
1368 * In order to avoid overwritting the superblock on the drive,
1369 * btrfs starts at an offset of at least 1MB when doing chunk
1370 * allocation.
1371 */
1372 skip_space = 1024 * 1024;
1373
1374 /* user can set the offset in fs_info->alloc_start. */
1375 if (fs_info->alloc_start + BTRFS_STRIPE_LEN <=
1376 device->total_bytes)
1377 skip_space = max(fs_info->alloc_start, skip_space);
1378
1379 /*
1380 * btrfs can not use the free space in [0, skip_space - 1],
1381 * we must subtract it from the total. In order to implement
1382 * it, we account the used space in this range first.
1383 */
1384 ret = btrfs_account_dev_extents_size(device, 0, skip_space - 1,
1385 &used_space);
1386 if (ret) {
1387 kfree(devices_info);
1388 return ret;
1389 }
1390
1391 /* calc the free space in [0, skip_space - 1] */
1392 skip_space -= used_space;
1393
1394 /*
1395 * we can use the free space in [0, skip_space - 1], subtract
1396 * it from the total.
1397 */
1398 if (avail_space && avail_space >= skip_space)
1399 avail_space -= skip_space;
1400 else
1401 avail_space = 0;
1402
1403 if (avail_space < min_stripe_size)
1404 continue;
1405
1406 devices_info[i].dev = device;
1407 devices_info[i].max_avail = avail_space;
1408
1409 i++;
1410 }
1411
1412 nr_devices = i;
1413
1414 btrfs_descending_sort_devices(devices_info, nr_devices);
1415
1416 i = nr_devices - 1;
1417 avail_space = 0;
1418 while (nr_devices >= min_stripes) {
1419 if (num_stripes > nr_devices)
1420 num_stripes = nr_devices;
1421
1422 if (devices_info[i].max_avail >= min_stripe_size) {
1423 int j;
1424 u64 alloc_size;
1425
1426 avail_space += devices_info[i].max_avail * num_stripes;
1427 alloc_size = devices_info[i].max_avail;
1428 for (j = i + 1 - num_stripes; j <= i; j++)
1429 devices_info[j].max_avail -= alloc_size;
1430 }
1431 i--;
1432 nr_devices--;
1433 }
1434
1435 kfree(devices_info);
1436 *free_bytes = avail_space;
1437 return 0;
1438 }
1439
1440 static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
1441 {
1442 struct btrfs_fs_info *fs_info = btrfs_sb(dentry->d_sb);
1443 struct btrfs_super_block *disk_super = fs_info->super_copy;
1444 struct list_head *head = &fs_info->space_info;
1445 struct btrfs_space_info *found;
1446 u64 total_used = 0;
1447 u64 total_free_data = 0;
1448 int bits = dentry->d_sb->s_blocksize_bits;
1449 __be32 *fsid = (__be32 *)fs_info->fsid;
1450 int ret;
1451
1452 /* holding chunk_muext to avoid allocating new chunks */
1453 mutex_lock(&fs_info->chunk_mutex);
1454 rcu_read_lock();
1455 list_for_each_entry_rcu(found, head, list) {
1456 if (found->flags & BTRFS_BLOCK_GROUP_DATA) {
1457 total_free_data += found->disk_total - found->disk_used;
1458 total_free_data -=
1459 btrfs_account_ro_block_groups_free_space(found);
1460 }
1461
1462 total_used += found->disk_used;
1463 }
1464 rcu_read_unlock();
1465
1466 buf->f_namelen = BTRFS_NAME_LEN;
1467 buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits;
1468 buf->f_bfree = buf->f_blocks - (total_used >> bits);
1469 buf->f_bsize = dentry->d_sb->s_blocksize;
1470 buf->f_type = BTRFS_SUPER_MAGIC;
1471 buf->f_bavail = total_free_data;
1472 ret = btrfs_calc_avail_data_space(fs_info->tree_root, &total_free_data);
1473 if (ret) {
1474 mutex_unlock(&fs_info->chunk_mutex);
1475 return ret;
1476 }
1477 buf->f_bavail += total_free_data;
1478 buf->f_bavail = buf->f_bavail >> bits;
1479 mutex_unlock(&fs_info->chunk_mutex);
1480
1481 /* We treat it as constant endianness (it doesn't matter _which_)
1482 because we want the fsid to come out the same whether mounted
1483 on a big-endian or little-endian host */
1484 buf->f_fsid.val[0] = be32_to_cpu(fsid[0]) ^ be32_to_cpu(fsid[2]);
1485 buf->f_fsid.val[1] = be32_to_cpu(fsid[1]) ^ be32_to_cpu(fsid[3]);
1486 /* Mask in the root object ID too, to disambiguate subvols */
1487 buf->f_fsid.val[0] ^= BTRFS_I(dentry->d_inode)->root->objectid >> 32;
1488 buf->f_fsid.val[1] ^= BTRFS_I(dentry->d_inode)->root->objectid;
1489
1490 return 0;
1491 }
1492
1493 static void btrfs_kill_super(struct super_block *sb)
1494 {
1495 struct btrfs_fs_info *fs_info = btrfs_sb(sb);
1496 kill_anon_super(sb);
1497 free_fs_info(fs_info);
1498 }
1499
1500 static struct file_system_type btrfs_fs_type = {
1501 .owner = THIS_MODULE,
1502 .name = "btrfs",
1503 .mount = btrfs_mount,
1504 .kill_sb = btrfs_kill_super,
1505 .fs_flags = FS_REQUIRES_DEV,
1506 };
1507
1508 /*
1509 * used by btrfsctl to scan devices when no FS is mounted
1510 */
1511 static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
1512 unsigned long arg)
1513 {
1514 struct btrfs_ioctl_vol_args *vol;
1515 struct btrfs_fs_devices *fs_devices;
1516 int ret = -ENOTTY;
1517
1518 if (!capable(CAP_SYS_ADMIN))
1519 return -EPERM;
1520
1521 vol = memdup_user((void __user *)arg, sizeof(*vol));
1522 if (IS_ERR(vol))
1523 return PTR_ERR(vol);
1524
1525 switch (cmd) {
1526 case BTRFS_IOC_SCAN_DEV:
1527 ret = btrfs_scan_one_device(vol->name, FMODE_READ,
1528 &btrfs_fs_type, &fs_devices);
1529 break;
1530 case BTRFS_IOC_DEVICES_READY:
1531 ret = btrfs_scan_one_device(vol->name, FMODE_READ,
1532 &btrfs_fs_type, &fs_devices);
1533 if (ret)
1534 break;
1535 ret = !(fs_devices->num_devices == fs_devices->total_devices);
1536 break;
1537 }
1538
1539 kfree(vol);
1540 return ret;
1541 }
1542
1543 static int btrfs_freeze(struct super_block *sb)
1544 {
1545 struct btrfs_trans_handle *trans;
1546 struct btrfs_root *root = btrfs_sb(sb)->tree_root;
1547
1548 trans = btrfs_attach_transaction(root);
1549 if (IS_ERR(trans)) {
1550 /* no transaction, don't bother */
1551 if (PTR_ERR(trans) == -ENOENT)
1552 return 0;
1553 return PTR_ERR(trans);
1554 }
1555 return btrfs_commit_transaction(trans, root);
1556 }
1557
1558 static int btrfs_unfreeze(struct super_block *sb)
1559 {
1560 return 0;
1561 }
1562
1563 static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
1564 {
1565 struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb);
1566 struct btrfs_fs_devices *cur_devices;
1567 struct btrfs_device *dev, *first_dev = NULL;
1568 struct list_head *head;
1569 struct rcu_string *name;
1570
1571 mutex_lock(&fs_info->fs_devices->device_list_mutex);
1572 cur_devices = fs_info->fs_devices;
1573 while (cur_devices) {
1574 head = &cur_devices->devices;
1575 list_for_each_entry(dev, head, dev_list) {
1576 if (dev->missing)
1577 continue;
1578 if (!first_dev || dev->devid < first_dev->devid)
1579 first_dev = dev;
1580 }
1581 cur_devices = cur_devices->seed;
1582 }
1583
1584 if (first_dev) {
1585 rcu_read_lock();
1586 name = rcu_dereference(first_dev->name);
1587 seq_escape(m, name->str, " \t\n\\");
1588 rcu_read_unlock();
1589 } else {
1590 WARN_ON(1);
1591 }
1592 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
1593 return 0;
1594 }
1595
1596 static const struct super_operations btrfs_super_ops = {
1597 .drop_inode = btrfs_drop_inode,
1598 .evict_inode = btrfs_evict_inode,
1599 .put_super = btrfs_put_super,
1600 .sync_fs = btrfs_sync_fs,
1601 .show_options = btrfs_show_options,
1602 .show_devname = btrfs_show_devname,
1603 .write_inode = btrfs_write_inode,
1604 .alloc_inode = btrfs_alloc_inode,
1605 .destroy_inode = btrfs_destroy_inode,
1606 .statfs = btrfs_statfs,
1607 .remount_fs = btrfs_remount,
1608 .freeze_fs = btrfs_freeze,
1609 .unfreeze_fs = btrfs_unfreeze,
1610 };
1611
1612 static const struct file_operations btrfs_ctl_fops = {
1613 .unlocked_ioctl = btrfs_control_ioctl,
1614 .compat_ioctl = btrfs_control_ioctl,
1615 .owner = THIS_MODULE,
1616 .llseek = noop_llseek,
1617 };
1618
1619 static struct miscdevice btrfs_misc = {
1620 .minor = BTRFS_MINOR,
1621 .name = "btrfs-control",
1622 .fops = &btrfs_ctl_fops
1623 };
1624
1625 MODULE_ALIAS_MISCDEV(BTRFS_MINOR);
1626 MODULE_ALIAS("devname:btrfs-control");
1627
1628 static int btrfs_interface_init(void)
1629 {
1630 return misc_register(&btrfs_misc);
1631 }
1632
1633 static void btrfs_interface_exit(void)
1634 {
1635 if (misc_deregister(&btrfs_misc) < 0)
1636 printk(KERN_INFO "btrfs: misc_deregister failed for control device\n");
1637 }
1638
1639 static int __init init_btrfs_fs(void)
1640 {
1641 int err;
1642
1643 err = btrfs_init_sysfs();
1644 if (err)
1645 return err;
1646
1647 btrfs_init_compress();
1648
1649 err = btrfs_init_cachep();
1650 if (err)
1651 goto free_compress;
1652
1653 err = extent_io_init();
1654 if (err)
1655 goto free_cachep;
1656
1657 err = extent_map_init();
1658 if (err)
1659 goto free_extent_io;
1660
1661 err = ordered_data_init();
1662 if (err)
1663 goto free_extent_map;
1664
1665 err = btrfs_delayed_inode_init();
1666 if (err)
1667 goto free_ordered_data;
1668
1669 err = btrfs_interface_init();
1670 if (err)
1671 goto free_delayed_inode;
1672
1673 err = register_filesystem(&btrfs_fs_type);
1674 if (err)
1675 goto unregister_ioctl;
1676
1677 btrfs_init_lockdep();
1678
1679 printk(KERN_INFO "%s loaded\n", BTRFS_BUILD_VERSION);
1680 return 0;
1681
1682 unregister_ioctl:
1683 btrfs_interface_exit();
1684 free_delayed_inode:
1685 btrfs_delayed_inode_exit();
1686 free_ordered_data:
1687 ordered_data_exit();
1688 free_extent_map:
1689 extent_map_exit();
1690 free_extent_io:
1691 extent_io_exit();
1692 free_cachep:
1693 btrfs_destroy_cachep();
1694 free_compress:
1695 btrfs_exit_compress();
1696 btrfs_exit_sysfs();
1697 return err;
1698 }
1699
1700 static void __exit exit_btrfs_fs(void)
1701 {
1702 btrfs_destroy_cachep();
1703 btrfs_delayed_inode_exit();
1704 ordered_data_exit();
1705 extent_map_exit();
1706 extent_io_exit();
1707 btrfs_interface_exit();
1708 unregister_filesystem(&btrfs_fs_type);
1709 btrfs_exit_sysfs();
1710 btrfs_cleanup_fs_uuids();
1711 btrfs_exit_compress();
1712 }
1713
1714 module_init(init_btrfs_fs)
1715 module_exit(exit_btrfs_fs)
1716
1717 MODULE_LICENSE("GPL");
This page took 0.103239 seconds and 6 git commands to generate.