fs/btrfs/super.c

   1 /*
   2  * Copyright (C) 2007 Oracle.  All rights reserved.
   3  *
   4  * This program is free software; you can redistribute it and/or
   5  * modify it under the terms of the GNU General Public
   6  * License v2 as published by the Free Software Foundation.
   7  *
   8  * This program is distributed in the hope that it will be useful,
   9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  11  * General Public License for more details.
  12  *
  13  * You should have received a copy of the GNU General Public
  14  * License along with this program; if not, write to the
  15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  16  * Boston, MA 021110-1307, USA.
  17  */
  18
  19 #include <linux/blkdev.h>
  20 #include <linux/module.h>
  21 #include <linux/buffer_head.h>
  22 #include <linux/fs.h>
  23 #include <linux/pagemap.h>
  24 #include <linux/highmem.h>
  25 #include <linux/time.h>
  26 #include <linux/init.h>
  27 #include <linux/seq_file.h>
  28 #include <linux/string.h>
  29 #include <linux/backing-dev.h>
  30 #include <linux/mount.h>
  31 #include <linux/mpage.h>
  32 #include <linux/swap.h>
  33 #include <linux/writeback.h>
  34 #include <linux/statfs.h>
  35 #include <linux/compat.h>
  36 #include <linux/parser.h>
  37 #include <linux/ctype.h>
  38 #include <linux/namei.h>
  39 #include <linux/miscdevice.h>
  40 #include <linux/magic.h>
  41 #include <linux/slab.h>
  42 #include <linux/cleancache.h>
  43 #include <linux/ratelimit.h>
  44 #include <linux/btrfs.h>
  45 #include "delayed-inode.h"
  46 #include "ctree.h"
  47 #include "disk-io.h"
  48 #include "transaction.h"
  49 #include "btrfs_inode.h"
  50 #include "print-tree.h"
  51 #include "hash.h"
  52 #include "props.h"
  53 #include "xattr.h"
  54 #include "volumes.h"
  55 #include "export.h"
  56 #include "compression.h"
  57 #include "rcu-string.h"
  58 #include "dev-replace.h"
  59 #include "free-space-cache.h"
  60 #include "backref.h"
  61 #include "tests/btrfs-tests.h"
  62
  63 #include "qgroup.h"
  64 #define CREATE_TRACE_POINTS
  65 #include <trace/events/btrfs.h>
  66
  67 static const struct super_operations btrfs_super_ops;
  68 static struct file_system_type btrfs_fs_type;
  69
  70 static int btrfs_remount(struct super_block *sb, int *flags, char *data);
  71
  72 static const char *btrfs_decode_error(int errno)
  73 {
  74         char *errstr = "unknown";
  75
  76         switch (errno) {
  77         case -EIO:
  78                 errstr = "IO failure";
  79                 break;
  80         case -ENOMEM:
  81                 errstr = "Out of memory";
  82                 break;
  83         case -EROFS:
  84                 errstr = "Readonly filesystem";
  85                 break;
  86         case -EEXIST:
  87                 errstr = "Object already exists";
  88                 break;
  89         case -ENOSPC:
  90                 errstr = "No space left";
  91                 break;
  92         case -ENOENT:
  93                 errstr = "No such entry";
  94                 break;
  95         }
  96
  97         return errstr;
  98 }
  99
 100 static void save_error_info(struct btrfs_fs_info *fs_info)
 101 {
 102         /*
 103          * today we only save the error info into ram.  Long term we'll
 104          * also send it down to the disk
 105          */
 106         set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state);
 107 }
 108
 109 /* btrfs handle error by forcing the filesystem readonly */
 110 static void btrfs_handle_error(struct btrfs_fs_info *fs_info)
 111 {
 112         struct super_block *sb = fs_info->sb;
 113
 114         if (sb->s_flags & MS_RDONLY)
 115                 return;
 116
 117         if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
 118                 sb->s_flags |= MS_RDONLY;
 119                 btrfs_info(fs_info, "forced readonly");
 120                 /*
 121                  * Note that a running device replace operation is not
 122                  * canceled here although there is no way to update
 123                  * the progress. It would add the risk of a deadlock,
 124                  * therefore the canceling is ommited. The only penalty
 125                  * is that some I/O remains active until the procedure
 126                  * completes. The next time when the filesystem is
 127                  * mounted writeable again, the device replace
 128                  * operation continues.
 129                  */
 130         }
 131 }
 132
 133 #ifdef CONFIG_PRINTK
 134 /*
 135  * __btrfs_std_error decodes expected errors from the caller and
 136  * invokes the approciate error response.
 137  */
 138 void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
 139                        unsigned int line, int errno, const char *fmt, ...)
 140 {
 141         struct super_block *sb = fs_info->sb;
 142         const char *errstr;
 143
 144         /*
 145          * Special case: if the error is EROFS, and we're already
 146          * under MS_RDONLY, then it is safe here.
 147          */
 148         if (errno == -EROFS && (sb->s_flags & MS_RDONLY))
 149                 return;
 150
 151         errstr = btrfs_decode_error(errno);
 152         if (fmt) {
 153                 struct va_format vaf;
 154                 va_list args;
 155
 156                 va_start(args, fmt);
 157                 vaf.fmt = fmt;
 158                 vaf.va = &args;
 159
 160                 printk(KERN_CRIT
 161                         "BTRFS: error (device %s) in %s:%d: errno=%d %s (%pV)\n",
 162                         sb->s_id, function, line, errno, errstr, &vaf);
 163                 va_end(args);
 164         } else {
 165                 printk(KERN_CRIT "BTRFS: error (device %s) in %s:%d: errno=%d %s\n",
 166                         sb->s_id, function, line, errno, errstr);
 167         }
 168
 169         /* Don't go through full error handling during mount */
 170         save_error_info(fs_info);
 171         if (sb->s_flags & MS_BORN)
 172                 btrfs_handle_error(fs_info);
 173 }
 174
 175 static const char * const logtypes[] = {
 176         "emergency",
 177         "alert",
 178         "critical",
 179         "error",
 180         "warning",
 181         "notice",
 182         "info",
 183         "debug",
 184 };
 185
 186 void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
 187 {
 188         struct super_block *sb = fs_info->sb;
 189         char lvl[4];
 190         struct va_format vaf;
 191         va_list args;
 192         const char *type = logtypes[4];
 193         int kern_level;
 194
 195         va_start(args, fmt);
 196
 197         kern_level = printk_get_level(fmt);
 198         if (kern_level) {
 199                 size_t size = printk_skip_level(fmt) - fmt;
 200                 memcpy(lvl, fmt,  size);
 201                 lvl[size] = '\0';
 202                 fmt += size;
 203                 type = logtypes[kern_level - '0'];
 204         } else
 205                 *lvl = '\0';
 206
 207         vaf.fmt = fmt;
 208         vaf.va = &args;
 209
 210         printk("%sBTRFS %s (device %s): %pV\n", lvl, type, sb->s_id, &vaf);
 211
 212         va_end(args);
 213 }
 214
 215 #else
 216
 217 void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
 218                        unsigned int line, int errno, const char *fmt, ...)
 219 {
 220         struct super_block *sb = fs_info->sb;
 221
 222         /*
 223          * Special case: if the error is EROFS, and we're already
 224          * under MS_RDONLY, then it is safe here.
 225          */
 226         if (errno == -EROFS && (sb->s_flags & MS_RDONLY))
 227                 return;
 228
 229         /* Don't go through full error handling during mount */
 230         if (sb->s_flags & MS_BORN) {
 231                 save_error_info(fs_info);
 232                 btrfs_handle_error(fs_info);
 233         }
 234 }
 235 #endif
 236
 237 /*
 238  * We only mark the transaction aborted and then set the file system read-only.
 239  * This will prevent new transactions from starting or trying to join this
 240  * one.
 241  *
 242  * This means that error recovery at the call site is limited to freeing
 243  * any local memory allocations and passing the error code up without
 244  * further cleanup. The transaction should complete as it normally would
 245  * in the call path but will return -EIO.
 246  *
 247  * We'll complete the cleanup in btrfs_end_transaction and
 248  * btrfs_commit_transaction.
 249  */
 250 void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
 251                                struct btrfs_root *root, const char *function,
 252                                unsigned int line, int errno)
 253 {
 254         trans->aborted = errno;
 255         /* Nothing used. The other threads that have joined this
 256          * transaction may be able to continue. */
 257         if (!trans->blocks_used && list_empty(&trans->new_bgs)) {
 258                 const char *errstr;
 259
 260                 errstr = btrfs_decode_error(errno);
 261                 btrfs_warn(root->fs_info,
 262                            "%s:%d: Aborting unused transaction(%s).",
 263                            function, line, errstr);
 264                 return;
 265         }
 266         ACCESS_ONCE(trans->transaction->aborted) = errno;
 267         /* Wake up anybody who may be waiting on this transaction */
 268         wake_up(&root->fs_info->transaction_wait);
 269         wake_up(&root->fs_info->transaction_blocked_wait);
 270         __btrfs_std_error(root->fs_info, function, line, errno, NULL);
 271 }
 272 /*
 273  * __btrfs_panic decodes unexpected, fatal errors from the caller,
 274  * issues an alert, and either panics or BUGs, depending on mount options.
 275  */
 276 void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function,
 277                    unsigned int line, int errno, const char *fmt, ...)
 278 {
 279         char *s_id = "<unknown>";
 280         const char *errstr;
 281         struct va_format vaf = { .fmt = fmt };
 282         va_list args;
 283
 284         if (fs_info)
 285                 s_id = fs_info->sb->s_id;
 286
 287         va_start(args, fmt);
 288         vaf.va = &args;
 289
 290         errstr = btrfs_decode_error(errno);
 291         if (fs_info && (fs_info->mount_opt & BTRFS_MOUNT_PANIC_ON_FATAL_ERROR))
 292                 panic(KERN_CRIT "BTRFS panic (device %s) in %s:%d: %pV (errno=%d %s)\n",
 293                         s_id, function, line, &vaf, errno, errstr);
 294
 295         btrfs_crit(fs_info, "panic in %s:%d: %pV (errno=%d %s)",
 296                    function, line, &vaf, errno, errstr);
 297         va_end(args);
 298         /* Caller calls BUG() */
 299 }
 300
 301 static void btrfs_put_super(struct super_block *sb)
 302 {
 303         close_ctree(btrfs_sb(sb)->tree_root);
 304 }
 305
 306 enum {
 307         Opt_degraded, Opt_subvol, Opt_subvolid, Opt_device, Opt_nodatasum,
 308         Opt_nodatacow, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd,
 309         Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress,
 310         Opt_compress_type, Opt_compress_force, Opt_compress_force_type,
 311         Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
 312         Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed,
 313         Opt_enospc_debug, Opt_subvolrootid, Opt_defrag, Opt_inode_cache,
 314         Opt_no_space_cache, Opt_recovery, Opt_skip_balance,
 315         Opt_check_integrity, Opt_check_integrity_including_extent_data,
 316         Opt_check_integrity_print_mask, Opt_fatal_errors, Opt_rescan_uuid_tree,
 317         Opt_commit_interval, Opt_barrier, Opt_nodefrag, Opt_nodiscard,
 318         Opt_noenospc_debug, Opt_noflushoncommit, Opt_acl, Opt_datacow,
 319         Opt_datasum, Opt_treelog, Opt_noinode_cache,
 320         Opt_err,
 321 };
 322
 323 static match_table_t tokens = {
 324         {Opt_degraded, "degraded"},
 325         {Opt_subvol, "subvol=%s"},
 326         {Opt_subvolid, "subvolid=%s"},
 327         {Opt_device, "device=%s"},
 328         {Opt_nodatasum, "nodatasum"},
 329         {Opt_datasum, "datasum"},
 330         {Opt_nodatacow, "nodatacow"},
 331         {Opt_datacow, "datacow"},
 332         {Opt_nobarrier, "nobarrier"},
 333         {Opt_barrier, "barrier"},
 334         {Opt_max_inline, "max_inline=%s"},
 335         {Opt_alloc_start, "alloc_start=%s"},
 336         {Opt_thread_pool, "thread_pool=%d"},
 337         {Opt_compress, "compress"},
 338         {Opt_compress_type, "compress=%s"},
 339         {Opt_compress_force, "compress-force"},
 340         {Opt_compress_force_type, "compress-force=%s"},
 341         {Opt_ssd, "ssd"},
 342         {Opt_ssd_spread, "ssd_spread"},
 343         {Opt_nossd, "nossd"},
 344         {Opt_acl, "acl"},
 345         {Opt_noacl, "noacl"},
 346         {Opt_notreelog, "notreelog"},
 347         {Opt_treelog, "treelog"},
 348         {Opt_flushoncommit, "flushoncommit"},
 349         {Opt_noflushoncommit, "noflushoncommit"},
 350         {Opt_ratio, "metadata_ratio=%d"},
 351         {Opt_discard, "discard"},
 352         {Opt_nodiscard, "nodiscard"},
 353         {Opt_space_cache, "space_cache"},
 354         {Opt_clear_cache, "clear_cache"},
 355         {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
 356         {Opt_enospc_debug, "enospc_debug"},
 357         {Opt_noenospc_debug, "noenospc_debug"},
 358         {Opt_subvolrootid, "subvolrootid=%d"},
 359         {Opt_defrag, "autodefrag"},
 360         {Opt_nodefrag, "noautodefrag"},
 361         {Opt_inode_cache, "inode_cache"},
 362         {Opt_noinode_cache, "noinode_cache"},
 363         {Opt_no_space_cache, "nospace_cache"},
 364         {Opt_recovery, "recovery"},
 365         {Opt_skip_balance, "skip_balance"},
 366         {Opt_check_integrity, "check_int"},
 367         {Opt_check_integrity_including_extent_data, "check_int_data"},
 368         {Opt_check_integrity_print_mask, "check_int_print_mask=%d"},
 369         {Opt_rescan_uuid_tree, "rescan_uuid_tree"},
 370         {Opt_fatal_errors, "fatal_errors=%s"},
 371         {Opt_commit_interval, "commit=%d"},
 372         {Opt_err, NULL},
 373 };
 374
 375 /*
 376  * Regular mount options parser.  Everything that is needed only when
 377  * reading in a new superblock is parsed here.
 378  * XXX JDM: This needs to be cleaned up for remount.
 379  */
 380 int btrfs_parse_options(struct btrfs_root *root, char *options)
 381 {
 382         struct btrfs_fs_info *info = root->fs_info;
 383         substring_t args[MAX_OPT_ARGS];
 384         char *p, *num, *orig = NULL;
 385         u64 cache_gen;
 386         int intarg;
 387         int ret = 0;
 388         char *compress_type;
 389         bool compress_force = false;
 390
 391         cache_gen = btrfs_super_cache_generation(root->fs_info->super_copy);
 392         if (cache_gen)
 393                 btrfs_set_opt(info->mount_opt, SPACE_CACHE);
 394
 395         if (!options)
 396                 goto out;
 397
 398         /*
 399          * strsep changes the string, duplicate it because parse_options
 400          * gets called twice
 401          */
 402         options = kstrdup(options, GFP_NOFS);
 403         if (!options)
 404                 return -ENOMEM;
 405
 406         orig = options;
 407
 408         while ((p = strsep(&options, ",")) != NULL) {
 409                 int token;
 410                 if (!*p)
 411                         continue;
 412
 413                 token = match_token(p, tokens, args);
 414                 switch (token) {
 415                 case Opt_degraded:
 416                         btrfs_info(root->fs_info, "allowing degraded mounts");
 417                         btrfs_set_opt(info->mount_opt, DEGRADED);
 418                         break;
 419                 case Opt_subvol:
 420                 case Opt_subvolid:
 421                 case Opt_subvolrootid:
 422                 case Opt_device:
 423                         /*
 424                          * These are parsed by btrfs_parse_early_options
 425                          * and can be happily ignored here.
 426                          */
 427                         break;
 428                 case Opt_nodatasum:
 429                         btrfs_set_and_info(root, NODATASUM,
 430                                            "setting nodatasum");
 431                         break;
 432                 case Opt_datasum:
 433                         if (btrfs_test_opt(root, NODATASUM)) {
 434                                 if (btrfs_test_opt(root, NODATACOW))
 435                                         btrfs_info(root->fs_info, "setting datasum, datacow enabled");
 436                                 else
 437                                         btrfs_info(root->fs_info, "setting datasum");
 438                         }
 439                         btrfs_clear_opt(info->mount_opt, NODATACOW);
 440                         btrfs_clear_opt(info->mount_opt, NODATASUM);
 441                         break;
 442                 case Opt_nodatacow:
 443                         if (!btrfs_test_opt(root, NODATACOW)) {
 444                                 if (!btrfs_test_opt(root, COMPRESS) ||
 445                                     !btrfs_test_opt(root, FORCE_COMPRESS)) {
 446                                         btrfs_info(root->fs_info,
 447                                                    "setting nodatacow, compression disabled");
 448                                 } else {
 449                                         btrfs_info(root->fs_info, "setting nodatacow");
 450                                 }
 451                         }
 452                         btrfs_clear_opt(info->mount_opt, COMPRESS);
 453                         btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS);
 454                         btrfs_set_opt(info->mount_opt, NODATACOW);
 455                         btrfs_set_opt(info->mount_opt, NODATASUM);
 456                         break;
 457                 case Opt_datacow:
 458                         btrfs_clear_and_info(root, NODATACOW,
 459                                              "setting datacow");
 460                         break;
 461                 case Opt_compress_force:
 462                 case Opt_compress_force_type:
 463                         compress_force = true;
 464                         /* Fallthrough */
 465                 case Opt_compress:
 466                 case Opt_compress_type:
 467                         if (token == Opt_compress ||
 468                             token == Opt_compress_force ||
 469                             strcmp(args[0].from, "zlib") == 0) {
 470                                 compress_type = "zlib";
 471                                 info->compress_type = BTRFS_COMPRESS_ZLIB;
 472                                 btrfs_set_opt(info->mount_opt, COMPRESS);
 473                                 btrfs_clear_opt(info->mount_opt, NODATACOW);
 474                                 btrfs_clear_opt(info->mount_opt, NODATASUM);
 475                         } else if (strcmp(args[0].from, "lzo") == 0) {
 476                                 compress_type = "lzo";
 477                                 info->compress_type = BTRFS_COMPRESS_LZO;
 478                                 btrfs_set_opt(info->mount_opt, COMPRESS);
 479                                 btrfs_clear_opt(info->mount_opt, NODATACOW);
 480                                 btrfs_clear_opt(info->mount_opt, NODATASUM);
 481                                 btrfs_set_fs_incompat(info, COMPRESS_LZO);
 482                         } else if (strncmp(args[0].from, "no", 2) == 0) {
 483                                 compress_type = "no";
 484                                 btrfs_clear_opt(info->mount_opt, COMPRESS);
 485                                 btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS);
 486                                 compress_force = false;
 487                         } else {
 488                                 ret = -EINVAL;
 489                                 goto out;
 490                         }
 491
 492                         if (compress_force) {
 493                                 btrfs_set_and_info(root, FORCE_COMPRESS,
 494                                                    "force %s compression",
 495                                                    compress_type);
 496                         } else {
 497                                 if (!btrfs_test_opt(root, COMPRESS))
 498                                         btrfs_info(root->fs_info,
 499                                                    "btrfs: use %s compression",
 500                                                    compress_type);
 501                                 /*
 502                                  * If we remount from compress-force=xxx to
 503                                  * compress=xxx, we need clear FORCE_COMPRESS
 504                                  * flag, otherwise, there is no way for users
 505                                  * to disable forcible compression separately.
 506                                  */
 507                                 btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS);
 508                         }
 509                         break;
 510                 case Opt_ssd:
 511                         btrfs_set_and_info(root, SSD,
 512                                            "use ssd allocation scheme");
 513                         break;
 514                 case Opt_ssd_spread:
 515                         btrfs_set_and_info(root, SSD_SPREAD,
 516                                            "use spread ssd allocation scheme");
 517                         btrfs_set_opt(info->mount_opt, SSD);
 518                         break;
 519                 case Opt_nossd:
 520                         btrfs_set_and_info(root, NOSSD,
 521                                              "not using ssd allocation scheme");
 522                         btrfs_clear_opt(info->mount_opt, SSD);
 523                         break;
 524                 case Opt_barrier:
 525                         btrfs_clear_and_info(root, NOBARRIER,
 526                                              "turning on barriers");
 527                         break;
 528                 case Opt_nobarrier:
 529                         btrfs_set_and_info(root, NOBARRIER,
 530                                            "turning off barriers");
 531                         break;
 532                 case Opt_thread_pool:
 533                         ret = match_int(&args[0], &intarg);
 534                         if (ret) {
 535                                 goto out;
 536                         } else if (intarg > 0) {
 537                                 info->thread_pool_size = intarg;
 538                         } else {
 539                                 ret = -EINVAL;
 540                                 goto out;
 541                         }
 542                         break;
 543                 case Opt_max_inline:
 544                         num = match_strdup(&args[0]);
 545                         if (num) {
 546                                 info->max_inline = memparse(num, NULL);
 547                                 kfree(num);
 548
 549                                 if (info->max_inline) {
 550                                         info->max_inline = min_t(u64,
 551                                                 info->max_inline,
 552                                                 root->sectorsize);
 553                                 }
 554                                 btrfs_info(root->fs_info, "max_inline at %llu",
 555                                         info->max_inline);
 556                         } else {
 557                                 ret = -ENOMEM;
 558                                 goto out;
 559                         }
 560                         break;
 561                 case Opt_alloc_start:
 562                         num = match_strdup(&args[0]);
 563                         if (num) {
 564                                 mutex_lock(&info->chunk_mutex);
 565                                 info->alloc_start = memparse(num, NULL);
 566                                 mutex_unlock(&info->chunk_mutex);
 567                                 kfree(num);
 568                                 btrfs_info(root->fs_info, "allocations start at %llu",
 569                                         info->alloc_start);
 570                         } else {
 571                                 ret = -ENOMEM;
 572                                 goto out;
 573                         }
 574                         break;
 575                 case Opt_acl:
 576 #ifdef CONFIG_BTRFS_FS_POSIX_ACL
 577                         root->fs_info->sb->s_flags |= MS_POSIXACL;
 578                         break;
 579 #else
 580                         btrfs_err(root->fs_info,
 581                                 "support for ACL not compiled in!");
 582                         ret = -EINVAL;
 583                         goto out;
 584 #endif
 585                 case Opt_noacl:
 586                         root->fs_info->sb->s_flags &= ~MS_POSIXACL;
 587                         break;
 588                 case Opt_notreelog:
 589                         btrfs_set_and_info(root, NOTREELOG,
 590                                            "disabling tree log");
 591                         break;
 592                 case Opt_treelog:
 593                         btrfs_clear_and_info(root, NOTREELOG,
 594                                              "enabling tree log");
 595                         break;
 596                 case Opt_flushoncommit:
 597                         btrfs_set_and_info(root, FLUSHONCOMMIT,
 598                                            "turning on flush-on-commit");
 599                         break;
 600                 case Opt_noflushoncommit:
 601                         btrfs_clear_and_info(root, FLUSHONCOMMIT,
 602                                              "turning off flush-on-commit");
 603                         break;
 604                 case Opt_ratio:
 605                         ret = match_int(&args[0], &intarg);
 606                         if (ret) {
 607                                 goto out;
 608                         } else if (intarg >= 0) {
 609                                 info->metadata_ratio = intarg;
 610                                 btrfs_info(root->fs_info, "metadata ratio %d",
 611                                        info->metadata_ratio);
 612                         } else {
 613                                 ret = -EINVAL;
 614                                 goto out;
 615                         }
 616                         break;
 617                 case Opt_discard:
 618                         btrfs_set_and_info(root, DISCARD,
 619                                            "turning on discard");
 620                         break;
 621                 case Opt_nodiscard:
 622                         btrfs_clear_and_info(root, DISCARD,
 623                                              "turning off discard");
 624                         break;
 625                 case Opt_space_cache:
 626                         btrfs_set_and_info(root, SPACE_CACHE,
 627                                            "enabling disk space caching");
 628                         break;
 629                 case Opt_rescan_uuid_tree:
 630                         btrfs_set_opt(info->mount_opt, RESCAN_UUID_TREE);
 631                         break;
 632                 case Opt_no_space_cache:
 633                         btrfs_clear_and_info(root, SPACE_CACHE,
 634                                              "disabling disk space caching");
 635                         break;
 636                 case Opt_inode_cache:
 637                         btrfs_set_pending_and_info(info, INODE_MAP_CACHE,
 638                                            "enabling inode map caching");
 639                         break;
 640                 case Opt_noinode_cache:
 641                         btrfs_clear_pending_and_info(info, INODE_MAP_CACHE,
 642                                              "disabling inode map caching");
 643                         break;
 644                 case Opt_clear_cache:
 645                         btrfs_set_and_info(root, CLEAR_CACHE,
 646                                            "force clearing of disk cache");
 647                         break;
 648                 case Opt_user_subvol_rm_allowed:
 649                         btrfs_set_opt(info->mount_opt, USER_SUBVOL_RM_ALLOWED);
 650                         break;
 651                 case Opt_enospc_debug:
 652                         btrfs_set_opt(info->mount_opt, ENOSPC_DEBUG);
 653                         break;
 654                 case Opt_noenospc_debug:
 655                         btrfs_clear_opt(info->mount_opt, ENOSPC_DEBUG);
 656                         break;
 657                 case Opt_defrag:
 658                         btrfs_set_and_info(root, AUTO_DEFRAG,
 659                                            "enabling auto defrag");
 660                         break;
 661                 case Opt_nodefrag:
 662                         btrfs_clear_and_info(root, AUTO_DEFRAG,
 663                                              "disabling auto defrag");
 664                         break;
 665                 case Opt_recovery:
 666                         btrfs_info(root->fs_info, "enabling auto recovery");
 667                         btrfs_set_opt(info->mount_opt, RECOVERY);
 668                         break;
 669                 case Opt_skip_balance:
 670                         btrfs_set_opt(info->mount_opt, SKIP_BALANCE);
 671                         break;
 672 #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
 673                 case Opt_check_integrity_including_extent_data:
 674                         btrfs_info(root->fs_info,
 675                                    "enabling check integrity including extent data");
 676                         btrfs_set_opt(info->mount_opt,
 677                                       CHECK_INTEGRITY_INCLUDING_EXTENT_DATA);
 678                         btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY);
 679                         break;
 680                 case Opt_check_integrity:
 681                         btrfs_info(root->fs_info, "enabling check integrity");
 682                         btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY);
 683                         break;
 684                 case Opt_check_integrity_print_mask:
 685                         ret = match_int(&args[0], &intarg);
 686                         if (ret) {
 687                                 goto out;
 688                         } else if (intarg >= 0) {
 689                                 info->check_integrity_print_mask = intarg;
 690                                 btrfs_info(root->fs_info, "check_integrity_print_mask 0x%x",
 691                                        info->check_integrity_print_mask);
 692                         } else {
 693                                 ret = -EINVAL;
 694                                 goto out;
 695                         }
 696                         break;
 697 #else
 698                 case Opt_check_integrity_including_extent_data:
 699                 case Opt_check_integrity:
 700                 case Opt_check_integrity_print_mask:
 701                         btrfs_err(root->fs_info,
 702                                 "support for check_integrity* not compiled in!");
 703                         ret = -EINVAL;
 704                         goto out;
 705 #endif
 706                 case Opt_fatal_errors:
 707                         if (strcmp(args[0].from, "panic") == 0)
 708                                 btrfs_set_opt(info->mount_opt,
 709                                               PANIC_ON_FATAL_ERROR);
 710                         else if (strcmp(args[0].from, "bug") == 0)
 711                                 btrfs_clear_opt(info->mount_opt,
 712                                               PANIC_ON_FATAL_ERROR);
 713                         else {
 714                                 ret = -EINVAL;
 715                                 goto out;
 716                         }
 717                         break;
 718                 case Opt_commit_interval:
 719                         intarg = 0;
 720                         ret = match_int(&args[0], &intarg);
 721                         if (ret < 0) {
 722                                 btrfs_err(root->fs_info, "invalid commit interval");
 723                                 ret = -EINVAL;
 724                                 goto out;
 725                         }
 726                         if (intarg > 0) {
 727                                 if (intarg > 300) {
 728                                         btrfs_warn(root->fs_info, "excessive commit interval %d",
 729                                                         intarg);
 730                                 }
 731                                 info->commit_interval = intarg;
 732                         } else {
 733                                 btrfs_info(root->fs_info, "using default commit interval %ds",
 734                                     BTRFS_DEFAULT_COMMIT_INTERVAL);
 735                                 info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL;
 736                         }
 737                         break;
 738                 case Opt_err:
 739                         btrfs_info(root->fs_info, "unrecognized mount option '%s'", p);
 740                         ret = -EINVAL;
 741                         goto out;
 742                 default:
 743                         break;
 744                 }
 745         }
 746 out:
 747         if (!ret && btrfs_test_opt(root, SPACE_CACHE))
 748                 btrfs_info(root->fs_info, "disk space caching is enabled");
 749         kfree(orig);
 750         return ret;
 751 }
 752
 753 /*
 754  * Parse mount options that are required early in the mount process.
 755  *
 756  * All other options will be parsed on much later in the mount process and
 757  * only when we need to allocate a new super block.
 758  */
 759 static int btrfs_parse_early_options(const char *options, fmode_t flags,
 760                 void *holder, char **subvol_name, u64 *subvol_objectid,
 761                 struct btrfs_fs_devices **fs_devices)
 762 {
 763         substring_t args[MAX_OPT_ARGS];
 764         char *device_name, *opts, *orig, *p;
 765         char *num = NULL;
 766         int error = 0;
 767
 768         if (!options)
 769                 return 0;
 770
 771         /*
 772          * strsep changes the string, duplicate it because parse_options
 773          * gets called twice
 774          */
 775         opts = kstrdup(options, GFP_KERNEL);
 776         if (!opts)
 777                 return -ENOMEM;
 778         orig = opts;
 779
 780         while ((p = strsep(&opts, ",")) != NULL) {
 781                 int token;
 782                 if (!*p)
 783                         continue;
 784
 785                 token = match_token(p, tokens, args);
 786                 switch (token) {
 787                 case Opt_subvol:
 788                         kfree(*subvol_name);
 789                         *subvol_name = match_strdup(&args[0]);
 790                         if (!*subvol_name) {
 791                                 error = -ENOMEM;
 792                                 goto out;
 793                         }
 794                         break;
 795                 case Opt_subvolid:
 796                         num = match_strdup(&args[0]);
 797                         if (num) {
 798                                 *subvol_objectid = memparse(num, NULL);
 799                                 kfree(num);
 800                                 /* we want the original fs_tree */
 801                                 if (!*subvol_objectid)
 802                                         *subvol_objectid =
 803                                                 BTRFS_FS_TREE_OBJECTID;
 804                         } else {
 805                                 error = -EINVAL;
 806                                 goto out;
 807                         }
 808                         break;
 809                 case Opt_subvolrootid:
 810                         printk(KERN_WARNING
 811                                 "BTRFS: 'subvolrootid' mount option is deprecated and has "
 812                                 "no effect\n");
 813                         break;
 814                 case Opt_device:
 815                         device_name = match_strdup(&args[0]);
 816                         if (!device_name) {
 817                                 error = -ENOMEM;
 818                                 goto out;
 819                         }
 820                         error = btrfs_scan_one_device(device_name,
 821                                         flags, holder, fs_devices);
 822                         kfree(device_name);
 823                         if (error)
 824                                 goto out;
 825                         break;
 826                 default:
 827                         break;
 828                 }
 829         }
 830
 831 out:
 832         kfree(orig);
 833         return error;
 834 }
 835
 836 static struct dentry *get_default_root(struct super_block *sb,
 837                                        u64 subvol_objectid)
 838 {
 839         struct btrfs_fs_info *fs_info = btrfs_sb(sb);
 840         struct btrfs_root *root = fs_info->tree_root;
 841         struct btrfs_root *new_root;
 842         struct btrfs_dir_item *di;
 843         struct btrfs_path *path;
 844         struct btrfs_key location;
 845         struct inode *inode;
 846         u64 dir_id;
 847         int new = 0;
 848
 849         /*
 850          * We have a specific subvol we want to mount, just setup location and
 851          * go look up the root.
 852          */
 853         if (subvol_objectid) {
 854                 location.objectid = subvol_objectid;
 855                 location.type = BTRFS_ROOT_ITEM_KEY;
 856                 location.offset = (u64)-1;
 857                 goto find_root;
 858         }
 859
 860         path = btrfs_alloc_path();
 861         if (!path)
 862                 return ERR_PTR(-ENOMEM);
 863         path->leave_spinning = 1;
 864
 865         /*
 866          * Find the "default" dir item which points to the root item that we
 867          * will mount by default if we haven't been given a specific subvolume
 868          * to mount.
 869          */
 870         dir_id = btrfs_super_root_dir(fs_info->super_copy);
 871         di = btrfs_lookup_dir_item(NULL, root, path, dir_id, "default", 7, 0);
 872         if (IS_ERR(di)) {
 873                 btrfs_free_path(path);
 874                 return ERR_CAST(di);
 875         }
 876         if (!di) {
 877                 /*
 878                  * Ok the default dir item isn't there.  This is weird since
 879                  * it's always been there, but don't freak out, just try and
 880                  * mount to root most subvolume.
 881                  */
 882                 btrfs_free_path(path);
 883                 dir_id = BTRFS_FIRST_FREE_OBJECTID;
 884                 new_root = fs_info->fs_root;
 885                 goto setup_root;
 886         }
 887
 888         btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location);
 889         btrfs_free_path(path);
 890
 891 find_root:
 892         new_root = btrfs_read_fs_root_no_name(fs_info, &location);
 893         if (IS_ERR(new_root))
 894                 return ERR_CAST(new_root);
 895
 896         if (!(sb->s_flags & MS_RDONLY)) {
 897                 int ret;
 898                 down_read(&fs_info->cleanup_work_sem);
 899                 ret = btrfs_orphan_cleanup(new_root);
 900                 up_read(&fs_info->cleanup_work_sem);
 901                 if (ret)
 902                         return ERR_PTR(ret);
 903         }
 904
 905         dir_id = btrfs_root_dirid(&new_root->root_item);
 906 setup_root:
 907         location.objectid = dir_id;
 908         location.type = BTRFS_INODE_ITEM_KEY;
 909         location.offset = 0;
 910
 911         inode = btrfs_iget(sb, &location, new_root, &new);
 912         if (IS_ERR(inode))
 913                 return ERR_CAST(inode);
 914
 915         /*
 916          * If we're just mounting the root most subvol put the inode and return
 917          * a reference to the dentry.  We will have already gotten a reference
 918          * to the inode in btrfs_fill_super so we're good to go.
 919          */
 920         if (!new && d_inode(sb->s_root) == inode) {
 921                 iput(inode);
 922                 return dget(sb->s_root);
 923         }
 924
 925         return d_obtain_root(inode);
 926 }
 927
 928 static int btrfs_fill_super(struct super_block *sb,
 929                             struct btrfs_fs_devices *fs_devices,
 930                             void *data, int silent)
 931 {
 932         struct inode *inode;
 933         struct btrfs_fs_info *fs_info = btrfs_sb(sb);
 934         struct btrfs_key key;
 935         int err;
 936
 937         sb->s_maxbytes = MAX_LFS_FILESIZE;
 938         sb->s_magic = BTRFS_SUPER_MAGIC;
 939         sb->s_op = &btrfs_super_ops;
 940         sb->s_d_op = &btrfs_dentry_operations;
 941         sb->s_export_op = &btrfs_export_ops;
 942         sb->s_xattr = btrfs_xattr_handlers;
 943         sb->s_time_gran = 1;
 944 #ifdef CONFIG_BTRFS_FS_POSIX_ACL
 945         sb->s_flags |= MS_POSIXACL;
 946 #endif
 947         sb->s_flags |= MS_I_VERSION;
 948         err = open_ctree(sb, fs_devices, (char *)data);
 949         if (err) {
 950                 printk(KERN_ERR "BTRFS: open_ctree failed\n");
 951                 return err;
 952         }
 953
 954         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
 955         key.type = BTRFS_INODE_ITEM_KEY;
 956         key.offset = 0;
 957         inode = btrfs_iget(sb, &key, fs_info->fs_root, NULL);
 958         if (IS_ERR(inode)) {
 959                 err = PTR_ERR(inode);
 960                 goto fail_close;
 961         }
 962
 963         sb->s_root = d_make_root(inode);
 964         if (!sb->s_root) {
 965                 err = -ENOMEM;
 966                 goto fail_close;
 967         }
 968
 969         save_mount_options(sb, data);
 970         cleancache_init_fs(sb);
 971         sb->s_flags |= MS_ACTIVE;
 972         return 0;
 973
 974 fail_close:
 975         close_ctree(fs_info->tree_root);
 976         return err;
 977 }
 978
 979 int btrfs_sync_fs(struct super_block *sb, int wait)
 980 {
 981         struct btrfs_trans_handle *trans;
 982         struct btrfs_fs_info *fs_info = btrfs_sb(sb);
 983         struct btrfs_root *root = fs_info->tree_root;
 984
 985         trace_btrfs_sync_fs(wait);
 986
 987         if (!wait) {
 988                 filemap_flush(fs_info->btree_inode->i_mapping);
 989                 return 0;
 990         }
 991
 992         btrfs_wait_ordered_roots(fs_info, -1);
 993
 994         trans = btrfs_attach_transaction_barrier(root);
 995         if (IS_ERR(trans)) {
 996                 /* no transaction, don't bother */
 997                 if (PTR_ERR(trans) == -ENOENT) {
 998                         /*
 999                          * Exit unless we have some pending changes
1000                          * that need to go through commit
1001                          */
1002                         if (fs_info->pending_changes == 0)
1003                                 return 0;
1004                         /*
1005                          * A non-blocking test if the fs is frozen. We must not
1006                          * start a new transaction here otherwise a deadlock
1007                          * happens. The pending operations are delayed to the
1008                          * next commit after thawing.
1009                          */
1010                         if (__sb_start_write(sb, SB_FREEZE_WRITE, false))
1011                                 __sb_end_write(sb, SB_FREEZE_WRITE);
1012                         else
1013                                 return 0;
1014                         trans = btrfs_start_transaction(root, 0);
1015                 }
1016                 if (IS_ERR(trans))
1017                         return PTR_ERR(trans);
1018         }
1019         return btrfs_commit_transaction(trans, root);
1020 }
1021
1022 static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
1023 {
1024         struct btrfs_fs_info *info = btrfs_sb(dentry->d_sb);
1025         struct btrfs_root *root = info->tree_root;
1026         char *compress_type;
1027
1028         if (btrfs_test_opt(root, DEGRADED))
1029                 seq_puts(seq, ",degraded");
1030         if (btrfs_test_opt(root, NODATASUM))
1031                 seq_puts(seq, ",nodatasum");
1032         if (btrfs_test_opt(root, NODATACOW))
1033                 seq_puts(seq, ",nodatacow");
1034         if (btrfs_test_opt(root, NOBARRIER))
1035                 seq_puts(seq, ",nobarrier");
1036         if (info->max_inline != BTRFS_DEFAULT_MAX_INLINE)
1037                 seq_printf(seq, ",max_inline=%llu", info->max_inline);
1038         if (info->alloc_start != 0)
1039                 seq_printf(seq, ",alloc_start=%llu", info->alloc_start);
1040         if (info->thread_pool_size !=  min_t(unsigned long,
1041                                              num_online_cpus() + 2, 8))
1042                 seq_printf(seq, ",thread_pool=%d", info->thread_pool_size);
1043         if (btrfs_test_opt(root, COMPRESS)) {
1044                 if (info->compress_type == BTRFS_COMPRESS_ZLIB)
1045                         compress_type = "zlib";
1046                 else
1047                         compress_type = "lzo";
1048                 if (btrfs_test_opt(root, FORCE_COMPRESS))
1049                         seq_printf(seq, ",compress-force=%s", compress_type);
1050                 else
1051                         seq_printf(seq, ",compress=%s", compress_type);
1052         }
1053         if (btrfs_test_opt(root, NOSSD))
1054                 seq_puts(seq, ",nossd");
1055         if (btrfs_test_opt(root, SSD_SPREAD))
1056                 seq_puts(seq, ",ssd_spread");
1057         else if (btrfs_test_opt(root, SSD))
1058                 seq_puts(seq, ",ssd");
1059         if (btrfs_test_opt(root, NOTREELOG))
1060                 seq_puts(seq, ",notreelog");
1061         if (btrfs_test_opt(root, FLUSHONCOMMIT))
1062                 seq_puts(seq, ",flushoncommit");
1063         if (btrfs_test_opt(root, DISCARD))
1064                 seq_puts(seq, ",discard");
1065         if (!(root->fs_info->sb->s_flags & MS_POSIXACL))
1066                 seq_puts(seq, ",noacl");
1067         if (btrfs_test_opt(root, SPACE_CACHE))
1068                 seq_puts(seq, ",space_cache");
1069         else
1070                 seq_puts(seq, ",nospace_cache");
1071         if (btrfs_test_opt(root, RESCAN_UUID_TREE))
1072                 seq_puts(seq, ",rescan_uuid_tree");
1073         if (btrfs_test_opt(root, CLEAR_CACHE))
1074                 seq_puts(seq, ",clear_cache");
1075         if (btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED))
1076                 seq_puts(seq, ",user_subvol_rm_allowed");
1077         if (btrfs_test_opt(root, ENOSPC_DEBUG))
1078                 seq_puts(seq, ",enospc_debug");
1079         if (btrfs_test_opt(root, AUTO_DEFRAG))
1080                 seq_puts(seq, ",autodefrag");
1081         if (btrfs_test_opt(root, INODE_MAP_CACHE))
1082                 seq_puts(seq, ",inode_cache");
1083         if (btrfs_test_opt(root, SKIP_BALANCE))
1084                 seq_puts(seq, ",skip_balance");
1085         if (btrfs_test_opt(root, RECOVERY))
1086                 seq_puts(seq, ",recovery");
1087 #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
1088         if (btrfs_test_opt(root, CHECK_INTEGRITY_INCLUDING_EXTENT_DATA))
1089                 seq_puts(seq, ",check_int_data");
1090         else if (btrfs_test_opt(root, CHECK_INTEGRITY))
1091                 seq_puts(seq, ",check_int");
1092         if (info->check_integrity_print_mask)
1093                 seq_printf(seq, ",check_int_print_mask=%d",
1094                                 info->check_integrity_print_mask);
1095 #endif
1096         if (info->metadata_ratio)
1097                 seq_printf(seq, ",metadata_ratio=%d",
1098                                 info->metadata_ratio);
1099         if (btrfs_test_opt(root, PANIC_ON_FATAL_ERROR))
1100                 seq_puts(seq, ",fatal_errors=panic");
1101         if (info->commit_interval != BTRFS_DEFAULT_COMMIT_INTERVAL)
1102                 seq_printf(seq, ",commit=%d", info->commit_interval);
1103         return 0;
1104 }
1105
1106 static int btrfs_test_super(struct super_block *s, void *data)
1107 {
1108         struct btrfs_fs_info *p = data;
1109         struct btrfs_fs_info *fs_info = btrfs_sb(s);
1110
1111         return fs_info->fs_devices == p->fs_devices;
1112 }
1113
1114 static int btrfs_set_super(struct super_block *s, void *data)
1115 {
1116         int err = set_anon_super(s, data);
1117         if (!err)
1118                 s->s_fs_info = data;
1119         return err;
1120 }
1121
1122 /*
1123  * subvolumes are identified by ino 256
1124  */
1125 static inline int is_subvolume_inode(struct inode *inode)
1126 {
1127         if (inode && inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)
1128                 return 1;
1129         return 0;
1130 }
1131
1132 /*
1133  * This will strip out the subvol=%s argument for an argument string and add
1134  * subvolid=0 to make sure we get the actual tree root for path walking to the
1135  * subvol we want.
1136  */
1137 static char *setup_root_args(char *args)
1138 {
1139         unsigned len = strlen(args) + 2 + 1;
1140         char *src, *dst, *buf;
1141
1142         /*
1143          * We need the same args as before, but with this substitution:
1144          * s!subvol=[^,]+!subvolid=0!
1145          *
1146          * Since the replacement string is up to 2 bytes longer than the
1147          * original, allocate strlen(args) + 2 + 1 bytes.
1148          */
1149
1150         src = strstr(args, "subvol=");
1151         /* This shouldn't happen, but just in case.. */
1152         if (!src)
1153                 return NULL;
1154
1155         buf = dst = kmalloc(len, GFP_NOFS);
1156         if (!buf)
1157                 return NULL;
1158
1159         /*
1160          * If the subvol= arg is not at the start of the string,
1161          * copy whatever precedes it into buf.
1162          */
1163         if (src != args) {
1164                 *src++ = '\0';
1165                 strcpy(buf, args);
1166                 dst += strlen(args);
1167         }
1168
1169         strcpy(dst, "subvolid=0");
1170         dst += strlen("subvolid=0");
1171
1172         /*
1173          * If there is a "," after the original subvol=... string,
1174          * copy that suffix into our buffer.  Otherwise, we're done.
1175          */
1176         src = strchr(src, ',');
1177         if (src)
1178                 strcpy(dst, src);
1179
1180         return buf;
1181 }
1182
1183 static struct dentry *mount_subvol(const char *subvol_name, int flags,
1184                                    const char *device_name, char *data)
1185 {
1186         struct dentry *root;
1187         struct vfsmount *mnt;
1188         char *newargs;
1189
1190         newargs = setup_root_args(data);
1191         if (!newargs)
1192                 return ERR_PTR(-ENOMEM);
1193         mnt = vfs_kern_mount(&btrfs_fs_type, flags, device_name,
1194                              newargs);
1195
1196         if (PTR_RET(mnt) == -EBUSY) {
1197                 if (flags & MS_RDONLY) {
1198                         mnt = vfs_kern_mount(&btrfs_fs_type, flags & ~MS_RDONLY, device_name,
1199                                              newargs);
1200                 } else {
1201                         int r;
1202                         mnt = vfs_kern_mount(&btrfs_fs_type, flags | MS_RDONLY, device_name,
1203                                              newargs);
1204                         if (IS_ERR(mnt)) {
1205                                 kfree(newargs);
1206                                 return ERR_CAST(mnt);
1207                         }
1208
1209                         r = btrfs_remount(mnt->mnt_sb, &flags, NULL);
1210                         if (r < 0) {
1211                                 /* FIXME: release vfsmount mnt ??*/
1212                                 kfree(newargs);
1213                                 return ERR_PTR(r);
1214                         }
1215                 }
1216         }
1217
1218         kfree(newargs);
1219
1220         if (IS_ERR(mnt))
1221                 return ERR_CAST(mnt);
1222
1223         root = mount_subtree(mnt, subvol_name);
1224
1225         if (!IS_ERR(root) && !is_subvolume_inode(d_inode(root))) {
1226                 struct super_block *s = root->d_sb;
1227                 dput(root);
1228                 root = ERR_PTR(-EINVAL);
1229                 deactivate_locked_super(s);
1230                 printk(KERN_ERR "BTRFS: '%s' is not a valid subvolume\n",
1231                                 subvol_name);
1232         }
1233
1234         return root;
1235 }
1236
1237 static int parse_security_options(char *orig_opts,
1238                                   struct security_mnt_opts *sec_opts)
1239 {
1240         char *secdata = NULL;
1241         int ret = 0;
1242
1243         secdata = alloc_secdata();
1244         if (!secdata)
1245                 return -ENOMEM;
1246         ret = security_sb_copy_data(orig_opts, secdata);
1247         if (ret) {
1248                 free_secdata(secdata);
1249                 return ret;
1250         }
1251         ret = security_sb_parse_opts_str(secdata, sec_opts);
1252         free_secdata(secdata);
1253         return ret;
1254 }
1255
1256 static int setup_security_options(struct btrfs_fs_info *fs_info,
1257                                   struct super_block *sb,
1258                                   struct security_mnt_opts *sec_opts)
1259 {
1260         int ret = 0;
1261
1262         /*
1263          * Call security_sb_set_mnt_opts() to check whether new sec_opts
1264          * is valid.
1265          */
1266         ret = security_sb_set_mnt_opts(sb, sec_opts, 0, NULL);
1267         if (ret)
1268                 return ret;
1269
1270 #ifdef CONFIG_SECURITY
1271         if (!fs_info->security_opts.num_mnt_opts) {
1272                 /* first time security setup, copy sec_opts to fs_info */
1273                 memcpy(&fs_info->security_opts, sec_opts, sizeof(*sec_opts));
1274         } else {
1275                 /*
1276                  * Since SELinux(the only one supports security_mnt_opts) does
1277                  * NOT support changing context during remount/mount same sb,
1278                  * This must be the same or part of the same security options,
1279                  * just free it.
1280                  */
1281                 security_free_mnt_opts(sec_opts);
1282         }
1283 #endif
1284         return ret;
1285 }
1286
1287 /*
1288  * Find a superblock for the given device / mount point.
1289  *
1290  * Note:  This is based on get_sb_bdev from fs/super.c with a few additions
1291  *        for multiple device setup.  Make sure to keep it in sync.
1292  */
1293 static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
1294                 const char *device_name, void *data)
1295 {
1296         struct block_device *bdev = NULL;
1297         struct super_block *s;
1298         struct dentry *root;
1299         struct btrfs_fs_devices *fs_devices = NULL;
1300         struct btrfs_fs_info *fs_info = NULL;
1301         struct security_mnt_opts new_sec_opts;
1302         fmode_t mode = FMODE_READ;
1303         char *subvol_name = NULL;
1304         u64 subvol_objectid = 0;
1305         int error = 0;
1306
1307         if (!(flags & MS_RDONLY))
1308                 mode |= FMODE_WRITE;
1309
1310         error = btrfs_parse_early_options(data, mode, fs_type,
1311                                           &subvol_name, &subvol_objectid,
1312                                           &fs_devices);
1313         if (error) {
1314                 kfree(subvol_name);
1315                 return ERR_PTR(error);
1316         }
1317
1318         if (subvol_name) {
1319                 root = mount_subvol(subvol_name, flags, device_name, data);
1320                 kfree(subvol_name);
1321                 return root;
1322         }
1323
1324         security_init_mnt_opts(&new_sec_opts);
1325         if (data) {
1326                 error = parse_security_options(data, &new_sec_opts);
1327                 if (error)
1328                         return ERR_PTR(error);
1329         }
1330
1331         error = btrfs_scan_one_device(device_name, mode, fs_type, &fs_devices);
1332         if (error)
1333                 goto error_sec_opts;
1334
1335         /*
1336          * Setup a dummy root and fs_info for test/set super.  This is because
1337          * we don't actually fill this stuff out until open_ctree, but we need
1338          * it for searching for existing supers, so this lets us do that and
1339          * then open_ctree will properly initialize everything later.
1340          */
1341         fs_info = kzalloc(sizeof(struct btrfs_fs_info), GFP_NOFS);
1342         if (!fs_info) {
1343                 error = -ENOMEM;
1344                 goto error_sec_opts;
1345         }
1346
1347         fs_info->fs_devices = fs_devices;
1348
1349         fs_info->super_copy = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_NOFS);
1350         fs_info->super_for_commit = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_NOFS);
1351         security_init_mnt_opts(&fs_info->security_opts);
1352         if (!fs_info->super_copy || !fs_info->super_for_commit) {
1353                 error = -ENOMEM;
1354                 goto error_fs_info;
1355         }
1356
1357         error = btrfs_open_devices(fs_devices, mode, fs_type);
1358         if (error)
1359                 goto error_fs_info;
1360
1361         if (!(flags & MS_RDONLY) && fs_devices->rw_devices == 0) {
1362                 error = -EACCES;
1363                 goto error_close_devices;
1364         }
1365
1366         bdev = fs_devices->latest_bdev;
1367         s = sget(fs_type, btrfs_test_super, btrfs_set_super, flags | MS_NOSEC,
1368                  fs_info);
1369         if (IS_ERR(s)) {
1370                 error = PTR_ERR(s);
1371                 goto error_close_devices;
1372         }
1373
1374         if (s->s_root) {
1375                 btrfs_close_devices(fs_devices);
1376                 free_fs_info(fs_info);
1377                 if ((flags ^ s->s_flags) & MS_RDONLY)
1378                         error = -EBUSY;
1379         } else {
1380                 char b[BDEVNAME_SIZE];
1381
1382                 strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id));
1383                 btrfs_sb(s)->bdev_holder = fs_type;
1384                 error = btrfs_fill_super(s, fs_devices, data,
1385                                          flags & MS_SILENT ? 1 : 0);
1386         }
1387
1388         root = !error ? get_default_root(s, subvol_objectid) : ERR_PTR(error);
1389         if (IS_ERR(root)) {
1390                 deactivate_locked_super(s);
1391                 error = PTR_ERR(root);
1392                 goto error_sec_opts;
1393         }
1394
1395         fs_info = btrfs_sb(s);
1396         error = setup_security_options(fs_info, s, &new_sec_opts);
1397         if (error) {
1398                 dput(root);
1399                 deactivate_locked_super(s);
1400                 goto error_sec_opts;
1401         }
1402
1403         return root;
1404
1405 error_close_devices:
1406         btrfs_close_devices(fs_devices);
1407 error_fs_info:
1408         free_fs_info(fs_info);
1409 error_sec_opts:
1410         security_free_mnt_opts(&new_sec_opts);
1411         return ERR_PTR(error);
1412 }
1413
1414 static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info,
1415                                      int new_pool_size, int old_pool_size)
1416 {
1417         if (new_pool_size == old_pool_size)
1418                 return;
1419
1420         fs_info->thread_pool_size = new_pool_size;
1421
1422         btrfs_info(fs_info, "resize thread pool %d -> %d",
1423                old_pool_size, new_pool_size);
1424
1425         btrfs_workqueue_set_max(fs_info->workers, new_pool_size);
1426         btrfs_workqueue_set_max(fs_info->delalloc_workers, new_pool_size);
1427         btrfs_workqueue_set_max(fs_info->submit_workers, new_pool_size);
1428         btrfs_workqueue_set_max(fs_info->caching_workers, new_pool_size);
1429         btrfs_workqueue_set_max(fs_info->endio_workers, new_pool_size);
1430         btrfs_workqueue_set_max(fs_info->endio_meta_workers, new_pool_size);
1431         btrfs_workqueue_set_max(fs_info->endio_meta_write_workers,
1432                                 new_pool_size);
1433         btrfs_workqueue_set_max(fs_info->endio_write_workers, new_pool_size);
1434         btrfs_workqueue_set_max(fs_info->endio_freespace_worker, new_pool_size);
1435         btrfs_workqueue_set_max(fs_info->delayed_workers, new_pool_size);
1436         btrfs_workqueue_set_max(fs_info->readahead_workers, new_pool_size);
1437         btrfs_workqueue_set_max(fs_info->scrub_wr_completion_workers,
1438                                 new_pool_size);
1439 }
1440
1441 static inline void btrfs_remount_prepare(struct btrfs_fs_info *fs_info)
1442 {
1443         set_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
1444 }
1445
1446 static inline void btrfs_remount_begin(struct btrfs_fs_info *fs_info,
1447                                        unsigned long old_opts, int flags)
1448 {
1449         if (btrfs_raw_test_opt(old_opts, AUTO_DEFRAG) &&
1450             (!btrfs_raw_test_opt(fs_info->mount_opt, AUTO_DEFRAG) ||
1451              (flags & MS_RDONLY))) {
1452                 /* wait for any defraggers to finish */
1453                 wait_event(fs_info->transaction_wait,
1454                            (atomic_read(&fs_info->defrag_running) == 0));
1455                 if (flags & MS_RDONLY)
1456                         sync_filesystem(fs_info->sb);
1457         }
1458 }
1459
1460 static inline void btrfs_remount_cleanup(struct btrfs_fs_info *fs_info,
1461                                          unsigned long old_opts)
1462 {
1463         /*
1464          * We need cleanup all defragable inodes if the autodefragment is
1465          * close or the fs is R/O.
1466          */
1467         if (btrfs_raw_test_opt(old_opts, AUTO_DEFRAG) &&
1468             (!btrfs_raw_test_opt(fs_info->mount_opt, AUTO_DEFRAG) ||
1469              (fs_info->sb->s_flags & MS_RDONLY))) {
1470                 btrfs_cleanup_defrag_inodes(fs_info);
1471         }
1472
1473         clear_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
1474 }
1475
1476 static int btrfs_remount(struct super_block *sb, int *flags, char *data)
1477 {
1478         struct btrfs_fs_info *fs_info = btrfs_sb(sb);
1479         struct btrfs_root *root = fs_info->tree_root;
1480         unsigned old_flags = sb->s_flags;
1481         unsigned long old_opts = fs_info->mount_opt;
1482         unsigned long old_compress_type = fs_info->compress_type;
1483         u64 old_max_inline = fs_info->max_inline;
1484         u64 old_alloc_start = fs_info->alloc_start;
1485         int old_thread_pool_size = fs_info->thread_pool_size;
1486         unsigned int old_metadata_ratio = fs_info->metadata_ratio;
1487         int ret;
1488
1489         sync_filesystem(sb);
1490         btrfs_remount_prepare(fs_info);
1491
1492         if (data) {
1493                 struct security_mnt_opts new_sec_opts;
1494
1495                 security_init_mnt_opts(&new_sec_opts);
1496                 ret = parse_security_options(data, &new_sec_opts);
1497                 if (ret)
1498                         goto restore;
1499                 ret = setup_security_options(fs_info, sb,
1500                                              &new_sec_opts);
1501                 if (ret) {
1502                         security_free_mnt_opts(&new_sec_opts);
1503                         goto restore;
1504                 }
1505         }
1506
1507         ret = btrfs_parse_options(root, data);
1508         if (ret) {
1509                 ret = -EINVAL;
1510                 goto restore;
1511         }
1512
1513         btrfs_remount_begin(fs_info, old_opts, *flags);
1514         btrfs_resize_thread_pool(fs_info,
1515                 fs_info->thread_pool_size, old_thread_pool_size);
1516
1517         if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
1518                 goto out;
1519
1520         if (*flags & MS_RDONLY) {
1521                 /*
1522                  * this also happens on 'umount -rf' or on shutdown, when
1523                  * the filesystem is busy.
1524                  */
1525                 cancel_work_sync(&fs_info->async_reclaim_work);
1526
1527                 /* wait for the uuid_scan task to finish */
1528                 down(&fs_info->uuid_tree_rescan_sem);
1529                 /* avoid complains from lockdep et al. */
1530                 up(&fs_info->uuid_tree_rescan_sem);
1531
1532                 sb->s_flags |= MS_RDONLY;
1533
1534                 btrfs_dev_replace_suspend_for_unmount(fs_info);
1535                 btrfs_scrub_cancel(fs_info);
1536                 btrfs_pause_balance(fs_info);
1537
1538                 ret = btrfs_commit_super(root);
1539                 if (ret)
1540                         goto restore;
1541         } else {
1542                 if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) {
1543                         btrfs_err(fs_info,
1544                                 "Remounting read-write after error is not allowed");
1545                         ret = -EINVAL;
1546                         goto restore;
1547                 }
1548                 if (fs_info->fs_devices->rw_devices == 0) {
1549                         ret = -EACCES;
1550                         goto restore;
1551                 }
1552
1553                 if (fs_info->fs_devices->missing_devices >
1554                      fs_info->num_tolerated_disk_barrier_failures &&
1555                     !(*flags & MS_RDONLY)) {
1556                         btrfs_warn(fs_info,
1557                                 "too many missing devices, writeable remount is not allowed");
1558                         ret = -EACCES;
1559                         goto restore;
1560                 }
1561
1562                 if (btrfs_super_log_root(fs_info->super_copy) != 0) {
1563                         ret = -EINVAL;
1564                         goto restore;
1565                 }
1566
1567                 ret = btrfs_cleanup_fs_roots(fs_info);
1568                 if (ret)
1569                         goto restore;
1570
1571                 /* recover relocation */
1572                 mutex_lock(&fs_info->cleaner_mutex);
1573                 ret = btrfs_recover_relocation(root);
1574                 mutex_unlock(&fs_info->cleaner_mutex);
1575                 if (ret)
1576                         goto restore;
1577
1578                 ret = btrfs_resume_balance_async(fs_info);
1579                 if (ret)
1580                         goto restore;
1581
1582                 ret = btrfs_resume_dev_replace_async(fs_info);
1583                 if (ret) {
1584                         btrfs_warn(fs_info, "failed to resume dev_replace");
1585                         goto restore;
1586                 }
1587
1588                 if (!fs_info->uuid_root) {
1589                         btrfs_info(fs_info, "creating UUID tree");
1590                         ret = btrfs_create_uuid_tree(fs_info);
1591                         if (ret) {
1592                                 btrfs_warn(fs_info, "failed to create the UUID tree %d", ret);
1593                                 goto restore;
1594                         }
1595                 }
1596                 sb->s_flags &= ~MS_RDONLY;
1597         }
1598 out:
1599         wake_up_process(fs_info->transaction_kthread);
1600         btrfs_remount_cleanup(fs_info, old_opts);
1601         return 0;
1602
1603 restore:
1604         /* We've hit an error - don't reset MS_RDONLY */
1605         if (sb->s_flags & MS_RDONLY)
1606                 old_flags |= MS_RDONLY;
1607         sb->s_flags = old_flags;
1608         fs_info->mount_opt = old_opts;
1609         fs_info->compress_type = old_compress_type;
1610         fs_info->max_inline = old_max_inline;
1611         mutex_lock(&fs_info->chunk_mutex);
1612         fs_info->alloc_start = old_alloc_start;
1613         mutex_unlock(&fs_info->chunk_mutex);
1614         btrfs_resize_thread_pool(fs_info,
1615                 old_thread_pool_size, fs_info->thread_pool_size);
1616         fs_info->metadata_ratio = old_metadata_ratio;
1617         btrfs_remount_cleanup(fs_info, old_opts);
1618         return ret;
1619 }
1620
1621 /* Used to sort the devices by max_avail(descending sort) */
1622 static int btrfs_cmp_device_free_bytes(const void *dev_info1,
1623                                        const void *dev_info2)
1624 {
1625         if (((struct btrfs_device_info *)dev_info1)->max_avail >
1626             ((struct btrfs_device_info *)dev_info2)->max_avail)
1627                 return -1;
1628         else if (((struct btrfs_device_info *)dev_info1)->max_avail <
1629                  ((struct btrfs_device_info *)dev_info2)->max_avail)
1630                 return 1;
1631         else
1632         return 0;
1633 }
1634
1635 /*
1636  * sort the devices by max_avail, in which max free extent size of each device
1637  * is stored.(Descending Sort)
1638  */
1639 static inline void btrfs_descending_sort_devices(
1640                                         struct btrfs_device_info *devices,
1641                                         size_t nr_devices)
1642 {
1643         sort(devices, nr_devices, sizeof(struct btrfs_device_info),
1644              btrfs_cmp_device_free_bytes, NULL);
1645 }
1646
1647 /*
1648  * The helper to calc the free space on the devices that can be used to store
1649  * file data.
1650  */
1651 static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes)
1652 {
1653         struct btrfs_fs_info *fs_info = root->fs_info;
1654         struct btrfs_device_info *devices_info;
1655         struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
1656         struct btrfs_device *device;
1657         u64 skip_space;
1658         u64 type;
1659         u64 avail_space;
1660         u64 used_space;
1661         u64 min_stripe_size;
1662         int min_stripes = 1, num_stripes = 1;
1663         int i = 0, nr_devices;
1664         int ret;
1665
1666         /*
1667          * We aren't under the device list lock, so this is racey-ish, but good
1668          * enough for our purposes.
1669          */
1670         nr_devices = fs_info->fs_devices->open_devices;
1671         if (!nr_devices) {
1672                 smp_mb();
1673                 nr_devices = fs_info->fs_devices->open_devices;
1674                 ASSERT(nr_devices);
1675                 if (!nr_devices) {
1676                         *free_bytes = 0;
1677                         return 0;
1678                 }
1679         }
1680
1681         devices_info = kmalloc_array(nr_devices, sizeof(*devices_info),
1682                                GFP_NOFS);
1683         if (!devices_info)
1684                 return -ENOMEM;
1685
1686         /* calc min stripe number for data space alloction */
1687         type = btrfs_get_alloc_profile(root, 1);
1688         if (type & BTRFS_BLOCK_GROUP_RAID0) {
1689                 min_stripes = 2;
1690                 num_stripes = nr_devices;
1691         } else if (type & BTRFS_BLOCK_GROUP_RAID1) {
1692                 min_stripes = 2;
1693                 num_stripes = 2;
1694         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
1695                 min_stripes = 4;
1696                 num_stripes = 4;
1697         }
1698
1699         if (type & BTRFS_BLOCK_GROUP_DUP)
1700                 min_stripe_size = 2 * BTRFS_STRIPE_LEN;
1701         else
1702                 min_stripe_size = BTRFS_STRIPE_LEN;
1703
1704         if (fs_info->alloc_start)
1705                 mutex_lock(&fs_devices->device_list_mutex);
1706         rcu_read_lock();
1707         list_for_each_entry_rcu(device, &fs_devices->devices, dev_list) {
1708                 if (!device->in_fs_metadata || !device->bdev ||
1709                     device->is_tgtdev_for_dev_replace)
1710                         continue;
1711
1712                 if (i >= nr_devices)
1713                         break;
1714
1715                 avail_space = device->total_bytes - device->bytes_used;
1716
1717                 /* align with stripe_len */
1718                 avail_space = div_u64(avail_space, BTRFS_STRIPE_LEN);
1719                 avail_space *= BTRFS_STRIPE_LEN;
1720
1721                 /*
1722                  * In order to avoid overwritting the superblock on the drive,
1723                  * btrfs starts at an offset of at least 1MB when doing chunk
1724                  * allocation.
1725                  */
1726                 skip_space = 1024 * 1024;
1727
1728                 /* user can set the offset in fs_info->alloc_start. */
1729                 if (fs_info->alloc_start &&
1730                     fs_info->alloc_start + BTRFS_STRIPE_LEN <=
1731                     device->total_bytes) {
1732                         rcu_read_unlock();
1733                         skip_space = max(fs_info->alloc_start, skip_space);
1734
1735                         /*
1736                          * btrfs can not use the free space in
1737                          * [0, skip_space - 1], we must subtract it from the
1738                          * total. In order to implement it, we account the used
1739                          * space in this range first.
1740                          */
1741                         ret = btrfs_account_dev_extents_size(device, 0,
1742                                                              skip_space - 1,
1743                                                              &used_space);
1744                         if (ret) {
1745                                 kfree(devices_info);
1746                                 mutex_unlock(&fs_devices->device_list_mutex);
1747                                 return ret;
1748                         }
1749
1750                         rcu_read_lock();
1751
1752                         /* calc the free space in [0, skip_space - 1] */
1753                         skip_space -= used_space;
1754                 }
1755
1756                 /*
1757                  * we can use the free space in [0, skip_space - 1], subtract
1758                  * it from the total.
1759                  */
1760                 if (avail_space && avail_space >= skip_space)
1761                         avail_space -= skip_space;
1762                 else
1763                         avail_space = 0;
1764
1765                 if (avail_space < min_stripe_size)
1766                         continue;
1767
1768                 devices_info[i].dev = device;
1769                 devices_info[i].max_avail = avail_space;
1770
1771                 i++;
1772         }
1773         rcu_read_unlock();
1774         if (fs_info->alloc_start)
1775                 mutex_unlock(&fs_devices->device_list_mutex);
1776
1777         nr_devices = i;
1778
1779         btrfs_descending_sort_devices(devices_info, nr_devices);
1780
1781         i = nr_devices - 1;
1782         avail_space = 0;
1783         while (nr_devices >= min_stripes) {
1784                 if (num_stripes > nr_devices)
1785                         num_stripes = nr_devices;
1786
1787                 if (devices_info[i].max_avail >= min_stripe_size) {
1788                         int j;
1789                         u64 alloc_size;
1790
1791                         avail_space += devices_info[i].max_avail * num_stripes;
1792                         alloc_size = devices_info[i].max_avail;
1793                         for (j = i + 1 - num_stripes; j <= i; j++)
1794                                 devices_info[j].max_avail -= alloc_size;
1795                 }
1796                 i--;
1797                 nr_devices--;
1798         }
1799
1800         kfree(devices_info);
1801         *free_bytes = avail_space;
1802         return 0;
1803 }
1804
1805 /*
1806  * Calculate numbers for 'df', pessimistic in case of mixed raid profiles.
1807  *
1808  * If there's a redundant raid level at DATA block groups, use the respective
1809  * multiplier to scale the sizes.
1810  *
1811  * Unused device space usage is based on simulating the chunk allocator
1812  * algorithm that respects the device sizes, order of allocations and the
1813  * 'alloc_start' value, this is a close approximation of the actual use but
1814  * there are other factors that may change the result (like a new metadata
1815  * chunk).
1816  *
1817  * FIXME: not accurate for mixed block groups, total and free/used are ok,
1818  * available appears slightly larger.
1819  */
1820 static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
1821 {
1822         struct btrfs_fs_info *fs_info = btrfs_sb(dentry->d_sb);
1823         struct btrfs_super_block *disk_super = fs_info->super_copy;
1824         struct list_head *head = &fs_info->space_info;
1825         struct btrfs_space_info *found;
1826         u64 total_used = 0;
1827         u64 total_free_data = 0;
1828         int bits = dentry->d_sb->s_blocksize_bits;
1829         __be32 *fsid = (__be32 *)fs_info->fsid;
1830         unsigned factor = 1;
1831         struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
1832         int ret;
1833
1834         /*
1835          * holding chunk_muext to avoid allocating new chunks, holding
1836          * device_list_mutex to avoid the device being removed
1837          */
1838         rcu_read_lock();
1839         list_for_each_entry_rcu(found, head, list) {
1840                 if (found->flags & BTRFS_BLOCK_GROUP_DATA) {
1841                         int i;
1842
1843                         total_free_data += found->disk_total - found->disk_used;
1844                         total_free_data -=
1845                                 btrfs_account_ro_block_groups_free_space(found);
1846
1847                         for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
1848                                 if (!list_empty(&found->block_groups[i])) {
1849                                         switch (i) {
1850                                         case BTRFS_RAID_DUP:
1851                                         case BTRFS_RAID_RAID1:
1852                                         case BTRFS_RAID_RAID10:
1853                                                 factor = 2;
1854                                         }
1855                                 }
1856                         }
1857                 }
1858
1859                 total_used += found->disk_used;
1860         }
1861
1862         rcu_read_unlock();
1863
1864         buf->f_blocks = div_u64(btrfs_super_total_bytes(disk_super), factor);
1865         buf->f_blocks >>= bits;
1866         buf->f_bfree = buf->f_blocks - (div_u64(total_used, factor) >> bits);
1867
1868         /* Account global block reserve as used, it's in logical size already */
1869         spin_lock(&block_rsv->lock);
1870         buf->f_bfree -= block_rsv->size >> bits;
1871         spin_unlock(&block_rsv->lock);
1872
1873         buf->f_bavail = div_u64(total_free_data, factor);
1874         ret = btrfs_calc_avail_data_space(fs_info->tree_root, &total_free_data);
1875         if (ret)
1876                 return ret;
1877         buf->f_bavail += div_u64(total_free_data, factor);
1878         buf->f_bavail = buf->f_bavail >> bits;
1879
1880         buf->f_type = BTRFS_SUPER_MAGIC;
1881         buf->f_bsize = dentry->d_sb->s_blocksize;
1882         buf->f_namelen = BTRFS_NAME_LEN;
1883
1884         /* We treat it as constant endianness (it doesn't matter _which_)
1885            because we want the fsid to come out the same whether mounted
1886            on a big-endian or little-endian host */
1887         buf->f_fsid.val[0] = be32_to_cpu(fsid[0]) ^ be32_to_cpu(fsid[2]);
1888         buf->f_fsid.val[1] = be32_to_cpu(fsid[1]) ^ be32_to_cpu(fsid[3]);
1889         /* Mask in the root object ID too, to disambiguate subvols */
1890         buf->f_fsid.val[0] ^= BTRFS_I(d_inode(dentry))->root->objectid >> 32;
1891         buf->f_fsid.val[1] ^= BTRFS_I(d_inode(dentry))->root->objectid;
1892
1893         return 0;
1894 }
1895
1896 static void btrfs_kill_super(struct super_block *sb)
1897 {
1898         struct btrfs_fs_info *fs_info = btrfs_sb(sb);
1899         kill_anon_super(sb);
1900         free_fs_info(fs_info);
1901 }
1902
1903 static struct file_system_type btrfs_fs_type = {
1904         .owner          = THIS_MODULE,
1905         .name           = "btrfs",
1906         .mount          = btrfs_mount,
1907         .kill_sb        = btrfs_kill_super,
1908         .fs_flags       = FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA,
1909 };
1910 MODULE_ALIAS_FS("btrfs");
1911
1912 static int btrfs_control_open(struct inode *inode, struct file *file)
1913 {
1914         /*
1915          * The control file's private_data is used to hold the
1916          * transaction when it is started and is used to keep
1917          * track of whether a transaction is already in progress.
1918          */
1919         file->private_data = NULL;
1920         return 0;
1921 }
1922
1923 /*
1924  * used by btrfsctl to scan devices when no FS is mounted
1925  */
1926 static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
1927                                 unsigned long arg)
1928 {
1929         struct btrfs_ioctl_vol_args *vol;
1930         struct btrfs_fs_devices *fs_devices;
1931         int ret = -ENOTTY;
1932
1933         if (!capable(CAP_SYS_ADMIN))
1934                 return -EPERM;
1935
1936         vol = memdup_user((void __user *)arg, sizeof(*vol));
1937         if (IS_ERR(vol))
1938                 return PTR_ERR(vol);
1939
1940         switch (cmd) {
1941         case BTRFS_IOC_SCAN_DEV:
1942                 ret = btrfs_scan_one_device(vol->name, FMODE_READ,
1943                                             &btrfs_fs_type, &fs_devices);
1944                 break;
1945         case BTRFS_IOC_DEVICES_READY:
1946                 ret = btrfs_scan_one_device(vol->name, FMODE_READ,
1947                                             &btrfs_fs_type, &fs_devices);
1948                 if (ret)
1949                         break;
1950                 ret = !(fs_devices->num_devices == fs_devices->total_devices);
1951                 break;
1952         }
1953
1954         kfree(vol);
1955         return ret;
1956 }
1957
1958 static int btrfs_freeze(struct super_block *sb)
1959 {
1960         struct btrfs_trans_handle *trans;
1961         struct btrfs_root *root = btrfs_sb(sb)->tree_root;
1962
1963         trans = btrfs_attach_transaction_barrier(root);
1964         if (IS_ERR(trans)) {
1965                 /* no transaction, don't bother */
1966                 if (PTR_ERR(trans) == -ENOENT)
1967                         return 0;
1968                 return PTR_ERR(trans);
1969         }
1970         return btrfs_commit_transaction(trans, root);
1971 }
1972
1973 static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
1974 {
1975         struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb);
1976         struct btrfs_fs_devices *cur_devices;
1977         struct btrfs_device *dev, *first_dev = NULL;
1978         struct list_head *head;
1979         struct rcu_string *name;
1980
1981         mutex_lock(&fs_info->fs_devices->device_list_mutex);
1982         cur_devices = fs_info->fs_devices;
1983         while (cur_devices) {
1984                 head = &cur_devices->devices;
1985                 list_for_each_entry(dev, head, dev_list) {
1986                         if (dev->missing)
1987                                 continue;
1988                         if (!dev->name)
1989                                 continue;
1990                         if (!first_dev || dev->devid < first_dev->devid)
1991                                 first_dev = dev;
1992                 }
1993                 cur_devices = cur_devices->seed;
1994         }
1995
1996         if (first_dev) {
1997                 rcu_read_lock();
1998                 name = rcu_dereference(first_dev->name);
1999                 seq_escape(m, name->str, " \t\n\\");
2000                 rcu_read_unlock();
2001         } else {
2002                 WARN_ON(1);
2003         }
2004         mutex_unlock(&fs_info->fs_devices->device_list_mutex);
2005         return 0;
2006 }
2007
2008 static const struct super_operations btrfs_super_ops = {
2009         .drop_inode     = btrfs_drop_inode,
2010         .evict_inode    = btrfs_evict_inode,
2011         .put_super      = btrfs_put_super,
2012         .sync_fs        = btrfs_sync_fs,
2013         .show_options   = btrfs_show_options,
2014         .show_devname   = btrfs_show_devname,
2015         .write_inode    = btrfs_write_inode,
2016         .alloc_inode    = btrfs_alloc_inode,
2017         .destroy_inode  = btrfs_destroy_inode,
2018         .statfs         = btrfs_statfs,
2019         .remount_fs     = btrfs_remount,
2020         .freeze_fs      = btrfs_freeze,
2021 };
2022
2023 static const struct file_operations btrfs_ctl_fops = {
2024         .open = btrfs_control_open,
2025         .unlocked_ioctl  = btrfs_control_ioctl,
2026         .compat_ioctl = btrfs_control_ioctl,
2027         .owner   = THIS_MODULE,
2028         .llseek = noop_llseek,
2029 };
2030
2031 static struct miscdevice btrfs_misc = {
2032         .minor          = BTRFS_MINOR,
2033         .name           = "btrfs-control",
2034         .fops           = &btrfs_ctl_fops
2035 };
2036
2037 MODULE_ALIAS_MISCDEV(BTRFS_MINOR);
2038 MODULE_ALIAS("devname:btrfs-control");
2039
2040 static int btrfs_interface_init(void)
2041 {
2042         return misc_register(&btrfs_misc);
2043 }
2044
2045 static void btrfs_interface_exit(void)
2046 {
2047         if (misc_deregister(&btrfs_misc) < 0)
2048                 printk(KERN_INFO "BTRFS: misc_deregister failed for control device\n");
2049 }
2050
2051 static void btrfs_print_info(void)
2052 {
2053         printk(KERN_INFO "Btrfs loaded"
2054 #ifdef CONFIG_BTRFS_DEBUG
2055                         ", debug=on"
2056 #endif
2057 #ifdef CONFIG_BTRFS_ASSERT
2058                         ", assert=on"
2059 #endif
2060 #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
2061                         ", integrity-checker=on"
2062 #endif
2063                         "\n");
2064 }
2065
2066 static int btrfs_run_sanity_tests(void)
2067 {
2068         int ret;
2069
2070         ret = btrfs_init_test_fs();
2071         if (ret)
2072                 return ret;
2073
2074         ret = btrfs_test_free_space_cache();
2075         if (ret)
2076                 goto out;
2077         ret = btrfs_test_extent_buffer_operations();
2078         if (ret)
2079                 goto out;
2080         ret = btrfs_test_extent_io();
2081         if (ret)
2082                 goto out;
2083         ret = btrfs_test_inodes();
2084         if (ret)
2085                 goto out;
2086         ret = btrfs_test_qgroups();
2087 out:
2088         btrfs_destroy_test_fs();
2089         return ret;
2090 }
2091
2092 static int __init init_btrfs_fs(void)
2093 {
2094         int err;
2095
2096         err = btrfs_hash_init();
2097         if (err)
2098                 return err;
2099
2100         btrfs_props_init();
2101
2102         err = btrfs_init_sysfs();
2103         if (err)
2104                 goto free_hash;
2105
2106         btrfs_init_compress();
2107
2108         err = btrfs_init_cachep();
2109         if (err)
2110                 goto free_compress;
2111
2112         err = extent_io_init();
2113         if (err)
2114                 goto free_cachep;
2115
2116         err = extent_map_init();
2117         if (err)
2118                 goto free_extent_io;
2119
2120         err = ordered_data_init();
2121         if (err)
2122                 goto free_extent_map;
2123
2124         err = btrfs_delayed_inode_init();
2125         if (err)
2126                 goto free_ordered_data;
2127
2128         err = btrfs_auto_defrag_init();
2129         if (err)
2130                 goto free_delayed_inode;
2131
2132         err = btrfs_delayed_ref_init();
2133         if (err)
2134                 goto free_auto_defrag;
2135
2136         err = btrfs_prelim_ref_init();
2137         if (err)
2138                 goto free_delayed_ref;
2139
2140         err = btrfs_end_io_wq_init();
2141         if (err)
2142                 goto free_prelim_ref;
2143
2144         err = btrfs_interface_init();
2145         if (err)
2146                 goto free_end_io_wq;
2147
2148         btrfs_init_lockdep();
2149
2150         btrfs_print_info();
2151
2152         err = btrfs_run_sanity_tests();
2153         if (err)
2154                 goto unregister_ioctl;
2155
2156         err = register_filesystem(&btrfs_fs_type);
2157         if (err)
2158                 goto unregister_ioctl;
2159
2160         return 0;
2161
2162 unregister_ioctl:
2163         btrfs_interface_exit();
2164 free_end_io_wq:
2165         btrfs_end_io_wq_exit();
2166 free_prelim_ref:
2167         btrfs_prelim_ref_exit();
2168 free_delayed_ref:
2169         btrfs_delayed_ref_exit();
2170 free_auto_defrag:
2171         btrfs_auto_defrag_exit();
2172 free_delayed_inode:
2173         btrfs_delayed_inode_exit();
2174 free_ordered_data:
2175         ordered_data_exit();
2176 free_extent_map:
2177         extent_map_exit();
2178 free_extent_io:
2179         extent_io_exit();
2180 free_cachep:
2181         btrfs_destroy_cachep();
2182 free_compress:
2183         btrfs_exit_compress();
2184         btrfs_exit_sysfs();
2185 free_hash:
2186         btrfs_hash_exit();
2187         return err;
2188 }
2189
2190 static void __exit exit_btrfs_fs(void)
2191 {
2192         btrfs_destroy_cachep();
2193         btrfs_delayed_ref_exit();
2194         btrfs_auto_defrag_exit();
2195         btrfs_delayed_inode_exit();
2196         btrfs_prelim_ref_exit();
2197         ordered_data_exit();
2198         extent_map_exit();
2199         extent_io_exit();
2200         btrfs_interface_exit();
2201         btrfs_end_io_wq_exit();
2202         unregister_filesystem(&btrfs_fs_type);
2203         btrfs_exit_sysfs();
2204         btrfs_cleanup_fs_uuids();
2205         btrfs_exit_compress();
2206         btrfs_hash_exit();
2207 }
2208
2209 late_initcall(init_btrfs_fs);
2210 module_exit(exit_btrfs_fs)
2211
2212 MODULE_LICENSE("GPL");