Merge tag 'xfs-for-linus-4.7-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git...

author Linus Torvalds <torvalds@linux-foundation.org>

Thu, 26 May 2016 17:13:40 +0000 (10:13 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Thu, 26 May 2016 17:13:40 +0000 (10:13 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Thu, 26 May 2016 17:13:40 +0000 (10:13 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Thu, 26 May 2016 17:13:40 +0000 (10:13 -0700)
diff --git a/fs/namei.c b/fs/namei.c

index 5375571cf6e19587c911c497d4be5089a2862727..15b124c18ed8235e9aec4ad8bd0edd36b969759c 100644 (file)
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -4542,7 +4542,6 @@ int readlink_copy(char __user *buffer, int buflen, const char *link)
  out:
         return len;
  }
-EXPORT_SYMBOL(readlink_copy);
  
  /*
   * A helper for ->readlink().  This should be used *ONLY* for symlinks that
diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c

index 686ba6fb20ddc942638f6e698b98bf4c1bb8785f..339c696bbc0186b9a0064ec0146d5e143bbe2569 100644 (file)
--- a/fs/xfs/kmem.c
+++ b/fs/xfs/kmem.c
@@ -93,19 +93,23 @@ kmem_zalloc_large(size_t size, xfs_km_flags_t flags)
  }
  
  void *
-kmem_realloc(const void *ptr, size_t newsize, size_t oldsize,
-            xfs_km_flags_t flags)
+kmem_realloc(const void *old, size_t newsize, xfs_km_flags_t flags)
  {
-       void    *new;
+       int     retries = 0;
+       gfp_t   lflags = kmem_flags_convert(flags);
+       void    *ptr;
  
-       new = kmem_alloc(newsize, flags);
-       if (ptr) {
-               if (new)
-                       memcpy(new, ptr,
-                               ((oldsize < newsize) ? oldsize : newsize));
-               kmem_free(ptr);
-       }
-       return new;
+       do {
+               ptr = krealloc(old, newsize, lflags);
+               if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP)))
+                       return ptr;
+               if (!(++retries % 100))
+                       xfs_err(NULL,
+       "%s(%u) possible memory allocation deadlock size %zu in %s (mode:0x%x)",
+                               current->comm, current->pid,
+                               newsize, __func__, lflags);
+               congestion_wait(BLK_RW_ASYNC, HZ/50);
+       } while (1);
  }
  
  void *
diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h

index d1c66e465ca5629fe3e330b9f6b87b07a5f9a1fa..689f746224e7ab8a0fbf3d2f9acb4f1dd68a9a16 100644 (file)
--- a/fs/xfs/kmem.h
+++ b/fs/xfs/kmem.h
@@ -62,7 +62,7 @@ kmem_flags_convert(xfs_km_flags_t flags)
  
  extern void *kmem_alloc(size_t, xfs_km_flags_t);
  extern void *kmem_zalloc_large(size_t size, xfs_km_flags_t);
-extern void *kmem_realloc(const void *, size_t, size_t, xfs_km_flags_t);
+extern void *kmem_realloc(const void *, size_t, xfs_km_flags_t);
  static inline void  kmem_free(const void *ptr)
  {
         kvfree(ptr);
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c

index fa3b948ef9c2561e8b34aae282ca8c0f02a22abc..4e126f41a0aa97d1f73773ea8efb89ef7a52746c 100644 (file)
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -242,37 +242,21 @@ xfs_attr_set(
                         return error;
         }
  
-       /*
-        * Start our first transaction of the day.
-        *
-        * All future transactions during this code must be "chained" off
-        * this one via the trans_dup() call.  All transactions will contain
-        * the inode, and the inode will always be marked with trans_ihold().
-        * Since the inode will be locked in all transactions, we must log
-        * the inode in every transaction to let it float upward through
-        * the log.
-        */
-       args.trans = xfs_trans_alloc(mp, XFS_TRANS_ATTR_SET);
+       tres.tr_logres = M_RES(mp)->tr_attrsetm.tr_logres +
+                        M_RES(mp)->tr_attrsetrt.tr_logres * args.total;
+       tres.tr_logcount = XFS_ATTRSET_LOG_COUNT;
+       tres.tr_logflags = XFS_TRANS_PERM_LOG_RES;
  
         /*
          * Root fork attributes can use reserved data blocks for this
          * operation if necessary
          */
-
-       if (rsvd)
-               args.trans->t_flags |= XFS_TRANS_RESERVE;
-
-       tres.tr_logres = M_RES(mp)->tr_attrsetm.tr_logres +
-                        M_RES(mp)->tr_attrsetrt.tr_logres * args.total;
-       tres.tr_logcount = XFS_ATTRSET_LOG_COUNT;
-       tres.tr_logflags = XFS_TRANS_PERM_LOG_RES;
-       error = xfs_trans_reserve(args.trans, &tres, args.total, 0);
-       if (error) {
-               xfs_trans_cancel(args.trans);
+       error = xfs_trans_alloc(mp, &tres, args.total, 0,
+                       rsvd ? XFS_TRANS_RESERVE : 0, &args.trans);
+       if (error)
                 return error;
-       }
-       xfs_ilock(dp, XFS_ILOCK_EXCL);
  
+       xfs_ilock(dp, XFS_ILOCK_EXCL);
         error = xfs_trans_reserve_quota_nblks(args.trans, dp, args.total, 0,
                                 rsvd ? XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
                                        XFS_QMOPT_RES_REGBLKS);
@@ -428,32 +412,16 @@ xfs_attr_remove(
         if (error)
                 return error;
  
-       /*
-        * Start our first transaction of the day.
-        *
-        * All future transactions during this code must be "chained" off
-        * this one via the trans_dup() call.  All transactions will contain
-        * the inode, and the inode will always be marked with trans_ihold().
-        * Since the inode will be locked in all transactions, we must log
-        * the inode in every transaction to let it float upward through
-        * the log.
-        */
-       args.trans = xfs_trans_alloc(mp, XFS_TRANS_ATTR_RM);
-
         /*
          * Root fork attributes can use reserved data blocks for this
          * operation if necessary
          */
-
-       if (flags & ATTR_ROOT)
-               args.trans->t_flags |= XFS_TRANS_RESERVE;
-
-       error = xfs_trans_reserve(args.trans, &M_RES(mp)->tr_attrrm,
-                                 XFS_ATTRRM_SPACE_RES(mp), 0);
-       if (error) {
-               xfs_trans_cancel(args.trans);
+       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_attrrm,
+                       XFS_ATTRRM_SPACE_RES(mp), 0,
+                       (flags & ATTR_ROOT) ? XFS_TRANS_RESERVE : 0,
+                       &args.trans);
+       if (error)
                 return error;
-       }
  
         xfs_ilock(dp, XFS_ILOCK_EXCL);
         /*
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c

index ce41d7fe753c5dcbb7edfe85771e1a121c4e580b..932381caef1bc421cb9a41e79191845d6fc60346 100644 (file)
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -1121,15 +1121,14 @@ xfs_bmap_add_attrfork(
  
         mp = ip->i_mount;
         ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
-       tp = xfs_trans_alloc(mp, XFS_TRANS_ADDAFORK);
+
         blks = XFS_ADDAFORK_SPACE_RES(mp);
-       if (rsvd)
-               tp->t_flags |= XFS_TRANS_RESERVE;
-       error = xfs_trans_reserve(tp, &M_RES(mp)->tr_addafork, blks, 0);
-       if (error) {
-               xfs_trans_cancel(tp);
+
+       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_addafork, blks, 0,
+                       rsvd ? XFS_TRANS_RESERVE : 0, &tp);
+       if (error)
                 return error;
-       }
+
         xfs_ilock(ip, XFS_ILOCK_EXCL);
         error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ?
                         XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
@@ -6026,13 +6025,10 @@ xfs_bmap_split_extent(
         xfs_fsblock_t           firstfsb;
         int                     error;
  
-       tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
-       error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
-                       XFS_DIOSTRAT_SPACE_RES(mp, 0), 0);
-       if (error) {
-               xfs_trans_cancel(tp);
+       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write,
+                       XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, 0, &tp);
+       if (error)
                 return error;
-       }
  
         xfs_ilock(ip, XFS_ILOCK_EXCL);
         xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c

index 974d62e677f45ada4ef7ca4e5ac916c9dbe6ffbe..e5bb9cc3b243b9fd451db6dcc429edb6a24644bf 100644 (file)
--- a/fs/xfs/libxfs/xfs_dir2_sf.c
+++ b/fs/xfs/libxfs/xfs_dir2_sf.c
@@ -257,15 +257,12 @@ xfs_dir2_block_to_sf(
          *
          * Convert the inode to local format and copy the data in.
          */
-       dp->i_df.if_flags &= ~XFS_IFEXTENTS;
-       dp->i_df.if_flags |= XFS_IFINLINE;
-       dp->i_d.di_format = XFS_DINODE_FMT_LOCAL;
         ASSERT(dp->i_df.if_bytes == 0);
-       xfs_idata_realloc(dp, size, XFS_DATA_FORK);
+       xfs_init_local_fork(dp, XFS_DATA_FORK, dst, size);
+       dp->i_d.di_format = XFS_DINODE_FMT_LOCAL;
+       dp->i_d.di_size = size;
  
         logflags |= XFS_ILOG_DDATA;
-       memcpy(dp->i_df.if_u1.if_data, dst, size);
-       dp->i_d.di_size = size;
         xfs_dir2_sf_check(args);
  out:
         xfs_trans_log_inode(args->trans, dp, logflags);
diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c

index 11faf7df14c8099e49759f51f0315dd5caec6632..bbcc8c7a44b3ffde66bf152583e87ebbd9763da7 100644 (file)
--- a/fs/xfs/libxfs/xfs_inode_fork.c
+++ b/fs/xfs/libxfs/xfs_inode_fork.c
@@ -231,6 +231,48 @@ xfs_iformat_fork(
         return error;
  }
  
+void
+xfs_init_local_fork(
+       struct xfs_inode        *ip,
+       int                     whichfork,
+       const void              *data,
+       int                     size)
+{
+       struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
+       int                     mem_size = size, real_size = 0;
+       bool                    zero_terminate;
+
+       /*
+        * If we are using the local fork to store a symlink body we need to
+        * zero-terminate it so that we can pass it back to the VFS directly.
+        * Overallocate the in-memory fork by one for that and add a zero
+        * to terminate it below.
+        */
+       zero_terminate = S_ISLNK(VFS_I(ip)->i_mode);
+       if (zero_terminate)
+               mem_size++;
+
+       if (size == 0)
+               ifp->if_u1.if_data = NULL;
+       else if (mem_size <= sizeof(ifp->if_u2.if_inline_data))
+               ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
+       else {
+               real_size = roundup(mem_size, 4);
+               ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP | KM_NOFS);
+       }
+
+       if (size) {
+               memcpy(ifp->if_u1.if_data, data, size);
+               if (zero_terminate)
+                       ifp->if_u1.if_data[size] = '\0';
+       }
+
+       ifp->if_bytes = size;
+       ifp->if_real_bytes = real_size;
+       ifp->if_flags &= ~(XFS_IFEXTENTS | XFS_IFBROOT);
+       ifp->if_flags |= XFS_IFINLINE;
+}
+
  /*
   * The file is in-lined in the on-disk inode.
   * If it fits into if_inline_data, then copy
@@ -248,8 +290,6 @@ xfs_iformat_local(
         int             whichfork,
         int             size)
  {
-       xfs_ifork_t     *ifp;
-       int             real_size;
  
         /*
          * If the size is unreasonable, then something
@@ -265,22 +305,8 @@ xfs_iformat_local(
                                      ip->i_mount, dip);
                 return -EFSCORRUPTED;
         }
-       ifp = XFS_IFORK_PTR(ip, whichfork);
-       real_size = 0;
-       if (size == 0)
-               ifp->if_u1.if_data = NULL;
-       else if (size <= sizeof(ifp->if_u2.if_inline_data))
-               ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
-       else {
-               real_size = roundup(size, 4);
-               ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP | KM_NOFS);
-       }
-       ifp->if_bytes = size;
-       ifp->if_real_bytes = real_size;
-       if (size)
-               memcpy(ifp->if_u1.if_data, XFS_DFORK_PTR(dip, whichfork), size);
-       ifp->if_flags &= ~XFS_IFEXTENTS;
-       ifp->if_flags |= XFS_IFINLINE;
+
+       xfs_init_local_fork(ip, whichfork, XFS_DFORK_PTR(dip, whichfork), size);
         return 0;
  }
  
@@ -516,7 +542,6 @@ xfs_iroot_realloc(
                 new_max = cur_max + rec_diff;
                 new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max);
                 ifp->if_broot = kmem_realloc(ifp->if_broot, new_size,
-                               XFS_BMAP_BROOT_SPACE_CALC(mp, cur_max),
                                 KM_SLEEP | KM_NOFS);
                 op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
                                                      ifp->if_broot_bytes);
@@ -660,7 +685,6 @@ xfs_idata_realloc(
                                 ifp->if_u1.if_data =
                                         kmem_realloc(ifp->if_u1.if_data,
                                                         real_size,
-                                                       ifp->if_real_bytes,
                                                         KM_SLEEP | KM_NOFS);
                         }
                 } else {
@@ -1376,8 +1400,7 @@ xfs_iext_realloc_direct(
                 if (rnew_size != ifp->if_real_bytes) {
                         ifp->if_u1.if_extents =
                                 kmem_realloc(ifp->if_u1.if_extents,
-                                               rnew_size,
-                                               ifp->if_real_bytes, KM_NOFS);
+                                               rnew_size, KM_NOFS);
                 }
                 if (rnew_size > ifp->if_real_bytes) {
                         memset(&ifp->if_u1.if_extents[ifp->if_bytes /
@@ -1461,9 +1484,8 @@ xfs_iext_realloc_indirect(
         if (new_size == 0) {
                 xfs_iext_destroy(ifp);
         } else {
-               ifp->if_u1.if_ext_irec = (xfs_ext_irec_t *)
-                       kmem_realloc(ifp->if_u1.if_ext_irec,
-                               new_size, size, KM_NOFS);
+               ifp->if_u1.if_ext_irec =
+                       kmem_realloc(ifp->if_u1.if_ext_irec, new_size, KM_NOFS);
         }
  }
  
@@ -1496,6 +1518,24 @@ xfs_iext_indirect_to_direct(
         }
  }
  
+/*
+ * Remove all records from the indirection array.
+ */
+STATIC void
+xfs_iext_irec_remove_all(
+       struct xfs_ifork *ifp)
+{
+       int             nlists;
+       int             i;
+
+       ASSERT(ifp->if_flags & XFS_IFEXTIREC);
+       nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
+       for (i = 0; i < nlists; i++)
+               kmem_free(ifp->if_u1.if_ext_irec[i].er_extbuf);
+       kmem_free(ifp->if_u1.if_ext_irec);
+       ifp->if_flags &= ~XFS_IFEXTIREC;
+}
+
  /*
   * Free incore file extents.
   */
@@ -1504,14 +1544,7 @@ xfs_iext_destroy(
         xfs_ifork_t     *ifp)           /* inode fork pointer */
  {
         if (ifp->if_flags & XFS_IFEXTIREC) {
-               int     erp_idx;
-               int     nlists;
-
-               nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
-               for (erp_idx = nlists - 1; erp_idx >= 0 ; erp_idx--) {
-                       xfs_iext_irec_remove(ifp, erp_idx);
-               }
-               ifp->if_flags &= ~XFS_IFEXTIREC;
+               xfs_iext_irec_remove_all(ifp);
         } else if (ifp->if_real_bytes) {
                 kmem_free(ifp->if_u1.if_extents);
         } else if (ifp->if_bytes) {
diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h

index 7d3b1ed6dcbe934dcea8d7b92ccb3342f784bff6..f95e072ae6468240a6ae0cb8d1dc094eafbf59fc 100644 (file)
--- a/fs/xfs/libxfs/xfs_inode_fork.h
+++ b/fs/xfs/libxfs/xfs_inode_fork.h
@@ -134,6 +134,7 @@ void                xfs_iroot_realloc(struct xfs_inode *, int, int);
  int            xfs_iread_extents(struct xfs_trans *, struct xfs_inode *, int);
  int            xfs_iextents_copy(struct xfs_inode *, struct xfs_bmbt_rec *,
                                   int);
+void           xfs_init_local_fork(struct xfs_inode *, int, const void *, int);
  
  struct xfs_bmbt_rec_host *
                 xfs_iext_get_ext(struct xfs_ifork *, xfs_extnum_t);
diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h

index d54a8018b079dd3f0c078e5fdf56cf48a151a545..e8f49c029ff05098ddc91eeeffe5ba7102eff77f 100644 (file)
--- a/fs/xfs/libxfs/xfs_log_format.h
+++ b/fs/xfs/libxfs/xfs_log_format.h
@@ -211,6 +211,11 @@ typedef struct xfs_trans_header {
  
  #define        XFS_TRANS_HEADER_MAGIC  0x5452414e      /* TRAN */
  
+/*
+ * The only type valid for th_type in CIL-enabled file system logs:
+ */
+#define XFS_TRANS_CHECKPOINT   40
+
  /*
   * Log item types.
   */
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c

index 8a53eaa349f44884354139fcde0a9c17b35e741a..12ca86778e023e4261998660f39b482927a16c02 100644 (file)
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -838,12 +838,10 @@ xfs_sync_sb(
         struct xfs_trans        *tp;
         int                     error;
  
-       tp = _xfs_trans_alloc(mp, XFS_TRANS_SB_CHANGE, KM_SLEEP);
-       error = xfs_trans_reserve(tp, &M_RES(mp)->tr_sb, 0, 0);
-       if (error) {
-               xfs_trans_cancel(tp);
+       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_sb, 0, 0,
+                       XFS_TRANS_NO_WRITECOUNT, &tp);
+       if (error)
                 return error;
-       }
  
         xfs_log_sb(tp);
         if (wait)
diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h

index 81ac870834da9e63515553e3fa291318acd1e73a..16002b5ec4eb82c2988fc6f559f0e3ed995ec1e9 100644 (file)
--- a/fs/xfs/libxfs/xfs_shared.h
+++ b/fs/xfs/libxfs/xfs_shared.h
@@ -55,103 +55,6 @@ extern const struct xfs_buf_ops xfs_sb_quiet_buf_ops;
  extern const struct xfs_buf_ops xfs_symlink_buf_ops;
  extern const struct xfs_buf_ops xfs_rtbuf_ops;
  
-/*
- * Transaction types.  Used to distinguish types of buffers. These never reach
- * the log.
- */
-#define XFS_TRANS_SETATTR_NOT_SIZE     1
-#define XFS_TRANS_SETATTR_SIZE         2
-#define XFS_TRANS_INACTIVE             3
-#define XFS_TRANS_CREATE               4
-#define XFS_TRANS_CREATE_TRUNC         5
-#define XFS_TRANS_TRUNCATE_FILE                6
-#define XFS_TRANS_REMOVE               7
-#define XFS_TRANS_LINK                 8
-#define XFS_TRANS_RENAME               9
-#define XFS_TRANS_MKDIR                        10
-#define XFS_TRANS_RMDIR                        11
-#define XFS_TRANS_SYMLINK              12
-#define XFS_TRANS_SET_DMATTRS          13
-#define XFS_TRANS_GROWFS               14
-#define XFS_TRANS_STRAT_WRITE          15
-#define XFS_TRANS_DIOSTRAT             16
-/* 17 was XFS_TRANS_WRITE_SYNC */
-#define        XFS_TRANS_WRITEID               18
-#define        XFS_TRANS_ADDAFORK              19
-#define        XFS_TRANS_ATTRINVAL             20
-#define        XFS_TRANS_ATRUNCATE             21
-#define        XFS_TRANS_ATTR_SET              22
-#define        XFS_TRANS_ATTR_RM               23
-#define        XFS_TRANS_ATTR_FLAG             24
-#define        XFS_TRANS_CLEAR_AGI_BUCKET      25
-#define XFS_TRANS_SB_CHANGE            26
-/*
- * Dummy entries since we use the transaction type to index into the
- * trans_type[] in xlog_recover_print_trans_head()
- */
-#define XFS_TRANS_DUMMY1               27
-#define XFS_TRANS_DUMMY2               28
-#define XFS_TRANS_QM_QUOTAOFF          29
-#define XFS_TRANS_QM_DQALLOC           30
-#define XFS_TRANS_QM_SETQLIM           31
-#define XFS_TRANS_QM_DQCLUSTER         32
-#define XFS_TRANS_QM_QINOCREATE                33
-#define XFS_TRANS_QM_QUOTAOFF_END      34
-#define XFS_TRANS_FSYNC_TS             35
-#define        XFS_TRANS_GROWFSRT_ALLOC        36
-#define        XFS_TRANS_GROWFSRT_ZERO         37
-#define        XFS_TRANS_GROWFSRT_FREE         38
-#define        XFS_TRANS_SWAPEXT               39
-#define        XFS_TRANS_CHECKPOINT            40
-#define        XFS_TRANS_ICREATE               41
-#define        XFS_TRANS_CREATE_TMPFILE        42
-#define        XFS_TRANS_TYPE_MAX              43
-/* new transaction types need to be reflected in xfs_logprint(8) */
-
-#define XFS_TRANS_TYPES \
-       { XFS_TRANS_SETATTR_NOT_SIZE,   "SETATTR_NOT_SIZE" }, \
-       { XFS_TRANS_SETATTR_SIZE,       "SETATTR_SIZE" }, \
-       { XFS_TRANS_INACTIVE,           "INACTIVE" }, \
-       { XFS_TRANS_CREATE,             "CREATE" }, \
-       { XFS_TRANS_CREATE_TRUNC,       "CREATE_TRUNC" }, \
-       { XFS_TRANS_TRUNCATE_FILE,      "TRUNCATE_FILE" }, \
-       { XFS_TRANS_REMOVE,             "REMOVE" }, \
-       { XFS_TRANS_LINK,               "LINK" }, \
-       { XFS_TRANS_RENAME,             "RENAME" }, \
-       { XFS_TRANS_MKDIR,              "MKDIR" }, \
-       { XFS_TRANS_RMDIR,              "RMDIR" }, \
-       { XFS_TRANS_SYMLINK,            "SYMLINK" }, \
-       { XFS_TRANS_SET_DMATTRS,        "SET_DMATTRS" }, \
-       { XFS_TRANS_GROWFS,             "GROWFS" }, \
-       { XFS_TRANS_STRAT_WRITE,        "STRAT_WRITE" }, \
-       { XFS_TRANS_DIOSTRAT,           "DIOSTRAT" }, \
-       { XFS_TRANS_WRITEID,            "WRITEID" }, \
-       { XFS_TRANS_ADDAFORK,           "ADDAFORK" }, \
-       { XFS_TRANS_ATTRINVAL,          "ATTRINVAL" }, \
-       { XFS_TRANS_ATRUNCATE,          "ATRUNCATE" }, \
-       { XFS_TRANS_ATTR_SET,           "ATTR_SET" }, \
-       { XFS_TRANS_ATTR_RM,            "ATTR_RM" }, \
-       { XFS_TRANS_ATTR_FLAG,          "ATTR_FLAG" }, \
-       { XFS_TRANS_CLEAR_AGI_BUCKET,   "CLEAR_AGI_BUCKET" }, \
-       { XFS_TRANS_SB_CHANGE,          "SBCHANGE" }, \
-       { XFS_TRANS_DUMMY1,             "DUMMY1" }, \
-       { XFS_TRANS_DUMMY2,             "DUMMY2" }, \
-       { XFS_TRANS_QM_QUOTAOFF,        "QM_QUOTAOFF" }, \
-       { XFS_TRANS_QM_DQALLOC,         "QM_DQALLOC" }, \
-       { XFS_TRANS_QM_SETQLIM,         "QM_SETQLIM" }, \
-       { XFS_TRANS_QM_DQCLUSTER,       "QM_DQCLUSTER" }, \
-       { XFS_TRANS_QM_QINOCREATE,      "QM_QINOCREATE" }, \
-       { XFS_TRANS_QM_QUOTAOFF_END,    "QM_QOFF_END" }, \
-       { XFS_TRANS_FSYNC_TS,           "FSYNC_TS" }, \
-       { XFS_TRANS_GROWFSRT_ALLOC,     "GROWFSRT_ALLOC" }, \
-       { XFS_TRANS_GROWFSRT_ZERO,      "GROWFSRT_ZERO" }, \
-       { XFS_TRANS_GROWFSRT_FREE,      "GROWFSRT_FREE" }, \
-       { XFS_TRANS_SWAPEXT,            "SWAPEXT" }, \
-       { XFS_TRANS_CHECKPOINT,         "CHECKPOINT" }, \
-       { XFS_TRANS_ICREATE,            "ICREATE" }, \
-       { XFS_TRANS_CREATE_TMPFILE,     "CREATE_TMPFILE" }, \
-       { XLOG_UNMOUNT_REC_TYPE,        "UNMOUNT" }
-
  /*
   * This structure is used to track log items associated with
   * a transaction.  It points to the log item and keeps some
@@ -181,8 +84,9 @@ int  xfs_log_calc_minimum_size(struct xfs_mount *);
  #define        XFS_TRANS_SYNC          0x08    /* make commit synchronous */
  #define XFS_TRANS_DQ_DIRTY     0x10    /* at least one dquot in trx dirty */
  #define XFS_TRANS_RESERVE      0x20    /* OK to use reserved data blocks */
-#define XFS_TRANS_FREEZE_PROT  0x40    /* Transaction has elevated writer
-                                          count in superblock */
+#define XFS_TRANS_NO_WRITECOUNT 0x40   /* do not elevate SB writecount */
+#define XFS_TRANS_NOFS         0x80    /* pass KM_NOFS to kmem_alloc */
+
  /*
   * Field values for xfs_trans_mod_sb.
   */
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c

index c535887c60a8e613d8bdc69725ecd5d891115fca..4c463b99fe574341043cb1f6ab612a59df881d31 100644 (file)
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -84,23 +84,71 @@ xfs_find_bdev_for_inode(
  }
  
  /*
- * We're now finished for good with this ioend structure.
- * Update the page state via the associated buffer_heads,
- * release holds on the inode and bio, and finally free
- * up memory.  Do not use the ioend after this.
+ * We're now finished for good with this page.  Update the page state via the
+ * associated buffer_heads, paying attention to the start and end offsets that
+ * we need to process on the page.
+ */
+static void
+xfs_finish_page_writeback(
+       struct inode            *inode,
+       struct bio_vec          *bvec,
+       int                     error)
+{
+       unsigned int            end = bvec->bv_offset + bvec->bv_len - 1;
+       struct buffer_head      *head, *bh;
+       unsigned int            off = 0;
+
+       ASSERT(bvec->bv_offset < PAGE_SIZE);
+       ASSERT((bvec->bv_offset & ((1 << inode->i_blkbits) - 1)) == 0);
+       ASSERT(end < PAGE_SIZE);
+       ASSERT((bvec->bv_len & ((1 << inode->i_blkbits) - 1)) == 0);
+
+       bh = head = page_buffers(bvec->bv_page);
+
+       do {
+               if (off < bvec->bv_offset)
+                       goto next_bh;
+               if (off > end)
+                       break;
+               bh->b_end_io(bh, !error);
+next_bh:
+               off += bh->b_size;
+       } while ((bh = bh->b_this_page) != head);
+}
+
+/*
+ * We're now finished for good with this ioend structure.  Update the page
+ * state, release holds on bios, and finally free up memory.  Do not use the
+ * ioend after this.
   */
  STATIC void
  xfs_destroy_ioend(
-       xfs_ioend_t             *ioend)
+       struct xfs_ioend        *ioend,
+       int                     error)
  {
-       struct buffer_head      *bh, *next;
+       struct inode            *inode = ioend->io_inode;
+       struct bio              *last = ioend->io_bio;
+       struct bio              *bio, *next;
  
-       for (bh = ioend->io_buffer_head; bh; bh = next) {
-               next = bh->b_private;
-               bh->b_end_io(bh, !ioend->io_error);
-       }
+       for (bio = &ioend->io_inline_bio; bio; bio = next) {
+               struct bio_vec  *bvec;
+               int             i;
+
+               /*
+                * For the last bio, bi_private points to the ioend, so we
+                * need to explicitly end the iteration here.
+                */
+               if (bio == last)
+                       next = NULL;
+               else
+                       next = bio->bi_private;
  
-       mempool_free(ioend, xfs_ioend_pool);
+               /* walk each page on bio, ending page IO on them */
+               bio_for_each_segment_all(bvec, bio, i)
+                       xfs_finish_page_writeback(inode, bvec, error);
+
+               bio_put(bio);
+       }
  }
  
  /*
@@ -120,13 +168,9 @@ xfs_setfilesize_trans_alloc(
         struct xfs_trans        *tp;
         int                     error;
  
-       tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
-
-       error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0);
-       if (error) {
-               xfs_trans_cancel(tp);
+       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp);
+       if (error)
                 return error;
-       }
  
         ioend->io_append_trans = tp;
  
@@ -174,7 +218,8 @@ xfs_setfilesize(
  
  STATIC int
  xfs_setfilesize_ioend(
-       struct xfs_ioend        *ioend)
+       struct xfs_ioend        *ioend,
+       int                     error)
  {
         struct xfs_inode        *ip = XFS_I(ioend->io_inode);
         struct xfs_trans        *tp = ioend->io_append_trans;
@@ -188,36 +233,14 @@ xfs_setfilesize_ioend(
         __sb_writers_acquired(VFS_I(ip)->i_sb, SB_FREEZE_FS);
  
         /* we abort the update if there was an IO error */
-       if (ioend->io_error) {
+       if (error) {
                 xfs_trans_cancel(tp);
-               return ioend->io_error;
+               return error;
         }
  
         return xfs_setfilesize(ip, tp, ioend->io_offset, ioend->io_size);
  }
  
-/*
- * Schedule IO completion handling on the final put of an ioend.
- *
- * If there is no work to do we might as well call it a day and free the
- * ioend right now.
- */
-STATIC void
-xfs_finish_ioend(
-       struct xfs_ioend        *ioend)
-{
-       if (atomic_dec_and_test(&ioend->io_remaining)) {
-               struct xfs_mount        *mp = XFS_I(ioend->io_inode)->i_mount;
-
-               if (ioend->io_type == XFS_IO_UNWRITTEN)
-                       queue_work(mp->m_unwritten_workqueue, &ioend->io_work);
-               else if (ioend->io_append_trans)
-                       queue_work(mp->m_data_workqueue, &ioend->io_work);
-               else
-                       xfs_destroy_ioend(ioend);
-       }
-}
-
  /*
   * IO write completion.
   */
@@ -225,16 +248,17 @@ STATIC void
  xfs_end_io(
         struct work_struct *work)
  {
-       xfs_ioend_t     *ioend = container_of(work, xfs_ioend_t, io_work);
-       struct xfs_inode *ip = XFS_I(ioend->io_inode);
-       int             error = 0;
+       struct xfs_ioend        *ioend =
+               container_of(work, struct xfs_ioend, io_work);
+       struct xfs_inode        *ip = XFS_I(ioend->io_inode);
+       int                     error = ioend->io_bio->bi_error;
  
         /*
          * Set an error if the mount has shut down and proceed with end I/O
          * processing so it can perform whatever cleanups are necessary.
          */
         if (XFS_FORCED_SHUTDOWN(ip->i_mount))
-               ioend->io_error = -EIO;
+               error = -EIO;
  
         /*
          * For unwritten extents we need to issue transactions to convert a
@@ -244,55 +268,33 @@ xfs_end_io(
          * on error.
          */
         if (ioend->io_type == XFS_IO_UNWRITTEN) {
-               if (ioend->io_error)
+               if (error)
                         goto done;
                 error = xfs_iomap_write_unwritten(ip, ioend->io_offset,
                                                   ioend->io_size);
         } else if (ioend->io_append_trans) {
-               error = xfs_setfilesize_ioend(ioend);
+               error = xfs_setfilesize_ioend(ioend, error);
         } else {
                 ASSERT(!xfs_ioend_is_append(ioend));
         }
  
  done:
-       if (error)
-               ioend->io_error = error;
-       xfs_destroy_ioend(ioend);
+       xfs_destroy_ioend(ioend, error);
  }
  
-/*
- * Allocate and initialise an IO completion structure.
- * We need to track unwritten extent write completion here initially.
- * We'll need to extend this for updating the ondisk inode size later
- * (vs. incore size).
- */
-STATIC xfs_ioend_t *
-xfs_alloc_ioend(
-       struct inode            *inode,
-       unsigned int            type)
+STATIC void
+xfs_end_bio(
+       struct bio              *bio)
  {
-       xfs_ioend_t             *ioend;
-
-       ioend = mempool_alloc(xfs_ioend_pool, GFP_NOFS);
-
-       /*
-        * Set the count to 1 initially, which will prevent an I/O
-        * completion callback from happening before we have started
-        * all the I/O from calling the completion routine too early.
-        */
-       atomic_set(&ioend->io_remaining, 1);
-       ioend->io_error = 0;
-       INIT_LIST_HEAD(&ioend->io_list);
-       ioend->io_type = type;
-       ioend->io_inode = inode;
-       ioend->io_buffer_head = NULL;
-       ioend->io_buffer_tail = NULL;
-       ioend->io_offset = 0;
-       ioend->io_size = 0;
-       ioend->io_append_trans = NULL;
+       struct xfs_ioend        *ioend = bio->bi_private;
+       struct xfs_mount        *mp = XFS_I(ioend->io_inode)->i_mount;
  
-       INIT_WORK(&ioend->io_work, xfs_end_io);
-       return ioend;
+       if (ioend->io_type == XFS_IO_UNWRITTEN)
+               queue_work(mp->m_unwritten_workqueue, &ioend->io_work);
+       else if (ioend->io_append_trans)
+               queue_work(mp->m_data_workqueue, &ioend->io_work);
+       else
+               xfs_destroy_ioend(ioend, bio->bi_error);
  }
  
  STATIC int
@@ -364,50 +366,6 @@ xfs_imap_valid(
                 offset < imap->br_startoff + imap->br_blockcount;
  }
  
-/*
- * BIO completion handler for buffered IO.
- */
-STATIC void
-xfs_end_bio(
-       struct bio              *bio)
-{
-       xfs_ioend_t             *ioend = bio->bi_private;
-
-       if (!ioend->io_error)
-               ioend->io_error = bio->bi_error;
-
-       /* Toss bio and pass work off to an xfsdatad thread */
-       bio->bi_private = NULL;
-       bio->bi_end_io = NULL;
-       bio_put(bio);
-
-       xfs_finish_ioend(ioend);
-}
-
-STATIC void
-xfs_submit_ioend_bio(
-       struct writeback_control *wbc,
-       xfs_ioend_t             *ioend,
-       struct bio              *bio)
-{
-       atomic_inc(&ioend->io_remaining);
-       bio->bi_private = ioend;
-       bio->bi_end_io = xfs_end_bio;
-       submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE, bio);
-}
-
-STATIC struct bio *
-xfs_alloc_ioend_bio(
-       struct buffer_head      *bh)
-{
-       struct bio              *bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES);
-
-       ASSERT(bio->bi_private == NULL);
-       bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
-       bio->bi_bdev = bh->b_bdev;
-       return bio;
-}
-
  STATIC void
  xfs_start_buffer_writeback(
         struct buffer_head      *bh)
@@ -452,28 +410,35 @@ static inline int xfs_bio_add_buffer(struct bio *bio, struct buffer_head *bh)
  }
  
  /*
- * Submit all of the bios for an ioend. We are only passed a single ioend at a
- * time; the caller is responsible for chaining prior to submission.
+ * Submit the bio for an ioend. We are passed an ioend with a bio attached to
+ * it, and we submit that bio. The ioend may be used for multiple bio
+ * submissions, so we only want to allocate an append transaction for the ioend
+ * once. In the case of multiple bio submission, each bio will take an IO
+ * reference to the ioend to ensure that the ioend completion is only done once
+ * all bios have been submitted and the ioend is really done.
   *
   * If @fail is non-zero, it means that we have a situation where some part of
   * the submission process has failed after we have marked paged for writeback
- * and unlocked them. In this situation, we need to fail the ioend chain rather
- * than submit it to IO. This typically only happens on a filesystem shutdown.
+ * and unlocked them. In this situation, we need to fail the bio and ioend
+ * rather than submit it to IO. This typically only happens on a filesystem
+ * shutdown.
   */
  STATIC int
  xfs_submit_ioend(
         struct writeback_control *wbc,
-       xfs_ioend_t             *ioend,
+       struct xfs_ioend        *ioend,
         int                     status)
  {
-       struct buffer_head      *bh;
-       struct bio              *bio;
-       sector_t                lastblock = 0;
-
         /* Reserve log space if we might write beyond the on-disk inode size. */
         if (!status &&
-            ioend->io_type != XFS_IO_UNWRITTEN && xfs_ioend_is_append(ioend))
+           ioend->io_type != XFS_IO_UNWRITTEN &&
+           xfs_ioend_is_append(ioend) &&
+           !ioend->io_append_trans)
                 status = xfs_setfilesize_trans_alloc(ioend);
+
+       ioend->io_bio->bi_private = ioend;
+       ioend->io_bio->bi_end_io = xfs_end_bio;
+
         /*
          * If we are failing the IO now, just mark the ioend with an
          * error and finish it. This will run IO completion immediately
@@ -481,33 +446,73 @@ xfs_submit_ioend(
          * time.
          */
         if (status) {
-               ioend->io_error = status;
-               xfs_finish_ioend(ioend);
+               ioend->io_bio->bi_error = status;
+               bio_endio(ioend->io_bio);
                 return status;
         }
  
-       bio = NULL;
-       for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) {
+       submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE,
+                  ioend->io_bio);
+       return 0;
+}
  
-               if (!bio) {
-retry:
-                       bio = xfs_alloc_ioend_bio(bh);
-               } else if (bh->b_blocknr != lastblock + 1) {
-                       xfs_submit_ioend_bio(wbc, ioend, bio);
-                       goto retry;
-               }
+static void
+xfs_init_bio_from_bh(
+       struct bio              *bio,
+       struct buffer_head      *bh)
+{
+       bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
+       bio->bi_bdev = bh->b_bdev;
+}
  
-               if (xfs_bio_add_buffer(bio, bh) != bh->b_size) {
-                       xfs_submit_ioend_bio(wbc, ioend, bio);
-                       goto retry;
-               }
+static struct xfs_ioend *
+xfs_alloc_ioend(
+       struct inode            *inode,
+       unsigned int            type,
+       xfs_off_t               offset,
+       struct buffer_head      *bh)
+{
+       struct xfs_ioend        *ioend;
+       struct bio              *bio;
  
-               lastblock = bh->b_blocknr;
-       }
-       if (bio)
-               xfs_submit_ioend_bio(wbc, ioend, bio);
-       xfs_finish_ioend(ioend);
-       return 0;
+       bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, xfs_ioend_bioset);
+       xfs_init_bio_from_bh(bio, bh);
+
+       ioend = container_of(bio, struct xfs_ioend, io_inline_bio);
+       INIT_LIST_HEAD(&ioend->io_list);
+       ioend->io_type = type;
+       ioend->io_inode = inode;
+       ioend->io_size = 0;
+       ioend->io_offset = offset;
+       INIT_WORK(&ioend->io_work, xfs_end_io);
+       ioend->io_append_trans = NULL;
+       ioend->io_bio = bio;
+       return ioend;
+}
+
+/*
+ * Allocate a new bio, and chain the old bio to the new one.
+ *
+ * Note that we have to do perform the chaining in this unintuitive order
+ * so that the bi_private linkage is set up in the right direction for the
+ * traversal in xfs_destroy_ioend().
+ */
+static void
+xfs_chain_bio(
+       struct xfs_ioend        *ioend,
+       struct writeback_control *wbc,
+       struct buffer_head      *bh)
+{
+       struct bio *new;
+
+       new = bio_alloc(GFP_NOFS, BIO_MAX_PAGES);
+       xfs_init_bio_from_bh(new, bh);
+
+       bio_chain(ioend->io_bio, new);
+       bio_get(ioend->io_bio);         /* for xfs_destroy_ioend */
+       submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE,
+                  ioend->io_bio);
+       ioend->io_bio = new;
  }
  
  /*
@@ -523,27 +528,24 @@ xfs_add_to_ioend(
         struct buffer_head      *bh,
         xfs_off_t               offset,
         struct xfs_writepage_ctx *wpc,
+       struct writeback_control *wbc,
         struct list_head        *iolist)
  {
         if (!wpc->ioend || wpc->io_type != wpc->ioend->io_type ||
             bh->b_blocknr != wpc->last_block + 1 ||
             offset != wpc->ioend->io_offset + wpc->ioend->io_size) {
-               struct xfs_ioend        *new;
-
                 if (wpc->ioend)
                         list_add(&wpc->ioend->io_list, iolist);
-
-               new = xfs_alloc_ioend(inode, wpc->io_type);
-               new->io_offset = offset;
-               new->io_buffer_head = bh;
-               new->io_buffer_tail = bh;
-               wpc->ioend = new;
-       } else {
-               wpc->ioend->io_buffer_tail->b_private = bh;
-               wpc->ioend->io_buffer_tail = bh;
+               wpc->ioend = xfs_alloc_ioend(inode, wpc->io_type, offset, bh);
         }
  
-       bh->b_private = NULL;
+       /*
+        * If the buffer doesn't fit into the bio we need to allocate a new
+        * one.  This shouldn't happen more than once for a given buffer.
+        */
+       while (xfs_bio_add_buffer(wpc->ioend->io_bio, bh) != bh->b_size)
+               xfs_chain_bio(wpc->ioend, wbc, bh);
+
         wpc->ioend->io_size += bh->b_size;
         wpc->last_block = bh->b_blocknr;
         xfs_start_buffer_writeback(bh);
@@ -803,7 +805,7 @@ xfs_writepage_map(
                         lock_buffer(bh);
                         if (wpc->io_type != XFS_IO_OVERWRITE)
                                 xfs_map_at_offset(inode, bh, &wpc->imap, offset);
-                       xfs_add_to_ioend(inode, bh, offset, wpc, &submit_list);
+                       xfs_add_to_ioend(inode, bh, offset, wpc, wbc, &submit_list);
                         count++;
                 }
  
@@ -1391,13 +1393,10 @@ xfs_end_io_direct_write(
  
                 trace_xfs_end_io_direct_write_append(ip, offset, size);
  
-               tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
-               error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0);
-               if (error) {
-                       xfs_trans_cancel(tp);
-                       return error;
-               }
-               error = xfs_setfilesize(ip, tp, offset, size);
+               error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0,
+                               &tp);
+               if (!error)
+                       error = xfs_setfilesize(ip, tp, offset, size);
         }
  
         return error;
diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h

index b4421177b68dc1ba619625876c05ee8d22caf570..814aab7907134e14cbc3ccb7f72d14001a7834d1 100644 (file)
--- a/fs/xfs/xfs_aops.h
+++ b/fs/xfs/xfs_aops.h
@@ -18,7 +18,7 @@
  #ifndef __XFS_AOPS_H__
  #define __XFS_AOPS_H__
  
-extern mempool_t *xfs_ioend_pool;
+extern struct bio_set *xfs_ioend_bioset;
  
  /*
   * Types of I/O for bmap clustering and I/O completion tracking.
@@ -37,22 +37,19 @@ enum {
         { XFS_IO_OVERWRITE,             "overwrite" }
  
  /*
- * xfs_ioend struct manages large extent writes for XFS.
- * It can manage several multi-page bio's at once.
+ * Structure for buffered I/O completions.
   */
-typedef struct xfs_ioend {
+struct xfs_ioend {
         struct list_head        io_list;        /* next ioend in chain */
         unsigned int            io_type;        /* delalloc / unwritten */
-       int                     io_error;       /* I/O error code */
-       atomic_t                io_remaining;   /* hold count */
         struct inode            *io_inode;      /* file being written to */
-       struct buffer_head      *io_buffer_head;/* buffer linked list head */
-       struct buffer_head      *io_buffer_tail;/* buffer linked list tail */
         size_t                  io_size;        /* size of the extent */
         xfs_off_t               io_offset;      /* offset in the file */
         struct work_struct      io_work;        /* xfsdatad work queue */
         struct xfs_trans        *io_append_trans;/* xact. for size update */
-} xfs_ioend_t;
+       struct bio              *io_bio;        /* bio being built */
+       struct bio              io_inline_bio;  /* MUST BE LAST! */
+};
  
  extern const struct address_space_operations xfs_address_space_operations;
  
diff --git a/fs/xfs/xfs_attr.h b/fs/xfs/xfs_attr.h

index dd4824589470eb106a2b5a764da6039d56121726..e3da5d448bcff5189aa15b2e04b95ac4cd01d9f1 100644 (file)
--- a/fs/xfs/xfs_attr.h
+++ b/fs/xfs/xfs_attr.h
@@ -112,8 +112,9 @@ typedef struct attrlist_cursor_kern {
   *========================================================================*/
  
  
+/* Return 0 on success, or -errno; other state communicated via *context */
  typedef int (*put_listent_func_t)(struct xfs_attr_list_context *, int,
-                             unsigned char *, int, int, unsigned char *);
+                             unsigned char *, int, int);
  
  typedef struct xfs_attr_list_context {
         struct xfs_inode                *dp;            /* inode */
@@ -126,7 +127,6 @@ typedef struct xfs_attr_list_context {
         int                             firstu;         /* first used byte in buffer */
         int                             flags;          /* from VOP call */
         int                             resynch;        /* T/F: resynch with cursor */
-       int                             put_value;      /* T/F: need value for listent */
         put_listent_func_t              put_listent;    /* list output fmt function */
         int                             index;          /* index into output buffer */
  } xfs_attr_list_context_t;
diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c

index 2bb959ada45bb5444830373a102fc4d7aced7273..55d214981ed27e6bb85490767939d25058815201 100644 (file)
--- a/fs/xfs/xfs_attr_inactive.c
+++ b/fs/xfs/xfs_attr_inactive.c
@@ -405,21 +405,11 @@ xfs_attr_inactive(
                 goto out_destroy_fork;
         xfs_iunlock(dp, lock_mode);
  
-       /*
-        * Start our first transaction of the day.
-        *
-        * All future transactions during this code must be "chained" off
-        * this one via the trans_dup() call.  All transactions will contain
-        * the inode, and the inode will always be marked with trans_ihold().
-        * Since the inode will be locked in all transactions, we must log
-        * the inode in every transaction to let it float upward through
-        * the log.
-        */
         lock_mode = 0;
-       trans = xfs_trans_alloc(mp, XFS_TRANS_ATTRINVAL);
-       error = xfs_trans_reserve(trans, &M_RES(mp)->tr_attrinval, 0, 0);
+
+       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_attrinval, 0, 0, 0, &trans);
         if (error)
-               goto out_cancel;
+               goto out_destroy_fork;
  
         lock_mode = XFS_ILOCK_EXCL;
         xfs_ilock(dp, lock_mode);
diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c

index 4fa14820e2e22b687ef852b81e1d6b9f9028caf3..d25f26b22ac92821cb2dbc95973e0c1804cebe10 100644 (file)
--- a/fs/xfs/xfs_attr_list.c
+++ b/fs/xfs/xfs_attr_list.c
@@ -106,18 +106,15 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
                                            sfe->flags,
                                            sfe->nameval,
                                            (int)sfe->namelen,
-                                          (int)sfe->valuelen,
-                                          &sfe->nameval[sfe->namelen]);
-
+                                          (int)sfe->valuelen);
+                       if (error)
+                               return error;
                         /*
                          * Either search callback finished early or
                          * didn't fit it all in the buffer after all.
                          */
                         if (context->seen_enough)
                                 break;
-
-                       if (error)
-                               return error;
                         sfe = XFS_ATTR_SF_NEXTENTRY(sfe);
                 }
                 trace_xfs_attr_list_sf_all(context);
@@ -200,8 +197,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
                                         sbp->flags,
                                         sbp->name,
                                         sbp->namelen,
-                                       sbp->valuelen,
-                                       &sbp->name[sbp->namelen]);
+                                       sbp->valuelen);
                 if (error) {
                         kmem_free(sbuf);
                         return error;
@@ -416,6 +412,9 @@ xfs_attr3_leaf_list_int(
          */
         retval = 0;
         for (; i < ichdr.count; entry++, i++) {
+               char *name;
+               int namelen, valuelen;
+
                 if (be32_to_cpu(entry->hashval) != cursor->hashval) {
                         cursor->hashval = be32_to_cpu(entry->hashval);
                         cursor->offset = 0;
@@ -425,56 +424,25 @@ xfs_attr3_leaf_list_int(
                         continue;               /* skip incomplete entries */
  
                 if (entry->flags & XFS_ATTR_LOCAL) {
-                       xfs_attr_leaf_name_local_t *name_loc =
-                               xfs_attr3_leaf_name_local(leaf, i);
-
-                       retval = context->put_listent(context,
-                                               entry->flags,
-                                               name_loc->nameval,
-                                               (int)name_loc->namelen,
-                                               be16_to_cpu(name_loc->valuelen),
-                                               &name_loc->nameval[name_loc->namelen]);
-                       if (retval)
-                               return retval;
+                       xfs_attr_leaf_name_local_t *name_loc;
+
+                       name_loc = xfs_attr3_leaf_name_local(leaf, i);
+                       name = name_loc->nameval;
+                       namelen = name_loc->namelen;
+                       valuelen = be16_to_cpu(name_loc->valuelen);
                 } else {
-                       xfs_attr_leaf_name_remote_t *name_rmt =
-                               xfs_attr3_leaf_name_remote(leaf, i);
-
-                       int valuelen = be32_to_cpu(name_rmt->valuelen);
-
-                       if (context->put_value) {
-                               xfs_da_args_t args;
-
-                               memset((char *)&args, 0, sizeof(args));
-                               args.geo = context->dp->i_mount->m_attr_geo;
-                               args.dp = context->dp;
-                               args.whichfork = XFS_ATTR_FORK;
-                               args.valuelen = valuelen;
-                               args.rmtvaluelen = valuelen;
-                               args.value = kmem_alloc(valuelen, KM_SLEEP | KM_NOFS);
-                               args.rmtblkno = be32_to_cpu(name_rmt->valueblk);
-                               args.rmtblkcnt = xfs_attr3_rmt_blocks(
-                                                       args.dp->i_mount, valuelen);
-                               retval = xfs_attr_rmtval_get(&args);
-                               if (!retval)
-                                       retval = context->put_listent(context,
-                                                       entry->flags,
-                                                       name_rmt->name,
-                                                       (int)name_rmt->namelen,
-                                                       valuelen,
-                                                       args.value);
-                               kmem_free(args.value);
-                       } else {
-                               retval = context->put_listent(context,
-                                               entry->flags,
-                                               name_rmt->name,
-                                               (int)name_rmt->namelen,
-                                               valuelen,
-                                               NULL);
-                       }
-                       if (retval)
-                               return retval;
+                       xfs_attr_leaf_name_remote_t *name_rmt;
+
+                       name_rmt = xfs_attr3_leaf_name_remote(leaf, i);
+                       name = name_rmt->name;
+                       namelen = name_rmt->namelen;
+                       valuelen = be32_to_cpu(name_rmt->valuelen);
                 }
+
+               retval = context->put_listent(context, entry->flags,
+                                             name, namelen, valuelen);
+               if (retval)
+                       break;
                 if (context->seen_enough)
                         break;
                 cursor->offset++;
@@ -551,8 +519,7 @@ xfs_attr_put_listent(
         int             flags,
         unsigned char   *name,
         int             namelen,
-       int             valuelen,
-       unsigned char   *value)
+       int             valuelen)
  {
         struct attrlist *alist = (struct attrlist *)context->alist;
         attrlist_ent_t *aep;
@@ -581,7 +548,7 @@ xfs_attr_put_listent(
                 trace_xfs_attr_list_full(context);
                 alist->al_more = 1;
                 context->seen_enough = 1;
-               return 1;
+               return 0;
         }
  
         aep = (attrlist_ent_t *)&context->alist[context->firstu];
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c

index 3b6309865c65336793a7ac84009188b44b394a32..613ea2d7ac19026f9668b9a6424e05d32315c5df 100644 (file)
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -900,19 +900,15 @@ xfs_free_eofblocks(
                  * Free them up now by truncating the file to
                  * its current size.
                  */
-               tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
-
                 if (need_iolock) {
-                       if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) {
-                               xfs_trans_cancel(tp);
+                       if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL))
                                 return -EAGAIN;
-                       }
                 }
  
-               error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
+               error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0,
+                               &tp);
                 if (error) {
                         ASSERT(XFS_FORCED_SHUTDOWN(mp));
-                       xfs_trans_cancel(tp);
                         if (need_iolock)
                                 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
                         return error;
@@ -1037,9 +1033,9 @@ xfs_alloc_file_space(
                 /*
                  * Allocate and setup the transaction.
                  */
-               tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
-               error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
-                                         resblks, resrtextents);
+               error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks,
+                               resrtextents, 0, &tp);
+
                 /*
                  * Check for running out of space
                  */
@@ -1048,7 +1044,6 @@ xfs_alloc_file_space(
                          * Free the transaction structure.
                          */
                         ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp));
-                       xfs_trans_cancel(tp);
                         break;
                 }
                 xfs_ilock(ip, XFS_ILOCK_EXCL);
@@ -1311,18 +1306,10 @@ xfs_free_file_space(
                  * transaction to dip into the reserve blocks to ensure
                  * the freeing of the space succeeds at ENOSPC.
                  */
-               tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
-               error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, resblks, 0);
-
-               /*
-                * check for running out of space
-                */
+               error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0,
+                               &tp);
                 if (error) {
-                       /*
-                        * Free the transaction structure.
-                        */
                         ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp));
-                       xfs_trans_cancel(tp);
                         break;
                 }
                 xfs_ilock(ip, XFS_ILOCK_EXCL);
@@ -1482,19 +1469,16 @@ xfs_shift_file_space(
         }
  
         while (!error && !done) {
-               tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
                 /*
                  * We would need to reserve permanent block for transaction.
                  * This will come into picture when after shifting extent into
                  * hole we found that adjacent extents can be merged which
                  * may lead to freeing of a block during record update.
                  */
-               error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
-                               XFS_DIOSTRAT_SPACE_RES(mp, 0), 0);
-               if (error) {
-                       xfs_trans_cancel(tp);
+               error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write,
+                               XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, 0, &tp);
+               if (error)
                         break;
-               }
  
                 xfs_ilock(ip, XFS_ILOCK_EXCL);
                 error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot,
@@ -1747,12 +1731,9 @@ xfs_swap_extents(
         if (error)
                 goto out_unlock;
  
-       tp = xfs_trans_alloc(mp, XFS_TRANS_SWAPEXT);
-       error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0);
-       if (error) {
-               xfs_trans_cancel(tp);
+       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp);
+       if (error)
                 goto out_unlock;
-       }
  
         /*
          * Lock and join the inodes to the tansaction so that transaction commit
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c

index 9a2191b911377f94e38d81d57d5d037a7e19ae8b..e71cfbd5acb3c74df23be024c91420a07291a493 100644 (file)
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1100,22 +1100,18 @@ xfs_bwrite(
         return error;
  }
  
-STATIC void
+static void
  xfs_buf_bio_end_io(
         struct bio              *bio)
  {
-       xfs_buf_t               *bp = (xfs_buf_t *)bio->bi_private;
+       struct xfs_buf          *bp = (struct xfs_buf *)bio->bi_private;
  
         /*
          * don't overwrite existing errors - otherwise we can lose errors on
          * buffers that require multiple bios to complete.
          */
-       if (bio->bi_error) {
-               spin_lock(&bp->b_lock);
-               if (!bp->b_io_error)
-                       bp->b_io_error = bio->bi_error;
-               spin_unlock(&bp->b_lock);
-       }
+       if (bio->bi_error)
+               cmpxchg(&bp->b_io_error, 0, bio->bi_error);
  
         if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
                 invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp));
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h

index 4eb89bd4ee73b6f4265eb63b8238e9571150bf26..8bfb974f0772844af09b11373fe11337d50685d7 100644 (file)
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -183,6 +183,26 @@ typedef struct xfs_buf {
         unsigned int            b_page_count;   /* size of page array */
         unsigned int            b_offset;       /* page offset in first page */
         int                     b_error;        /* error code on I/O */
+
+       /*
+        * async write failure retry count. Initialised to zero on the first
+        * failure, then when it exceeds the maximum configured without a
+        * success the write is considered to be failed permanently and the
+        * iodone handler will take appropriate action.
+        *
+        * For retry timeouts, we record the jiffie of the first failure. This
+        * means that we can change the retry timeout for buffers already under
+        * I/O and thus avoid getting stuck in a retry loop with a long timeout.
+        *
+        * last_error is used to ensure that we are getting repeated errors, not
+        * different errors. e.g. a block device might change ENOSPC to EIO when
+        * a failure timeout occurs, so we want to re-initialise the error
+        * retry behaviour appropriately when that happens.
+        */
+       int                     b_retries;
+       unsigned long           b_first_retry_time; /* in jiffies */
+       int                     b_last_error;
+
         const struct xfs_buf_ops        *b_ops;
  
  #ifdef XFS_BUF_LOCK_TRACKING
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c

index 99e91a0e554ea6512ce5eb43cb8a338804f550ae..34257992934c4184feba8505c8342e2e0a1e9edc 100644 (file)
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -1042,35 +1042,22 @@ xfs_buf_do_callbacks(
         }
  }
  
-/*
- * This is the iodone() function for buffers which have had callbacks
- * attached to them by xfs_buf_attach_iodone().  It should remove each
- * log item from the buffer's list and call the callback of each in turn.
- * When done, the buffer's fsprivate field is set to NULL and the buffer
- * is unlocked with a call to iodone().
- */
-void
-xfs_buf_iodone_callbacks(
+static bool
+xfs_buf_iodone_callback_error(
         struct xfs_buf          *bp)
  {
         struct xfs_log_item     *lip = bp->b_fspriv;
         struct xfs_mount        *mp = lip->li_mountp;
         static ulong            lasttime;
         static xfs_buftarg_t    *lasttarg;
-
-       if (likely(!bp->b_error))
-               goto do_callbacks;
+       struct xfs_error_cfg    *cfg;
  
         /*
          * If we've already decided to shutdown the filesystem because of
          * I/O errors, there's no point in giving this a retry.
          */
-       if (XFS_FORCED_SHUTDOWN(mp)) {
-               xfs_buf_stale(bp);
-               bp->b_flags |= XBF_DONE;
-               trace_xfs_buf_item_iodone(bp, _RET_IP_);
-               goto do_callbacks;
-       }
+       if (XFS_FORCED_SHUTDOWN(mp))
+               goto out_stale;
  
         if (bp->b_target != lasttarg ||
             time_after(jiffies, (lasttime + 5*HZ))) {
@@ -1079,45 +1066,93 @@ xfs_buf_iodone_callbacks(
         }
         lasttarg = bp->b_target;
  
+       /* synchronous writes will have callers process the error */
+       if (!(bp->b_flags & XBF_ASYNC))
+               goto out_stale;
+
+       trace_xfs_buf_item_iodone_async(bp, _RET_IP_);
+       ASSERT(bp->b_iodone != NULL);
+
         /*
          * If the write was asynchronous then no one will be looking for the
-        * error.  Clear the error state and write the buffer out again.
-        *
-        * XXX: This helps against transient write errors, but we need to find
-        * a way to shut the filesystem down if the writes keep failing.
-        *
-        * In practice we'll shut the filesystem down soon as non-transient
-        * errors tend to affect the whole device and a failing log write
-        * will make us give up.  But we really ought to do better here.
+        * error.  If this is the first failure of this type, clear the error
+        * state and write the buffer out again. This means we always retry an
+        * async write failure at least once, but we also need to set the buffer
+        * up to behave correctly now for repeated failures.
          */
-       if (bp->b_flags & XBF_ASYNC) {
-               ASSERT(bp->b_iodone != NULL);
+       if (!(bp->b_flags & (XBF_STALE|XBF_WRITE_FAIL)) ||
+            bp->b_last_error != bp->b_error) {
+               bp->b_flags |= (XBF_WRITE | XBF_ASYNC |
+                               XBF_DONE | XBF_WRITE_FAIL);
+               bp->b_last_error = bp->b_error;
+               bp->b_retries = 0;
+               bp->b_first_retry_time = jiffies;
+
+               xfs_buf_ioerror(bp, 0);
+               xfs_buf_submit(bp);
+               return true;
+       }
  
-               trace_xfs_buf_item_iodone_async(bp, _RET_IP_);
+       /*
+        * Repeated failure on an async write. Take action according to the
+        * error configuration we have been set up to use.
+        */
+       cfg = xfs_error_get_cfg(mp, XFS_ERR_METADATA, bp->b_error);
  
-               xfs_buf_ioerror(bp, 0); /* errno of 0 unsets the flag */
+       if (cfg->max_retries != XFS_ERR_RETRY_FOREVER &&
+           ++bp->b_retries > cfg->max_retries)
+                       goto permanent_error;
+       if (cfg->retry_timeout &&
+           time_after(jiffies, cfg->retry_timeout + bp->b_first_retry_time))
+                       goto permanent_error;
  
-               if (!(bp->b_flags & (XBF_STALE|XBF_WRITE_FAIL))) {
-                       bp->b_flags |= XBF_WRITE | XBF_ASYNC |
-                                      XBF_DONE | XBF_WRITE_FAIL;
-                       xfs_buf_submit(bp);
-               } else {
-                       xfs_buf_relse(bp);
-               }
+       /* At unmount we may treat errors differently */
+       if ((mp->m_flags & XFS_MOUNT_UNMOUNTING) && mp->m_fail_unmount)
+               goto permanent_error;
  
-               return;
-       }
+       /* still a transient error, higher layers will retry */
+       xfs_buf_ioerror(bp, 0);
+       xfs_buf_relse(bp);
+       return true;
  
         /*
-        * If the write of the buffer was synchronous, we want to make
-        * sure to return the error to the caller of xfs_bwrite().
+        * Permanent error - we need to trigger a shutdown if we haven't already
+        * to indicate that inconsistency will result from this action.
          */
+permanent_error:
+       xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
+out_stale:
         xfs_buf_stale(bp);
         bp->b_flags |= XBF_DONE;
-
         trace_xfs_buf_error_relse(bp, _RET_IP_);
+       return false;
+}
+
+/*
+ * This is the iodone() function for buffers which have had callbacks attached
+ * to them by xfs_buf_attach_iodone(). We need to iterate the items on the
+ * callback list, mark the buffer as having no more callbacks and then push the
+ * buffer through IO completion processing.
+ */
+void
+xfs_buf_iodone_callbacks(
+       struct xfs_buf          *bp)
+{
+       /*
+        * If there is an error, process it. Some errors require us
+        * to run callbacks after failure processing is done so we
+        * detect that and take appropriate action.
+        */
+       if (bp->b_error && xfs_buf_iodone_callback_error(bp))
+               return;
+
+       /*
+        * Successful IO or permanent error. Either way, we can clear the
+        * retry state here in preparation for the next error that may occur.
+        */
+       bp->b_last_error = 0;
+       bp->b_retries = 0;
  
-do_callbacks:
         xfs_buf_do_callbacks(bp);
         bp->b_fspriv = NULL;
         bp->b_iodone = NULL;
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c

index 316b2a1bdba5f6da82f1bad0dcbc0708151a59d2..e0646659ce16eaafa7b35833fac127fd13b80582 100644 (file)
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -614,11 +614,10 @@ xfs_qm_dqread(
         trace_xfs_dqread(dqp);
  
         if (flags & XFS_QMOPT_DQALLOC) {
-               tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC);
-               error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_dqalloc,
-                                         XFS_QM_DQALLOC_SPACE_RES(mp), 0);
+               error = xfs_trans_alloc(mp, &M_RES(mp)->tr_qm_dqalloc,
+                               XFS_QM_DQALLOC_SPACE_RES(mp), 0, 0, &tp);
                 if (error)
-                       goto error1;
+                       goto error0;
         }
  
         /*
@@ -692,7 +691,7 @@ error0:
   * end of the chunk, skip ahead to first id in next allocated chunk
   * using the SEEK_DATA interface.
   */
-int
+static int
  xfs_dq_get_next_id(
         xfs_mount_t             *mp,
         uint                    type,
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c

index 85ce3032f815ebc0ef1e360eeda9ce67eef2f9a1..44af22897c8bfec066d0ac80107a6ce18b9edb9c 100644 (file)
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -145,12 +145,10 @@ xfs_update_prealloc_flags(
         struct xfs_trans        *tp;
         int                     error;
  
-       tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_WRITEID);
-       error = xfs_trans_reserve(tp, &M_RES(ip->i_mount)->tr_writeid, 0, 0);
-       if (error) {
-               xfs_trans_cancel(tp);
+       error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_writeid,
+                       0, 0, 0, &tp);
+       if (error)
                 return error;
-       }
  
         xfs_ilock(ip, XFS_ILOCK_EXCL);
         xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c

index ee3aaa0a53179f761ffffe6257a58b637a6b7dc8..b4d75825ae3732d98ee19c8760719d92bd83a9cf 100644 (file)
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -198,14 +198,10 @@ xfs_growfs_data_private(
                         return error;
         }
  
-       tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFS);
-       tp->t_flags |= XFS_TRANS_RESERVE;
-       error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growdata,
-                                 XFS_GROWFS_SPACE_RES(mp), 0);
-       if (error) {
-               xfs_trans_cancel(tp);
+       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growdata,
+                       XFS_GROWFS_SPACE_RES(mp), 0, XFS_TRANS_RESERVE, &tp);
+       if (error)
                 return error;
-       }
  
         /*
          * Write new AG headers to disk. Non-transactional, but written
@@ -243,8 +239,8 @@ xfs_growfs_data_private(
                 agf->agf_roots[XFS_BTNUM_CNTi] = cpu_to_be32(XFS_CNT_BLOCK(mp));
                 agf->agf_levels[XFS_BTNUM_BNOi] = cpu_to_be32(1);
                 agf->agf_levels[XFS_BTNUM_CNTi] = cpu_to_be32(1);
-               agf->agf_flfirst = 0;
-               agf->agf_fllast = cpu_to_be32(XFS_AGFL_SIZE(mp) - 1);
+               agf->agf_flfirst = cpu_to_be32(1);
+               agf->agf_fllast = 0;
                 agf->agf_flcount = 0;
                 tmpsize = agsize - XFS_PREALLOC_BLOCKS(mp);
                 agf->agf_freeblks = cpu_to_be32(tmpsize);
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c

index bf2d60749278602b5b4afcda09ede7d3dd89fd1e..99ee6eee5e0b0d5af6e2e5124c4878a1fe2c4cb4 100644 (file)
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -37,9 +37,6 @@
  #include <linux/kthread.h>
  #include <linux/freezer.h>
  
-STATIC void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp,
-                               struct xfs_perag *pag, struct xfs_inode *ip);
-
  /*
   * Allocate and initialise an xfs_inode.
   */
@@ -94,13 +91,6 @@ xfs_inode_free_callback(
         struct inode            *inode = container_of(head, struct inode, i_rcu);
         struct xfs_inode        *ip = XFS_I(inode);
  
-       kmem_zone_free(xfs_inode_zone, ip);
-}
-
-void
-xfs_inode_free(
-       struct xfs_inode        *ip)
-{
         switch (VFS_I(ip)->i_mode & S_IFMT) {
         case S_IFREG:
         case S_IFDIR:
@@ -118,6 +108,25 @@ xfs_inode_free(
                 ip->i_itemp = NULL;
         }
  
+       kmem_zone_free(xfs_inode_zone, ip);
+}
+
+static void
+__xfs_inode_free(
+       struct xfs_inode        *ip)
+{
+       /* asserts to verify all state is correct here */
+       ASSERT(atomic_read(&ip->i_pincount) == 0);
+       ASSERT(!xfs_isiflocked(ip));
+       XFS_STATS_DEC(ip->i_mount, vn_active);
+
+       call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback);
+}
+
+void
+xfs_inode_free(
+       struct xfs_inode        *ip)
+{
         /*
          * Because we use RCU freeing we need to ensure the inode always
          * appears to be reclaimed with an invalid inode number when in the
@@ -129,12 +138,123 @@ xfs_inode_free(
         ip->i_ino = 0;
         spin_unlock(&ip->i_flags_lock);
  
-       /* asserts to verify all state is correct here */
-       ASSERT(atomic_read(&ip->i_pincount) == 0);
-       ASSERT(!xfs_isiflocked(ip));
-       XFS_STATS_DEC(ip->i_mount, vn_active);
+       __xfs_inode_free(ip);
+}
  
-       call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback);
+/*
+ * Queue a new inode reclaim pass if there are reclaimable inodes and there
+ * isn't a reclaim pass already in progress. By default it runs every 5s based
+ * on the xfs periodic sync default of 30s. Perhaps this should have it's own
+ * tunable, but that can be done if this method proves to be ineffective or too
+ * aggressive.
+ */
+static void
+xfs_reclaim_work_queue(
+       struct xfs_mount        *mp)
+{
+
+       rcu_read_lock();
+       if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) {
+               queue_delayed_work(mp->m_reclaim_workqueue, &mp->m_reclaim_work,
+                       msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10));
+       }
+       rcu_read_unlock();
+}
+
+/*
+ * This is a fast pass over the inode cache to try to get reclaim moving on as
+ * many inodes as possible in a short period of time. It kicks itself every few
+ * seconds, as well as being kicked by the inode cache shrinker when memory
+ * goes low. It scans as quickly as possible avoiding locked inodes or those
+ * already being flushed, and once done schedules a future pass.
+ */
+void
+xfs_reclaim_worker(
+       struct work_struct *work)
+{
+       struct xfs_mount *mp = container_of(to_delayed_work(work),
+                                       struct xfs_mount, m_reclaim_work);
+
+       xfs_reclaim_inodes(mp, SYNC_TRYLOCK);
+       xfs_reclaim_work_queue(mp);
+}
+
+static void
+xfs_perag_set_reclaim_tag(
+       struct xfs_perag        *pag)
+{
+       struct xfs_mount        *mp = pag->pag_mount;
+
+       ASSERT(spin_is_locked(&pag->pag_ici_lock));
+       if (pag->pag_ici_reclaimable++)
+               return;
+
+       /* propagate the reclaim tag up into the perag radix tree */
+       spin_lock(&mp->m_perag_lock);
+       radix_tree_tag_set(&mp->m_perag_tree, pag->pag_agno,
+                          XFS_ICI_RECLAIM_TAG);
+       spin_unlock(&mp->m_perag_lock);
+
+       /* schedule periodic background inode reclaim */
+       xfs_reclaim_work_queue(mp);
+
+       trace_xfs_perag_set_reclaim(mp, pag->pag_agno, -1, _RET_IP_);
+}
+
+static void
+xfs_perag_clear_reclaim_tag(
+       struct xfs_perag        *pag)
+{
+       struct xfs_mount        *mp = pag->pag_mount;
+
+       ASSERT(spin_is_locked(&pag->pag_ici_lock));
+       if (--pag->pag_ici_reclaimable)
+               return;
+
+       /* clear the reclaim tag from the perag radix tree */
+       spin_lock(&mp->m_perag_lock);
+       radix_tree_tag_clear(&mp->m_perag_tree, pag->pag_agno,
+                            XFS_ICI_RECLAIM_TAG);
+       spin_unlock(&mp->m_perag_lock);
+       trace_xfs_perag_clear_reclaim(mp, pag->pag_agno, -1, _RET_IP_);
+}
+
+
+/*
+ * We set the inode flag atomically with the radix tree tag.
+ * Once we get tag lookups on the radix tree, this inode flag
+ * can go away.
+ */
+void
+xfs_inode_set_reclaim_tag(
+       struct xfs_inode        *ip)
+{
+       struct xfs_mount        *mp = ip->i_mount;
+       struct xfs_perag        *pag;
+
+       pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
+       spin_lock(&pag->pag_ici_lock);
+       spin_lock(&ip->i_flags_lock);
+
+       radix_tree_tag_set(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ip->i_ino),
+                          XFS_ICI_RECLAIM_TAG);
+       xfs_perag_set_reclaim_tag(pag);
+       __xfs_iflags_set(ip, XFS_IRECLAIMABLE);
+
+       spin_unlock(&ip->i_flags_lock);
+       spin_unlock(&pag->pag_ici_lock);
+       xfs_perag_put(pag);
+}
+
+STATIC void
+xfs_inode_clear_reclaim_tag(
+       struct xfs_perag        *pag,
+       xfs_ino_t               ino)
+{
+       radix_tree_tag_clear(&pag->pag_ici_root,
+                            XFS_INO_TO_AGINO(pag->pag_mount, ino),
+                            XFS_ICI_RECLAIM_TAG);
+       xfs_perag_clear_reclaim_tag(pag);
  }
  
  /*
@@ -264,7 +384,7 @@ xfs_iget_cache_hit(
                  */
                 ip->i_flags &= ~XFS_IRECLAIM_RESET_FLAGS;
                 ip->i_flags |= XFS_INEW;
-               __xfs_inode_clear_reclaim_tag(mp, pag, ip);
+               xfs_inode_clear_reclaim_tag(pag, ip->i_ino);
                 inode->i_state = I_NEW;
  
                 ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
@@ -722,121 +842,6 @@ xfs_inode_ag_iterator_tag(
         return last_error;
  }
  
-/*
- * Queue a new inode reclaim pass if there are reclaimable inodes and there
- * isn't a reclaim pass already in progress. By default it runs every 5s based
- * on the xfs periodic sync default of 30s. Perhaps this should have it's own
- * tunable, but that can be done if this method proves to be ineffective or too
- * aggressive.
- */
-static void
-xfs_reclaim_work_queue(
-       struct xfs_mount        *mp)
-{
-
-       rcu_read_lock();
-       if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) {
-               queue_delayed_work(mp->m_reclaim_workqueue, &mp->m_reclaim_work,
-                       msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10));
-       }
-       rcu_read_unlock();
-}
-
-/*
- * This is a fast pass over the inode cache to try to get reclaim moving on as
- * many inodes as possible in a short period of time. It kicks itself every few
- * seconds, as well as being kicked by the inode cache shrinker when memory
- * goes low. It scans as quickly as possible avoiding locked inodes or those
- * already being flushed, and once done schedules a future pass.
- */
-void
-xfs_reclaim_worker(
-       struct work_struct *work)
-{
-       struct xfs_mount *mp = container_of(to_delayed_work(work),
-                                       struct xfs_mount, m_reclaim_work);
-
-       xfs_reclaim_inodes(mp, SYNC_TRYLOCK);
-       xfs_reclaim_work_queue(mp);
-}
-
-static void
-__xfs_inode_set_reclaim_tag(
-       struct xfs_perag        *pag,
-       struct xfs_inode        *ip)
-{
-       radix_tree_tag_set(&pag->pag_ici_root,
-                          XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino),
-                          XFS_ICI_RECLAIM_TAG);
-
-       if (!pag->pag_ici_reclaimable) {
-               /* propagate the reclaim tag up into the perag radix tree */
-               spin_lock(&ip->i_mount->m_perag_lock);
-               radix_tree_tag_set(&ip->i_mount->m_perag_tree,
-                               XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
-                               XFS_ICI_RECLAIM_TAG);
-               spin_unlock(&ip->i_mount->m_perag_lock);
-
-               /* schedule periodic background inode reclaim */
-               xfs_reclaim_work_queue(ip->i_mount);
-
-               trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno,
-                                                       -1, _RET_IP_);
-       }
-       pag->pag_ici_reclaimable++;
-}
-
-/*
- * We set the inode flag atomically with the radix tree tag.
- * Once we get tag lookups on the radix tree, this inode flag
- * can go away.
- */
-void
-xfs_inode_set_reclaim_tag(
-       xfs_inode_t     *ip)
-{
-       struct xfs_mount *mp = ip->i_mount;
-       struct xfs_perag *pag;
-
-       pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
-       spin_lock(&pag->pag_ici_lock);
-       spin_lock(&ip->i_flags_lock);
-       __xfs_inode_set_reclaim_tag(pag, ip);
-       __xfs_iflags_set(ip, XFS_IRECLAIMABLE);
-       spin_unlock(&ip->i_flags_lock);
-       spin_unlock(&pag->pag_ici_lock);
-       xfs_perag_put(pag);
-}
-
-STATIC void
-__xfs_inode_clear_reclaim(
-       xfs_perag_t     *pag,
-       xfs_inode_t     *ip)
-{
-       pag->pag_ici_reclaimable--;
-       if (!pag->pag_ici_reclaimable) {
-               /* clear the reclaim tag from the perag radix tree */
-               spin_lock(&ip->i_mount->m_perag_lock);
-               radix_tree_tag_clear(&ip->i_mount->m_perag_tree,
-                               XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
-                               XFS_ICI_RECLAIM_TAG);
-               spin_unlock(&ip->i_mount->m_perag_lock);
-               trace_xfs_perag_clear_reclaim(ip->i_mount, pag->pag_agno,
-                                                       -1, _RET_IP_);
-       }
-}
-
-STATIC void
-__xfs_inode_clear_reclaim_tag(
-       xfs_mount_t     *mp,
-       xfs_perag_t     *pag,
-       xfs_inode_t     *ip)
-{
-       radix_tree_tag_clear(&pag->pag_ici_root,
-                       XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
-       __xfs_inode_clear_reclaim(pag, ip);
-}
-
  /*
   * Grab the inode for reclaim exclusively.
   * Return 0 if we grabbed it, non-zero otherwise.
@@ -929,6 +934,7 @@ xfs_reclaim_inode(
         int                     sync_mode)
  {
         struct xfs_buf          *bp = NULL;
+       xfs_ino_t               ino = ip->i_ino; /* for radix_tree_delete */
         int                     error;
  
  restart:
@@ -993,6 +999,22 @@ restart:
  
         xfs_iflock(ip);
  reclaim:
+       /*
+        * Because we use RCU freeing we need to ensure the inode always appears
+        * to be reclaimed with an invalid inode number when in the free state.
+        * We do this as early as possible under the ILOCK and flush lock so
+        * that xfs_iflush_cluster() can be guaranteed to detect races with us
+        * here. By doing this, we guarantee that once xfs_iflush_cluster has
+        * locked both the XFS_ILOCK and the flush lock that it will see either
+        * a valid, flushable inode that will serialise correctly against the
+        * locks below, or it will see a clean (and invalid) inode that it can
+        * skip.
+        */
+       spin_lock(&ip->i_flags_lock);
+       ip->i_flags = XFS_IRECLAIM;
+       ip->i_ino = 0;
+       spin_unlock(&ip->i_flags_lock);
+
         xfs_ifunlock(ip);
         xfs_iunlock(ip, XFS_ILOCK_EXCL);
  
@@ -1006,9 +1028,9 @@ reclaim:
          */
         spin_lock(&pag->pag_ici_lock);
         if (!radix_tree_delete(&pag->pag_ici_root,
-                               XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino)))
+                               XFS_INO_TO_AGINO(ip->i_mount, ino)))
                 ASSERT(0);
-       __xfs_inode_clear_reclaim(pag, ip);
+       xfs_perag_clear_reclaim_tag(pag);
         spin_unlock(&pag->pag_ici_lock);
  
         /*
@@ -1023,7 +1045,7 @@ reclaim:
         xfs_qm_dqdetach(ip);
         xfs_iunlock(ip, XFS_ILOCK_EXCL);
  
-       xfs_inode_free(ip);
+       __xfs_inode_free(ip);
         return error;
  
  out_ifunlock:
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c

index 96f606deee313aed506b7e7ee229fc801ba5de80..ee6799e0476f397b0aba305fee9f7cddef188b8e 100644 (file)
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1030,7 +1030,7 @@ xfs_dir_ialloc(
                         tp->t_flags &= ~(XFS_TRANS_DQ_DIRTY);
                 }
  
-               code = xfs_trans_roll(&tp, 0);
+               code = xfs_trans_roll(&tp, NULL);
                 if (committed != NULL)
                         *committed = 1;
  
@@ -1161,11 +1161,9 @@ xfs_create(
                 rdev = 0;
                 resblks = XFS_MKDIR_SPACE_RES(mp, name->len);
                 tres = &M_RES(mp)->tr_mkdir;
-               tp = xfs_trans_alloc(mp, XFS_TRANS_MKDIR);
         } else {
                 resblks = XFS_CREATE_SPACE_RES(mp, name->len);
                 tres = &M_RES(mp)->tr_create;
-               tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE);
         }
  
         /*
@@ -1174,20 +1172,19 @@ xfs_create(
          * the case we'll drop the one we have and get a more
          * appropriate transaction later.
          */
-       error = xfs_trans_reserve(tp, tres, resblks, 0);
+       error = xfs_trans_alloc(mp, tres, resblks, 0, 0, &tp);
         if (error == -ENOSPC) {
                 /* flush outstanding delalloc blocks and retry */
                 xfs_flush_inodes(mp);
-               error = xfs_trans_reserve(tp, tres, resblks, 0);
+               error = xfs_trans_alloc(mp, tres, resblks, 0, 0, &tp);
         }
         if (error == -ENOSPC) {
                 /* No space at all so try a "no-allocation" reservation */
                 resblks = 0;
-               error = xfs_trans_reserve(tp, tres, 0, 0);
+               error = xfs_trans_alloc(mp, tres, 0, 0, 0, &tp);
         }
         if (error)
-               goto out_trans_cancel;
-
+               goto out_release_inode;
  
         xfs_ilock(dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL |
                       XFS_IOLOCK_PARENT | XFS_ILOCK_PARENT);
@@ -1337,17 +1334,16 @@ xfs_create_tmpfile(
                 return error;
  
         resblks = XFS_IALLOC_SPACE_RES(mp);
-       tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE_TMPFILE);
-
         tres = &M_RES(mp)->tr_create_tmpfile;
-       error = xfs_trans_reserve(tp, tres, resblks, 0);
+
+       error = xfs_trans_alloc(mp, tres, resblks, 0, 0, &tp);
         if (error == -ENOSPC) {
                 /* No space at all so try a "no-allocation" reservation */
                 resblks = 0;
-               error = xfs_trans_reserve(tp, tres, 0, 0);
+               error = xfs_trans_alloc(mp, tres, 0, 0, 0, &tp);
         }
         if (error)
-               goto out_trans_cancel;
+               goto out_release_inode;
  
         error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp,
                                                 pdqp, resblks, 1, 0);
@@ -1432,15 +1428,14 @@ xfs_link(
         if (error)
                 goto std_return;
  
-       tp = xfs_trans_alloc(mp, XFS_TRANS_LINK);
         resblks = XFS_LINK_SPACE_RES(mp, target_name->len);
-       error = xfs_trans_reserve(tp, &M_RES(mp)->tr_link, resblks, 0);
+       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_link, resblks, 0, 0, &tp);
         if (error == -ENOSPC) {
                 resblks = 0;
-               error = xfs_trans_reserve(tp, &M_RES(mp)->tr_link, 0, 0);
+               error = xfs_trans_alloc(mp, &M_RES(mp)->tr_link, 0, 0, 0, &tp);
         }
         if (error)
-               goto error_return;
+               goto std_return;
  
         xfs_ilock(tdp, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT);
         xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL);
@@ -1710,11 +1705,9 @@ xfs_inactive_truncate(
         struct xfs_trans        *tp;
         int                     error;
  
-       tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
-       error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
+       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
         if (error) {
                 ASSERT(XFS_FORCED_SHUTDOWN(mp));
-               xfs_trans_cancel(tp);
                 return error;
         }
  
@@ -1764,8 +1757,6 @@ xfs_inactive_ifree(
         struct xfs_trans        *tp;
         int                     error;
  
-       tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
-
         /*
          * The ifree transaction might need to allocate blocks for record
          * insertion to the finobt. We don't want to fail here at ENOSPC, so
@@ -1781,9 +1772,8 @@ xfs_inactive_ifree(
          * now remains allocated and sits on the unlinked list until the fs is
          * repaired.
          */
-       tp->t_flags |= XFS_TRANS_RESERVE;
-       error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ifree,
-                                 XFS_IFREE_SPACE_RES(mp), 0);
+       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ifree,
+                       XFS_IFREE_SPACE_RES(mp), 0, XFS_TRANS_RESERVE, &tp);
         if (error) {
                 if (error == -ENOSPC) {
                         xfs_warn_ratelimited(mp,
@@ -1792,7 +1782,6 @@ xfs_inactive_ifree(
                 } else {
                         ASSERT(XFS_FORCED_SHUTDOWN(mp));
                 }
-               xfs_trans_cancel(tp);
                 return error;
         }
  
@@ -2525,11 +2514,6 @@ xfs_remove(
         if (error)
                 goto std_return;
  
-       if (is_dir)
-               tp = xfs_trans_alloc(mp, XFS_TRANS_RMDIR);
-       else
-               tp = xfs_trans_alloc(mp, XFS_TRANS_REMOVE);
-
         /*
          * We try to get the real space reservation first,
          * allowing for directory btree deletion(s) implying
@@ -2540,14 +2524,15 @@ xfs_remove(
          * block from the directory.
          */
         resblks = XFS_REMOVE_SPACE_RES(mp);
-       error = xfs_trans_reserve(tp, &M_RES(mp)->tr_remove, resblks, 0);
+       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_remove, resblks, 0, 0, &tp);
         if (error == -ENOSPC) {
                 resblks = 0;
-               error = xfs_trans_reserve(tp, &M_RES(mp)->tr_remove, 0, 0);
+               error = xfs_trans_alloc(mp, &M_RES(mp)->tr_remove, 0, 0, 0,
+                               &tp);
         }
         if (error) {
                 ASSERT(error != -ENOSPC);
-               goto out_trans_cancel;
+               goto std_return;
         }
  
         xfs_ilock(dp, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT);
@@ -2855,6 +2840,7 @@ xfs_rename_alloc_whiteout(
          * and flag it as linkable.
          */
         drop_nlink(VFS_I(tmpfile));
+       xfs_setup_iops(tmpfile);
         xfs_finish_inode_setup(tmpfile);
         VFS_I(tmpfile)->i_state |= I_LINKABLE;
  
@@ -2910,15 +2896,15 @@ xfs_rename(
         xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip, wip,
                                 inodes, &num_inodes);
  
-       tp = xfs_trans_alloc(mp, XFS_TRANS_RENAME);
         spaceres = XFS_RENAME_SPACE_RES(mp, target_name->len);
-       error = xfs_trans_reserve(tp, &M_RES(mp)->tr_rename, spaceres, 0);
+       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_rename, spaceres, 0, 0, &tp);
         if (error == -ENOSPC) {
                 spaceres = 0;
-               error = xfs_trans_reserve(tp, &M_RES(mp)->tr_rename, 0, 0);
+               error = xfs_trans_alloc(mp, &M_RES(mp)->tr_rename, 0, 0, 0,
+                               &tp);
         }
         if (error)
-               goto out_trans_cancel;
+               goto out_release_wip;
  
         /*
          * Attach the dquots to the inodes
@@ -3155,6 +3141,7 @@ out_bmap_cancel:
         xfs_bmap_cancel(&free_list);
  out_trans_cancel:
         xfs_trans_cancel(tp);
+out_release_wip:
         if (wip)
                 IRELE(wip);
         return error;
@@ -3162,16 +3149,16 @@ out_trans_cancel:
  
  STATIC int
  xfs_iflush_cluster(
-       xfs_inode_t     *ip,
-       xfs_buf_t       *bp)
+       struct xfs_inode        *ip,
+       struct xfs_buf          *bp)
  {
-       xfs_mount_t             *mp = ip->i_mount;
+       struct xfs_mount        *mp = ip->i_mount;
         struct xfs_perag        *pag;
         unsigned long           first_index, mask;
         unsigned long           inodes_per_cluster;
-       int                     ilist_size;
-       xfs_inode_t             **ilist;
-       xfs_inode_t             *iq;
+       int                     cilist_size;
+       struct xfs_inode        **cilist;
+       struct xfs_inode        *cip;
         int                     nr_found;
         int                     clcount = 0;
         int                     bufwasdelwri;
@@ -3180,23 +3167,23 @@ xfs_iflush_cluster(
         pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
  
         inodes_per_cluster = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog;
-       ilist_size = inodes_per_cluster * sizeof(xfs_inode_t *);
-       ilist = kmem_alloc(ilist_size, KM_MAYFAIL|KM_NOFS);
-       if (!ilist)
+       cilist_size = inodes_per_cluster * sizeof(xfs_inode_t *);
+       cilist = kmem_alloc(cilist_size, KM_MAYFAIL|KM_NOFS);
+       if (!cilist)
                 goto out_put;
  
         mask = ~(((mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog)) - 1);
         first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask;
         rcu_read_lock();
         /* really need a gang lookup range call here */
-       nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)ilist,
+       nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)cilist,
                                         first_index, inodes_per_cluster);
         if (nr_found == 0)
                 goto out_free;
  
         for (i = 0; i < nr_found; i++) {
-               iq = ilist[i];
-               if (iq == ip)
+               cip = cilist[i];
+               if (cip == ip)
                         continue;
  
                 /*
@@ -3205,20 +3192,30 @@ xfs_iflush_cluster(
                  * We need to check under the i_flags_lock for a valid inode
                  * here. Skip it if it is not valid or the wrong inode.
                  */
-               spin_lock(&ip->i_flags_lock);
-               if (!ip->i_ino ||
-                   (XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index) {
-                       spin_unlock(&ip->i_flags_lock);
+               spin_lock(&cip->i_flags_lock);
+               if (!cip->i_ino ||
+                   __xfs_iflags_test(cip, XFS_ISTALE)) {
+                       spin_unlock(&cip->i_flags_lock);
                         continue;
                 }
-               spin_unlock(&ip->i_flags_lock);
+
+               /*
+                * Once we fall off the end of the cluster, no point checking
+                * any more inodes in the list because they will also all be
+                * outside the cluster.
+                */
+               if ((XFS_INO_TO_AGINO(mp, cip->i_ino) & mask) != first_index) {
+                       spin_unlock(&cip->i_flags_lock);
+                       break;
+               }
+               spin_unlock(&cip->i_flags_lock);
  
                 /*
                  * Do an un-protected check to see if the inode is dirty and
                  * is a candidate for flushing.  These checks will be repeated
                  * later after the appropriate locks are acquired.
                  */
-               if (xfs_inode_clean(iq) && xfs_ipincount(iq) == 0)
+               if (xfs_inode_clean(cip) && xfs_ipincount(cip) == 0)
                         continue;
  
                 /*
@@ -3226,15 +3223,28 @@ xfs_iflush_cluster(
                  * then this inode cannot be flushed and is skipped.
                  */
  
-               if (!xfs_ilock_nowait(iq, XFS_ILOCK_SHARED))
+               if (!xfs_ilock_nowait(cip, XFS_ILOCK_SHARED))
+                       continue;
+               if (!xfs_iflock_nowait(cip)) {
+                       xfs_iunlock(cip, XFS_ILOCK_SHARED);
                         continue;
-               if (!xfs_iflock_nowait(iq)) {
-                       xfs_iunlock(iq, XFS_ILOCK_SHARED);
+               }
+               if (xfs_ipincount(cip)) {
+                       xfs_ifunlock(cip);
+                       xfs_iunlock(cip, XFS_ILOCK_SHARED);
                         continue;
                 }
-               if (xfs_ipincount(iq)) {
-                       xfs_ifunlock(iq);
-                       xfs_iunlock(iq, XFS_ILOCK_SHARED);
+
+
+               /*
+                * Check the inode number again, just to be certain we are not
+                * racing with freeing in xfs_reclaim_inode(). See the comments
+                * in that function for more information as to why the initial
+                * check is not sufficient.
+                */
+               if (!cip->i_ino) {
+                       xfs_ifunlock(cip);
+                       xfs_iunlock(cip, XFS_ILOCK_SHARED);
                         continue;
                 }
  
@@ -3242,18 +3252,18 @@ xfs_iflush_cluster(
                  * arriving here means that this inode can be flushed.  First
                  * re-check that it's dirty before flushing.
                  */
-               if (!xfs_inode_clean(iq)) {
+               if (!xfs_inode_clean(cip)) {
                         int     error;
-                       error = xfs_iflush_int(iq, bp);
+                       error = xfs_iflush_int(cip, bp);
                         if (error) {
-                               xfs_iunlock(iq, XFS_ILOCK_SHARED);
+                               xfs_iunlock(cip, XFS_ILOCK_SHARED);
                                 goto cluster_corrupt_out;
                         }
                         clcount++;
                 } else {
-                       xfs_ifunlock(iq);
+                       xfs_ifunlock(cip);
                 }
-               xfs_iunlock(iq, XFS_ILOCK_SHARED);
+               xfs_iunlock(cip, XFS_ILOCK_SHARED);
         }
  
         if (clcount) {
@@ -3263,7 +3273,7 @@ xfs_iflush_cluster(
  
  out_free:
         rcu_read_unlock();
-       kmem_free(ilist);
+       kmem_free(cilist);
  out_put:
         xfs_perag_put(pag);
         return 0;
@@ -3306,8 +3316,8 @@ cluster_corrupt_out:
         /*
          * Unlocks the flush lock
          */
-       xfs_iflush_abort(iq, false);
-       kmem_free(ilist);
+       xfs_iflush_abort(cip, false);
+       kmem_free(cilist);
         xfs_perag_put(pag);
         return -EFSCORRUPTED;
  }
@@ -3327,7 +3337,7 @@ xfs_iflush(
         struct xfs_buf          **bpp)
  {
         struct xfs_mount        *mp = ip->i_mount;
-       struct xfs_buf          *bp;
+       struct xfs_buf          *bp = NULL;
         struct xfs_dinode       *dip;
         int                     error;
  
@@ -3369,14 +3379,22 @@ xfs_iflush(
         }
  
         /*
-        * Get the buffer containing the on-disk inode.
+        * Get the buffer containing the on-disk inode. We are doing a try-lock
+        * operation here, so we may get  an EAGAIN error. In that case, we
+        * simply want to return with the inode still dirty.
+        *
+        * If we get any other error, we effectively have a corruption situation
+        * and we cannot flush the inode, so we treat it the same as failing
+        * xfs_iflush_int().
          */
         error = xfs_imap_to_bp(mp, NULL, &ip->i_imap, &dip, &bp, XBF_TRYLOCK,
                                0);
-       if (error || !bp) {
+       if (error == -EAGAIN) {
                 xfs_ifunlock(ip);
                 return error;
         }
+       if (error)
+               goto corrupt_out;
  
         /*
          * First flush out the inode that xfs_iflush was called with.
@@ -3404,7 +3422,8 @@ xfs_iflush(
         return 0;
  
  corrupt_out:
-       xfs_buf_relse(bp);
+       if (bp)
+               xfs_buf_relse(bp);
         xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
  cluster_corrupt_out:
         error = -EFSCORRUPTED;
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h

index 43e1d51b15eb84ca34e978166025b74d30e5b573..e52d7c7aeb5b7773558a4d218afbb2a69cdaafd4 100644 (file)
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -440,6 +440,9 @@ loff_t      __xfs_seek_hole_data(struct inode *inode, loff_t start,
  
  
  /* from xfs_iops.c */
+extern void xfs_setup_inode(struct xfs_inode *ip);
+extern void xfs_setup_iops(struct xfs_inode *ip);
+
  /*
   * When setting up a newly allocated inode, we need to call
   * xfs_finish_inode_setup() once the inode is fully instantiated at
@@ -447,7 +450,6 @@ loff_t      __xfs_seek_hole_data(struct inode *inode, loff_t start,
   * before we've completed instantiation. Otherwise we can do it
   * the moment the inode lookup is complete.
   */
-extern void xfs_setup_inode(struct xfs_inode *ip);
  static inline void xfs_finish_inode_setup(struct xfs_inode *ip)
  {
         xfs_iflags_clear(ip, XFS_INEW);
@@ -458,6 +460,7 @@ static inline void xfs_finish_inode_setup(struct xfs_inode *ip)
  static inline void xfs_setup_existing_inode(struct xfs_inode *ip)
  {
         xfs_setup_inode(ip);
+       xfs_setup_iops(ip);
         xfs_finish_inode_setup(ip);
  }
  
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c

index c48b5b18d771fab685e23c03613a1c6e762efcb4..a1b07612224c95ea56391474a4bb790fec07fab6 100644 (file)
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -210,7 +210,7 @@ xfs_inode_item_format_data_fork(
                          */
                         data_bytes = roundup(ip->i_df.if_bytes, 4);
                         ASSERT(ip->i_df.if_real_bytes == 0 ||
-                              ip->i_df.if_real_bytes == data_bytes);
+                              ip->i_df.if_real_bytes >= data_bytes);
                         ASSERT(ip->i_df.if_u1.if_data != NULL);
                         ASSERT(ip->i_d.di_size > 0);
                         xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_ILOCAL,
@@ -305,7 +305,7 @@ xfs_inode_item_format_attr_fork(
                          */
                         data_bytes = roundup(ip->i_afp->if_bytes, 4);
                         ASSERT(ip->i_afp->if_real_bytes == 0 ||
-                              ip->i_afp->if_real_bytes == data_bytes);
+                              ip->i_afp->if_real_bytes >= data_bytes);
                         ASSERT(ip->i_afp->if_u1.if_data != NULL);
                         xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_LOCAL,
                                         ip->i_afp->if_u1.if_data,
@@ -479,6 +479,8 @@ STATIC uint
  xfs_inode_item_push(
         struct xfs_log_item     *lip,
         struct list_head        *buffer_list)
+               __releases(&lip->li_ailp->xa_lock)
+               __acquires(&lip->li_ailp->xa_lock)
  {
         struct xfs_inode_log_item *iip = INODE_ITEM(lip);
         struct xfs_inode        *ip = iip->ili_inode;
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c

index bcb6c19ce3ea4fea69c536343c07ea222de3155d..dbca7375deefa3f7d2499f516697ffdd28178546 100644 (file)
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -277,7 +277,6 @@ xfs_readlink_by_handle(
  {
         struct dentry           *dentry;
         __u32                   olen;
-       void                    *link;
         int                     error;
  
         if (!capable(CAP_SYS_ADMIN))
@@ -288,7 +287,7 @@ xfs_readlink_by_handle(
                 return PTR_ERR(dentry);
  
         /* Restrict this handle operation to symlinks only. */
-       if (!d_is_symlink(dentry)) {
+       if (!d_inode(dentry)->i_op->readlink) {
                 error = -EINVAL;
                 goto out_dput;
         }
@@ -298,21 +297,8 @@ xfs_readlink_by_handle(
                 goto out_dput;
         }
  
-       link = kmalloc(MAXPATHLEN+1, GFP_KERNEL);
-       if (!link) {
-               error = -ENOMEM;
-               goto out_dput;
-       }
-
-       error = xfs_readlink(XFS_I(d_inode(dentry)), link);
-       if (error)
-               goto out_kfree;
-       error = readlink_copy(hreq->ohandle, olen, link);
-       if (error)
-               goto out_kfree;
+       error = d_inode(dentry)->i_op->readlink(dentry, hreq->ohandle, olen);
  
- out_kfree:
-       kfree(link);
   out_dput:
         dput(dentry);
         return error;
@@ -334,12 +320,10 @@ xfs_set_dmattrs(
         if (XFS_FORCED_SHUTDOWN(mp))
                 return -EIO;
  
-       tp = xfs_trans_alloc(mp, XFS_TRANS_SET_DMATTRS);
-       error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0);
-       if (error) {
-               xfs_trans_cancel(tp);
+       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp);
+       if (error)
                 return error;
-       }
+
         xfs_ilock(ip, XFS_ILOCK_EXCL);
         xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
  
@@ -1141,10 +1125,9 @@ xfs_ioctl_setattr_get_trans(
         if (XFS_FORCED_SHUTDOWN(mp))
                 goto out_unlock;
  
-       tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
-       error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0);
+       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp);
         if (error)
-               goto out_cancel;
+               return ERR_PTR(error);
  
         xfs_ilock(ip, XFS_ILOCK_EXCL);
         xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | join_flags);
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c

index d81bdc080370e474f52b7fbd68c327cf576e74ea..58391355a44df71b7973ae132bb4b5adc2f77a9b 100644 (file)
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -132,6 +132,7 @@ xfs_iomap_write_direct(
         int             error;
         int             lockmode;
         int             bmapi_flags = XFS_BMAPI_PREALLOC;
+       uint            tflags = 0;
  
         rt = XFS_IS_REALTIME_INODE(ip);
         extsz = xfs_get_extsz_hint(ip);
@@ -191,11 +192,6 @@ xfs_iomap_write_direct(
         if (error)
                 return error;
  
-       /*
-        * Allocate and setup the transaction
-        */
-       tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
-
         /*
          * For DAX, we do not allocate unwritten extents, but instead we zero
          * the block before we commit the transaction.  Ideally we'd like to do
@@ -209,23 +205,17 @@ xfs_iomap_write_direct(
          * the reserve block pool for bmbt block allocation if there is no space
          * left but we need to do unwritten extent conversion.
          */
-
         if (IS_DAX(VFS_I(ip))) {
                 bmapi_flags = XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO;
                 if (ISUNWRITTEN(imap)) {
-                       tp->t_flags |= XFS_TRANS_RESERVE;
+                       tflags |= XFS_TRANS_RESERVE;
                         resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1;
                 }
         }
-       error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
-                                 resblks, resrtextents);
-       /*
-        * Check for running out of space, note: need lock to return
-        */
-       if (error) {
-               xfs_trans_cancel(tp);
+       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, resrtextents,
+                       tflags, &tp);
+       if (error)
                 return error;
-       }
  
         lockmode = XFS_ILOCK_EXCL;
         xfs_ilock(ip, lockmode);
@@ -726,15 +716,13 @@ xfs_iomap_write_allocate(
  
                 nimaps = 0;
                 while (nimaps == 0) {
-                       tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE);
-                       tp->t_flags |= XFS_TRANS_RESERVE;
                         nres = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK);
-                       error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
-                                                 nres, 0);
-                       if (error) {
-                               xfs_trans_cancel(tp);
+
+                       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, nres,
+                                       0, XFS_TRANS_RESERVE, &tp);
+                       if (error)
                                 return error;
-                       }
+
                         xfs_ilock(ip, XFS_ILOCK_EXCL);
                         xfs_trans_ijoin(tp, ip, 0);
  
@@ -878,25 +866,18 @@ xfs_iomap_write_unwritten(
  
         do {
                 /*
-                * set up a transaction to convert the range of extents
+                * Set up a transaction to convert the range of extents
                  * from unwritten to real. Do allocations in a loop until
                  * we have covered the range passed in.
                  *
-                * Note that we open code the transaction allocation here
-                * to pass KM_NOFS--we can't risk to recursing back into
-                * the filesystem here as we might be asked to write out
-                * the same inode that we complete here and might deadlock
-                * on the iolock.
+                * Note that we can't risk to recursing back into the filesystem
+                * here as we might be asked to write out the same inode that we
+                * complete here and might deadlock on the iolock.
                  */
-               sb_start_intwrite(mp->m_super);
-               tp = _xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE, KM_NOFS);
-               tp->t_flags |= XFS_TRANS_RESERVE | XFS_TRANS_FREEZE_PROT;
-               error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
-                                         resblks, 0);
-               if (error) {
-                       xfs_trans_cancel(tp);
+               error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0,
+                               XFS_TRANS_RESERVE | XFS_TRANS_NOFS, &tp);
+               if (error)
                         return error;
-               }
  
                 xfs_ilock(ip, XFS_ILOCK_EXCL);
                 xfs_trans_ijoin(tp, ip, 0);
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c

index fb7dc61f4a29d7cee3d4683c675c4551e7669e52..c5d4eba6972eb17327be763e7bd914c5f4da3a59 100644 (file)
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -181,6 +181,8 @@ xfs_generic_create(
         }
  #endif
  
+       xfs_setup_iops(ip);
+
         if (tmpfile)
                 d_tmpfile(dentry, inode);
         else
@@ -368,6 +370,8 @@ xfs_vn_symlink(
         if (unlikely(error))
                 goto out_cleanup_inode;
  
+       xfs_setup_iops(cip);
+
         d_instantiate(dentry, inode);
         xfs_finish_inode_setup(cip);
         return 0;
@@ -442,6 +446,16 @@ xfs_vn_get_link(
         return ERR_PTR(error);
  }
  
+STATIC const char *
+xfs_vn_get_link_inline(
+       struct dentry           *dentry,
+       struct inode            *inode,
+       struct delayed_call     *done)
+{
+       ASSERT(XFS_I(inode)->i_df.if_flags & XFS_IFINLINE);
+       return XFS_I(inode)->i_df.if_u1.if_data;
+}
+
  STATIC int
  xfs_vn_getattr(
         struct vfsmount         *mnt,
@@ -599,12 +613,12 @@ xfs_setattr_nonsize(
                         return error;
         }
  
-       tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
-       error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0);
+       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp);
         if (error)
-               goto out_trans_cancel;
+               goto out_dqrele;
  
         xfs_ilock(ip, XFS_ILOCK_EXCL);
+       xfs_trans_ijoin(tp, ip, 0);
  
         /*
          * Change file ownership.  Must be the owner or privileged.
@@ -633,12 +647,10 @@ xfs_setattr_nonsize(
                                                 NULL, capable(CAP_FOWNER) ?
                                                 XFS_QMOPT_FORCE_RES : 0);
                         if (error)      /* out of quota */
-                               goto out_unlock;
+                               goto out_cancel;
                 }
         }
  
-       xfs_trans_ijoin(tp, ip, 0);
-
         /*
          * Change file ownership.  Must be the owner or privileged.
          */
@@ -722,10 +734,9 @@ xfs_setattr_nonsize(
  
         return 0;
  
-out_unlock:
-       xfs_iunlock(ip, XFS_ILOCK_EXCL);
-out_trans_cancel:
+out_cancel:
         xfs_trans_cancel(tp);
+out_dqrele:
         xfs_qm_dqrele(udqp);
         xfs_qm_dqrele(gdqp);
         return error;
@@ -834,7 +845,7 @@ xfs_setattr_size(
          * We have to do all the page cache truncate work outside the
          * transaction context as the "lock" order is page lock->log space
          * reservation as defined by extent allocation in the writeback path.
-        * Hence a truncate can fail with ENOMEM from xfs_trans_reserve(), but
+        * Hence a truncate can fail with ENOMEM from xfs_trans_alloc(), but
          * having already truncated the in-memory version of the file (i.e. made
          * user visible changes). There's not much we can do about this, except
          * to hope that the caller sees ENOMEM and retries the truncate
@@ -849,10 +860,9 @@ xfs_setattr_size(
                 return error;
         truncate_setsize(inode, newsize);
  
-       tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE);
-       error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
+       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
         if (error)
-               goto out_trans_cancel;
+               return error;
  
         lock_flags |= XFS_ILOCK_EXCL;
         xfs_ilock(ip, XFS_ILOCK_EXCL);
@@ -971,12 +981,9 @@ xfs_vn_update_time(
  
         trace_xfs_update_time(ip);
  
-       tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
-       error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0);
-       if (error) {
-               xfs_trans_cancel(tp);
+       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp);
+       if (error)
                 return error;
-       }
  
         xfs_ilock(ip, XFS_ILOCK_EXCL);
         if (flags & S_CTIME)
@@ -1167,6 +1174,18 @@ static const struct inode_operations xfs_symlink_inode_operations = {
         .update_time            = xfs_vn_update_time,
  };
  
+static const struct inode_operations xfs_inline_symlink_inode_operations = {
+       .readlink               = generic_readlink,
+       .get_link               = xfs_vn_get_link_inline,
+       .getattr                = xfs_vn_getattr,
+       .setattr                = xfs_vn_setattr,
+       .setxattr               = generic_setxattr,
+       .getxattr               = generic_getxattr,
+       .removexattr            = generic_removexattr,
+       .listxattr              = xfs_vn_listxattr,
+       .update_time            = xfs_vn_update_time,
+};
+
  STATIC void
  xfs_diflags_to_iflags(
         struct inode            *inode,
@@ -1193,7 +1212,7 @@ xfs_diflags_to_iflags(
  }
  
  /*
- * Initialize the Linux inode and set up the operation vectors.
+ * Initialize the Linux inode.
   *
   * When reading existing inodes from disk this is called directly from xfs_iget,
   * when creating a new inode it is called from xfs_ialloc after setting up the
@@ -1232,32 +1251,12 @@ xfs_setup_inode(
         i_size_write(inode, ip->i_d.di_size);
         xfs_diflags_to_iflags(inode, ip);
  
-       ip->d_ops = ip->i_mount->m_nondir_inode_ops;
-       lockdep_set_class(&ip->i_lock.mr_lock, &xfs_nondir_ilock_class);
-       switch (inode->i_mode & S_IFMT) {
-       case S_IFREG:
-               inode->i_op = &xfs_inode_operations;
-               inode->i_fop = &xfs_file_operations;
-               inode->i_mapping->a_ops = &xfs_address_space_operations;
-               break;
-       case S_IFDIR:
+       if (S_ISDIR(inode->i_mode)) {
                 lockdep_set_class(&ip->i_lock.mr_lock, &xfs_dir_ilock_class);
-               if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb))
-                       inode->i_op = &xfs_dir_ci_inode_operations;
-               else
-                       inode->i_op = &xfs_dir_inode_operations;
-               inode->i_fop = &xfs_dir_file_operations;
                 ip->d_ops = ip->i_mount->m_dir_inode_ops;
-               break;
-       case S_IFLNK:
-               inode->i_op = &xfs_symlink_inode_operations;
-               if (!(ip->i_df.if_flags & XFS_IFINLINE))
-                       inode->i_mapping->a_ops = &xfs_address_space_operations;
-               break;
-       default:
-               inode->i_op = &xfs_inode_operations;
-               init_special_inode(inode, inode->i_mode, inode->i_rdev);
-               break;
+       } else {
+               ip->d_ops = ip->i_mount->m_nondir_inode_ops;
+               lockdep_set_class(&ip->i_lock.mr_lock, &xfs_nondir_ilock_class);
         }
  
         /*
@@ -1277,3 +1276,35 @@ xfs_setup_inode(
                 cache_no_acl(inode);
         }
  }
+
+void
+xfs_setup_iops(
+       struct xfs_inode        *ip)
+{
+       struct inode            *inode = &ip->i_vnode;
+
+       switch (inode->i_mode & S_IFMT) {
+       case S_IFREG:
+               inode->i_op = &xfs_inode_operations;
+               inode->i_fop = &xfs_file_operations;
+               inode->i_mapping->a_ops = &xfs_address_space_operations;
+               break;
+       case S_IFDIR:
+               if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb))
+                       inode->i_op = &xfs_dir_ci_inode_operations;
+               else
+                       inode->i_op = &xfs_dir_inode_operations;
+               inode->i_fop = &xfs_dir_file_operations;
+               break;
+       case S_IFLNK:
+               if (ip->i_df.if_flags & XFS_IFINLINE)
+                       inode->i_op = &xfs_inline_symlink_inode_operations;
+               else
+                       inode->i_op = &xfs_symlink_inode_operations;
+               break;
+       default:
+               inode->i_op = &xfs_inode_operations;
+               init_special_inode(inode, inode->i_mode, inode->i_rdev);
+               break;
+       }
+}
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c

index b49ccf5c1d7564402c39671c4e67b7cf92ab8082..bde02f1fba7323a2186edcd8462df8d2b9273fc5 100644 (file)
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -435,8 +435,7 @@ xfs_log_reserve(
         int                     cnt,
         struct xlog_ticket      **ticp,
         __uint8_t               client,
-       bool                    permanent,
-       uint                    t_type)
+       bool                    permanent)
  {
         struct xlog             *log = mp->m_log;
         struct xlog_ticket      *tic;
@@ -456,7 +455,6 @@ xfs_log_reserve(
         if (!tic)
                 return -ENOMEM;
  
-       tic->t_trans_type = t_type;
         *ticp = tic;
  
         xlog_grant_push_ail(log, tic->t_cnt ? tic->t_unit_res * tic->t_cnt
@@ -823,8 +821,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
         } while (iclog != first_iclog);
  #endif
         if (! (XLOG_FORCED_SHUTDOWN(log))) {
-               error = xfs_log_reserve(mp, 600, 1, &tic,
-                                       XFS_LOG, 0, XLOG_UNMOUNT_REC_TYPE);
+               error = xfs_log_reserve(mp, 600, 1, &tic, XFS_LOG, 0);
                 if (!error) {
                         /* the data section must be 32 bit size aligned */
                         struct {
@@ -2032,58 +2029,8 @@ xlog_print_tic_res(
             REG_TYPE_STR(ICREATE, "inode create")
         };
  #undef REG_TYPE_STR
-#define TRANS_TYPE_STR(type)   [XFS_TRANS_##type] = #type
-       static char *trans_type_str[XFS_TRANS_TYPE_MAX] = {
-           TRANS_TYPE_STR(SETATTR_NOT_SIZE),
-           TRANS_TYPE_STR(SETATTR_SIZE),
-           TRANS_TYPE_STR(INACTIVE),
-           TRANS_TYPE_STR(CREATE),
-           TRANS_TYPE_STR(CREATE_TRUNC),
-           TRANS_TYPE_STR(TRUNCATE_FILE),
-           TRANS_TYPE_STR(REMOVE),
-           TRANS_TYPE_STR(LINK),
-           TRANS_TYPE_STR(RENAME),
-           TRANS_TYPE_STR(MKDIR),
-           TRANS_TYPE_STR(RMDIR),
-           TRANS_TYPE_STR(SYMLINK),
-           TRANS_TYPE_STR(SET_DMATTRS),
-           TRANS_TYPE_STR(GROWFS),
-           TRANS_TYPE_STR(STRAT_WRITE),
-           TRANS_TYPE_STR(DIOSTRAT),
-           TRANS_TYPE_STR(WRITEID),
-           TRANS_TYPE_STR(ADDAFORK),
-           TRANS_TYPE_STR(ATTRINVAL),
-           TRANS_TYPE_STR(ATRUNCATE),
-           TRANS_TYPE_STR(ATTR_SET),
-           TRANS_TYPE_STR(ATTR_RM),
-           TRANS_TYPE_STR(ATTR_FLAG),
-           TRANS_TYPE_STR(CLEAR_AGI_BUCKET),
-           TRANS_TYPE_STR(SB_CHANGE),
-           TRANS_TYPE_STR(DUMMY1),
-           TRANS_TYPE_STR(DUMMY2),
-           TRANS_TYPE_STR(QM_QUOTAOFF),
-           TRANS_TYPE_STR(QM_DQALLOC),
-           TRANS_TYPE_STR(QM_SETQLIM),
-           TRANS_TYPE_STR(QM_DQCLUSTER),
-           TRANS_TYPE_STR(QM_QINOCREATE),
-           TRANS_TYPE_STR(QM_QUOTAOFF_END),
-           TRANS_TYPE_STR(FSYNC_TS),
-           TRANS_TYPE_STR(GROWFSRT_ALLOC),
-           TRANS_TYPE_STR(GROWFSRT_ZERO),
-           TRANS_TYPE_STR(GROWFSRT_FREE),
-           TRANS_TYPE_STR(SWAPEXT),
-           TRANS_TYPE_STR(CHECKPOINT),
-           TRANS_TYPE_STR(ICREATE),
-           TRANS_TYPE_STR(CREATE_TMPFILE)
-       };
-#undef TRANS_TYPE_STR
  
         xfs_warn(mp, "xlog_write: reservation summary:");
-       xfs_warn(mp, "  trans type  = %s (%u)",
-                ((ticket->t_trans_type <= 0 ||
-                  ticket->t_trans_type > XFS_TRANS_TYPE_MAX) ?
-                 "bad-trans-type" : trans_type_str[ticket->t_trans_type]),
-                ticket->t_trans_type);
         xfs_warn(mp, "  unit res    = %d bytes",
                  ticket->t_unit_res);
         xfs_warn(mp, "  current res = %d bytes",
@@ -3378,7 +3325,7 @@ xfs_log_force(
  {
         int     error;
  
-       trace_xfs_log_force(mp, 0);
+       trace_xfs_log_force(mp, 0, _RET_IP_);
         error = _xfs_log_force(mp, flags, NULL);
         if (error)
                 xfs_warn(mp, "%s: error %d returned.", __func__, error);
@@ -3527,7 +3474,7 @@ xfs_log_force_lsn(
  {
         int     error;
  
-       trace_xfs_log_force(mp, lsn);
+       trace_xfs_log_force(mp, lsn, _RET_IP_);
         error = _xfs_log_force_lsn(mp, lsn, flags, NULL);
         if (error)
                 xfs_warn(mp, "%s: error %d returned.", __func__, error);
@@ -3709,7 +3656,6 @@ xlog_ticket_alloc(
         tic->t_tid              = prandom_u32();
         tic->t_clientid         = client;
         tic->t_flags            = XLOG_TIC_INITED;
-       tic->t_trans_type       = 0;
         if (permanent)
                 tic->t_flags |= XLOG_TIC_PERM_RESERV;
  
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h

index aa533a7d50f2186f051b09c7dcfd78b431571d15..80ba0c047090165cb6353bcf32ae560fe82ef418 100644 (file)
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -161,8 +161,7 @@ int   xfs_log_reserve(struct xfs_mount *mp,
                           int              count,
                           struct xlog_ticket **ticket,
                           __uint8_t        clientid,
-                         bool             permanent,
-                         uint             t_type);
+                         bool             permanent);
  int      xfs_log_regrant(struct xfs_mount *mp, struct xlog_ticket *tic);
  int      xfs_log_unmount_write(struct xfs_mount *mp);
  void      xfs_log_unmount(struct xfs_mount *mp);
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c

index 4e7649351f5a25ab062396818861c9d7c2ee0d61..5e54e7955ea638a7c8fbbd885080fbf0005282f7 100644 (file)
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -51,7 +51,6 @@ xlog_cil_ticket_alloc(
  
         tic = xlog_ticket_alloc(log, 0, 1, XFS_TRANSACTION, 0,
                                 KM_SLEEP|KM_NOFS);
-       tic->t_trans_type = XFS_TRANS_CHECKPOINT;
  
         /*
          * set the current reservation to zero so we know to steal the basic
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h

index ed8896310c00b64ff768a207a81e9f58b7836fc5..765f084759b5d5be555de8f71ec7556d55782f01 100644 (file)
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -175,7 +175,6 @@ typedef struct xlog_ticket {
         char               t_cnt;        /* current count                : 1  */
         char               t_clientid;   /* who does this belong to;     : 1  */
         char               t_flags;      /* properties of reservation    : 1  */
-       uint               t_trans_type; /* transaction type             : 4  */
  
          /* reservation array fields */
         uint               t_res_num;                    /* num in array : 4 */
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c

index 396565f4324764058b979cf5e4c5bd96744f8ef8..83599784384686c2cb306bd2c2843422d5a2966c 100644 (file)
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -3843,7 +3843,7 @@ xlog_recover_add_to_cont_trans(
         old_ptr = item->ri_buf[item->ri_cnt-1].i_addr;
         old_len = item->ri_buf[item->ri_cnt-1].i_len;
  
-       ptr = kmem_realloc(old_ptr, len+old_len, old_len, KM_SLEEP);
+       ptr = kmem_realloc(old_ptr, len + old_len, KM_SLEEP);
         memcpy(&ptr[old_len], dp, len);
         item->ri_buf[item->ri_cnt-1].i_len += len;
         item->ri_buf[item->ri_cnt-1].i_addr = ptr;
@@ -4205,10 +4205,9 @@ xlog_recover_process_efi(
                 }
         }
  
-       tp = xfs_trans_alloc(mp, 0);
-       error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
+       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
         if (error)
-               goto abort_error;
+               return error;
         efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents);
  
         for (i = 0; i < efip->efi_format.efi_nextents; i++) {
@@ -4355,10 +4354,9 @@ xlog_recover_clear_agi_bucket(
         int             offset;
         int             error;
  
-       tp = xfs_trans_alloc(mp, XFS_TRANS_CLEAR_AGI_BUCKET);
-       error = xfs_trans_reserve(tp, &M_RES(mp)->tr_clearagi, 0, 0);
+       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_clearagi, 0, 0, 0, &tp);
         if (error)
-               goto out_abort;
+               goto out_error;
  
         error = xfs_read_agi(mp, tp, agno, &agibp);
         if (error)
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c

index cfd4210dd01500203c5c90e2fe064442fe0998ac..e39b02351b4a257e92f9e2506a8af8f31aecc688 100644 (file)
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -89,7 +89,6 @@ xfs_uuid_mount(
         if (hole < 0) {
                 xfs_uuid_table = kmem_realloc(xfs_uuid_table,
                         (xfs_uuid_table_size + 1) * sizeof(*xfs_uuid_table),
-                       xfs_uuid_table_size  * sizeof(*xfs_uuid_table),
                         KM_SLEEP);
                 hole = xfs_uuid_table_size++;
         }
@@ -681,6 +680,9 @@ xfs_mountfs(
  
         xfs_set_maxicount(mp);
  
+       /* enable fail_at_unmount as default */
+       mp->m_fail_unmount = 1;
+
         error = xfs_sysfs_init(&mp->m_kobj, &xfs_mp_ktype, NULL, mp->m_fsname);
         if (error)
                 goto out;
@@ -690,10 +692,15 @@ xfs_mountfs(
         if (error)
                 goto out_remove_sysfs;
  
-       error = xfs_uuid_mount(mp);
+       error = xfs_error_sysfs_init(mp);
         if (error)
                 goto out_del_stats;
  
+
+       error = xfs_uuid_mount(mp);
+       if (error)
+               goto out_remove_error_sysfs;
+
         /*
          * Set the minimum read and write sizes
          */
@@ -957,6 +964,7 @@ xfs_mountfs(
         cancel_delayed_work_sync(&mp->m_reclaim_work);
         xfs_reclaim_inodes(mp, SYNC_WAIT);
   out_log_dealloc:
+       mp->m_flags |= XFS_MOUNT_UNMOUNTING;
         xfs_log_mount_cancel(mp);
   out_fail_wait:
         if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp)
@@ -968,6 +976,8 @@ xfs_mountfs(
         xfs_da_unmount(mp);
   out_remove_uuid:
         xfs_uuid_unmount(mp);
+ out_remove_error_sysfs:
+       xfs_error_sysfs_del(mp);
   out_del_stats:
         xfs_sysfs_del(&mp->m_stats.xs_kobj);
   out_remove_sysfs:
@@ -1005,6 +1015,14 @@ xfs_unmountfs(
          */
         xfs_log_force(mp, XFS_LOG_SYNC);
  
+       /*
+        * We now need to tell the world we are unmounting. This will allow
+        * us to detect that the filesystem is going away and we should error
+        * out anything that we have been retrying in the background. This will
+        * prevent neverending retries in AIL pushing from hanging the unmount.
+        */
+       mp->m_flags |= XFS_MOUNT_UNMOUNTING;
+
         /*
          * Flush all pending changes from the AIL.
          */
@@ -1056,6 +1074,7 @@ xfs_unmountfs(
  #endif
         xfs_free_perag(mp);
  
+       xfs_error_sysfs_del(mp);
         xfs_sysfs_del(&mp->m_stats.xs_kobj);
         xfs_sysfs_del(&mp->m_kobj);
  }
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h

index eafe257b357addf83152f0c0ca53128e320abc94..c1b798c7212618462ee2130814eac8544c691bf0 100644 (file)
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -37,6 +37,32 @@ enum {
         XFS_LOWSP_MAX,
  };
  
+/*
+ * Error Configuration
+ *
+ * Error classes define the subsystem the configuration belongs to.
+ * Error numbers define the errors that are configurable.
+ */
+enum {
+       XFS_ERR_METADATA,
+       XFS_ERR_CLASS_MAX,
+};
+enum {
+       XFS_ERR_DEFAULT,
+       XFS_ERR_EIO,
+       XFS_ERR_ENOSPC,
+       XFS_ERR_ENODEV,
+       XFS_ERR_ERRNO_MAX,
+};
+
+#define XFS_ERR_RETRY_FOREVER  -1
+
+struct xfs_error_cfg {
+       struct xfs_kobj kobj;
+       int             max_retries;
+       unsigned long   retry_timeout;  /* in jiffies, 0 = no timeout */
+};
+
  typedef struct xfs_mount {
         struct super_block      *m_super;
         xfs_tid_t               m_tid;          /* next unused tid for fs */
@@ -127,6 +153,9 @@ typedef struct xfs_mount {
         int64_t                 m_low_space[XFS_LOWSP_MAX];
                                                 /* low free space thresholds */
         struct xfs_kobj         m_kobj;
+       struct xfs_kobj         m_error_kobj;
+       struct xfs_kobj         m_error_meta_kobj;
+       struct xfs_error_cfg    m_error_cfg[XFS_ERR_CLASS_MAX][XFS_ERR_ERRNO_MAX];
         struct xstats           m_stats;        /* per-fs stats */
  
         struct workqueue_struct *m_buf_workqueue;
@@ -148,6 +177,7 @@ typedef struct xfs_mount {
          */
         __uint32_t              m_generation;
  
+       bool                    m_fail_unmount;
  #ifdef DEBUG
         /*
          * DEBUG mode instrumentation to test and/or trigger delayed allocation
@@ -166,6 +196,7 @@ typedef struct xfs_mount {
  #define XFS_MOUNT_WSYNC                (1ULL << 0)     /* for nfs - all metadata ops
                                                    must be synchronous except
                                                    for space allocations */
+#define XFS_MOUNT_UNMOUNTING   (1ULL << 1)     /* filesystem is unmounting */
  #define XFS_MOUNT_WAS_CLEAN    (1ULL << 3)
  #define XFS_MOUNT_FS_SHUTDOWN  (1ULL << 4)     /* atomic stop of all filesystem
                                                    operations, typically for
@@ -364,4 +395,7 @@ extern void xfs_set_low_space_thresholds(struct xfs_mount *);
  int    xfs_zero_extent(struct xfs_inode *ip, xfs_fsblock_t start_fsb,
                         xfs_off_t count_fsb);
  
+struct xfs_error_cfg * xfs_error_get_cfg(struct xfs_mount *mp,
+               int error_class, int error);
+
  #endif /* __XFS_MOUNT_H__ */
diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c

index 51ddaf2c2b8c96648d70f364d45d6bb95eca3947..d5b756669fb5f43763b1b634b1348a5d52941743 100644 (file)
--- a/fs/xfs/xfs_pnfs.c
+++ b/fs/xfs/xfs_pnfs.c
@@ -308,12 +308,9 @@ xfs_fs_commit_blocks(
                         goto out_drop_iolock;
         }
  
-       tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
-       error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0);
-       if (error) {
-               xfs_trans_cancel(tp);
+       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp);
+       if (error)
                 goto out_drop_iolock;
-       }
  
         xfs_ilock(ip, XFS_ILOCK_EXCL);
         xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c

index be125e1758c1a5e4df36cfb8ec6e3e3643adc534..a60d9e2739d14a2ebcff8ee7dcedae7f5177bf3c 100644 (file)
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -783,13 +783,10 @@ xfs_qm_qino_alloc(
                 }
         }
  
-       tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QINOCREATE);
-       error = xfs_trans_reserve(tp, &M_RES(mp)->tr_create,
-                                 XFS_QM_QINOCREATE_SPACE_RES(mp), 0);
-       if (error) {
-               xfs_trans_cancel(tp);
+       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_create,
+                       XFS_QM_QINOCREATE_SPACE_RES(mp), 0, 0, &tp);
+       if (error)
                 return error;
-       }
  
         if (need_alloc) {
                 error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, 1, ip,
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c

index f4d0e0a8f517c65913b8d45f383450384576b39e..475a3882a81fef1cedaabf33383d04c1ee8d3e4b 100644 (file)
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -236,10 +236,8 @@ xfs_qm_scall_trunc_qfile(
  
         xfs_ilock(ip, XFS_IOLOCK_EXCL);
  
-       tp = xfs_trans_alloc(mp, XFS_TRANS_TRUNCATE_FILE);
-       error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
+       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
         if (error) {
-               xfs_trans_cancel(tp);
                 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
                 goto out_put;
         }
@@ -436,12 +434,9 @@ xfs_qm_scall_setqlim(
         defq = xfs_get_defquota(dqp, q);
         xfs_dqunlock(dqp);
  
-       tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM);
-       error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_setqlim, 0, 0);
-       if (error) {
-               xfs_trans_cancel(tp);
+       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_qm_setqlim, 0, 0, 0, &tp);
+       if (error)
                 goto out_rele;
-       }
  
         xfs_dqlock(dqp);
         xfs_trans_dqjoin(tp, dqp);
@@ -569,13 +564,9 @@ xfs_qm_log_quotaoff_end(
         int                     error;
         xfs_qoff_logitem_t      *qoffi;
  
-       tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF_END);
-
-       error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_equotaoff, 0, 0);
-       if (error) {
-               xfs_trans_cancel(tp);
+       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_qm_equotaoff, 0, 0, 0, &tp);
+       if (error)
                 return error;
-       }
  
         qoffi = xfs_trans_get_qoff_item(tp, startqoff,
                                         flags & XFS_ALL_QUOTA_ACCT);
@@ -603,12 +594,9 @@ xfs_qm_log_quotaoff(
  
         *qoffstartp = NULL;
  
-       tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF);
-       error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_quotaoff, 0, 0);
-       if (error) {
-               xfs_trans_cancel(tp);
+       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_qm_quotaoff, 0, 0, 0, &tp);
+       if (error)
                 goto out;
-       }
  
         qoffi = xfs_trans_get_qoff_item(tp, NULL, flags & XFS_ALL_QUOTA_ACCT);
         xfs_trans_log_quotaoff_item(tp, qoffi);
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c

index abf44435d04a3f4b898e21a00e45ee8ae607738a..3938b37d1043bb6fd98879fa4783b6bbec8cfef6 100644 (file)
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -780,15 +780,14 @@ xfs_growfs_rt_alloc(
          * Allocate space to the file, as necessary.
          */
         while (oblocks < nblocks) {
-               tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFSRT_ALLOC);
                 resblks = XFS_GROWFSRT_SPACE_RES(mp, nblocks - oblocks);
                 /*
                  * Reserve space & log for one extent added to the file.
                  */
-               error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growrtalloc,
-                                         resblks, 0);
+               error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growrtalloc, resblks,
+                               0, 0, &tp);
                 if (error)
-                       goto out_trans_cancel;
+                       return error;
                 /*
                  * Lock the inode.
                  */
@@ -823,14 +822,13 @@ xfs_growfs_rt_alloc(
                 for (bno = map.br_startoff, fsbno = map.br_startblock;
                      bno < map.br_startoff + map.br_blockcount;
                      bno++, fsbno++) {
-                       tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFSRT_ZERO);
                         /*
                          * Reserve log for one block zeroing.
                          */
-                       error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growrtzero,
-                                                 0, 0);
+                       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growrtzero,
+                                       0, 0, 0, &tp);
                         if (error)
-                               goto out_trans_cancel;
+                               return error;
                         /*
                          * Lock the bitmap inode.
                          */
@@ -994,11 +992,10 @@ xfs_growfs_rt(
                 /*
                  * Start a transaction, get the log reservation.
                  */
-               tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFSRT_FREE);
-               error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growrtfree,
-                                         0, 0);
+               error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growrtfree, 0, 0, 0,
+                               &tp);
                 if (error)
-                       goto error_cancel;
+                       break;
                 /*
                  * Lock out other callers by grabbing the bitmap inode lock.
                  */
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c

index 187e14b696c200bac8d78745fce25c449b6f35f6..416421d7ff10a2f9192650f717df32e7374a70bc 100644 (file)
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -58,8 +58,7 @@
  #include <linux/parser.h>
  
  static const struct super_operations xfs_super_operations;
-static kmem_zone_t *xfs_ioend_zone;
-mempool_t *xfs_ioend_pool;
+struct bio_set *xfs_ioend_bioset;
  
  static struct kset *xfs_kset;          /* top-level xfs sysfs dir */
  #ifdef DEBUG
@@ -350,6 +349,7 @@ xfs_parseargs(
                 case Opt_pqnoenforce:
                         mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE);
                         mp->m_qflags &= ~XFS_PQUOTA_ENFD;
+                       break;
                 case Opt_gquota:
                 case Opt_grpquota:
                         mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE |
@@ -928,7 +928,7 @@ xfs_fs_alloc_inode(
  
  /*
   * Now that the generic code is guaranteed not to be accessing
- * the linux inode, we can reclaim the inode.
+ * the linux inode, we can inactivate and reclaim the inode.
   */
  STATIC void
  xfs_fs_destroy_inode(
@@ -938,9 +938,14 @@ xfs_fs_destroy_inode(
  
         trace_xfs_destroy_inode(ip);
  
-       XFS_STATS_INC(ip->i_mount, vn_reclaim);
+       ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
+       XFS_STATS_INC(ip->i_mount, vn_rele);
+       XFS_STATS_INC(ip->i_mount, vn_remove);
+
+       xfs_inactive(ip);
  
         ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0);
+       XFS_STATS_INC(ip->i_mount, vn_reclaim);
  
         /*
          * We should never get here with one of the reclaim flags already set.
@@ -987,24 +992,6 @@ xfs_fs_inode_init_once(
                      "xfsino", ip->i_ino);
  }
  
-STATIC void
-xfs_fs_evict_inode(
-       struct inode            *inode)
-{
-       xfs_inode_t             *ip = XFS_I(inode);
-
-       ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
-
-       trace_xfs_evict_inode(ip);
-
-       truncate_inode_pages_final(&inode->i_data);
-       clear_inode(inode);
-       XFS_STATS_INC(ip->i_mount, vn_rele);
-       XFS_STATS_INC(ip->i_mount, vn_remove);
-
-       xfs_inactive(ip);
-}
-
  /*
   * We do an unlocked check for XFS_IDONTCACHE here because we are already
   * serialised against cache hits here via the inode->i_lock and igrab() in
@@ -1276,6 +1263,16 @@ xfs_fs_remount(
                         return -EINVAL;
                 }
  
+               if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 &&
+                   xfs_sb_has_ro_compat_feature(sbp,
+                                       XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) {
+                       xfs_warn(mp,
+"ro->rw transition prohibited on unknown (0x%x) ro-compat filesystem",
+                               (sbp->sb_features_ro_compat &
+                                       XFS_SB_FEAT_RO_COMPAT_UNKNOWN));
+                       return -EINVAL;
+               }
+
                 mp->m_flags &= ~XFS_MOUNT_RDONLY;
  
                 /*
@@ -1663,7 +1660,6 @@ xfs_fs_free_cached_objects(
  static const struct super_operations xfs_super_operations = {
         .alloc_inode            = xfs_fs_alloc_inode,
         .destroy_inode          = xfs_fs_destroy_inode,
-       .evict_inode            = xfs_fs_evict_inode,
         .drop_inode             = xfs_fs_drop_inode,
         .put_super              = xfs_fs_put_super,
         .sync_fs                = xfs_fs_sync_fs,
@@ -1688,20 +1684,15 @@ MODULE_ALIAS_FS("xfs");
  STATIC int __init
  xfs_init_zones(void)
  {
-
-       xfs_ioend_zone = kmem_zone_init(sizeof(xfs_ioend_t), "xfs_ioend");
-       if (!xfs_ioend_zone)
+       xfs_ioend_bioset = bioset_create(4 * MAX_BUF_PER_PAGE,
+                       offsetof(struct xfs_ioend, io_inline_bio));
+       if (!xfs_ioend_bioset)
                 goto out;
  
-       xfs_ioend_pool = mempool_create_slab_pool(4 * MAX_BUF_PER_PAGE,
-                                                 xfs_ioend_zone);
-       if (!xfs_ioend_pool)
-               goto out_destroy_ioend_zone;
-
         xfs_log_ticket_zone = kmem_zone_init(sizeof(xlog_ticket_t),
                                                 "xfs_log_ticket");
         if (!xfs_log_ticket_zone)
-               goto out_destroy_ioend_pool;
+               goto out_free_ioend_bioset;
  
         xfs_bmap_free_item_zone = kmem_zone_init(sizeof(xfs_bmap_free_item_t),
                                                 "xfs_bmap_free_item");
@@ -1797,10 +1788,8 @@ xfs_init_zones(void)
         kmem_zone_destroy(xfs_bmap_free_item_zone);
   out_destroy_log_ticket_zone:
         kmem_zone_destroy(xfs_log_ticket_zone);
- out_destroy_ioend_pool:
-       mempool_destroy(xfs_ioend_pool);
- out_destroy_ioend_zone:
-       kmem_zone_destroy(xfs_ioend_zone);
+ out_free_ioend_bioset:
+       bioset_free(xfs_ioend_bioset);
   out:
         return -ENOMEM;
  }
@@ -1826,9 +1815,7 @@ xfs_destroy_zones(void)
         kmem_zone_destroy(xfs_btree_cur_zone);
         kmem_zone_destroy(xfs_bmap_free_item_zone);
         kmem_zone_destroy(xfs_log_ticket_zone);
-       mempool_destroy(xfs_ioend_pool);
-       kmem_zone_destroy(xfs_ioend_zone);
-
+       bioset_free(xfs_ioend_bioset);
  }
  
  STATIC int __init
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c

index b44284c1adda15c647ddcc2feebf0a24302bdcbc..08a46c6181fdb698bf6b6deed28e21fa6c01ce7d 100644 (file)
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -131,6 +131,8 @@ xfs_readlink(
  
         trace_xfs_readlink(ip);
  
+       ASSERT(!(ip->i_df.if_flags & XFS_IFINLINE));
+
         if (XFS_FORCED_SHUTDOWN(mp))
                 return -EIO;
  
@@ -150,12 +152,7 @@ xfs_readlink(
         }
  
  
-       if (ip->i_df.if_flags & XFS_IFINLINE) {
-               memcpy(link, ip->i_df.if_u1.if_data, pathlen);
-               link[pathlen] = '\0';
-       } else {
-               error = xfs_readlink_bmap(ip, link);
-       }
+       error = xfs_readlink_bmap(ip, link);
  
   out:
         xfs_iunlock(ip, XFS_ILOCK_SHARED);
@@ -221,7 +218,6 @@ xfs_symlink(
         if (error)
                 return error;
  
-       tp = xfs_trans_alloc(mp, XFS_TRANS_SYMLINK);
         /*
          * The symlink will fit into the inode data fork?
          * There can't be any attributes so we get the whole variable part.
@@ -231,13 +227,15 @@ xfs_symlink(
         else
                 fs_blocks = xfs_symlink_blocks(mp, pathlen);
         resblks = XFS_SYMLINK_SPACE_RES(mp, link_name->len, fs_blocks);
-       error = xfs_trans_reserve(tp, &M_RES(mp)->tr_symlink, resblks, 0);
+
+       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_symlink, resblks, 0, 0, &tp);
         if (error == -ENOSPC && fs_blocks == 0) {
                 resblks = 0;
-               error = xfs_trans_reserve(tp, &M_RES(mp)->tr_symlink, 0, 0);
+               error = xfs_trans_alloc(mp, &M_RES(mp)->tr_symlink, 0, 0, 0,
+                               &tp);
         }
         if (error)
-               goto out_trans_cancel;
+               goto out_release_inode;
  
         xfs_ilock(dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL |
                       XFS_IOLOCK_PARENT | XFS_ILOCK_PARENT);
@@ -302,19 +300,11 @@ xfs_symlink(
          * If the symlink will fit into the inode, write it inline.
          */
         if (pathlen <= XFS_IFORK_DSIZE(ip)) {
-               xfs_idata_realloc(ip, pathlen, XFS_DATA_FORK);
-               memcpy(ip->i_df.if_u1.if_data, target_path, pathlen);
-               ip->i_d.di_size = pathlen;
-
-               /*
-                * The inode was initially created in extent format.
-                */
-               ip->i_df.if_flags &= ~(XFS_IFEXTENTS | XFS_IFBROOT);
-               ip->i_df.if_flags |= XFS_IFINLINE;
+               xfs_init_local_fork(ip, XFS_DATA_FORK, target_path, pathlen);
  
+               ip->i_d.di_size = pathlen;
                 ip->i_d.di_format = XFS_DINODE_FMT_LOCAL;
                 xfs_trans_log_inode(tp, ip, XFS_ILOG_DDATA | XFS_ILOG_CORE);
-
         } else {
                 int     offset;
  
@@ -455,12 +445,9 @@ xfs_inactive_symlink_rmt(
          */
         ASSERT(ip->i_d.di_nextents > 0 && ip->i_d.di_nextents <= 2);
  
-       tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
-       error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
-       if (error) {
-               xfs_trans_cancel(tp);
+       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
+       if (error)
                 return error;
-       }
  
         xfs_ilock(ip, XFS_ILOCK_EXCL);
         xfs_trans_ijoin(tp, ip, 0);
diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c

index 6ced4f1434948d3757077c732a93d6fac88ea0cc..4c2c5508620819ea4075fe65c1b7390a47246add 100644 (file)
--- a/fs/xfs/xfs_sysfs.c
+++ b/fs/xfs/xfs_sysfs.c
@@ -17,10 +17,11 @@
   */
  
  #include "xfs.h"
-#include "xfs_sysfs.h"
+#include "xfs_shared.h"
  #include "xfs_format.h"
  #include "xfs_log_format.h"
  #include "xfs_trans_resv.h"
+#include "xfs_sysfs.h"
  #include "xfs_log.h"
  #include "xfs_log_priv.h"
  #include "xfs_stats.h"
@@ -362,3 +363,291 @@ struct kobj_type xfs_log_ktype = {
         .sysfs_ops = &xfs_sysfs_ops,
         .default_attrs = xfs_log_attrs,
  };
+
+/*
+ * Metadata IO error configuration
+ *
+ * The sysfs structure here is:
+ *     ...xfs/<dev>/error/<class>/<errno>/<error_attrs>
+ *
+ * where <class> allows us to discriminate between data IO and metadata IO,
+ * and any other future type of IO (e.g. special inode or directory error
+ * handling) we care to support.
+ */
+static inline struct xfs_error_cfg *
+to_error_cfg(struct kobject *kobject)
+{
+       struct xfs_kobj *kobj = to_kobj(kobject);
+       return container_of(kobj, struct xfs_error_cfg, kobj);
+}
+
+static inline struct xfs_mount *
+err_to_mp(struct kobject *kobject)
+{
+       struct xfs_kobj *kobj = to_kobj(kobject);
+       return container_of(kobj, struct xfs_mount, m_error_kobj);
+}
+
+static ssize_t
+max_retries_show(
+       struct kobject  *kobject,
+       char            *buf)
+{
+       struct xfs_error_cfg *cfg = to_error_cfg(kobject);
+
+       return snprintf(buf, PAGE_SIZE, "%d\n", cfg->max_retries);
+}
+
+static ssize_t
+max_retries_store(
+       struct kobject  *kobject,
+       const char      *buf,
+       size_t          count)
+{
+       struct xfs_error_cfg *cfg = to_error_cfg(kobject);
+       int             ret;
+       int             val;
+
+       ret = kstrtoint(buf, 0, &val);
+       if (ret)
+               return ret;
+
+       if (val < -1)
+               return -EINVAL;
+
+       cfg->max_retries = val;
+       return count;
+}
+XFS_SYSFS_ATTR_RW(max_retries);
+
+static ssize_t
+retry_timeout_seconds_show(
+       struct kobject  *kobject,
+       char            *buf)
+{
+       struct xfs_error_cfg *cfg = to_error_cfg(kobject);
+
+       return snprintf(buf, PAGE_SIZE, "%ld\n",
+                       jiffies_to_msecs(cfg->retry_timeout) / MSEC_PER_SEC);
+}
+
+static ssize_t
+retry_timeout_seconds_store(
+       struct kobject  *kobject,
+       const char      *buf,
+       size_t          count)
+{
+       struct xfs_error_cfg *cfg = to_error_cfg(kobject);
+       int             ret;
+       int             val;
+
+       ret = kstrtoint(buf, 0, &val);
+       if (ret)
+               return ret;
+
+       /* 1 day timeout maximum */
+       if (val < 0 || val > 86400)
+               return -EINVAL;
+
+       cfg->retry_timeout = msecs_to_jiffies(val * MSEC_PER_SEC);
+       return count;
+}
+XFS_SYSFS_ATTR_RW(retry_timeout_seconds);
+
+static ssize_t
+fail_at_unmount_show(
+       struct kobject  *kobject,
+       char            *buf)
+{
+       struct xfs_mount        *mp = err_to_mp(kobject);
+
+       return snprintf(buf, PAGE_SIZE, "%d\n", mp->m_fail_unmount);
+}
+
+static ssize_t
+fail_at_unmount_store(
+       struct kobject  *kobject,
+       const char      *buf,
+       size_t          count)
+{
+       struct xfs_mount        *mp = err_to_mp(kobject);
+       int             ret;
+       int             val;
+
+       ret = kstrtoint(buf, 0, &val);
+       if (ret)
+               return ret;
+
+       if (val < 0 || val > 1)
+               return -EINVAL;
+
+       mp->m_fail_unmount = val;
+       return count;
+}
+XFS_SYSFS_ATTR_RW(fail_at_unmount);
+
+static struct attribute *xfs_error_attrs[] = {
+       ATTR_LIST(max_retries),
+       ATTR_LIST(retry_timeout_seconds),
+       NULL,
+};
+
+
+struct kobj_type xfs_error_cfg_ktype = {
+       .release = xfs_sysfs_release,
+       .sysfs_ops = &xfs_sysfs_ops,
+       .default_attrs = xfs_error_attrs,
+};
+
+struct kobj_type xfs_error_ktype = {
+       .release = xfs_sysfs_release,
+       .sysfs_ops = &xfs_sysfs_ops,
+};
+
+/*
+ * Error initialization tables. These need to be ordered in the same
+ * order as the enums used to index the array. All class init tables need to
+ * define a "default" behaviour as the first entry, all other entries can be
+ * empty.
+ */
+struct xfs_error_init {
+       char            *name;
+       int             max_retries;
+       int             retry_timeout;  /* in seconds */
+};
+
+static const struct xfs_error_init xfs_error_meta_init[XFS_ERR_ERRNO_MAX] = {
+       { .name = "default",
+         .max_retries = XFS_ERR_RETRY_FOREVER,
+         .retry_timeout = 0,
+       },
+       { .name = "EIO",
+         .max_retries = XFS_ERR_RETRY_FOREVER,
+         .retry_timeout = 0,
+       },
+       { .name = "ENOSPC",
+         .max_retries = XFS_ERR_RETRY_FOREVER,
+         .retry_timeout = 0,
+       },
+       { .name = "ENODEV",
+         .max_retries = 0,
+       },
+};
+
+static int
+xfs_error_sysfs_init_class(
+       struct xfs_mount        *mp,
+       int                     class,
+       const char              *parent_name,
+       struct xfs_kobj         *parent_kobj,
+       const struct xfs_error_init init[])
+{
+       struct xfs_error_cfg    *cfg;
+       int                     error;
+       int                     i;
+
+       ASSERT(class < XFS_ERR_CLASS_MAX);
+
+       error = xfs_sysfs_init(parent_kobj, &xfs_error_ktype,
+                               &mp->m_error_kobj, parent_name);
+       if (error)
+               return error;
+
+       for (i = 0; i < XFS_ERR_ERRNO_MAX; i++) {
+               cfg = &mp->m_error_cfg[class][i];
+               error = xfs_sysfs_init(&cfg->kobj, &xfs_error_cfg_ktype,
+                                       parent_kobj, init[i].name);
+               if (error)
+                       goto out_error;
+
+               cfg->max_retries = init[i].max_retries;
+               cfg->retry_timeout = msecs_to_jiffies(
+                                       init[i].retry_timeout * MSEC_PER_SEC);
+       }
+       return 0;
+
+out_error:
+       /* unwind the entries that succeeded */
+       for (i--; i >= 0; i--) {
+               cfg = &mp->m_error_cfg[class][i];
+               xfs_sysfs_del(&cfg->kobj);
+       }
+       xfs_sysfs_del(parent_kobj);
+       return error;
+}
+
+int
+xfs_error_sysfs_init(
+       struct xfs_mount        *mp)
+{
+       int                     error;
+
+       /* .../xfs/<dev>/error/ */
+       error = xfs_sysfs_init(&mp->m_error_kobj, &xfs_error_ktype,
+                               &mp->m_kobj, "error");
+       if (error)
+               return error;
+
+       error = sysfs_create_file(&mp->m_error_kobj.kobject,
+                                 ATTR_LIST(fail_at_unmount));
+
+       if (error)
+               goto out_error;
+
+       /* .../xfs/<dev>/error/metadata/ */
+       error = xfs_error_sysfs_init_class(mp, XFS_ERR_METADATA,
+                               "metadata", &mp->m_error_meta_kobj,
+                               xfs_error_meta_init);
+       if (error)
+               goto out_error;
+
+       return 0;
+
+out_error:
+       xfs_sysfs_del(&mp->m_error_kobj);
+       return error;
+}
+
+void
+xfs_error_sysfs_del(
+       struct xfs_mount        *mp)
+{
+       struct xfs_error_cfg    *cfg;
+       int                     i, j;
+
+       for (i = 0; i < XFS_ERR_CLASS_MAX; i++) {
+               for (j = 0; j < XFS_ERR_ERRNO_MAX; j++) {
+                       cfg = &mp->m_error_cfg[i][j];
+
+                       xfs_sysfs_del(&cfg->kobj);
+               }
+       }
+       xfs_sysfs_del(&mp->m_error_meta_kobj);
+       xfs_sysfs_del(&mp->m_error_kobj);
+}
+
+struct xfs_error_cfg *
+xfs_error_get_cfg(
+       struct xfs_mount        *mp,
+       int                     error_class,
+       int                     error)
+{
+       struct xfs_error_cfg    *cfg;
+
+       switch (error) {
+       case EIO:
+               cfg = &mp->m_error_cfg[error_class][XFS_ERR_EIO];
+               break;
+       case ENOSPC:
+               cfg = &mp->m_error_cfg[error_class][XFS_ERR_ENOSPC];
+               break;
+       case ENODEV:
+               cfg = &mp->m_error_cfg[error_class][XFS_ERR_ENODEV];
+               break;
+       default:
+               cfg = &mp->m_error_cfg[error_class][XFS_ERR_DEFAULT];
+               break;
+       }
+
+       return cfg;
+}
diff --git a/fs/xfs/xfs_sysfs.h b/fs/xfs/xfs_sysfs.h

index be692e59938db7e8cba4ca4186b1a8f90928887c..d04637181ef21709715d6320bf0569e3d715740a 100644 (file)
--- a/fs/xfs/xfs_sysfs.h
+++ b/fs/xfs/xfs_sysfs.h
@@ -58,4 +58,7 @@ xfs_sysfs_del(
         wait_for_completion(&kobj->complete);
  }
  
+int    xfs_error_sysfs_init(struct xfs_mount *mp);
+void   xfs_error_sysfs_del(struct xfs_mount *mp);
+
  #endif /* __XFS_SYSFS_H__ */
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h

index c8d58426008ed7ef49096097904ed13653a8cfe9..ea94ee0fe5ea2b8e9e089b857d82a30057e0b950 100644 (file)
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -364,7 +364,6 @@ DEFINE_BUF_EVENT(xfs_buf_delwri_split);
  DEFINE_BUF_EVENT(xfs_buf_get_uncached);
  DEFINE_BUF_EVENT(xfs_bdstrat_shut);
  DEFINE_BUF_EVENT(xfs_buf_item_relse);
-DEFINE_BUF_EVENT(xfs_buf_item_iodone);
  DEFINE_BUF_EVENT(xfs_buf_item_iodone_async);
  DEFINE_BUF_EVENT(xfs_buf_error_relse);
  DEFINE_BUF_EVENT(xfs_buf_wait_buftarg);
@@ -944,7 +943,6 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class,
         TP_ARGS(log, tic),
         TP_STRUCT__entry(
                 __field(dev_t, dev)
-               __field(unsigned, trans_type)
                 __field(char, ocnt)
                 __field(char, cnt)
                 __field(int, curr_res)
@@ -962,7 +960,6 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class,
         ),
         TP_fast_assign(
                 __entry->dev = log->l_mp->m_super->s_dev;
-               __entry->trans_type = tic->t_trans_type;
                 __entry->ocnt = tic->t_ocnt;
                 __entry->cnt = tic->t_cnt;
                 __entry->curr_res = tic->t_curr_res;
@@ -980,14 +977,13 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class,
                 __entry->curr_block = log->l_curr_block;
                 __entry->tail_lsn = atomic64_read(&log->l_tail_lsn);
         ),
-       TP_printk("dev %d:%d type %s t_ocnt %u t_cnt %u t_curr_res %u "
+       TP_printk("dev %d:%d t_ocnt %u t_cnt %u t_curr_res %u "
                   "t_unit_res %u t_flags %s reserveq %s "
                   "writeq %s grant_reserve_cycle %d "
                   "grant_reserve_bytes %d grant_write_cycle %d "
                   "grant_write_bytes %d curr_cycle %d curr_block %d "
                   "tail_cycle %d tail_block %d",
                   MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __print_symbolic(__entry->trans_type, XFS_TRANS_TYPES),
                   __entry->ocnt,
                   __entry->cnt,
                   __entry->curr_res,
@@ -1053,19 +1049,21 @@ DECLARE_EVENT_CLASS(xfs_log_item_class,
  )
  
  TRACE_EVENT(xfs_log_force,
-       TP_PROTO(struct xfs_mount *mp, xfs_lsn_t lsn),
-       TP_ARGS(mp, lsn),
+       TP_PROTO(struct xfs_mount *mp, xfs_lsn_t lsn, unsigned long caller_ip),
+       TP_ARGS(mp, lsn, caller_ip),
         TP_STRUCT__entry(
                 __field(dev_t, dev)
                 __field(xfs_lsn_t, lsn)
+               __field(unsigned long, caller_ip)
         ),
         TP_fast_assign(
                 __entry->dev = mp->m_super->s_dev;
                 __entry->lsn = lsn;
+               __entry->caller_ip = caller_ip;
         ),
-       TP_printk("dev %d:%d lsn 0x%llx",
+       TP_printk("dev %d:%d lsn 0x%llx caller %ps",
                   MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->lsn)
+                 __entry->lsn, (void *)__entry->caller_ip)
  )
  
  #define DEFINE_LOG_ITEM_EVENT(name) \
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c

index 20c53666cb4b3272400bc7111d285f104e555d75..5f3d33d16e6706b9db55cb3919c35feea29408ca 100644 (file)
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -46,47 +46,6 @@ xfs_trans_init(
         xfs_trans_resv_calc(mp, M_RES(mp));
  }
  
-/*
- * This routine is called to allocate a transaction structure.
- * The type parameter indicates the type of the transaction.  These
- * are enumerated in xfs_trans.h.
- *
- * Dynamically allocate the transaction structure from the transaction
- * zone, initialize it, and return it to the caller.
- */
-xfs_trans_t *
-xfs_trans_alloc(
-       xfs_mount_t     *mp,
-       uint            type)
-{
-       xfs_trans_t     *tp;
-
-       sb_start_intwrite(mp->m_super);
-       tp = _xfs_trans_alloc(mp, type, KM_SLEEP);
-       tp->t_flags |= XFS_TRANS_FREEZE_PROT;
-       return tp;
-}
-
-xfs_trans_t *
-_xfs_trans_alloc(
-       xfs_mount_t     *mp,
-       uint            type,
-       xfs_km_flags_t  memflags)
-{
-       xfs_trans_t     *tp;
-
-       WARN_ON(mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE);
-       atomic_inc(&mp->m_active_trans);
-
-       tp = kmem_zone_zalloc(xfs_trans_zone, memflags);
-       tp->t_magic = XFS_TRANS_HEADER_MAGIC;
-       tp->t_type = type;
-       tp->t_mountp = mp;
-       INIT_LIST_HEAD(&tp->t_items);
-       INIT_LIST_HEAD(&tp->t_busy);
-       return tp;
-}
-
  /*
   * Free the transaction structure.  If there is more clean up
   * to do when the structure is freed, add it here.
@@ -99,7 +58,7 @@ xfs_trans_free(
         xfs_extent_busy_clear(tp->t_mountp, &tp->t_busy, false);
  
         atomic_dec(&tp->t_mountp->m_active_trans);
-       if (tp->t_flags & XFS_TRANS_FREEZE_PROT)
+       if (!(tp->t_flags & XFS_TRANS_NO_WRITECOUNT))
                 sb_end_intwrite(tp->t_mountp->m_super);
         xfs_trans_free_dqinfo(tp);
         kmem_zone_free(xfs_trans_zone, tp);
@@ -125,7 +84,6 @@ xfs_trans_dup(
          * Initialize the new transaction structure.
          */
         ntp->t_magic = XFS_TRANS_HEADER_MAGIC;
-       ntp->t_type = tp->t_type;
         ntp->t_mountp = tp->t_mountp;
         INIT_LIST_HEAD(&ntp->t_items);
         INIT_LIST_HEAD(&ntp->t_busy);
@@ -135,9 +93,9 @@ xfs_trans_dup(
  
         ntp->t_flags = XFS_TRANS_PERM_LOG_RES |
                        (tp->t_flags & XFS_TRANS_RESERVE) |
-                      (tp->t_flags & XFS_TRANS_FREEZE_PROT);
+                      (tp->t_flags & XFS_TRANS_NO_WRITECOUNT);
         /* We gave our writer reference to the new transaction */
-       tp->t_flags &= ~XFS_TRANS_FREEZE_PROT;
+       tp->t_flags |= XFS_TRANS_NO_WRITECOUNT;
         ntp->t_ticket = xfs_log_ticket_get(tp->t_ticket);
         ntp->t_blk_res = tp->t_blk_res - tp->t_blk_res_used;
         tp->t_blk_res = tp->t_blk_res_used;
@@ -165,7 +123,7 @@ xfs_trans_dup(
   * This does not do quota reservations. That typically is done by the
   * caller afterwards.
   */
-int
+static int
  xfs_trans_reserve(
         struct xfs_trans        *tp,
         struct xfs_trans_res    *resp,
@@ -219,7 +177,7 @@ xfs_trans_reserve(
                                                 resp->tr_logres,
                                                 resp->tr_logcount,
                                                 &tp->t_ticket, XFS_TRANSACTION,
-                                               permanent, tp->t_type);
+                                               permanent);
                 }
  
                 if (error)
@@ -268,6 +226,42 @@ undo_blocks:
         return error;
  }
  
+int
+xfs_trans_alloc(
+       struct xfs_mount        *mp,
+       struct xfs_trans_res    *resp,
+       uint                    blocks,
+       uint                    rtextents,
+       uint                    flags,
+       struct xfs_trans        **tpp)
+{
+       struct xfs_trans        *tp;
+       int                     error;
+
+       if (!(flags & XFS_TRANS_NO_WRITECOUNT))
+               sb_start_intwrite(mp->m_super);
+
+       WARN_ON(mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE);
+       atomic_inc(&mp->m_active_trans);
+
+       tp = kmem_zone_zalloc(xfs_trans_zone,
+               (flags & XFS_TRANS_NOFS) ? KM_NOFS : KM_SLEEP);
+       tp->t_magic = XFS_TRANS_HEADER_MAGIC;
+       tp->t_flags = flags;
+       tp->t_mountp = mp;
+       INIT_LIST_HEAD(&tp->t_items);
+       INIT_LIST_HEAD(&tp->t_busy);
+
+       error = xfs_trans_reserve(tp, resp, blocks, rtextents);
+       if (error) {
+               xfs_trans_cancel(tp);
+               return error;
+       }
+
+       *tpp = tp;
+       return 0;
+}
+
  /*
   * Record the indicated change to the given field for application
   * to the file system's superblock when the transaction commits.
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h

index e7c49cf43fbc85c183e1728966d4f4506b94eaf6..9a462e892e4f33f9f50508b6ba8f33bfb348a420 100644 (file)
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -90,7 +90,6 @@ void  xfs_log_item_init(struct xfs_mount *mp, struct xfs_log_item *item,
   */
  typedef struct xfs_trans {
         unsigned int            t_magic;        /* magic number */
-       unsigned int            t_type;         /* transaction type */
         unsigned int            t_log_res;      /* amt of log space resvd */
         unsigned int            t_log_count;    /* count for perm log res */
         unsigned int            t_blk_res;      /* # of blocks resvd */
@@ -148,10 +147,9 @@ typedef struct xfs_trans {
  /*
   * XFS transaction mechanism exported interfaces.
   */
-xfs_trans_t    *xfs_trans_alloc(struct xfs_mount *, uint);
-xfs_trans_t    *_xfs_trans_alloc(struct xfs_mount *, uint, xfs_km_flags_t);
-int            xfs_trans_reserve(struct xfs_trans *, struct xfs_trans_res *,
-                                 uint, uint);
+int            xfs_trans_alloc(struct xfs_mount *mp, struct xfs_trans_res *resp,
+                       uint blocks, uint rtextents, uint flags,
+                       struct xfs_trans **tpp);
  void           xfs_trans_mod_sb(xfs_trans_t *, uint, int64_t);
  
  struct xfs_buf *xfs_trans_get_buf_map(struct xfs_trans *tp,
diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c

index d111f691f313fdc980a9d9e9f6519d9bd30e06a0..ec58ff094b1dfbfcd634f5aad4b0fae2da8cc086 100644 (file)
--- a/fs/xfs/xfs_xattr.c
+++ b/fs/xfs/xfs_xattr.c
@@ -146,7 +146,7 @@ __xfs_xattr_put_listent(
         arraytop = context->count + prefix_len + namelen + 1;
         if (arraytop > context->firstu) {
                 context->count = -1;    /* insufficient space */
-               return 1;
+               return 0;
         }
         offset = (char *)context->alist + context->count;
         strncpy(offset, prefix, prefix_len);
@@ -166,8 +166,7 @@ xfs_xattr_put_listent(
         int             flags,
         unsigned char   *name,
         int             namelen,
-       int             valuelen,
-       unsigned char   *value)
+       int             valuelen)
  {
         char *prefix;
         int prefix_len;
@@ -221,11 +220,15 @@ xfs_xattr_put_listent(
  }
  
  ssize_t
-xfs_vn_listxattr(struct dentry *dentry, char *data, size_t size)
+xfs_vn_listxattr(
+       struct dentry   *dentry,
+       char            *data,
+       size_t          size)
  {
         struct xfs_attr_list_context context;
         struct attrlist_cursor_kern cursor = { 0 };
-       struct inode            *inode = d_inode(dentry);
+       struct inode    *inode = d_inode(dentry);
+       int             error;
  
         /*
          * First read the regular on-disk attributes.
@@ -239,7 +242,9 @@ xfs_vn_listxattr(struct dentry *dentry, char *data, size_t size)
         context.firstu = context.bufsize;
         context.put_listent = xfs_xattr_put_listent;
  
-       xfs_attr_list_int(&context);
+       error = xfs_attr_list_int(&context);
+       if (error)
+               return error;
         if (context.count < 0)
                 return -ERANGE;
author	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 26 May 2016 17:13:40 +0000 (10:13 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 26 May 2016 17:13:40 +0000 (10:13 -0700)
fs/namei.c		patch \| blob \| blame \| history
fs/xfs/kmem.c		patch \| blob \| blame \| history
fs/xfs/kmem.h		patch \| blob \| blame \| history
fs/xfs/libxfs/xfs_attr.c		patch \| blob \| blame \| history
fs/xfs/libxfs/xfs_bmap.c		patch \| blob \| blame \| history
fs/xfs/libxfs/xfs_dir2_sf.c		patch \| blob \| blame \| history
fs/xfs/libxfs/xfs_inode_fork.c		patch \| blob \| blame \| history
fs/xfs/libxfs/xfs_inode_fork.h		patch \| blob \| blame \| history
fs/xfs/libxfs/xfs_log_format.h		patch \| blob \| blame \| history
fs/xfs/libxfs/xfs_sb.c		patch \| blob \| blame \| history
fs/xfs/libxfs/xfs_shared.h		patch \| blob \| blame \| history
fs/xfs/xfs_aops.c		patch \| blob \| blame \| history
fs/xfs/xfs_aops.h		patch \| blob \| blame \| history
fs/xfs/xfs_attr.h		patch \| blob \| blame \| history
fs/xfs/xfs_attr_inactive.c		patch \| blob \| blame \| history
fs/xfs/xfs_attr_list.c		patch \| blob \| blame \| history
fs/xfs/xfs_bmap_util.c		patch \| blob \| blame \| history
fs/xfs/xfs_buf.c		patch \| blob \| blame \| history
fs/xfs/xfs_buf.h		patch \| blob \| blame \| history
fs/xfs/xfs_buf_item.c		patch \| blob \| blame \| history
fs/xfs/xfs_dquot.c		patch \| blob \| blame \| history
fs/xfs/xfs_file.c		patch \| blob \| blame \| history
fs/xfs/xfs_fsops.c		patch \| blob \| blame \| history
fs/xfs/xfs_icache.c		patch \| blob \| blame \| history
fs/xfs/xfs_inode.c		patch \| blob \| blame \| history
fs/xfs/xfs_inode.h		patch \| blob \| blame \| history
fs/xfs/xfs_inode_item.c		patch \| blob \| blame \| history
fs/xfs/xfs_ioctl.c		patch \| blob \| blame \| history
fs/xfs/xfs_iomap.c		patch \| blob \| blame \| history
fs/xfs/xfs_iops.c		patch \| blob \| blame \| history
fs/xfs/xfs_log.c		patch \| blob \| blame \| history
fs/xfs/xfs_log.h		patch \| blob \| blame \| history
fs/xfs/xfs_log_cil.c		patch \| blob \| blame \| history
fs/xfs/xfs_log_priv.h		patch \| blob \| blame \| history
fs/xfs/xfs_log_recover.c		patch \| blob \| blame \| history
fs/xfs/xfs_mount.c		patch \| blob \| blame \| history
fs/xfs/xfs_mount.h		patch \| blob \| blame \| history
fs/xfs/xfs_pnfs.c		patch \| blob \| blame \| history
fs/xfs/xfs_qm.c		patch \| blob \| blame \| history
fs/xfs/xfs_qm_syscalls.c		patch \| blob \| blame \| history
fs/xfs/xfs_rtalloc.c		patch \| blob \| blame \| history
fs/xfs/xfs_super.c		patch \| blob \| blame \| history
fs/xfs/xfs_symlink.c		patch \| blob \| blame \| history
fs/xfs/xfs_sysfs.c		patch \| blob \| blame \| history
fs/xfs/xfs_sysfs.h		patch \| blob \| blame \| history
fs/xfs/xfs_trace.h		patch \| blob \| blame \| history
fs/xfs/xfs_trans.c		patch \| blob \| blame \| history
fs/xfs/xfs_trans.h		patch \| blob \| blame \| history
fs/xfs/xfs_xattr.c		patch \| blob \| blame \| history