NFSv4.1: Deal with wraparound issues when updating the layout stateid
[deliverable/linux.git] / fs / nfs / pnfs.c
index 7ac5be36f132546baf8568642ea0ae702529cc2f..f1387e87513ff619bcea36117fc29795106ec559 100644 (file)
@@ -200,36 +200,35 @@ static struct pnfs_layout_hdr *
 pnfs_alloc_layout_hdr(struct inode *ino, gfp_t gfp_flags)
 {
        struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld;
-       return ld->alloc_layout_hdr ? ld->alloc_layout_hdr(ino, gfp_flags) :
-               kzalloc(sizeof(struct pnfs_layout_hdr), gfp_flags);
+       return ld->alloc_layout_hdr(ino, gfp_flags);
 }
 
 static void
 pnfs_free_layout_hdr(struct pnfs_layout_hdr *lo)
 {
-       struct pnfs_layoutdriver_type *ld = NFS_SERVER(lo->plh_inode)->pnfs_curr_ld;
+       struct nfs_server *server = NFS_SERVER(lo->plh_inode);
+       struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
+
+       if (!list_empty(&lo->plh_layouts)) {
+               struct nfs_client *clp = server->nfs_client;
+
+               spin_lock(&clp->cl_lock);
+               list_del_init(&lo->plh_layouts);
+               spin_unlock(&clp->cl_lock);
+       }
        put_rpccred(lo->plh_lc_cred);
-       return ld->alloc_layout_hdr ? ld->free_layout_hdr(lo) : kfree(lo);
+       return ld->free_layout_hdr(lo);
 }
 
 static void
-destroy_layout_hdr(struct pnfs_layout_hdr *lo)
+pnfs_detach_layout_hdr(struct pnfs_layout_hdr *lo)
 {
        struct nfs_inode *nfsi = NFS_I(lo->plh_inode);
        dprintk("%s: freeing layout cache %p\n", __func__, lo);
-       BUG_ON(!list_empty(&lo->plh_layouts));
        nfsi->layout = NULL;
        /* Reset MDS Threshold I/O counters */
        nfsi->write_io = 0;
        nfsi->read_io = 0;
-       pnfs_free_layout_hdr(lo);
-}
-
-static void
-pnfs_put_layout_hdr_locked(struct pnfs_layout_hdr *lo)
-{
-       if (atomic_dec_and_test(&lo->plh_refcount))
-               destroy_layout_hdr(lo);
 }
 
 void
@@ -238,8 +237,9 @@ pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo)
        struct inode *inode = lo->plh_inode;
 
        if (atomic_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) {
-               destroy_layout_hdr(lo);
+               pnfs_detach_layout_hdr(lo);
                spin_unlock(&inode->i_lock);
+               pnfs_free_layout_hdr(lo);
        }
 }
 
@@ -314,13 +314,11 @@ init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg)
        lseg->pls_layout = lo;
 }
 
-static void free_lseg(struct pnfs_layout_segment *lseg)
+static void pnfs_free_lseg(struct pnfs_layout_segment *lseg)
 {
        struct inode *ino = lseg->pls_layout->plh_inode;
 
        NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
-       /* Matched by pnfs_get_layout_hdr in pnfs_layout_insert_lseg */
-       pnfs_put_layout_hdr(NFS_I(ino)->layout);
 }
 
 static void
@@ -331,8 +329,10 @@ pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo,
 
        WARN_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
        list_del_init(&lseg->pls_list);
+       /* Matched by pnfs_get_layout_hdr in pnfs_layout_insert_lseg */
+       atomic_dec(&lo->plh_refcount);
        if (list_empty(&lo->plh_segs))
-               set_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags);
+               clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
        rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq);
 }
 
@@ -351,12 +351,11 @@ pnfs_put_lseg(struct pnfs_layout_segment *lseg)
        lo = lseg->pls_layout;
        inode = lo->plh_inode;
        if (atomic_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) {
-               LIST_HEAD(free_me);
-
+               pnfs_get_layout_hdr(lo);
                pnfs_layout_remove_lseg(lo, lseg);
                spin_unlock(&inode->i_lock);
-               list_add(&lseg->pls_list, &free_me);
-               pnfs_free_lseg_list(&free_me);
+               pnfs_free_lseg(lseg);
+               pnfs_put_layout_hdr(lo);
        }
 }
 EXPORT_SYMBOL_GPL(pnfs_put_lseg);
@@ -464,10 +463,8 @@ pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
 
        dprintk("%s:Begin lo %p\n", __func__, lo);
 
-       if (list_empty(&lo->plh_segs)) {
-               set_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags);
+       if (list_empty(&lo->plh_segs))
                return 0;
-       }
        list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list)
                if (!recall_range ||
                    should_free_lseg(&lseg->pls_range, recall_range)) {
@@ -487,25 +484,13 @@ void
 pnfs_free_lseg_list(struct list_head *free_me)
 {
        struct pnfs_layout_segment *lseg, *tmp;
-       struct pnfs_layout_hdr *lo;
 
        if (list_empty(free_me))
                return;
 
-       lo = list_first_entry(free_me, struct pnfs_layout_segment,
-                             pls_list)->pls_layout;
-
-       if (test_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags)) {
-               struct nfs_client *clp;
-
-               clp = NFS_SERVER(lo->plh_inode)->nfs_client;
-               spin_lock(&clp->cl_lock);
-               list_del_init(&lo->plh_layouts);
-               spin_unlock(&clp->cl_lock);
-       }
        list_for_each_entry_safe(lseg, tmp, free_me, pls_list) {
                list_del(&lseg->pls_list);
-               free_lseg(lseg);
+               pnfs_free_lseg(lseg);
        }
 }
 
@@ -564,6 +549,15 @@ pnfs_destroy_all_layouts(struct nfs_client *clp)
        }
 }
 
+/*
+ * Compare 2 layout stateid sequence ids, to see which is newer,
+ * taking into account wraparound issues.
+ */
+static bool pnfs_seqid_is_newer(u32 s1, u32 s2)
+{
+       return (s32)s1 - (s32)s2 > 0;
+}
+
 /* update lo->plh_stateid with new if is more recent */
 void
 pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new,
@@ -573,7 +567,7 @@ pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new,
 
        oldseq = be32_to_cpu(lo->plh_stateid.seqid);
        newseq = be32_to_cpu(new->seqid);
-       if ((int)(newseq - oldseq) > 0) {
+       if (list_empty(&lo->plh_segs) || pnfs_seqid_is_newer(newseq, oldseq)) {
                nfs4_stateid_copy(&lo->plh_stateid, new);
                if (update_barrier) {
                        u32 new_barrier = be32_to_cpu(new->seqid);
@@ -603,7 +597,6 @@ pnfs_layoutgets_blocked(struct pnfs_layout_hdr *lo, nfs4_stateid *stateid,
            (int)(lo->plh_barrier - be32_to_cpu(stateid->seqid)) >= 0)
                return true;
        return lo->plh_block_lgets ||
-               test_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags) ||
                test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) ||
                (list_empty(&lo->plh_segs) &&
                 (atomic_read(&lo->plh_outstanding) > lget));
@@ -708,7 +701,7 @@ _pnfs_return_layout(struct inode *ino)
 
        spin_lock(&ino->i_lock);
        lo = nfsi->layout;
-       if (!lo || pnfs_test_layout_returned(lo)) {
+       if (!lo) {
                spin_unlock(&ino->i_lock);
                dprintk("NFS: %s no layout to return\n", __func__);
                goto out;
@@ -726,7 +719,6 @@ _pnfs_return_layout(struct inode *ino)
                goto out;
        }
        lo->plh_block_lgets++;
-       pnfs_mark_layout_returned(lo);
        spin_unlock(&ino->i_lock);
        pnfs_free_lseg_list(&tmp_list);
 
@@ -735,9 +727,9 @@ _pnfs_return_layout(struct inode *ino)
        lrp = kzalloc(sizeof(*lrp), GFP_KERNEL);
        if (unlikely(lrp == NULL)) {
                status = -ENOMEM;
-               pnfs_layout_io_set_failed(lo, IOMODE_RW);
-               pnfs_layout_io_set_failed(lo, IOMODE_READ);
-               pnfs_clear_layout_returned(lo);
+               spin_lock(&ino->i_lock);
+               lo->plh_block_lgets--;
+               spin_unlock(&ino->i_lock);
                pnfs_put_layout_hdr(lo);
                goto out;
        }
@@ -792,8 +784,12 @@ void pnfs_roc_release(struct inode *ino)
        spin_lock(&ino->i_lock);
        lo = NFS_I(ino)->layout;
        lo->plh_block_lgets--;
-       pnfs_put_layout_hdr_locked(lo);
-       spin_unlock(&ino->i_lock);
+       if (atomic_dec_and_test(&lo->plh_refcount)) {
+               pnfs_detach_layout_hdr(lo);
+               spin_unlock(&ino->i_lock);
+               pnfs_free_layout_hdr(lo);
+       } else
+               spin_unlock(&ino->i_lock);
 }
 
 void pnfs_roc_set_barrier(struct inode *ino, u32 barrier)
@@ -867,7 +863,6 @@ pnfs_layout_insert_lseg(struct pnfs_layout_hdr *lo,
 
        dprintk("%s:Begin\n", __func__);
 
-       assert_spin_locked(&lo->plh_inode->i_lock);
        list_for_each_entry(lp, &lo->plh_segs, pls_list) {
                if (cmp_layout(&lseg->pls_range, &lp->pls_range) > 0)
                        continue;
@@ -921,21 +916,19 @@ pnfs_find_alloc_layout(struct inode *ino,
 
        dprintk("%s Begin ino=%p layout=%p\n", __func__, ino, nfsi->layout);
 
-       assert_spin_locked(&ino->i_lock);
-       if (nfsi->layout) {
-               if (test_bit(NFS_LAYOUT_DESTROYED, &nfsi->layout->plh_flags))
-                       return NULL;
-               pnfs_get_layout_hdr(nfsi->layout);
-               return nfsi->layout;
-       }
+       if (nfsi->layout != NULL)
+               goto out_existing;
        spin_unlock(&ino->i_lock);
        new = alloc_init_layout_hdr(ino, ctx, gfp_flags);
        spin_lock(&ino->i_lock);
 
-       if (likely(nfsi->layout == NULL))       /* Won the race? */
+       if (likely(nfsi->layout == NULL)) {     /* Won the race? */
                nfsi->layout = new;
-       else
-               pnfs_free_layout_hdr(new);
+               return new;
+       }
+       pnfs_free_layout_hdr(new);
+out_existing:
+       pnfs_get_layout_hdr(nfsi->layout);
        return nfsi->layout;
 }
 
@@ -978,7 +971,6 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo,
 
        dprintk("%s:Begin\n", __func__);
 
-       assert_spin_locked(&lo->plh_inode->i_lock);
        list_for_each_entry(lseg, &lo->plh_segs, pls_list) {
                if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) &&
                    is_matching_lseg(&lseg->pls_range, range)) {
@@ -1128,9 +1120,6 @@ pnfs_update_layout(struct inode *ino,
        if (list_empty(&lo->plh_segs))
                first = true;
 
-       /* Enable LAYOUTRETURNs */
-       pnfs_clear_layout_returned(lo);
-
        spin_unlock(&ino->i_lock);
        if (first) {
                /* The lo must be on the clp list if there is any
@@ -1151,11 +1140,6 @@ pnfs_update_layout(struct inode *ino,
                arg.length = PAGE_CACHE_ALIGN(arg.length);
 
        lseg = send_layoutget(lo, ctx, &arg, gfp_flags);
-       if (!lseg && first) {
-               spin_lock(&clp->cl_lock);
-               list_del_init(&lo->plh_layouts);
-               spin_unlock(&clp->cl_lock);
-       }
        atomic_dec(&lo->plh_outstanding);
 out_put_layout_hdr:
        pnfs_put_layout_hdr(lo);
@@ -1206,6 +1190,10 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
                dprintk("%s forget reply due to state\n", __func__);
                goto out_forget_reply;
        }
+
+       /* Done processing layoutget. Set the layout stateid */
+       pnfs_set_layout_stateid(lo, &res->stateid, false);
+
        init_lseg(lo, lseg);
        lseg->pls_range = res->range;
        pnfs_get_lseg(lseg);
@@ -1216,8 +1204,6 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
                set_bit(NFS_LAYOUT_ROC, &lo->plh_flags);
        }
 
-       /* Done processing layoutget. Set the layout stateid */
-       pnfs_set_layout_stateid(lo, &res->stateid, false);
        spin_unlock(&ino->i_lock);
        return lseg;
 out:
This page took 0.029426 seconds and 5 git commands to generate.