Merge branch 'stable/for-jens-3.8' of git://git.kernel.org/pub/scm/linux/kernel/git...
authorJens Axboe <axboe@kernel.dk>
Mon, 12 Nov 2012 16:18:47 +0000 (09:18 -0700)
committerJens Axboe <axboe@kernel.dk>
Mon, 12 Nov 2012 16:18:47 +0000 (09:18 -0700)
drivers/block/xen-blkback/blkback.c
drivers/block/xen-blkback/common.h
drivers/block/xen-blkback/xenbus.c
drivers/block/xen-blkfront.c

index 280a13846e6cb06e1f774dd927598c1f3bfa5a8c..a05961683efa24179a90e04da93d7f2ec2013345 100644 (file)
@@ -39,6 +39,7 @@
 #include <linux/list.h>
 #include <linux/delay.h>
 #include <linux/freezer.h>
+#include <linux/bitmap.h>
 
 #include <xen/events.h>
 #include <xen/page.h>
@@ -79,6 +80,7 @@ struct pending_req {
        unsigned short          operation;
        int                     status;
        struct list_head        free_list;
+       DECLARE_BITMAP(unmap_seg, BLKIF_MAX_SEGMENTS_PER_REQUEST);
 };
 
 #define BLKBACK_INVALID_HANDLE (~0)
@@ -98,6 +100,36 @@ struct xen_blkbk {
 
 static struct xen_blkbk *blkbk;
 
+/*
+ * Maximum number of grant pages that can be mapped in blkback.
+ * BLKIF_MAX_SEGMENTS_PER_REQUEST * RING_SIZE is the maximum number of
+ * pages that blkback will persistently map.
+ * Currently, this is:
+ * RING_SIZE = 32 (for all known ring types)
+ * BLKIF_MAX_SEGMENTS_PER_REQUEST = 11
+ * sizeof(struct persistent_gnt) = 48
+ * So the maximum memory used to store the grants is:
+ * 32 * 11 * 48 = 16896 bytes
+ */
+static inline unsigned int max_mapped_grant_pages(enum blkif_protocol protocol)
+{
+       switch (protocol) {
+       case BLKIF_PROTOCOL_NATIVE:
+               return __CONST_RING_SIZE(blkif, PAGE_SIZE) *
+                          BLKIF_MAX_SEGMENTS_PER_REQUEST;
+       case BLKIF_PROTOCOL_X86_32:
+               return __CONST_RING_SIZE(blkif_x86_32, PAGE_SIZE) *
+                          BLKIF_MAX_SEGMENTS_PER_REQUEST;
+       case BLKIF_PROTOCOL_X86_64:
+               return __CONST_RING_SIZE(blkif_x86_64, PAGE_SIZE) *
+                          BLKIF_MAX_SEGMENTS_PER_REQUEST;
+       default:
+               BUG();
+       }
+       return 0;
+}
+
+
 /*
  * Little helpful macro to figure out the index and virtual address of the
  * pending_pages[..]. For each 'pending_req' we have have up to
@@ -129,6 +161,57 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
 static void make_response(struct xen_blkif *blkif, u64 id,
                          unsigned short op, int st);
 
+#define foreach_grant(pos, rbtree, node) \
+       for ((pos) = container_of(rb_first((rbtree)), typeof(*(pos)), node); \
+            &(pos)->node != NULL; \
+            (pos) = container_of(rb_next(&(pos)->node), typeof(*(pos)), node))
+
+
+static void add_persistent_gnt(struct rb_root *root,
+                              struct persistent_gnt *persistent_gnt)
+{
+       struct rb_node **new = &(root->rb_node), *parent = NULL;
+       struct persistent_gnt *this;
+
+       /* Figure out where to put new node */
+       while (*new) {
+               this = container_of(*new, struct persistent_gnt, node);
+
+               parent = *new;
+               if (persistent_gnt->gnt < this->gnt)
+                       new = &((*new)->rb_left);
+               else if (persistent_gnt->gnt > this->gnt)
+                       new = &((*new)->rb_right);
+               else {
+                       pr_alert(DRV_PFX " trying to add a gref that's already in the tree\n");
+                       BUG();
+               }
+       }
+
+       /* Add new node and rebalance tree. */
+       rb_link_node(&(persistent_gnt->node), parent, new);
+       rb_insert_color(&(persistent_gnt->node), root);
+}
+
+static struct persistent_gnt *get_persistent_gnt(struct rb_root *root,
+                                                grant_ref_t gref)
+{
+       struct persistent_gnt *data;
+       struct rb_node *node = root->rb_node;
+
+       while (node) {
+               data = container_of(node, struct persistent_gnt, node);
+
+               if (gref < data->gnt)
+                       node = node->rb_left;
+               else if (gref > data->gnt)
+                       node = node->rb_right;
+               else
+                       return data;
+       }
+       return NULL;
+}
+
 /*
  * Retrieve from the 'pending_reqs' a free pending_req structure to be used.
  */
@@ -275,6 +358,11 @@ int xen_blkif_schedule(void *arg)
 {
        struct xen_blkif *blkif = arg;
        struct xen_vbd *vbd = &blkif->vbd;
+       struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+       struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+       struct persistent_gnt *persistent_gnt;
+       int ret = 0;
+       int segs_to_unmap = 0;
 
        xen_blkif_get(blkif);
 
@@ -302,6 +390,36 @@ int xen_blkif_schedule(void *arg)
                        print_stats(blkif);
        }
 
+       /* Free all persistent grant pages */
+       if (!RB_EMPTY_ROOT(&blkif->persistent_gnts)) {
+               foreach_grant(persistent_gnt, &blkif->persistent_gnts, node) {
+                       BUG_ON(persistent_gnt->handle ==
+                               BLKBACK_INVALID_HANDLE);
+                       gnttab_set_unmap_op(&unmap[segs_to_unmap],
+                           (unsigned long) pfn_to_kaddr(page_to_pfn(
+                               persistent_gnt->page)),
+                           GNTMAP_host_map,
+                           persistent_gnt->handle);
+
+                       pages[segs_to_unmap] = persistent_gnt->page;
+                       rb_erase(&persistent_gnt->node,
+                               &blkif->persistent_gnts);
+                       kfree(persistent_gnt);
+                       blkif->persistent_gnt_c--;
+
+                       if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST ||
+                               !rb_next(&persistent_gnt->node)) {
+                               ret = gnttab_unmap_refs(unmap, NULL, pages,
+                                                       segs_to_unmap);
+                               BUG_ON(ret);
+                               segs_to_unmap = 0;
+                       }
+               }
+       }
+
+       BUG_ON(blkif->persistent_gnt_c != 0);
+       BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts));
+
        if (log_stats)
                print_stats(blkif);
 
@@ -328,6 +446,8 @@ static void xen_blkbk_unmap(struct pending_req *req)
        int ret;
 
        for (i = 0; i < req->nr_pages; i++) {
+               if (!test_bit(i, req->unmap_seg))
+                       continue;
                handle = pending_handle(req, i);
                if (handle == BLKBACK_INVALID_HANDLE)
                        continue;
@@ -344,12 +464,26 @@ static void xen_blkbk_unmap(struct pending_req *req)
 
 static int xen_blkbk_map(struct blkif_request *req,
                         struct pending_req *pending_req,
-                        struct seg_buf seg[])
+                        struct seg_buf seg[],
+                        struct page *pages[])
 {
        struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-       int i;
+       struct persistent_gnt *persistent_gnts[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+       struct page *pages_to_gnt[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+       struct persistent_gnt *persistent_gnt = NULL;
+       struct xen_blkif *blkif = pending_req->blkif;
+       phys_addr_t addr = 0;
+       int i, j;
+       bool new_map;
        int nseg = req->u.rw.nr_segments;
+       int segs_to_map = 0;
        int ret = 0;
+       int use_persistent_gnts;
+
+       use_persistent_gnts = (blkif->vbd.feature_gnt_persistent);
+
+       BUG_ON(blkif->persistent_gnt_c >
+                  max_mapped_grant_pages(pending_req->blkif->blk_protocol));
 
        /*
         * Fill out preq.nr_sects with proper amount of sectors, and setup
@@ -359,36 +493,146 @@ static int xen_blkbk_map(struct blkif_request *req,
        for (i = 0; i < nseg; i++) {
                uint32_t flags;
 
-               flags = GNTMAP_host_map;
-               if (pending_req->operation != BLKIF_OP_READ)
-                       flags |= GNTMAP_readonly;
-               gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags,
-                                 req->u.rw.seg[i].gref,
-                                 pending_req->blkif->domid);
+               if (use_persistent_gnts)
+                       persistent_gnt = get_persistent_gnt(
+                               &blkif->persistent_gnts,
+                               req->u.rw.seg[i].gref);
+
+               if (persistent_gnt) {
+                       /*
+                        * We are using persistent grants and
+                        * the grant is already mapped
+                        */
+                       new_map = false;
+               } else if (use_persistent_gnts &&
+                          blkif->persistent_gnt_c <
+                          max_mapped_grant_pages(blkif->blk_protocol)) {
+                       /*
+                        * We are using persistent grants, the grant is
+                        * not mapped but we have room for it
+                        */
+                       new_map = true;
+                       persistent_gnt = kmalloc(
+                               sizeof(struct persistent_gnt),
+                               GFP_KERNEL);
+                       if (!persistent_gnt)
+                               return -ENOMEM;
+                       persistent_gnt->page = alloc_page(GFP_KERNEL);
+                       if (!persistent_gnt->page) {
+                               kfree(persistent_gnt);
+                               return -ENOMEM;
+                       }
+                       persistent_gnt->gnt = req->u.rw.seg[i].gref;
+                       persistent_gnt->handle = BLKBACK_INVALID_HANDLE;
+
+                       pages_to_gnt[segs_to_map] =
+                               persistent_gnt->page;
+                       addr = (unsigned long) pfn_to_kaddr(
+                               page_to_pfn(persistent_gnt->page));
+
+                       add_persistent_gnt(&blkif->persistent_gnts,
+                               persistent_gnt);
+                       blkif->persistent_gnt_c++;
+                       pr_debug(DRV_PFX " grant %u added to the tree of persistent grants, using %u/%u\n",
+                                persistent_gnt->gnt, blkif->persistent_gnt_c,
+                                max_mapped_grant_pages(blkif->blk_protocol));
+               } else {
+                       /*
+                        * We are either using persistent grants and
+                        * hit the maximum limit of grants mapped,
+                        * or we are not using persistent grants.
+                        */
+                       if (use_persistent_gnts &&
+                               !blkif->vbd.overflow_max_grants) {
+                               blkif->vbd.overflow_max_grants = 1;
+                               pr_alert(DRV_PFX " domain %u, device %#x is using maximum number of persistent grants\n",
+                                        blkif->domid, blkif->vbd.handle);
+                       }
+                       new_map = true;
+                       pages[i] = blkbk->pending_page(pending_req, i);
+                       addr = vaddr(pending_req, i);
+                       pages_to_gnt[segs_to_map] =
+                               blkbk->pending_page(pending_req, i);
+               }
+
+               if (persistent_gnt) {
+                       pages[i] = persistent_gnt->page;
+                       persistent_gnts[i] = persistent_gnt;
+               } else {
+                       persistent_gnts[i] = NULL;
+               }
+
+               if (new_map) {
+                       flags = GNTMAP_host_map;
+                       if (!persistent_gnt &&
+                           (pending_req->operation != BLKIF_OP_READ))
+                               flags |= GNTMAP_readonly;
+                       gnttab_set_map_op(&map[segs_to_map++], addr,
+                                         flags, req->u.rw.seg[i].gref,
+                                         blkif->domid);
+               }
        }
 
-       ret = gnttab_map_refs(map, NULL, &blkbk->pending_page(pending_req, 0), nseg);
-       BUG_ON(ret);
+       if (segs_to_map) {
+               ret = gnttab_map_refs(map, NULL, pages_to_gnt, segs_to_map);
+               BUG_ON(ret);
+       }
 
        /*
         * Now swizzle the MFN in our domain with the MFN from the other domain
         * so that when we access vaddr(pending_req,i) it has the contents of
         * the page from the other domain.
         */
-       for (i = 0; i < nseg; i++) {
-               if (unlikely(map[i].status != 0)) {
-                       pr_debug(DRV_PFX "invalid buffer -- could not remap it\n");
-                       map[i].handle = BLKBACK_INVALID_HANDLE;
-                       ret |= 1;
+       bitmap_zero(pending_req->unmap_seg, BLKIF_MAX_SEGMENTS_PER_REQUEST);
+       for (i = 0, j = 0; i < nseg; i++) {
+               if (!persistent_gnts[i] ||
+                   persistent_gnts[i]->handle == BLKBACK_INVALID_HANDLE) {
+                       /* This is a newly mapped grant */
+                       BUG_ON(j >= segs_to_map);
+                       if (unlikely(map[j].status != 0)) {
+                               pr_debug(DRV_PFX "invalid buffer -- could not remap it\n");
+                               map[j].handle = BLKBACK_INVALID_HANDLE;
+                               ret |= 1;
+                               if (persistent_gnts[i]) {
+                                       rb_erase(&persistent_gnts[i]->node,
+                                                &blkif->persistent_gnts);
+                                       blkif->persistent_gnt_c--;
+                                       kfree(persistent_gnts[i]);
+                                       persistent_gnts[i] = NULL;
+                               }
+                       }
+               }
+               if (persistent_gnts[i]) {
+                       if (persistent_gnts[i]->handle ==
+                           BLKBACK_INVALID_HANDLE) {
+                               /*
+                                * If this is a new persistent grant
+                                * save the handler
+                                */
+                               persistent_gnts[i]->handle = map[j].handle;
+                               persistent_gnts[i]->dev_bus_addr =
+                                       map[j++].dev_bus_addr;
+                       }
+                       pending_handle(pending_req, i) =
+                               persistent_gnts[i]->handle;
+
+                       if (ret)
+                               continue;
+
+                       seg[i].buf = persistent_gnts[i]->dev_bus_addr |
+                               (req->u.rw.seg[i].first_sect << 9);
+               } else {
+                       pending_handle(pending_req, i) = map[j].handle;
+                       bitmap_set(pending_req->unmap_seg, i, 1);
+
+                       if (ret) {
+                               j++;
+                               continue;
+                       }
+
+                       seg[i].buf = map[j++].dev_bus_addr |
+                               (req->u.rw.seg[i].first_sect << 9);
                }
-
-               pending_handle(pending_req, i) = map[i].handle;
-
-               if (ret)
-                       continue;
-
-               seg[i].buf  = map[i].dev_bus_addr |
-                       (req->u.rw.seg[i].first_sect << 9);
        }
        return ret;
 }
@@ -591,6 +835,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
        int operation;
        struct blk_plug plug;
        bool drain = false;
+       struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 
        switch (req->operation) {
        case BLKIF_OP_READ:
@@ -677,7 +922,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
         * the hypercall to unmap the grants - that is all done in
         * xen_blkbk_unmap.
         */
-       if (xen_blkbk_map(req, pending_req, seg))
+       if (xen_blkbk_map(req, pending_req, seg, pages))
                goto fail_flush;
 
        /*
@@ -689,7 +934,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
        for (i = 0; i < nseg; i++) {
                while ((bio == NULL) ||
                       (bio_add_page(bio,
-                                    blkbk->pending_page(pending_req, i),
+                                    pages[i],
                                     seg[i].nsec << 9,
                                     seg[i].buf & ~PAGE_MASK) == 0)) {
 
index 9ad3b5ec1dc1c521085db47a7928cc8cf1179701..ae7951f0e268e739a007243328589346ca12af5d 100644 (file)
@@ -34,6 +34,7 @@
 #include <linux/vmalloc.h>
 #include <linux/wait.h>
 #include <linux/io.h>
+#include <linux/rbtree.h>
 #include <asm/setup.h>
 #include <asm/pgalloc.h>
 #include <asm/hypervisor.h>
@@ -160,10 +161,22 @@ struct xen_vbd {
        sector_t                size;
        bool                    flush_support;
        bool                    discard_secure;
+
+       unsigned int            feature_gnt_persistent:1;
+       unsigned int            overflow_max_grants:1;
 };
 
 struct backend_info;
 
+
+struct persistent_gnt {
+       struct page *page;
+       grant_ref_t gnt;
+       grant_handle_t handle;
+       uint64_t dev_bus_addr;
+       struct rb_node node;
+};
+
 struct xen_blkif {
        /* Unique identifier for this interface. */
        domid_t                 domid;
@@ -190,6 +203,10 @@ struct xen_blkif {
        struct task_struct      *xenblkd;
        unsigned int            waiting_reqs;
 
+       /* tree to store persistent grants */
+       struct rb_root          persistent_gnts;
+       unsigned int            persistent_gnt_c;
+
        /* statistics */
        unsigned long           st_print;
        int                     st_rd_req;
index 4f66171c668354b490f1284aec48278b8676bd84..a03ecbb0044c6ab193ebb6b0c8f2a3ba29ba8b90 100644 (file)
@@ -118,6 +118,7 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
        atomic_set(&blkif->drain, 0);
        blkif->st_print = jiffies;
        init_waitqueue_head(&blkif->waiting_to_free);
+       blkif->persistent_gnts.rb_node = NULL;
 
        return blkif;
 }
@@ -673,6 +674,13 @@ again:
 
        xen_blkbk_barrier(xbt, be, be->blkif->vbd.flush_support);
 
+       err = xenbus_printf(xbt, dev->nodename, "feature-persistent", "%u", 1);
+       if (err) {
+               xenbus_dev_fatal(dev, err, "writing %s/feature-persistent",
+                                dev->nodename);
+               goto abort;
+       }
+
        err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
                            (unsigned long long)vbd_sz(&be->blkif->vbd));
        if (err) {
@@ -721,6 +729,7 @@ static int connect_ring(struct backend_info *be)
        struct xenbus_device *dev = be->dev;
        unsigned long ring_ref;
        unsigned int evtchn;
+       unsigned int pers_grants;
        char protocol[64] = "";
        int err;
 
@@ -750,8 +759,18 @@ static int connect_ring(struct backend_info *be)
                xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol);
                return -1;
        }
-       pr_info(DRV_PFX "ring-ref %ld, event-channel %d, protocol %d (%s)\n",
-               ring_ref, evtchn, be->blkif->blk_protocol, protocol);
+       err = xenbus_gather(XBT_NIL, dev->otherend,
+                           "feature-persistent", "%u",
+                           &pers_grants, NULL);
+       if (err)
+               pers_grants = 0;
+
+       be->blkif->vbd.feature_gnt_persistent = pers_grants;
+       be->blkif->vbd.overflow_max_grants = 0;
+
+       pr_info(DRV_PFX "ring-ref %ld, event-channel %d, protocol %d (%s) %s\n",
+               ring_ref, evtchn, be->blkif->blk_protocol, protocol,
+               pers_grants ? "persistent grants" : "");
 
        /* Map the shared frame, irq etc. */
        err = xen_blkif_map(be->blkif, ring_ref, evtchn);
index 007db8986e84d3df91eb0a00707f14d4c02d0ed3..f1de806b0a636b8677e081616a0a7cf9ee78f13f 100644 (file)
@@ -44,6 +44,7 @@
 #include <linux/mutex.h>
 #include <linux/scatterlist.h>
 #include <linux/bitmap.h>
+#include <linux/llist.h>
 
 #include <xen/xen.h>
 #include <xen/xenbus.h>
@@ -64,10 +65,17 @@ enum blkif_state {
        BLKIF_STATE_SUSPENDED,
 };
 
+struct grant {
+       grant_ref_t gref;
+       unsigned long pfn;
+       struct llist_node node;
+};
+
 struct blk_shadow {
        struct blkif_request req;
        struct request *request;
        unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+       struct grant *grants_used[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 };
 
 static DEFINE_MUTEX(blkfront_mutex);
@@ -97,6 +105,8 @@ struct blkfront_info
        struct work_struct work;
        struct gnttab_free_callback callback;
        struct blk_shadow shadow[BLK_RING_SIZE];
+       struct llist_head persistent_gnts;
+       unsigned int persistent_gnts_c;
        unsigned long shadow_free;
        unsigned int feature_flush;
        unsigned int flush_op;
@@ -104,6 +114,7 @@ struct blkfront_info
        unsigned int feature_secdiscard:1;
        unsigned int discard_granularity;
        unsigned int discard_alignment;
+       unsigned int feature_persistent:1;
        int is_ready;
 };
 
@@ -287,21 +298,36 @@ static int blkif_queue_request(struct request *req)
        unsigned long id;
        unsigned int fsect, lsect;
        int i, ref;
+
+       /*
+        * Used to store if we are able to queue the request by just using
+        * existing persistent grants, or if we have to get new grants,
+        * as there are not sufficiently many free.
+        */
+       bool new_persistent_gnts;
        grant_ref_t gref_head;
+       struct page *granted_page;
+       struct grant *gnt_list_entry = NULL;
        struct scatterlist *sg;
 
        if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
                return 1;
 
-       if (gnttab_alloc_grant_references(
-               BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) {
-               gnttab_request_free_callback(
-                       &info->callback,
-                       blkif_restart_queue_callback,
-                       info,
-                       BLKIF_MAX_SEGMENTS_PER_REQUEST);
-               return 1;
-       }
+       /* Check if we have enought grants to allocate a requests */
+       if (info->persistent_gnts_c < BLKIF_MAX_SEGMENTS_PER_REQUEST) {
+               new_persistent_gnts = 1;
+               if (gnttab_alloc_grant_references(
+                   BLKIF_MAX_SEGMENTS_PER_REQUEST - info->persistent_gnts_c,
+                   &gref_head) < 0) {
+                       gnttab_request_free_callback(
+                               &info->callback,
+                               blkif_restart_queue_callback,
+                               info,
+                               BLKIF_MAX_SEGMENTS_PER_REQUEST);
+                       return 1;
+               }
+       } else
+               new_persistent_gnts = 0;
 
        /* Fill out a communications ring structure. */
        ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
@@ -341,18 +367,73 @@ static int blkif_queue_request(struct request *req)
                       BLKIF_MAX_SEGMENTS_PER_REQUEST);
 
                for_each_sg(info->sg, sg, ring_req->u.rw.nr_segments, i) {
-                       buffer_mfn = pfn_to_mfn(page_to_pfn(sg_page(sg)));
                        fsect = sg->offset >> 9;
                        lsect = fsect + (sg->length >> 9) - 1;
-                       /* install a grant reference. */
-                       ref = gnttab_claim_grant_reference(&gref_head);
-                       BUG_ON(ref == -ENOSPC);
 
-                       gnttab_grant_foreign_access_ref(
-                                       ref,
+                       if (info->persistent_gnts_c) {
+                               BUG_ON(llist_empty(&info->persistent_gnts));
+                               gnt_list_entry = llist_entry(
+                                       llist_del_first(&info->persistent_gnts),
+                                       struct grant, node);
+
+                               ref = gnt_list_entry->gref;
+                               buffer_mfn = pfn_to_mfn(gnt_list_entry->pfn);
+                               info->persistent_gnts_c--;
+                       } else {
+                               ref = gnttab_claim_grant_reference(&gref_head);
+                               BUG_ON(ref == -ENOSPC);
+
+                               gnt_list_entry =
+                                       kmalloc(sizeof(struct grant),
+                                                        GFP_ATOMIC);
+                               if (!gnt_list_entry)
+                                       return -ENOMEM;
+
+                               granted_page = alloc_page(GFP_ATOMIC);
+                               if (!granted_page) {
+                                       kfree(gnt_list_entry);
+                                       return -ENOMEM;
+                               }
+
+                               gnt_list_entry->pfn =
+                                       page_to_pfn(granted_page);
+                               gnt_list_entry->gref = ref;
+
+                               buffer_mfn = pfn_to_mfn(page_to_pfn(
+                                                               granted_page));
+                               gnttab_grant_foreign_access_ref(ref,
                                        info->xbdev->otherend_id,
-                                       buffer_mfn,
-                                       rq_data_dir(req));
+                                       buffer_mfn, 0);
+                       }
+
+                       info->shadow[id].grants_used[i] = gnt_list_entry;
+
+                       if (rq_data_dir(req)) {
+                               char *bvec_data;
+                               void *shared_data;
+
+                               BUG_ON(sg->offset + sg->length > PAGE_SIZE);
+
+                               shared_data = kmap_atomic(
+                                       pfn_to_page(gnt_list_entry->pfn));
+                               bvec_data = kmap_atomic(sg_page(sg));
+
+                               /*
+                                * this does not wipe data stored outside the
+                                * range sg->offset..sg->offset+sg->length.
+                                * Therefore, blkback *could* see data from
+                                * previous requests. This is OK as long as
+                                * persistent grants are shared with just one
+                                * domain. It may need refactoring if this
+                                * changes
+                                */
+                               memcpy(shared_data + sg->offset,
+                                      bvec_data   + sg->offset,
+                                      sg->length);
+
+                               kunmap_atomic(bvec_data);
+                               kunmap_atomic(shared_data);
+                       }
 
                        info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn);
                        ring_req->u.rw.seg[i] =
@@ -368,7 +449,8 @@ static int blkif_queue_request(struct request *req)
        /* Keep a private copy so we can reissue requests when recovering. */
        info->shadow[id].req = *ring_req;
 
-       gnttab_free_grant_references(gref_head);
+       if (new_persistent_gnts)
+               gnttab_free_grant_references(gref_head);
 
        return 0;
 }
@@ -480,12 +562,13 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
 static void xlvbd_flush(struct blkfront_info *info)
 {
        blk_queue_flush(info->rq, info->feature_flush);
-       printk(KERN_INFO "blkfront: %s: %s: %s\n",
+       printk(KERN_INFO "blkfront: %s: %s: %s %s\n",
               info->gd->disk_name,
               info->flush_op == BLKIF_OP_WRITE_BARRIER ?
                "barrier" : (info->flush_op == BLKIF_OP_FLUSH_DISKCACHE ?
                "flush diskcache" : "barrier or flush"),
-              info->feature_flush ? "enabled" : "disabled");
+              info->feature_flush ? "enabled" : "disabled",
+              info->feature_persistent ? "using persistent grants" : "");
 }
 
 static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset)
@@ -707,6 +790,9 @@ static void blkif_restart_queue(struct work_struct *work)
 
 static void blkif_free(struct blkfront_info *info, int suspend)
 {
+       struct llist_node *all_gnts;
+       struct grant *persistent_gnt;
+
        /* Prevent new requests being issued until we fix things up. */
        spin_lock_irq(&info->io_lock);
        info->connected = suspend ?
@@ -714,6 +800,17 @@ static void blkif_free(struct blkfront_info *info, int suspend)
        /* No more blkif_request(). */
        if (info->rq)
                blk_stop_queue(info->rq);
+
+       /* Remove all persistent grants */
+       if (info->persistent_gnts_c) {
+               all_gnts = llist_del_all(&info->persistent_gnts);
+               llist_for_each_entry(persistent_gnt, all_gnts, node) {
+                       gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL);
+                       kfree(persistent_gnt);
+               }
+               info->persistent_gnts_c = 0;
+       }
+
        /* No more gnttab callback work. */
        gnttab_cancel_free_callback(&info->callback);
        spin_unlock_irq(&info->io_lock);
@@ -734,13 +831,43 @@ static void blkif_free(struct blkfront_info *info, int suspend)
 
 }
 
-static void blkif_completion(struct blk_shadow *s)
+static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info,
+                            struct blkif_response *bret)
 {
        int i;
-       /* Do not let BLKIF_OP_DISCARD as nr_segment is in the same place
-        * flag. */
-       for (i = 0; i < s->req.u.rw.nr_segments; i++)
-               gnttab_end_foreign_access(s->req.u.rw.seg[i].gref, 0, 0UL);
+       struct bio_vec *bvec;
+       struct req_iterator iter;
+       unsigned long flags;
+       char *bvec_data;
+       void *shared_data;
+       unsigned int offset = 0;
+
+       if (bret->operation == BLKIF_OP_READ) {
+               /*
+                * Copy the data received from the backend into the bvec.
+                * Since bv_offset can be different than 0, and bv_len different
+                * than PAGE_SIZE, we have to keep track of the current offset,
+                * to be sure we are copying the data from the right shared page.
+                */
+               rq_for_each_segment(bvec, s->request, iter) {
+                       BUG_ON((bvec->bv_offset + bvec->bv_len) > PAGE_SIZE);
+                       i = offset >> PAGE_SHIFT;
+                       BUG_ON(i >= s->req.u.rw.nr_segments);
+                       shared_data = kmap_atomic(
+                               pfn_to_page(s->grants_used[i]->pfn));
+                       bvec_data = bvec_kmap_irq(bvec, &flags);
+                       memcpy(bvec_data, shared_data + bvec->bv_offset,
+                               bvec->bv_len);
+                       bvec_kunmap_irq(bvec_data, &flags);
+                       kunmap_atomic(shared_data);
+                       offset += bvec->bv_len;
+               }
+       }
+       /* Add the persistent grant into the list of free grants */
+       for (i = 0; i < s->req.u.rw.nr_segments; i++) {
+               llist_add(&s->grants_used[i]->node, &info->persistent_gnts);
+               info->persistent_gnts_c++;
+       }
 }
 
 static irqreturn_t blkif_interrupt(int irq, void *dev_id)
@@ -783,7 +910,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
                req  = info->shadow[id].request;
 
                if (bret->operation != BLKIF_OP_DISCARD)
-                       blkif_completion(&info->shadow[id]);
+                       blkif_completion(&info->shadow[id], info, bret);
 
                if (add_id_to_freelist(info, id)) {
                        WARN(1, "%s: response to %s (id %ld) couldn't be recycled!\n",
@@ -942,6 +1069,11 @@ again:
                message = "writing protocol";
                goto abort_transaction;
        }
+       err = xenbus_printf(xbt, dev->nodename,
+                           "feature-persistent", "%u", 1);
+       if (err)
+               dev_warn(&dev->dev,
+                        "writing persistent grants feature to xenbus");
 
        err = xenbus_transaction_end(xbt, 0);
        if (err) {
@@ -1029,6 +1161,8 @@ static int blkfront_probe(struct xenbus_device *dev,
        spin_lock_init(&info->io_lock);
        info->xbdev = dev;
        info->vdevice = vdevice;
+       init_llist_head(&info->persistent_gnts);
+       info->persistent_gnts_c = 0;
        info->connected = BLKIF_STATE_DISCONNECTED;
        INIT_WORK(&info->work, blkif_restart_queue);
 
@@ -1093,7 +1227,7 @@ static int blkif_recover(struct blkfront_info *info)
                                        req->u.rw.seg[j].gref,
                                        info->xbdev->otherend_id,
                                        pfn_to_mfn(info->shadow[req->u.rw.id].frame[j]),
-                                       rq_data_dir(info->shadow[req->u.rw.id].request));
+                                       0);
                }
                info->shadow[req->u.rw.id].req = *req;
 
@@ -1225,7 +1359,7 @@ static void blkfront_connect(struct blkfront_info *info)
        unsigned long sector_size;
        unsigned int binfo;
        int err;
-       int barrier, flush, discard;
+       int barrier, flush, discard, persistent;
 
        switch (info->connected) {
        case BLKIF_STATE_CONNECTED:
@@ -1303,6 +1437,14 @@ static void blkfront_connect(struct blkfront_info *info)
        if (!err && discard)
                blkfront_setup_discard(info);
 
+       err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
+                           "feature-persistent", "%u", &persistent,
+                           NULL);
+       if (err)
+               info->feature_persistent = 0;
+       else
+               info->feature_persistent = persistent;
+
        err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size);
        if (err) {
                xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
This page took 0.064544 seconds and 5 git commands to generate.