xen-blkback: use balloon pages for all mappings
[deliverable/linux.git] / drivers / block / xen-blkback / blkback.c
CommitLineData
4d05a28d 1/******************************************************************************
4d05a28d
KRW
2 *
3 * Back-end of the driver for virtual block devices. This portion of the
4 * driver exports a 'unified' block-device interface that can be accessed
5 * by any operating system that implements a compatible front end. A
6 * reference front-end implementation can be found in:
a1397fa3 7 * drivers/block/xen-blkfront.c
4d05a28d
KRW
8 *
9 * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
10 * Copyright (c) 2005, Christopher Clark
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License version 2
14 * as published by the Free Software Foundation; or, when distributed
15 * separately from the Linux kernel or incorporated into other
16 * software packages, subject to the following license:
17 *
18 * Permission is hereby granted, free of charge, to any person obtaining a copy
19 * of this source file (the "Software"), to deal in the Software without
20 * restriction, including without limitation the rights to use, copy, modify,
21 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
22 * and to permit persons to whom the Software is furnished to do so, subject to
23 * the following conditions:
24 *
25 * The above copyright notice and this permission notice shall be included in
26 * all copies or substantial portions of the Software.
27 *
28 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
29 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
30 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
31 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
32 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
33 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
34 * IN THE SOFTWARE.
35 */
36
37#include <linux/spinlock.h>
38#include <linux/kthread.h>
39#include <linux/list.h>
40#include <linux/delay.h>
88122933 41#include <linux/freezer.h>
0a8704a5 42#include <linux/bitmap.h>
afd91d07 43
88122933
JF
44#include <xen/events.h>
45#include <xen/page.h>
e79affc3 46#include <xen/xen.h>
88122933
JF
47#include <asm/xen/hypervisor.h>
48#include <asm/xen/hypercall.h>
087ffecd 49#include <xen/balloon.h>
4d05a28d
KRW
50#include "common.h"
51
52/*
53 * These are rather arbitrary. They are fairly large because adjacent requests
54 * pulled from a communication ring are quite likely to end up being part of
55 * the same scatter/gather request at the disc.
56 *
8b6bf747 57 * ** TRY INCREASING 'xen_blkif_reqs' IF WRITE SPEEDS SEEM TOO LOW **
4d05a28d
KRW
58 *
59 * This will increase the chances of being able to write whole tracks.
60 * 64 should be enough to keep us competitive with Linux.
61 */
8b6bf747
KRW
62static int xen_blkif_reqs = 64;
63module_param_named(reqs, xen_blkif_reqs, int, 0);
4d05a28d
KRW
64MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate");
65
c6cc142d
RPM
66/*
67 * Maximum number of unused free pages to keep in the internal buffer.
68 * Setting this to a value too low will reduce memory used in each backend,
69 * but can have a performance penalty.
70 *
71 * A sane value is xen_blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST, but can
72 * be set to a lower value that might degrade performance on some intensive
73 * IO workloads.
74 */
75
76static int xen_blkif_max_buffer_pages = 704;
77module_param_named(max_buffer_pages, xen_blkif_max_buffer_pages, int, 0644);
78MODULE_PARM_DESC(max_buffer_pages,
79"Maximum number of free pages to keep in each block backend buffer");
80
4d05a28d 81/* Run-time switchable: /sys/module/blkback/parameters/ */
2e9977c2 82static unsigned int log_stats;
4d05a28d 83module_param(log_stats, int, 0644);
4d05a28d
KRW
84
85/*
86 * Each outstanding request that we've passed to the lower device layers has a
87 * 'pending_req' allocated to it. Each buffer_head that completes decrements
88 * the pendcnt towards zero. When it hits zero, the specified domain has a
89 * response queued for it, with the saved 'id' passed back.
90 */
2e9977c2 91struct pending_req {
30fd1502 92 struct xen_blkif *blkif;
01f37f2d
KRW
93 u64 id;
94 int nr_pages;
95 atomic_t pendcnt;
96 unsigned short operation;
97 int status;
98 struct list_head free_list;
0a8704a5 99 DECLARE_BITMAP(unmap_seg, BLKIF_MAX_SEGMENTS_PER_REQUEST);
c6cc142d 100 struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
2e9977c2 101};
4d05a28d 102
4d05a28d
KRW
103#define BLKBACK_INVALID_HANDLE (~0)
104
c6cc142d
RPM
105/* Number of free pages to remove on each call to free_xenballooned_pages */
106#define NUM_BATCH_FREE_PAGES 10
107
e8e28871 108struct xen_blkbk {
2e9977c2 109 struct pending_req *pending_reqs;
a1397fa3 110 /* List of all 'pending_req' available */
e8e28871 111 struct list_head pending_free;
a1397fa3 112 /* And its spinlock. */
e8e28871
KRW
113 spinlock_t pending_free_lock;
114 wait_queue_head_t pending_free_wq;
a1397fa3 115 /* And the grant handles that are available. */
e8e28871
KRW
116 grant_handle_t *pending_grant_handles;
117};
118
119static struct xen_blkbk *blkbk;
4d05a28d 120
0a8704a5
RPM
121/*
122 * Maximum number of grant pages that can be mapped in blkback.
123 * BLKIF_MAX_SEGMENTS_PER_REQUEST * RING_SIZE is the maximum number of
124 * pages that blkback will persistently map.
125 * Currently, this is:
126 * RING_SIZE = 32 (for all known ring types)
127 * BLKIF_MAX_SEGMENTS_PER_REQUEST = 11
128 * sizeof(struct persistent_gnt) = 48
129 * So the maximum memory used to store the grants is:
130 * 32 * 11 * 48 = 16896 bytes
131 */
132static inline unsigned int max_mapped_grant_pages(enum blkif_protocol protocol)
133{
134 switch (protocol) {
135 case BLKIF_PROTOCOL_NATIVE:
136 return __CONST_RING_SIZE(blkif, PAGE_SIZE) *
137 BLKIF_MAX_SEGMENTS_PER_REQUEST;
138 case BLKIF_PROTOCOL_X86_32:
139 return __CONST_RING_SIZE(blkif_x86_32, PAGE_SIZE) *
140 BLKIF_MAX_SEGMENTS_PER_REQUEST;
141 case BLKIF_PROTOCOL_X86_64:
142 return __CONST_RING_SIZE(blkif_x86_64, PAGE_SIZE) *
143 BLKIF_MAX_SEGMENTS_PER_REQUEST;
144 default:
145 BUG();
146 }
147 return 0;
148}
149
150
a1397fa3
KRW
151/*
152 * Little helpful macro to figure out the index and virtual address of the
153 * pending_pages[..]. For each 'pending_req' we have have up to
154 * BLKIF_MAX_SEGMENTS_PER_REQUEST (11) pages. The seg would be from 0 through
01f37f2d
KRW
155 * 10 and would index in the pending_pages[..].
156 */
2e9977c2 157static inline int vaddr_pagenr(struct pending_req *req, int seg)
4d05a28d 158{
2e9977c2
KRW
159 return (req - blkbk->pending_reqs) *
160 BLKIF_MAX_SEGMENTS_PER_REQUEST + seg;
4d05a28d
KRW
161}
162
c6cc142d
RPM
163static inline int get_free_page(struct xen_blkif *blkif, struct page **page)
164{
165 unsigned long flags;
166
167 spin_lock_irqsave(&blkif->free_pages_lock, flags);
168 if (list_empty(&blkif->free_pages)) {
169 BUG_ON(blkif->free_pages_num != 0);
170 spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
171 return alloc_xenballooned_pages(1, page, false);
172 }
173 BUG_ON(blkif->free_pages_num == 0);
174 page[0] = list_first_entry(&blkif->free_pages, struct page, lru);
175 list_del(&page[0]->lru);
176 blkif->free_pages_num--;
177 spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
efe08a3e 178
c6cc142d
RPM
179 return 0;
180}
181
182static inline void put_free_pages(struct xen_blkif *blkif, struct page **page,
183 int num)
4d05a28d 184{
c6cc142d
RPM
185 unsigned long flags;
186 int i;
187
188 spin_lock_irqsave(&blkif->free_pages_lock, flags);
189 for (i = 0; i < num; i++)
190 list_add(&page[i]->lru, &blkif->free_pages);
191 blkif->free_pages_num += num;
192 spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
193}
194
195static inline void shrink_free_pagepool(struct xen_blkif *blkif, int num)
196{
197 /* Remove requested pages in batches of NUM_BATCH_FREE_PAGES */
198 struct page *page[NUM_BATCH_FREE_PAGES];
199 unsigned int num_pages = 0;
200 unsigned long flags;
201
202 spin_lock_irqsave(&blkif->free_pages_lock, flags);
203 while (blkif->free_pages_num > num) {
204 BUG_ON(list_empty(&blkif->free_pages));
205 page[num_pages] = list_first_entry(&blkif->free_pages,
206 struct page, lru);
207 list_del(&page[num_pages]->lru);
208 blkif->free_pages_num--;
209 if (++num_pages == NUM_BATCH_FREE_PAGES) {
210 spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
211 free_xenballooned_pages(num_pages, page);
212 spin_lock_irqsave(&blkif->free_pages_lock, flags);
213 num_pages = 0;
214 }
215 }
216 spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
217 if (num_pages != 0)
218 free_xenballooned_pages(num_pages, page);
4d05a28d
KRW
219}
220
c6cc142d
RPM
221#define vaddr(page) ((unsigned long)pfn_to_kaddr(page_to_pfn(page)))
222
4d05a28d 223#define pending_handle(_req, _seg) \
e8e28871 224 (blkbk->pending_grant_handles[vaddr_pagenr(_req, _seg)])
4d05a28d
KRW
225
226
30fd1502
KRW
227static int do_block_io_op(struct xen_blkif *blkif);
228static int dispatch_rw_block_io(struct xen_blkif *blkif,
fc53bf75
KRW
229 struct blkif_request *req,
230 struct pending_req *pending_req);
30fd1502 231static void make_response(struct xen_blkif *blkif, u64 id,
4d05a28d
KRW
232 unsigned short op, int st);
233
7dc34117
RPM
234#define foreach_grant_safe(pos, n, rbtree, node) \
235 for ((pos) = container_of(rb_first((rbtree)), typeof(*(pos)), node), \
217fd5e7 236 (n) = (&(pos)->node != NULL) ? rb_next(&(pos)->node) : NULL; \
0a8704a5 237 &(pos)->node != NULL; \
7dc34117
RPM
238 (pos) = container_of(n, typeof(*(pos)), node), \
239 (n) = (&(pos)->node != NULL) ? rb_next(&(pos)->node) : NULL)
0a8704a5
RPM
240
241
c6cc142d 242static int add_persistent_gnt(struct rb_root *root,
0a8704a5
RPM
243 struct persistent_gnt *persistent_gnt)
244{
245 struct rb_node **new = &(root->rb_node), *parent = NULL;
246 struct persistent_gnt *this;
247
248 /* Figure out where to put new node */
249 while (*new) {
250 this = container_of(*new, struct persistent_gnt, node);
251
252 parent = *new;
253 if (persistent_gnt->gnt < this->gnt)
254 new = &((*new)->rb_left);
255 else if (persistent_gnt->gnt > this->gnt)
256 new = &((*new)->rb_right);
257 else {
c6cc142d
RPM
258 pr_alert_ratelimited(DRV_PFX " trying to add a gref that's already in the tree\n");
259 return -EINVAL;
0a8704a5
RPM
260 }
261 }
262
263 /* Add new node and rebalance tree. */
264 rb_link_node(&(persistent_gnt->node), parent, new);
265 rb_insert_color(&(persistent_gnt->node), root);
c6cc142d 266 return 0;
0a8704a5
RPM
267}
268
269static struct persistent_gnt *get_persistent_gnt(struct rb_root *root,
270 grant_ref_t gref)
271{
272 struct persistent_gnt *data;
273 struct rb_node *node = root->rb_node;
274
275 while (node) {
276 data = container_of(node, struct persistent_gnt, node);
277
278 if (gref < data->gnt)
279 node = node->rb_left;
280 else if (gref > data->gnt)
281 node = node->rb_right;
282 else
283 return data;
284 }
285 return NULL;
286}
287
c6cc142d
RPM
288static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root,
289 unsigned int num)
4d4f270f
RPM
290{
291 struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
292 struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
293 struct persistent_gnt *persistent_gnt;
7dc34117 294 struct rb_node *n;
4d4f270f
RPM
295 int ret = 0;
296 int segs_to_unmap = 0;
297
7dc34117 298 foreach_grant_safe(persistent_gnt, n, root, node) {
4d4f270f
RPM
299 BUG_ON(persistent_gnt->handle ==
300 BLKBACK_INVALID_HANDLE);
301 gnttab_set_unmap_op(&unmap[segs_to_unmap],
302 (unsigned long) pfn_to_kaddr(page_to_pfn(
303 persistent_gnt->page)),
304 GNTMAP_host_map,
305 persistent_gnt->handle);
306
307 pages[segs_to_unmap] = persistent_gnt->page;
4d4f270f
RPM
308
309 if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST ||
310 !rb_next(&persistent_gnt->node)) {
311 ret = gnttab_unmap_refs(unmap, NULL, pages,
312 segs_to_unmap);
313 BUG_ON(ret);
c6cc142d 314 put_free_pages(blkif, pages, segs_to_unmap);
4d4f270f
RPM
315 segs_to_unmap = 0;
316 }
7dc34117
RPM
317
318 rb_erase(&persistent_gnt->node, root);
319 kfree(persistent_gnt);
320 num--;
4d4f270f
RPM
321 }
322 BUG_ON(num != 0);
323}
324
a1397fa3
KRW
325/*
326 * Retrieve from the 'pending_reqs' a free pending_req structure to be used.
4d05a28d 327 */
2e9977c2 328static struct pending_req *alloc_req(void)
4d05a28d 329{
2e9977c2 330 struct pending_req *req = NULL;
4d05a28d
KRW
331 unsigned long flags;
332
e8e28871
KRW
333 spin_lock_irqsave(&blkbk->pending_free_lock, flags);
334 if (!list_empty(&blkbk->pending_free)) {
2e9977c2
KRW
335 req = list_entry(blkbk->pending_free.next, struct pending_req,
336 free_list);
4d05a28d
KRW
337 list_del(&req->free_list);
338 }
e8e28871 339 spin_unlock_irqrestore(&blkbk->pending_free_lock, flags);
4d05a28d
KRW
340 return req;
341}
342
a1397fa3
KRW
343/*
344 * Return the 'pending_req' structure back to the freepool. We also
345 * wake up the thread if it was waiting for a free page.
346 */
2e9977c2 347static void free_req(struct pending_req *req)
4d05a28d
KRW
348{
349 unsigned long flags;
350 int was_empty;
351
e8e28871
KRW
352 spin_lock_irqsave(&blkbk->pending_free_lock, flags);
353 was_empty = list_empty(&blkbk->pending_free);
354 list_add(&req->free_list, &blkbk->pending_free);
355 spin_unlock_irqrestore(&blkbk->pending_free_lock, flags);
4d05a28d 356 if (was_empty)
e8e28871 357 wake_up(&blkbk->pending_free_wq);
4d05a28d
KRW
358}
359
ee9ff853
KRW
360/*
361 * Routines for managing virtual block devices (vbds).
362 */
3d814731
KRW
363static int xen_vbd_translate(struct phys_req *req, struct xen_blkif *blkif,
364 int operation)
ee9ff853 365{
3d814731 366 struct xen_vbd *vbd = &blkif->vbd;
ee9ff853
KRW
367 int rc = -EACCES;
368
369 if ((operation != READ) && vbd->readonly)
370 goto out;
371
8ab52150
JB
372 if (likely(req->nr_sects)) {
373 blkif_sector_t end = req->sector_number + req->nr_sects;
374
375 if (unlikely(end < req->sector_number))
376 goto out;
377 if (unlikely(end > vbd_sz(vbd)))
378 goto out;
379 }
ee9ff853
KRW
380
381 req->dev = vbd->pdevice;
382 req->bdev = vbd->bdev;
383 rc = 0;
384
385 out:
386 return rc;
387}
388
3d814731 389static void xen_vbd_resize(struct xen_blkif *blkif)
ee9ff853 390{
3d814731 391 struct xen_vbd *vbd = &blkif->vbd;
ee9ff853
KRW
392 struct xenbus_transaction xbt;
393 int err;
8b6bf747 394 struct xenbus_device *dev = xen_blkbk_xenbus(blkif->be);
42c7841d 395 unsigned long long new_size = vbd_sz(vbd);
ee9ff853 396
22b20f2d 397 pr_info(DRV_PFX "VBD Resize: Domid: %d, Device: (%d, %d)\n",
ee9ff853 398 blkif->domid, MAJOR(vbd->pdevice), MINOR(vbd->pdevice));
22b20f2d 399 pr_info(DRV_PFX "VBD Resize: new size %llu\n", new_size);
ee9ff853
KRW
400 vbd->size = new_size;
401again:
402 err = xenbus_transaction_start(&xbt);
403 if (err) {
22b20f2d 404 pr_warn(DRV_PFX "Error starting transaction");
ee9ff853
KRW
405 return;
406 }
407 err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
42c7841d 408 (unsigned long long)vbd_sz(vbd));
ee9ff853 409 if (err) {
22b20f2d 410 pr_warn(DRV_PFX "Error writing new size");
ee9ff853
KRW
411 goto abort;
412 }
413 /*
414 * Write the current state; we will use this to synchronize
415 * the front-end. If the current state is "connected" the
416 * front-end will get the new size information online.
417 */
418 err = xenbus_printf(xbt, dev->nodename, "state", "%d", dev->state);
419 if (err) {
22b20f2d 420 pr_warn(DRV_PFX "Error writing the state");
ee9ff853
KRW
421 goto abort;
422 }
423
424 err = xenbus_transaction_end(xbt, 0);
425 if (err == -EAGAIN)
426 goto again;
427 if (err)
22b20f2d 428 pr_warn(DRV_PFX "Error ending transaction");
496b318e 429 return;
ee9ff853
KRW
430abort:
431 xenbus_transaction_end(xbt, 1);
432}
433
a1397fa3 434/*
b0aef179
KRW
435 * Notification from the guest OS.
436 */
30fd1502 437static void blkif_notify_work(struct xen_blkif *blkif)
4d05a28d 438{
b0aef179
KRW
439 blkif->waiting_reqs = 1;
440 wake_up(&blkif->wq);
441}
4d05a28d 442
8b6bf747 443irqreturn_t xen_blkif_be_int(int irq, void *dev_id)
b0aef179
KRW
444{
445 blkif_notify_work(dev_id);
446 return IRQ_HANDLED;
4d05a28d
KRW
447}
448
2e9977c2 449/*
4d05a28d
KRW
450 * SCHEDULER FUNCTIONS
451 */
452
30fd1502 453static void print_stats(struct xen_blkif *blkif)
4d05a28d 454{
986cacbd 455 pr_info("xen-blkback (%s): oo %3llu | rd %4llu | wr %4llu | f %4llu"
c1a15d08 456 " | ds %4llu | pg: %4u/%4u\n",
ebe81906 457 current->comm, blkif->st_oo_req,
b3cb0d6a 458 blkif->st_rd_req, blkif->st_wr_req,
c1a15d08
RPM
459 blkif->st_f_req, blkif->st_ds_req,
460 blkif->persistent_gnt_c,
461 max_mapped_grant_pages(blkif->blk_protocol));
4d05a28d
KRW
462 blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
463 blkif->st_rd_req = 0;
464 blkif->st_wr_req = 0;
465 blkif->st_oo_req = 0;
b3cb0d6a 466 blkif->st_ds_req = 0;
4d05a28d
KRW
467}
468
8b6bf747 469int xen_blkif_schedule(void *arg)
4d05a28d 470{
30fd1502 471 struct xen_blkif *blkif = arg;
3d814731 472 struct xen_vbd *vbd = &blkif->vbd;
4d05a28d 473
8b6bf747 474 xen_blkif_get(blkif);
4d05a28d 475
4d05a28d
KRW
476 while (!kthread_should_stop()) {
477 if (try_to_freeze())
478 continue;
42c7841d 479 if (unlikely(vbd->size != vbd_sz(vbd)))
3d814731 480 xen_vbd_resize(blkif);
4d05a28d
KRW
481
482 wait_event_interruptible(
483 blkif->wq,
484 blkif->waiting_reqs || kthread_should_stop());
485 wait_event_interruptible(
e8e28871 486 blkbk->pending_free_wq,
2e9977c2
KRW
487 !list_empty(&blkbk->pending_free) ||
488 kthread_should_stop());
4d05a28d
KRW
489
490 blkif->waiting_reqs = 0;
491 smp_mb(); /* clear flag *before* checking for work */
492
493 if (do_block_io_op(blkif))
494 blkif->waiting_reqs = 1;
4d05a28d 495
c6cc142d
RPM
496 /* Shrink if we have more than xen_blkif_max_buffer_pages */
497 shrink_free_pagepool(blkif, xen_blkif_max_buffer_pages);
498
4d05a28d
KRW
499 if (log_stats && time_after(jiffies, blkif->st_print))
500 print_stats(blkif);
501 }
502
c6cc142d
RPM
503 /* Since we are shutting down remove all pages from the buffer */
504 shrink_free_pagepool(blkif, 0 /* All */);
505
0a8704a5 506 /* Free all persistent grant pages */
4d4f270f 507 if (!RB_EMPTY_ROOT(&blkif->persistent_gnts))
c6cc142d 508 free_persistent_gnts(blkif, &blkif->persistent_gnts,
4d4f270f 509 blkif->persistent_gnt_c);
0a8704a5 510
0a8704a5 511 BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts));
4d4f270f 512 blkif->persistent_gnt_c = 0;
0a8704a5 513
4d05a28d
KRW
514 if (log_stats)
515 print_stats(blkif);
4d05a28d
KRW
516
517 blkif->xenblkd = NULL;
8b6bf747 518 xen_blkif_put(blkif);
4d05a28d
KRW
519
520 return 0;
521}
522
1a95fe6e 523struct seg_buf {
ffb1dabd 524 unsigned int offset;
1a95fe6e
KRW
525 unsigned int nsec;
526};
b0aef179
KRW
527/*
528 * Unmap the grant references, and also remove the M2P over-rides
529 * used in the 'pending_req'.
01f37f2d 530 */
9f3aedf5 531static void xen_blkbk_unmap(struct pending_req *req)
b0aef179
KRW
532{
533 struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
4f14faaa 534 struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
b0aef179
KRW
535 unsigned int i, invcount = 0;
536 grant_handle_t handle;
c6cc142d 537 struct xen_blkif *blkif = req->blkif;
b0aef179
KRW
538 int ret;
539
540 for (i = 0; i < req->nr_pages; i++) {
0a8704a5
RPM
541 if (!test_bit(i, req->unmap_seg))
542 continue;
b0aef179 543 handle = pending_handle(req, i);
c6cc142d 544 pages[invcount] = req->pages[i];
b0aef179
KRW
545 if (handle == BLKBACK_INVALID_HANDLE)
546 continue;
c6cc142d 547 gnttab_set_unmap_op(&unmap[invcount], vaddr(pages[invcount]),
b0aef179
KRW
548 GNTMAP_host_map, handle);
549 pending_handle(req, i) = BLKBACK_INVALID_HANDLE;
550 invcount++;
551 }
552
2fc136ee 553 ret = gnttab_unmap_refs(unmap, NULL, pages, invcount);
b0aef179 554 BUG_ON(ret);
c6cc142d 555 put_free_pages(blkif, pages, invcount);
b0aef179 556}
01f37f2d
KRW
557
558static int xen_blkbk_map(struct blkif_request *req,
559 struct pending_req *pending_req,
0a8704a5
RPM
560 struct seg_buf seg[],
561 struct page *pages[])
1a95fe6e
KRW
562{
563 struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
0a8704a5
RPM
564 struct persistent_gnt *persistent_gnts[BLKIF_MAX_SEGMENTS_PER_REQUEST];
565 struct page *pages_to_gnt[BLKIF_MAX_SEGMENTS_PER_REQUEST];
566 struct persistent_gnt *persistent_gnt = NULL;
567 struct xen_blkif *blkif = pending_req->blkif;
568 phys_addr_t addr = 0;
c6cc142d 569 int i, seg_idx, new_map_idx;
97e36834 570 int nseg = req->u.rw.nr_segments;
0a8704a5 571 int segs_to_map = 0;
1a95fe6e 572 int ret = 0;
0a8704a5
RPM
573 int use_persistent_gnts;
574
575 use_persistent_gnts = (blkif->vbd.feature_gnt_persistent);
576
577 BUG_ON(blkif->persistent_gnt_c >
578 max_mapped_grant_pages(pending_req->blkif->blk_protocol));
01f37f2d
KRW
579
580 /*
581 * Fill out preq.nr_sects with proper amount of sectors, and setup
1a95fe6e
KRW
582 * assign map[..] with the PFN of the page in our domain with the
583 * corresponding grant reference for each page.
584 */
585 for (i = 0; i < nseg; i++) {
586 uint32_t flags;
587
0a8704a5
RPM
588 if (use_persistent_gnts)
589 persistent_gnt = get_persistent_gnt(
590 &blkif->persistent_gnts,
591 req->u.rw.seg[i].gref);
592
593 if (persistent_gnt) {
594 /*
595 * We are using persistent grants and
596 * the grant is already mapped
597 */
0a8704a5
RPM
598 pages[i] = persistent_gnt->page;
599 persistent_gnts[i] = persistent_gnt;
600 } else {
c6cc142d
RPM
601 if (get_free_page(blkif, &pages[i]))
602 goto out_of_memory;
603 addr = vaddr(pages[i]);
604 pages_to_gnt[segs_to_map] = pages[i];
0a8704a5 605 persistent_gnts[i] = NULL;
0a8704a5 606 flags = GNTMAP_host_map;
c6cc142d 607 if (!use_persistent_gnts &&
0a8704a5
RPM
608 (pending_req->operation != BLKIF_OP_READ))
609 flags |= GNTMAP_readonly;
610 gnttab_set_map_op(&map[segs_to_map++], addr,
611 flags, req->u.rw.seg[i].gref,
612 blkif->domid);
613 }
1a95fe6e
KRW
614 }
615
0a8704a5
RPM
616 if (segs_to_map) {
617 ret = gnttab_map_refs(map, NULL, pages_to_gnt, segs_to_map);
618 BUG_ON(ret);
619 }
1a95fe6e 620
01f37f2d
KRW
621 /*
622 * Now swizzle the MFN in our domain with the MFN from the other domain
1a95fe6e
KRW
623 * so that when we access vaddr(pending_req,i) it has the contents of
624 * the page from the other domain.
625 */
0a8704a5 626 bitmap_zero(pending_req->unmap_seg, BLKIF_MAX_SEGMENTS_PER_REQUEST);
c6cc142d
RPM
627 for (seg_idx = 0, new_map_idx = 0; seg_idx < nseg; seg_idx++) {
628 if (!persistent_gnts[seg_idx]) {
0a8704a5 629 /* This is a newly mapped grant */
c6cc142d
RPM
630 BUG_ON(new_map_idx >= segs_to_map);
631 if (unlikely(map[new_map_idx].status != 0)) {
0a8704a5 632 pr_debug(DRV_PFX "invalid buffer -- could not remap it\n");
c6cc142d 633 pending_handle(pending_req, seg_idx) = BLKBACK_INVALID_HANDLE;
0a8704a5 634 ret |= 1;
c6cc142d
RPM
635 new_map_idx++;
636 /*
637 * No need to set unmap_seg bit, since
638 * we can not unmap this grant because
639 * the handle is invalid.
640 */
641 continue;
0a8704a5 642 }
c6cc142d
RPM
643 pending_handle(pending_req, seg_idx) = map[new_map_idx].handle;
644 } else {
645 /* This grant is persistent and already mapped */
646 goto next;
0a8704a5 647 }
c6cc142d
RPM
648 if (use_persistent_gnts &&
649 blkif->persistent_gnt_c <
650 max_mapped_grant_pages(blkif->blk_protocol)) {
651 /*
652 * We are using persistent grants, the grant is
653 * not mapped but we have room for it
654 */
655 persistent_gnt = kmalloc(sizeof(struct persistent_gnt),
656 GFP_KERNEL);
657 if (!persistent_gnt) {
0a8704a5 658 /*
c6cc142d
RPM
659 * If we don't have enough memory to
660 * allocate the persistent_gnt struct
661 * map this grant non-persistenly
0a8704a5 662 */
c6cc142d 663 goto next_unmap;
0a8704a5 664 }
c6cc142d
RPM
665 persistent_gnt->gnt = map[new_map_idx].ref;
666 persistent_gnt->handle = map[new_map_idx].handle;
667 persistent_gnt->page = pages[seg_idx];
668 if (add_persistent_gnt(&blkif->persistent_gnts,
669 persistent_gnt)) {
670 kfree(persistent_gnt);
671 persistent_gnt = NULL;
672 goto next_unmap;
673 }
674 blkif->persistent_gnt_c++;
675 pr_debug(DRV_PFX " grant %u added to the tree of persistent grants, using %u/%u\n",
676 persistent_gnt->gnt, blkif->persistent_gnt_c,
677 max_mapped_grant_pages(blkif->blk_protocol));
678 new_map_idx++;
679 goto next;
680 }
681 if (use_persistent_gnts && !blkif->vbd.overflow_max_grants) {
682 blkif->vbd.overflow_max_grants = 1;
683 pr_debug(DRV_PFX " domain %u, device %#x is using maximum number of persistent grants\n",
684 blkif->domid, blkif->vbd.handle);
1a95fe6e 685 }
c6cc142d
RPM
686next_unmap:
687 /*
688 * We could not map this grant persistently, so use it as
689 * a non-persistent grant.
690 */
691 bitmap_set(pending_req->unmap_seg, seg_idx, 1);
692 new_map_idx++;
693next:
694 seg[seg_idx].offset = (req->u.rw.seg[seg_idx].first_sect << 9);
1a95fe6e
KRW
695 }
696 return ret;
c6cc142d
RPM
697
698out_of_memory:
699 pr_alert(DRV_PFX "%s: out of memory\n", __func__);
700 put_free_pages(blkif, pages_to_gnt, segs_to_map);
701 return -ENOMEM;
1a95fe6e
KRW
702}
703
42146352
KRW
704static int dispatch_discard_io(struct xen_blkif *blkif,
705 struct blkif_request *req)
b3cb0d6a
LD
706{
707 int err = 0;
708 int status = BLKIF_RSP_OKAY;
709 struct block_device *bdev = blkif->vbd.bdev;
4dae7670 710 unsigned long secure;
b3cb0d6a 711
42146352
KRW
712 blkif->st_ds_req++;
713
714 xen_blkif_get(blkif);
4dae7670
KRW
715 secure = (blkif->vbd.discard_secure &&
716 (req->u.discard.flag & BLKIF_DISCARD_SECURE)) ?
717 BLKDEV_DISCARD_SECURE : 0;
718
719 err = blkdev_issue_discard(bdev, req->u.discard.sector_number,
720 req->u.discard.nr_sectors,
721 GFP_KERNEL, secure);
b3cb0d6a
LD
722
723 if (err == -EOPNOTSUPP) {
724 pr_debug(DRV_PFX "discard op failed, not supported\n");
725 status = BLKIF_RSP_EOPNOTSUPP;
726 } else if (err)
727 status = BLKIF_RSP_ERROR;
728
97e36834 729 make_response(blkif, req->u.discard.id, req->operation, status);
42146352
KRW
730 xen_blkif_put(blkif);
731 return err;
b3cb0d6a
LD
732}
733
0e367ae4
DV
734static int dispatch_other_io(struct xen_blkif *blkif,
735 struct blkif_request *req,
736 struct pending_req *pending_req)
737{
738 free_req(pending_req);
739 make_response(blkif, req->u.other.id, req->operation,
740 BLKIF_RSP_EOPNOTSUPP);
741 return -EIO;
742}
743
29bde093
KRW
744static void xen_blk_drain_io(struct xen_blkif *blkif)
745{
746 atomic_set(&blkif->drain, 1);
747 do {
6927d920
KRW
748 /* The initial value is one, and one refcnt taken at the
749 * start of the xen_blkif_schedule thread. */
750 if (atomic_read(&blkif->refcnt) <= 2)
751 break;
29bde093
KRW
752 wait_for_completion_interruptible_timeout(
753 &blkif->drain_complete, HZ);
754
755 if (!atomic_read(&blkif->drain))
756 break;
29bde093
KRW
757 } while (!kthread_should_stop());
758 atomic_set(&blkif->drain, 0);
759}
760
a1397fa3
KRW
761/*
762 * Completion callback on the bio's. Called as bh->b_end_io()
4d05a28d
KRW
763 */
764
2e9977c2 765static void __end_block_io_op(struct pending_req *pending_req, int error)
4d05a28d
KRW
766{
767 /* An error fails the entire request. */
24f567f9 768 if ((pending_req->operation == BLKIF_OP_FLUSH_DISKCACHE) &&
4d05a28d 769 (error == -EOPNOTSUPP)) {
22b20f2d 770 pr_debug(DRV_PFX "flush diskcache op failed, not supported\n");
24f567f9 771 xen_blkbk_flush_diskcache(XBT_NIL, pending_req->blkif->be, 0);
4d05a28d 772 pending_req->status = BLKIF_RSP_EOPNOTSUPP;
29bde093
KRW
773 } else if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) &&
774 (error == -EOPNOTSUPP)) {
775 pr_debug(DRV_PFX "write barrier op failed, not supported\n");
776 xen_blkbk_barrier(XBT_NIL, pending_req->blkif->be, 0);
777 pending_req->status = BLKIF_RSP_EOPNOTSUPP;
4d05a28d 778 } else if (error) {
22b20f2d 779 pr_debug(DRV_PFX "Buffer not up-to-date at end of operation,"
ebe81906 780 " error=%d\n", error);
4d05a28d
KRW
781 pending_req->status = BLKIF_RSP_ERROR;
782 }
783
01f37f2d
KRW
784 /*
785 * If all of the bio's have completed it is time to unmap
a1397fa3 786 * the grant references associated with 'request' and provide
2e9977c2
KRW
787 * the proper response on the ring.
788 */
4d05a28d 789 if (atomic_dec_and_test(&pending_req->pendcnt)) {
9f3aedf5 790 xen_blkbk_unmap(pending_req);
4d05a28d
KRW
791 make_response(pending_req->blkif, pending_req->id,
792 pending_req->operation, pending_req->status);
8b6bf747 793 xen_blkif_put(pending_req->blkif);
29bde093
KRW
794 if (atomic_read(&pending_req->blkif->refcnt) <= 2) {
795 if (atomic_read(&pending_req->blkif->drain))
796 complete(&pending_req->blkif->drain_complete);
797 }
4d05a28d
KRW
798 free_req(pending_req);
799 }
800}
801
a1397fa3
KRW
802/*
803 * bio callback.
804 */
88122933 805static void end_block_io_op(struct bio *bio, int error)
4d05a28d 806{
4d05a28d
KRW
807 __end_block_io_op(bio->bi_private, error);
808 bio_put(bio);
4d05a28d
KRW
809}
810
811
4d05a28d 812
a1397fa3
KRW
813/*
814 * Function to copy the from the ring buffer the 'struct blkif_request'
815 * (which has the sectors we want, number of them, grant references, etc),
816 * and transmute it to the block API to hand it over to the proper block disk.
4d05a28d 817 */
b4726a9d
DS
818static int
819__do_block_io_op(struct xen_blkif *blkif)
4d05a28d 820{
88122933
JF
821 union blkif_back_rings *blk_rings = &blkif->blk_rings;
822 struct blkif_request req;
2e9977c2 823 struct pending_req *pending_req;
4d05a28d
KRW
824 RING_IDX rc, rp;
825 int more_to_do = 0;
826
827 rc = blk_rings->common.req_cons;
828 rp = blk_rings->common.sring->req_prod;
829 rmb(); /* Ensure we see queued requests up to 'rp'. */
830
831 while (rc != rp) {
832
833 if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc))
834 break;
835
8270b45b 836 if (kthread_should_stop()) {
4d05a28d
KRW
837 more_to_do = 1;
838 break;
839 }
840
8270b45b
KF
841 pending_req = alloc_req();
842 if (NULL == pending_req) {
843 blkif->st_oo_req++;
4d05a28d
KRW
844 more_to_do = 1;
845 break;
846 }
847
848 switch (blkif->blk_protocol) {
849 case BLKIF_PROTOCOL_NATIVE:
850 memcpy(&req, RING_GET_REQUEST(&blk_rings->native, rc), sizeof(req));
851 break;
852 case BLKIF_PROTOCOL_X86_32:
853 blkif_get_x86_32_req(&req, RING_GET_REQUEST(&blk_rings->x86_32, rc));
854 break;
855 case BLKIF_PROTOCOL_X86_64:
856 blkif_get_x86_64_req(&req, RING_GET_REQUEST(&blk_rings->x86_64, rc));
857 break;
858 default:
859 BUG();
860 }
861 blk_rings->common.req_cons = ++rc; /* before make_response() */
862
863 /* Apply all sanity checks to /private copy/ of request. */
864 barrier();
0e367ae4
DV
865
866 switch (req.operation) {
867 case BLKIF_OP_READ:
868 case BLKIF_OP_WRITE:
869 case BLKIF_OP_WRITE_BARRIER:
870 case BLKIF_OP_FLUSH_DISKCACHE:
871 if (dispatch_rw_block_io(blkif, &req, pending_req))
872 goto done;
873 break;
874 case BLKIF_OP_DISCARD:
42146352
KRW
875 free_req(pending_req);
876 if (dispatch_discard_io(blkif, &req))
0e367ae4 877 goto done;
4d05a28d 878 break;
0e367ae4
DV
879 default:
880 if (dispatch_other_io(blkif, &req, pending_req))
881 goto done;
882 break;
883 }
4d05a28d
KRW
884
885 /* Yield point for this unbounded loop. */
886 cond_resched();
887 }
0e367ae4 888done:
4d05a28d
KRW
889 return more_to_do;
890}
891
b4726a9d
DS
892static int
893do_block_io_op(struct xen_blkif *blkif)
894{
895 union blkif_back_rings *blk_rings = &blkif->blk_rings;
896 int more_to_do;
897
898 do {
899 more_to_do = __do_block_io_op(blkif);
900 if (more_to_do)
901 break;
902
903 RING_FINAL_CHECK_FOR_REQUESTS(&blk_rings->common, more_to_do);
904 } while (more_to_do);
905
906 return more_to_do;
907}
a1397fa3 908/*
01f37f2d
KRW
909 * Transmutation of the 'struct blkif_request' to a proper 'struct bio'
910 * and call the 'submit_bio' to pass it to the underlying storage.
a1397fa3 911 */
30fd1502
KRW
912static int dispatch_rw_block_io(struct xen_blkif *blkif,
913 struct blkif_request *req,
914 struct pending_req *pending_req)
4d05a28d 915{
4d05a28d 916 struct phys_req preq;
1a95fe6e 917 struct seg_buf seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
4d05a28d
KRW
918 unsigned int nseg;
919 struct bio *bio = NULL;
77089926 920 struct bio *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST];
1a95fe6e 921 int i, nbio = 0;
4d05a28d 922 int operation;
a19be5f0 923 struct blk_plug plug;
29bde093 924 bool drain = false;
c6cc142d 925 struct page **pages = pending_req->pages;
4d05a28d
KRW
926
927 switch (req->operation) {
928 case BLKIF_OP_READ:
fc53bf75 929 blkif->st_rd_req++;
4d05a28d
KRW
930 operation = READ;
931 break;
932 case BLKIF_OP_WRITE:
fc53bf75 933 blkif->st_wr_req++;
013c3ca1 934 operation = WRITE_ODIRECT;
4d05a28d 935 break;
29bde093
KRW
936 case BLKIF_OP_WRITE_BARRIER:
937 drain = true;
24f567f9 938 case BLKIF_OP_FLUSH_DISKCACHE:
fc53bf75 939 blkif->st_f_req++;
24f567f9 940 operation = WRITE_FLUSH;
4d05a28d
KRW
941 break;
942 default:
943 operation = 0; /* make gcc happy */
fc53bf75
KRW
944 goto fail_response;
945 break;
4d05a28d
KRW
946 }
947
42146352
KRW
948 /* Check that the number of segments is sane. */
949 nseg = req->u.rw.nr_segments;
97e36834 950
42146352 951 if (unlikely(nseg == 0 && operation != WRITE_FLUSH) ||
4d05a28d 952 unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
22b20f2d 953 pr_debug(DRV_PFX "Bad number of segments in request (%d)\n",
ebe81906 954 nseg);
1a95fe6e 955 /* Haven't submitted any bio's yet. */
4d05a28d
KRW
956 goto fail_response;
957 }
958
c35950bf 959 preq.sector_number = req->u.rw.sector_number;
4d05a28d
KRW
960 preq.nr_sects = 0;
961
962 pending_req->blkif = blkif;
97e36834 963 pending_req->id = req->u.rw.id;
4d05a28d
KRW
964 pending_req->operation = req->operation;
965 pending_req->status = BLKIF_RSP_OKAY;
966 pending_req->nr_pages = nseg;
e9350493 967
4d05a28d 968 for (i = 0; i < nseg; i++) {
c35950bf
KRW
969 seg[i].nsec = req->u.rw.seg[i].last_sect -
970 req->u.rw.seg[i].first_sect + 1;
c35950bf
KRW
971 if ((req->u.rw.seg[i].last_sect >= (PAGE_SIZE >> 9)) ||
972 (req->u.rw.seg[i].last_sect < req->u.rw.seg[i].first_sect))
4d05a28d
KRW
973 goto fail_response;
974 preq.nr_sects += seg[i].nsec;
976222e0 975
4d05a28d
KRW
976 }
977
3d814731 978 if (xen_vbd_translate(&preq, blkif, operation) != 0) {
22b20f2d 979 pr_debug(DRV_PFX "access denied: %s of [%llu,%llu] on dev=%04x\n",
ebe81906
KRW
980 operation == READ ? "read" : "write",
981 preq.sector_number,
a72d9002
CG
982 preq.sector_number + preq.nr_sects,
983 blkif->vbd.pdevice);
1a95fe6e 984 goto fail_response;
4d05a28d 985 }
01f37f2d
KRW
986
987 /*
3d814731 988 * This check _MUST_ be done after xen_vbd_translate as the preq.bdev
01f37f2d
KRW
989 * is set there.
990 */
e9350493
KRW
991 for (i = 0; i < nseg; i++) {
992 if (((int)preq.sector_number|(int)seg[i].nsec) &
993 ((bdev_logical_block_size(preq.bdev) >> 9) - 1)) {
22b20f2d 994 pr_debug(DRV_PFX "Misaligned I/O request from domain %d",
ebe81906 995 blkif->domid);
e9350493
KRW
996 goto fail_response;
997 }
998 }
01f37f2d 999
29bde093
KRW
1000 /* Wait on all outstanding I/O's and once that has been completed
1001 * issue the WRITE_FLUSH.
1002 */
1003 if (drain)
1004 xen_blk_drain_io(pending_req->blkif);
1005
01f37f2d
KRW
1006 /*
1007 * If we have failed at this point, we need to undo the M2P override,
2e9977c2
KRW
1008 * set gnttab_set_unmap_op on all of the grant references and perform
1009 * the hypercall to unmap the grants - that is all done in
9f3aedf5 1010 * xen_blkbk_unmap.
2e9977c2 1011 */
0a8704a5 1012 if (xen_blkbk_map(req, pending_req, seg, pages))
4d05a28d
KRW
1013 goto fail_flush;
1014
b3cb0d6a
LD
1015 /*
1016 * This corresponding xen_blkif_put is done in __end_block_io_op, or
1017 * below (in "!bio") if we are handling a BLKIF_OP_DISCARD.
1018 */
8b6bf747 1019 xen_blkif_get(blkif);
4d05a28d
KRW
1020
1021 for (i = 0; i < nseg; i++) {
4d05a28d
KRW
1022 while ((bio == NULL) ||
1023 (bio_add_page(bio,
0a8704a5 1024 pages[i],
4d05a28d 1025 seg[i].nsec << 9,
ffb1dabd 1026 seg[i].offset) == 0)) {
2e9977c2 1027
03e0edf9 1028 bio = bio_alloc(GFP_KERNEL, nseg-i);
4d05a28d
KRW
1029 if (unlikely(bio == NULL))
1030 goto fail_put_bio;
1031
03e0edf9 1032 biolist[nbio++] = bio;
4d05a28d
KRW
1033 bio->bi_bdev = preq.bdev;
1034 bio->bi_private = pending_req;
1035 bio->bi_end_io = end_block_io_op;
1036 bio->bi_sector = preq.sector_number;
1037 }
1038
1039 preq.sector_number += seg[i].nsec;
1040 }
1041
b3cb0d6a 1042 /* This will be hit if the operation was a flush or discard. */
4d05a28d 1043 if (!bio) {
42146352 1044 BUG_ON(operation != WRITE_FLUSH);
b0f80127 1045
42146352
KRW
1046 bio = bio_alloc(GFP_KERNEL, 0);
1047 if (unlikely(bio == NULL))
1048 goto fail_put_bio;
4d05a28d 1049
42146352
KRW
1050 biolist[nbio++] = bio;
1051 bio->bi_bdev = preq.bdev;
1052 bio->bi_private = pending_req;
1053 bio->bi_end_io = end_block_io_op;
4d05a28d
KRW
1054 }
1055
77089926 1056 atomic_set(&pending_req->pendcnt, nbio);
a19be5f0
KRW
1057 blk_start_plug(&plug);
1058
77089926
KRW
1059 for (i = 0; i < nbio; i++)
1060 submit_bio(operation, biolist[i]);
1061
a19be5f0 1062 /* Let the I/Os go.. */
3d68b399 1063 blk_finish_plug(&plug);
a19be5f0 1064
4d05a28d
KRW
1065 if (operation == READ)
1066 blkif->st_rd_sect += preq.nr_sects;
5c62cb48 1067 else if (operation & WRITE)
4d05a28d
KRW
1068 blkif->st_wr_sect += preq.nr_sects;
1069
fc53bf75 1070 return 0;
4d05a28d
KRW
1071
1072 fail_flush:
9f3aedf5 1073 xen_blkbk_unmap(pending_req);
4d05a28d 1074 fail_response:
0faa8cca 1075 /* Haven't submitted any bio's yet. */
97e36834 1076 make_response(blkif, req->u.rw.id, req->operation, BLKIF_RSP_ERROR);
4d05a28d
KRW
1077 free_req(pending_req);
1078 msleep(1); /* back off a bit */
fc53bf75 1079 return -EIO;
4d05a28d
KRW
1080
1081 fail_put_bio:
03e0edf9 1082 for (i = 0; i < nbio; i++)
77089926 1083 bio_put(biolist[i]);
0e5e098a 1084 atomic_set(&pending_req->pendcnt, 1);
4d05a28d 1085 __end_block_io_op(pending_req, -EINVAL);
4d05a28d 1086 msleep(1); /* back off a bit */
fc53bf75 1087 return -EIO;
4d05a28d
KRW
1088}
1089
1090
1091
a1397fa3
KRW
1092/*
1093 * Put a response on the ring on how the operation fared.
4d05a28d 1094 */
30fd1502 1095static void make_response(struct xen_blkif *blkif, u64 id,
4d05a28d
KRW
1096 unsigned short op, int st)
1097{
88122933 1098 struct blkif_response resp;
4d05a28d 1099 unsigned long flags;
88122933 1100 union blkif_back_rings *blk_rings = &blkif->blk_rings;
4d05a28d
KRW
1101 int notify;
1102
1103 resp.id = id;
1104 resp.operation = op;
1105 resp.status = st;
1106
1107 spin_lock_irqsave(&blkif->blk_ring_lock, flags);
1108 /* Place on the response ring for the relevant domain. */
1109 switch (blkif->blk_protocol) {
1110 case BLKIF_PROTOCOL_NATIVE:
1111 memcpy(RING_GET_RESPONSE(&blk_rings->native, blk_rings->native.rsp_prod_pvt),
1112 &resp, sizeof(resp));
1113 break;
1114 case BLKIF_PROTOCOL_X86_32:
1115 memcpy(RING_GET_RESPONSE(&blk_rings->x86_32, blk_rings->x86_32.rsp_prod_pvt),
1116 &resp, sizeof(resp));
1117 break;
1118 case BLKIF_PROTOCOL_X86_64:
1119 memcpy(RING_GET_RESPONSE(&blk_rings->x86_64, blk_rings->x86_64.rsp_prod_pvt),
1120 &resp, sizeof(resp));
1121 break;
1122 default:
1123 BUG();
1124 }
1125 blk_rings->common.rsp_prod_pvt++;
1126 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blk_rings->common, notify);
4d05a28d 1127 spin_unlock_irqrestore(&blkif->blk_ring_lock, flags);
4d05a28d
KRW
1128 if (notify)
1129 notify_remote_via_irq(blkif->irq);
1130}
1131
8b6bf747 1132static int __init xen_blkif_init(void)
4d05a28d
KRW
1133{
1134 int i, mmap_pages;
8770b268 1135 int rc = 0;
4d05a28d 1136
b2167ba6 1137 if (!xen_domain())
4d05a28d
KRW
1138 return -ENODEV;
1139
2e9977c2 1140 blkbk = kzalloc(sizeof(struct xen_blkbk), GFP_KERNEL);
e8e28871 1141 if (!blkbk) {
22b20f2d 1142 pr_alert(DRV_PFX "%s: out of memory!\n", __func__);
e8e28871
KRW
1143 return -ENOMEM;
1144 }
1145
8b6bf747 1146 mmap_pages = xen_blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST;
4d05a28d 1147
8e6dc6fe 1148 blkbk->pending_reqs = kzalloc(sizeof(blkbk->pending_reqs[0]) *
8b6bf747 1149 xen_blkif_reqs, GFP_KERNEL);
8e6dc6fe 1150 blkbk->pending_grant_handles = kmalloc(sizeof(blkbk->pending_grant_handles[0]) *
a742b02c 1151 mmap_pages, GFP_KERNEL);
4d05a28d 1152
c6cc142d 1153 if (!blkbk->pending_reqs || !blkbk->pending_grant_handles) {
8770b268 1154 rc = -ENOMEM;
4d05a28d 1155 goto out_of_memory;
8770b268 1156 }
4d05a28d 1157
464fb419 1158 for (i = 0; i < mmap_pages; i++) {
e8e28871 1159 blkbk->pending_grant_handles[i] = BLKBACK_INVALID_HANDLE;
464fb419 1160 }
8b6bf747 1161 rc = xen_blkif_interface_init();
8770b268
KRW
1162 if (rc)
1163 goto failed_init;
4d05a28d 1164
e8e28871
KRW
1165 INIT_LIST_HEAD(&blkbk->pending_free);
1166 spin_lock_init(&blkbk->pending_free_lock);
1167 init_waitqueue_head(&blkbk->pending_free_wq);
4d05a28d 1168
8b6bf747 1169 for (i = 0; i < xen_blkif_reqs; i++)
2e9977c2
KRW
1170 list_add_tail(&blkbk->pending_reqs[i].free_list,
1171 &blkbk->pending_free);
4d05a28d 1172
8b6bf747 1173 rc = xen_blkif_xenbus_init();
8770b268
KRW
1174 if (rc)
1175 goto failed_init;
4d05a28d
KRW
1176
1177 return 0;
1178
1179 out_of_memory:
22b20f2d 1180 pr_alert(DRV_PFX "%s: out of memory\n", __func__);
8770b268 1181 failed_init:
e8e28871 1182 kfree(blkbk->pending_reqs);
a742b02c 1183 kfree(blkbk->pending_grant_handles);
a742b02c 1184 kfree(blkbk);
e8e28871 1185 blkbk = NULL;
8770b268 1186 return rc;
4d05a28d
KRW
1187}
1188
8b6bf747 1189module_init(xen_blkif_init);
4d05a28d
KRW
1190
1191MODULE_LICENSE("Dual BSD/GPL");
a7e9357f 1192MODULE_ALIAS("xen-backend:vbd");
This page took 0.163074 seconds and 5 git commands to generate.