Commit | Line | Data |
---|---|---|
f87d0fbb RR |
1 | /* |
2 | * Helpers for the host side of a virtio ring. | |
3 | * | |
4 | * Since these may be in userspace, we use (inline) accessors. | |
5 | */ | |
f558a845 | 6 | #include <linux/module.h> |
f87d0fbb RR |
7 | #include <linux/vringh.h> |
8 | #include <linux/virtio_ring.h> | |
9 | #include <linux/kernel.h> | |
10 | #include <linux/ratelimit.h> | |
11 | #include <linux/uaccess.h> | |
12 | #include <linux/slab.h> | |
13 | #include <linux/export.h> | |
14 | ||
15 | static __printf(1,2) __cold void vringh_bad(const char *fmt, ...) | |
16 | { | |
17 | static DEFINE_RATELIMIT_STATE(vringh_rs, | |
18 | DEFAULT_RATELIMIT_INTERVAL, | |
19 | DEFAULT_RATELIMIT_BURST); | |
20 | if (__ratelimit(&vringh_rs)) { | |
21 | va_list ap; | |
22 | va_start(ap, fmt); | |
23 | printk(KERN_NOTICE "vringh:"); | |
24 | vprintk(fmt, ap); | |
25 | va_end(ap); | |
26 | } | |
27 | } | |
28 | ||
29 | /* Returns vring->num if empty, -ve on error. */ | |
30 | static inline int __vringh_get_head(const struct vringh *vrh, | |
31 | int (*getu16)(u16 *val, const u16 *p), | |
32 | u16 *last_avail_idx) | |
33 | { | |
34 | u16 avail_idx, i, head; | |
35 | int err; | |
36 | ||
37 | err = getu16(&avail_idx, &vrh->vring.avail->idx); | |
38 | if (err) { | |
39 | vringh_bad("Failed to access avail idx at %p", | |
40 | &vrh->vring.avail->idx); | |
41 | return err; | |
42 | } | |
43 | ||
44 | if (*last_avail_idx == avail_idx) | |
45 | return vrh->vring.num; | |
46 | ||
47 | /* Only get avail ring entries after they have been exposed by guest. */ | |
48 | virtio_rmb(vrh->weak_barriers); | |
49 | ||
50 | i = *last_avail_idx & (vrh->vring.num - 1); | |
51 | ||
52 | err = getu16(&head, &vrh->vring.avail->ring[i]); | |
53 | if (err) { | |
54 | vringh_bad("Failed to read head: idx %d address %p", | |
55 | *last_avail_idx, &vrh->vring.avail->ring[i]); | |
56 | return err; | |
57 | } | |
58 | ||
59 | if (head >= vrh->vring.num) { | |
60 | vringh_bad("Guest says index %u > %u is available", | |
61 | head, vrh->vring.num); | |
62 | return -EINVAL; | |
63 | } | |
64 | ||
65 | (*last_avail_idx)++; | |
66 | return head; | |
67 | } | |
68 | ||
69 | /* Copy some bytes to/from the iovec. Returns num copied. */ | |
70 | static inline ssize_t vringh_iov_xfer(struct vringh_kiov *iov, | |
71 | void *ptr, size_t len, | |
72 | int (*xfer)(void *addr, void *ptr, | |
73 | size_t len)) | |
74 | { | |
75 | int err, done = 0; | |
76 | ||
77 | while (len && iov->i < iov->used) { | |
78 | size_t partlen; | |
79 | ||
80 | partlen = min(iov->iov[iov->i].iov_len, len); | |
81 | err = xfer(iov->iov[iov->i].iov_base, ptr, partlen); | |
82 | if (err) | |
83 | return err; | |
84 | done += partlen; | |
85 | len -= partlen; | |
86 | ptr += partlen; | |
87 | iov->consumed += partlen; | |
88 | iov->iov[iov->i].iov_len -= partlen; | |
89 | iov->iov[iov->i].iov_base += partlen; | |
90 | ||
91 | if (!iov->iov[iov->i].iov_len) { | |
92 | /* Fix up old iov element then increment. */ | |
93 | iov->iov[iov->i].iov_len = iov->consumed; | |
94 | iov->iov[iov->i].iov_base -= iov->consumed; | |
95 | ||
96 | iov->consumed = 0; | |
97 | iov->i++; | |
98 | } | |
99 | } | |
100 | return done; | |
101 | } | |
102 | ||
103 | /* May reduce *len if range is shorter. */ | |
104 | static inline bool range_check(struct vringh *vrh, u64 addr, size_t *len, | |
105 | struct vringh_range *range, | |
106 | bool (*getrange)(struct vringh *, | |
107 | u64, struct vringh_range *)) | |
108 | { | |
109 | if (addr < range->start || addr > range->end_incl) { | |
110 | if (!getrange(vrh, addr, range)) | |
111 | return false; | |
112 | } | |
113 | BUG_ON(addr < range->start || addr > range->end_incl); | |
114 | ||
115 | /* To end of memory? */ | |
116 | if (unlikely(addr + *len == 0)) { | |
117 | if (range->end_incl == -1ULL) | |
118 | return true; | |
119 | goto truncate; | |
120 | } | |
121 | ||
122 | /* Otherwise, don't wrap. */ | |
123 | if (addr + *len < addr) { | |
124 | vringh_bad("Wrapping descriptor %zu@0x%llx", | |
125 | *len, (unsigned long long)addr); | |
126 | return false; | |
127 | } | |
128 | ||
129 | if (unlikely(addr + *len - 1 > range->end_incl)) | |
130 | goto truncate; | |
131 | return true; | |
132 | ||
133 | truncate: | |
134 | *len = range->end_incl + 1 - addr; | |
135 | return true; | |
136 | } | |
137 | ||
138 | static inline bool no_range_check(struct vringh *vrh, u64 addr, size_t *len, | |
139 | struct vringh_range *range, | |
140 | bool (*getrange)(struct vringh *, | |
141 | u64, struct vringh_range *)) | |
142 | { | |
143 | return true; | |
144 | } | |
145 | ||
146 | /* No reason for this code to be inline. */ | |
147 | static int move_to_indirect(int *up_next, u16 *i, void *addr, | |
148 | const struct vring_desc *desc, | |
149 | struct vring_desc **descs, int *desc_max) | |
150 | { | |
151 | /* Indirect tables can't have indirect. */ | |
152 | if (*up_next != -1) { | |
153 | vringh_bad("Multilevel indirect %u->%u", *up_next, *i); | |
154 | return -EINVAL; | |
155 | } | |
156 | ||
157 | if (unlikely(desc->len % sizeof(struct vring_desc))) { | |
158 | vringh_bad("Strange indirect len %u", desc->len); | |
159 | return -EINVAL; | |
160 | } | |
161 | ||
162 | /* We will check this when we follow it! */ | |
163 | if (desc->flags & VRING_DESC_F_NEXT) | |
164 | *up_next = desc->next; | |
165 | else | |
166 | *up_next = -2; | |
167 | *descs = addr; | |
168 | *desc_max = desc->len / sizeof(struct vring_desc); | |
169 | ||
170 | /* Now, start at the first indirect. */ | |
171 | *i = 0; | |
172 | return 0; | |
173 | } | |
174 | ||
175 | static int resize_iovec(struct vringh_kiov *iov, gfp_t gfp) | |
176 | { | |
177 | struct kvec *new; | |
178 | unsigned int flag, new_num = (iov->max_num & ~VRINGH_IOV_ALLOCATED) * 2; | |
179 | ||
180 | if (new_num < 8) | |
181 | new_num = 8; | |
182 | ||
183 | flag = (iov->max_num & VRINGH_IOV_ALLOCATED); | |
184 | if (flag) | |
185 | new = krealloc(iov->iov, new_num * sizeof(struct iovec), gfp); | |
186 | else { | |
187 | new = kmalloc(new_num * sizeof(struct iovec), gfp); | |
188 | if (new) { | |
189 | memcpy(new, iov->iov, | |
190 | iov->max_num * sizeof(struct iovec)); | |
191 | flag = VRINGH_IOV_ALLOCATED; | |
192 | } | |
193 | } | |
194 | if (!new) | |
195 | return -ENOMEM; | |
196 | iov->iov = new; | |
197 | iov->max_num = (new_num | flag); | |
198 | return 0; | |
199 | } | |
200 | ||
201 | static u16 __cold return_from_indirect(const struct vringh *vrh, int *up_next, | |
202 | struct vring_desc **descs, int *desc_max) | |
203 | { | |
204 | u16 i = *up_next; | |
205 | ||
206 | *up_next = -1; | |
207 | *descs = vrh->vring.desc; | |
208 | *desc_max = vrh->vring.num; | |
209 | return i; | |
210 | } | |
211 | ||
212 | static int slow_copy(struct vringh *vrh, void *dst, const void *src, | |
213 | bool (*rcheck)(struct vringh *vrh, u64 addr, size_t *len, | |
214 | struct vringh_range *range, | |
215 | bool (*getrange)(struct vringh *vrh, | |
216 | u64, | |
217 | struct vringh_range *)), | |
218 | bool (*getrange)(struct vringh *vrh, | |
219 | u64 addr, | |
220 | struct vringh_range *r), | |
221 | struct vringh_range *range, | |
222 | int (*copy)(void *dst, const void *src, size_t len)) | |
223 | { | |
224 | size_t part, len = sizeof(struct vring_desc); | |
225 | ||
226 | do { | |
227 | u64 addr; | |
228 | int err; | |
229 | ||
230 | part = len; | |
231 | addr = (u64)(unsigned long)src - range->offset; | |
232 | ||
233 | if (!rcheck(vrh, addr, &part, range, getrange)) | |
234 | return -EINVAL; | |
235 | ||
236 | err = copy(dst, src, part); | |
237 | if (err) | |
238 | return err; | |
239 | ||
240 | dst += part; | |
241 | src += part; | |
242 | len -= part; | |
243 | } while (len); | |
244 | return 0; | |
245 | } | |
246 | ||
247 | static inline int | |
248 | __vringh_iov(struct vringh *vrh, u16 i, | |
249 | struct vringh_kiov *riov, | |
250 | struct vringh_kiov *wiov, | |
251 | bool (*rcheck)(struct vringh *vrh, u64 addr, size_t *len, | |
252 | struct vringh_range *range, | |
253 | bool (*getrange)(struct vringh *, u64, | |
254 | struct vringh_range *)), | |
255 | bool (*getrange)(struct vringh *, u64, struct vringh_range *), | |
256 | gfp_t gfp, | |
257 | int (*copy)(void *dst, const void *src, size_t len)) | |
258 | { | |
259 | int err, count = 0, up_next, desc_max; | |
260 | struct vring_desc desc, *descs; | |
261 | struct vringh_range range = { -1ULL, 0 }, slowrange; | |
262 | bool slow = false; | |
263 | ||
264 | /* We start traversing vring's descriptor table. */ | |
265 | descs = vrh->vring.desc; | |
266 | desc_max = vrh->vring.num; | |
267 | up_next = -1; | |
268 | ||
269 | if (riov) | |
270 | riov->i = riov->used = 0; | |
271 | else if (wiov) | |
272 | wiov->i = wiov->used = 0; | |
273 | else | |
274 | /* You must want something! */ | |
275 | BUG(); | |
276 | ||
277 | for (;;) { | |
278 | void *addr; | |
279 | struct vringh_kiov *iov; | |
280 | size_t len; | |
281 | ||
282 | if (unlikely(slow)) | |
283 | err = slow_copy(vrh, &desc, &descs[i], rcheck, getrange, | |
284 | &slowrange, copy); | |
285 | else | |
286 | err = copy(&desc, &descs[i], sizeof(desc)); | |
287 | if (unlikely(err)) | |
288 | goto fail; | |
289 | ||
290 | if (unlikely(desc.flags & VRING_DESC_F_INDIRECT)) { | |
291 | /* Make sure it's OK, and get offset. */ | |
292 | len = desc.len; | |
293 | if (!rcheck(vrh, desc.addr, &len, &range, getrange)) { | |
294 | err = -EINVAL; | |
295 | goto fail; | |
296 | } | |
297 | ||
298 | if (unlikely(len != desc.len)) { | |
299 | slow = true; | |
300 | /* We need to save this range to use offset */ | |
301 | slowrange = range; | |
302 | } | |
303 | ||
304 | addr = (void *)(long)(desc.addr + range.offset); | |
305 | err = move_to_indirect(&up_next, &i, addr, &desc, | |
306 | &descs, &desc_max); | |
307 | if (err) | |
308 | goto fail; | |
309 | continue; | |
310 | } | |
311 | ||
312 | if (count++ == vrh->vring.num) { | |
313 | vringh_bad("Descriptor loop in %p", descs); | |
314 | err = -ELOOP; | |
315 | goto fail; | |
316 | } | |
317 | ||
318 | if (desc.flags & VRING_DESC_F_WRITE) | |
319 | iov = wiov; | |
320 | else { | |
321 | iov = riov; | |
322 | if (unlikely(wiov && wiov->i)) { | |
323 | vringh_bad("Readable desc %p after writable", | |
324 | &descs[i]); | |
325 | err = -EINVAL; | |
326 | goto fail; | |
327 | } | |
328 | } | |
329 | ||
330 | if (!iov) { | |
331 | vringh_bad("Unexpected %s desc", | |
332 | !wiov ? "writable" : "readable"); | |
333 | err = -EPROTO; | |
334 | goto fail; | |
335 | } | |
336 | ||
337 | again: | |
338 | /* Make sure it's OK, and get offset. */ | |
339 | len = desc.len; | |
340 | if (!rcheck(vrh, desc.addr, &len, &range, getrange)) { | |
341 | err = -EINVAL; | |
342 | goto fail; | |
343 | } | |
344 | addr = (void *)(unsigned long)(desc.addr + range.offset); | |
345 | ||
346 | if (unlikely(iov->used == (iov->max_num & ~VRINGH_IOV_ALLOCATED))) { | |
347 | err = resize_iovec(iov, gfp); | |
348 | if (err) | |
349 | goto fail; | |
350 | } | |
351 | ||
352 | iov->iov[iov->used].iov_base = addr; | |
353 | iov->iov[iov->used].iov_len = len; | |
354 | iov->used++; | |
355 | ||
356 | if (unlikely(len != desc.len)) { | |
357 | desc.len -= len; | |
358 | desc.addr += len; | |
359 | goto again; | |
360 | } | |
361 | ||
362 | if (desc.flags & VRING_DESC_F_NEXT) { | |
363 | i = desc.next; | |
364 | } else { | |
365 | /* Just in case we need to finish traversing above. */ | |
366 | if (unlikely(up_next > 0)) { | |
367 | i = return_from_indirect(vrh, &up_next, | |
368 | &descs, &desc_max); | |
369 | slow = false; | |
370 | } else | |
371 | break; | |
372 | } | |
373 | ||
374 | if (i >= desc_max) { | |
375 | vringh_bad("Chained index %u > %u", i, desc_max); | |
376 | err = -EINVAL; | |
377 | goto fail; | |
378 | } | |
379 | } | |
380 | ||
381 | return 0; | |
382 | ||
383 | fail: | |
384 | return err; | |
385 | } | |
386 | ||
387 | static inline int __vringh_complete(struct vringh *vrh, | |
388 | const struct vring_used_elem *used, | |
389 | unsigned int num_used, | |
390 | int (*putu16)(u16 *p, u16 val), | |
391 | int (*putused)(struct vring_used_elem *dst, | |
392 | const struct vring_used_elem | |
393 | *src, unsigned num)) | |
394 | { | |
395 | struct vring_used *used_ring; | |
396 | int err; | |
397 | u16 used_idx, off; | |
398 | ||
399 | used_ring = vrh->vring.used; | |
400 | used_idx = vrh->last_used_idx + vrh->completed; | |
401 | ||
402 | off = used_idx % vrh->vring.num; | |
403 | ||
404 | /* Compiler knows num_used == 1 sometimes, hence extra check */ | |
405 | if (num_used > 1 && unlikely(off + num_used >= vrh->vring.num)) { | |
406 | u16 part = vrh->vring.num - off; | |
407 | err = putused(&used_ring->ring[off], used, part); | |
408 | if (!err) | |
409 | err = putused(&used_ring->ring[0], used + part, | |
410 | num_used - part); | |
411 | } else | |
412 | err = putused(&used_ring->ring[off], used, num_used); | |
413 | ||
414 | if (err) { | |
415 | vringh_bad("Failed to write %u used entries %u at %p", | |
416 | num_used, off, &used_ring->ring[off]); | |
417 | return err; | |
418 | } | |
419 | ||
420 | /* Make sure buffer is written before we update index. */ | |
421 | virtio_wmb(vrh->weak_barriers); | |
422 | ||
423 | err = putu16(&vrh->vring.used->idx, used_idx + num_used); | |
424 | if (err) { | |
425 | vringh_bad("Failed to update used index at %p", | |
426 | &vrh->vring.used->idx); | |
427 | return err; | |
428 | } | |
429 | ||
430 | vrh->completed += num_used; | |
431 | return 0; | |
432 | } | |
433 | ||
434 | ||
435 | static inline int __vringh_need_notify(struct vringh *vrh, | |
436 | int (*getu16)(u16 *val, const u16 *p)) | |
437 | { | |
438 | bool notify; | |
439 | u16 used_event; | |
440 | int err; | |
441 | ||
442 | /* Flush out used index update. This is paired with the | |
443 | * barrier that the Guest executes when enabling | |
444 | * interrupts. */ | |
445 | virtio_mb(vrh->weak_barriers); | |
446 | ||
447 | /* Old-style, without event indices. */ | |
448 | if (!vrh->event_indices) { | |
449 | u16 flags; | |
450 | err = getu16(&flags, &vrh->vring.avail->flags); | |
451 | if (err) { | |
452 | vringh_bad("Failed to get flags at %p", | |
453 | &vrh->vring.avail->flags); | |
454 | return err; | |
455 | } | |
456 | return (!(flags & VRING_AVAIL_F_NO_INTERRUPT)); | |
457 | } | |
458 | ||
459 | /* Modern: we know when other side wants to know. */ | |
460 | err = getu16(&used_event, &vring_used_event(&vrh->vring)); | |
461 | if (err) { | |
462 | vringh_bad("Failed to get used event idx at %p", | |
463 | &vring_used_event(&vrh->vring)); | |
464 | return err; | |
465 | } | |
466 | ||
467 | /* Just in case we added so many that we wrap. */ | |
468 | if (unlikely(vrh->completed > 0xffff)) | |
469 | notify = true; | |
470 | else | |
471 | notify = vring_need_event(used_event, | |
472 | vrh->last_used_idx + vrh->completed, | |
473 | vrh->last_used_idx); | |
474 | ||
475 | vrh->last_used_idx += vrh->completed; | |
476 | vrh->completed = 0; | |
477 | return notify; | |
478 | } | |
479 | ||
480 | static inline bool __vringh_notify_enable(struct vringh *vrh, | |
481 | int (*getu16)(u16 *val, const u16 *p), | |
482 | int (*putu16)(u16 *p, u16 val)) | |
483 | { | |
484 | u16 avail; | |
485 | ||
486 | if (!vrh->event_indices) { | |
487 | /* Old-school; update flags. */ | |
488 | if (putu16(&vrh->vring.used->flags, 0) != 0) { | |
489 | vringh_bad("Clearing used flags %p", | |
490 | &vrh->vring.used->flags); | |
491 | return true; | |
492 | } | |
493 | } else { | |
494 | if (putu16(&vring_avail_event(&vrh->vring), | |
495 | vrh->last_avail_idx) != 0) { | |
496 | vringh_bad("Updating avail event index %p", | |
497 | &vring_avail_event(&vrh->vring)); | |
498 | return true; | |
499 | } | |
500 | } | |
501 | ||
502 | /* They could have slipped one in as we were doing that: make | |
503 | * sure it's written, then check again. */ | |
504 | virtio_mb(vrh->weak_barriers); | |
505 | ||
506 | if (getu16(&avail, &vrh->vring.avail->idx) != 0) { | |
507 | vringh_bad("Failed to check avail idx at %p", | |
508 | &vrh->vring.avail->idx); | |
509 | return true; | |
510 | } | |
511 | ||
512 | /* This is unlikely, so we just leave notifications enabled | |
513 | * (if we're using event_indices, we'll only get one | |
514 | * notification anyway). */ | |
515 | return avail == vrh->last_avail_idx; | |
516 | } | |
517 | ||
518 | static inline void __vringh_notify_disable(struct vringh *vrh, | |
519 | int (*putu16)(u16 *p, u16 val)) | |
520 | { | |
521 | if (!vrh->event_indices) { | |
522 | /* Old-school; update flags. */ | |
523 | if (putu16(&vrh->vring.used->flags, VRING_USED_F_NO_NOTIFY)) { | |
524 | vringh_bad("Setting used flags %p", | |
525 | &vrh->vring.used->flags); | |
526 | } | |
527 | } | |
528 | } | |
529 | ||
530 | /* Userspace access helpers: in this case, addresses are really userspace. */ | |
531 | static inline int getu16_user(u16 *val, const u16 *p) | |
532 | { | |
533 | return get_user(*val, (__force u16 __user *)p); | |
534 | } | |
535 | ||
536 | static inline int putu16_user(u16 *p, u16 val) | |
537 | { | |
538 | return put_user(val, (__force u16 __user *)p); | |
539 | } | |
540 | ||
541 | static inline int copydesc_user(void *dst, const void *src, size_t len) | |
542 | { | |
543 | return copy_from_user(dst, (__force void __user *)src, len) ? | |
544 | -EFAULT : 0; | |
545 | } | |
546 | ||
547 | static inline int putused_user(struct vring_used_elem *dst, | |
548 | const struct vring_used_elem *src, | |
549 | unsigned int num) | |
550 | { | |
551 | return copy_to_user((__force void __user *)dst, src, | |
552 | sizeof(*dst) * num) ? -EFAULT : 0; | |
553 | } | |
554 | ||
555 | static inline int xfer_from_user(void *src, void *dst, size_t len) | |
556 | { | |
557 | return copy_from_user(dst, (__force void __user *)src, len) ? | |
558 | -EFAULT : 0; | |
559 | } | |
560 | ||
561 | static inline int xfer_to_user(void *dst, void *src, size_t len) | |
562 | { | |
563 | return copy_to_user((__force void __user *)dst, src, len) ? | |
564 | -EFAULT : 0; | |
565 | } | |
566 | ||
567 | /** | |
568 | * vringh_init_user - initialize a vringh for a userspace vring. | |
569 | * @vrh: the vringh to initialize. | |
570 | * @features: the feature bits for this ring. | |
571 | * @num: the number of elements. | |
572 | * @weak_barriers: true if we only need memory barriers, not I/O. | |
573 | * @desc: the userpace descriptor pointer. | |
574 | * @avail: the userpace avail pointer. | |
575 | * @used: the userpace used pointer. | |
576 | * | |
577 | * Returns an error if num is invalid: you should check pointers | |
578 | * yourself! | |
579 | */ | |
580 | int vringh_init_user(struct vringh *vrh, u32 features, | |
581 | unsigned int num, bool weak_barriers, | |
582 | struct vring_desc __user *desc, | |
583 | struct vring_avail __user *avail, | |
584 | struct vring_used __user *used) | |
585 | { | |
586 | /* Sane power of 2 please! */ | |
587 | if (!num || num > 0xffff || (num & (num - 1))) { | |
588 | vringh_bad("Bad ring size %u", num); | |
589 | return -EINVAL; | |
590 | } | |
591 | ||
592 | vrh->event_indices = (features & (1 << VIRTIO_RING_F_EVENT_IDX)); | |
593 | vrh->weak_barriers = weak_barriers; | |
594 | vrh->completed = 0; | |
595 | vrh->last_avail_idx = 0; | |
596 | vrh->last_used_idx = 0; | |
597 | vrh->vring.num = num; | |
598 | /* vring expects kernel addresses, but only used via accessors. */ | |
599 | vrh->vring.desc = (__force struct vring_desc *)desc; | |
600 | vrh->vring.avail = (__force struct vring_avail *)avail; | |
601 | vrh->vring.used = (__force struct vring_used *)used; | |
602 | return 0; | |
603 | } | |
604 | EXPORT_SYMBOL(vringh_init_user); | |
605 | ||
606 | /** | |
607 | * vringh_getdesc_user - get next available descriptor from userspace ring. | |
608 | * @vrh: the userspace vring. | |
609 | * @riov: where to put the readable descriptors (or NULL) | |
610 | * @wiov: where to put the writable descriptors (or NULL) | |
611 | * @getrange: function to call to check ranges. | |
612 | * @head: head index we received, for passing to vringh_complete_user(). | |
613 | * | |
614 | * Returns 0 if there was no descriptor, 1 if there was, or -errno. | |
615 | * | |
616 | * Note that on error return, you can tell the difference between an | |
617 | * invalid ring and a single invalid descriptor: in the former case, | |
618 | * *head will be vrh->vring.num. You may be able to ignore an invalid | |
619 | * descriptor, but there's not much you can do with an invalid ring. | |
620 | * | |
621 | * Note that you may need to clean up riov and wiov, even on error! | |
622 | */ | |
623 | int vringh_getdesc_user(struct vringh *vrh, | |
624 | struct vringh_iov *riov, | |
625 | struct vringh_iov *wiov, | |
626 | bool (*getrange)(struct vringh *vrh, | |
627 | u64 addr, struct vringh_range *r), | |
628 | u16 *head) | |
629 | { | |
630 | int err; | |
631 | ||
632 | *head = vrh->vring.num; | |
633 | err = __vringh_get_head(vrh, getu16_user, &vrh->last_avail_idx); | |
634 | if (err < 0) | |
635 | return err; | |
636 | ||
637 | /* Empty... */ | |
638 | if (err == vrh->vring.num) | |
639 | return 0; | |
640 | ||
641 | /* We need the layouts to be the identical for this to work */ | |
642 | BUILD_BUG_ON(sizeof(struct vringh_kiov) != sizeof(struct vringh_iov)); | |
643 | BUILD_BUG_ON(offsetof(struct vringh_kiov, iov) != | |
644 | offsetof(struct vringh_iov, iov)); | |
645 | BUILD_BUG_ON(offsetof(struct vringh_kiov, i) != | |
646 | offsetof(struct vringh_iov, i)); | |
647 | BUILD_BUG_ON(offsetof(struct vringh_kiov, used) != | |
648 | offsetof(struct vringh_iov, used)); | |
649 | BUILD_BUG_ON(offsetof(struct vringh_kiov, max_num) != | |
650 | offsetof(struct vringh_iov, max_num)); | |
651 | BUILD_BUG_ON(sizeof(struct iovec) != sizeof(struct kvec)); | |
652 | BUILD_BUG_ON(offsetof(struct iovec, iov_base) != | |
653 | offsetof(struct kvec, iov_base)); | |
654 | BUILD_BUG_ON(offsetof(struct iovec, iov_len) != | |
655 | offsetof(struct kvec, iov_len)); | |
656 | BUILD_BUG_ON(sizeof(((struct iovec *)NULL)->iov_base) | |
657 | != sizeof(((struct kvec *)NULL)->iov_base)); | |
658 | BUILD_BUG_ON(sizeof(((struct iovec *)NULL)->iov_len) | |
659 | != sizeof(((struct kvec *)NULL)->iov_len)); | |
660 | ||
661 | *head = err; | |
662 | err = __vringh_iov(vrh, *head, (struct vringh_kiov *)riov, | |
663 | (struct vringh_kiov *)wiov, | |
664 | range_check, getrange, GFP_KERNEL, copydesc_user); | |
665 | if (err) | |
666 | return err; | |
667 | ||
668 | return 1; | |
669 | } | |
670 | EXPORT_SYMBOL(vringh_getdesc_user); | |
671 | ||
672 | /** | |
673 | * vringh_iov_pull_user - copy bytes from vring_iov. | |
674 | * @riov: the riov as passed to vringh_getdesc_user() (updated as we consume) | |
675 | * @dst: the place to copy. | |
676 | * @len: the maximum length to copy. | |
677 | * | |
678 | * Returns the bytes copied <= len or a negative errno. | |
679 | */ | |
680 | ssize_t vringh_iov_pull_user(struct vringh_iov *riov, void *dst, size_t len) | |
681 | { | |
682 | return vringh_iov_xfer((struct vringh_kiov *)riov, | |
683 | dst, len, xfer_from_user); | |
684 | } | |
685 | EXPORT_SYMBOL(vringh_iov_pull_user); | |
686 | ||
687 | /** | |
688 | * vringh_iov_push_user - copy bytes into vring_iov. | |
689 | * @wiov: the wiov as passed to vringh_getdesc_user() (updated as we consume) | |
690 | * @dst: the place to copy. | |
691 | * @len: the maximum length to copy. | |
692 | * | |
693 | * Returns the bytes copied <= len or a negative errno. | |
694 | */ | |
695 | ssize_t vringh_iov_push_user(struct vringh_iov *wiov, | |
696 | const void *src, size_t len) | |
697 | { | |
698 | return vringh_iov_xfer((struct vringh_kiov *)wiov, | |
699 | (void *)src, len, xfer_to_user); | |
700 | } | |
701 | EXPORT_SYMBOL(vringh_iov_push_user); | |
702 | ||
703 | /** | |
704 | * vringh_abandon_user - we've decided not to handle the descriptor(s). | |
705 | * @vrh: the vring. | |
706 | * @num: the number of descriptors to put back (ie. num | |
707 | * vringh_get_user() to undo). | |
708 | * | |
709 | * The next vringh_get_user() will return the old descriptor(s) again. | |
710 | */ | |
711 | void vringh_abandon_user(struct vringh *vrh, unsigned int num) | |
712 | { | |
713 | /* We only update vring_avail_event(vr) when we want to be notified, | |
714 | * so we haven't changed that yet. */ | |
715 | vrh->last_avail_idx -= num; | |
716 | } | |
717 | EXPORT_SYMBOL(vringh_abandon_user); | |
718 | ||
719 | /** | |
720 | * vringh_complete_user - we've finished with descriptor, publish it. | |
721 | * @vrh: the vring. | |
722 | * @head: the head as filled in by vringh_getdesc_user. | |
723 | * @len: the length of data we have written. | |
724 | * | |
725 | * You should check vringh_need_notify_user() after one or more calls | |
726 | * to this function. | |
727 | */ | |
728 | int vringh_complete_user(struct vringh *vrh, u16 head, u32 len) | |
729 | { | |
730 | struct vring_used_elem used; | |
731 | ||
732 | used.id = head; | |
733 | used.len = len; | |
734 | return __vringh_complete(vrh, &used, 1, putu16_user, putused_user); | |
735 | } | |
736 | EXPORT_SYMBOL(vringh_complete_user); | |
737 | ||
738 | /** | |
739 | * vringh_complete_multi_user - we've finished with many descriptors. | |
740 | * @vrh: the vring. | |
741 | * @used: the head, length pairs. | |
742 | * @num_used: the number of used elements. | |
743 | * | |
744 | * You should check vringh_need_notify_user() after one or more calls | |
745 | * to this function. | |
746 | */ | |
747 | int vringh_complete_multi_user(struct vringh *vrh, | |
748 | const struct vring_used_elem used[], | |
749 | unsigned num_used) | |
750 | { | |
751 | return __vringh_complete(vrh, used, num_used, | |
752 | putu16_user, putused_user); | |
753 | } | |
754 | EXPORT_SYMBOL(vringh_complete_multi_user); | |
755 | ||
756 | /** | |
757 | * vringh_notify_enable_user - we want to know if something changes. | |
758 | * @vrh: the vring. | |
759 | * | |
760 | * This always enables notifications, but returns false if there are | |
761 | * now more buffers available in the vring. | |
762 | */ | |
763 | bool vringh_notify_enable_user(struct vringh *vrh) | |
764 | { | |
765 | return __vringh_notify_enable(vrh, getu16_user, putu16_user); | |
766 | } | |
767 | EXPORT_SYMBOL(vringh_notify_enable_user); | |
768 | ||
769 | /** | |
770 | * vringh_notify_disable_user - don't tell us if something changes. | |
771 | * @vrh: the vring. | |
772 | * | |
773 | * This is our normal running state: we disable and then only enable when | |
774 | * we're going to sleep. | |
775 | */ | |
776 | void vringh_notify_disable_user(struct vringh *vrh) | |
777 | { | |
778 | __vringh_notify_disable(vrh, putu16_user); | |
779 | } | |
780 | EXPORT_SYMBOL(vringh_notify_disable_user); | |
781 | ||
782 | /** | |
783 | * vringh_need_notify_user - must we tell the other side about used buffers? | |
784 | * @vrh: the vring we've called vringh_complete_user() on. | |
785 | * | |
786 | * Returns -errno or 0 if we don't need to tell the other side, 1 if we do. | |
787 | */ | |
788 | int vringh_need_notify_user(struct vringh *vrh) | |
789 | { | |
790 | return __vringh_need_notify(vrh, getu16_user); | |
791 | } | |
792 | EXPORT_SYMBOL(vringh_need_notify_user); | |
793 | ||
794 | /* Kernelspace access helpers. */ | |
795 | static inline int getu16_kern(u16 *val, const u16 *p) | |
796 | { | |
797 | *val = ACCESS_ONCE(*p); | |
798 | return 0; | |
799 | } | |
800 | ||
801 | static inline int putu16_kern(u16 *p, u16 val) | |
802 | { | |
803 | ACCESS_ONCE(*p) = val; | |
804 | return 0; | |
805 | } | |
806 | ||
807 | static inline int copydesc_kern(void *dst, const void *src, size_t len) | |
808 | { | |
809 | memcpy(dst, src, len); | |
810 | return 0; | |
811 | } | |
812 | ||
813 | static inline int putused_kern(struct vring_used_elem *dst, | |
814 | const struct vring_used_elem *src, | |
815 | unsigned int num) | |
816 | { | |
817 | memcpy(dst, src, num * sizeof(*dst)); | |
818 | return 0; | |
819 | } | |
820 | ||
821 | static inline int xfer_kern(void *src, void *dst, size_t len) | |
822 | { | |
823 | memcpy(dst, src, len); | |
824 | return 0; | |
825 | } | |
826 | ||
827 | /** | |
828 | * vringh_init_kern - initialize a vringh for a kernelspace vring. | |
829 | * @vrh: the vringh to initialize. | |
830 | * @features: the feature bits for this ring. | |
831 | * @num: the number of elements. | |
832 | * @weak_barriers: true if we only need memory barriers, not I/O. | |
833 | * @desc: the userpace descriptor pointer. | |
834 | * @avail: the userpace avail pointer. | |
835 | * @used: the userpace used pointer. | |
836 | * | |
837 | * Returns an error if num is invalid. | |
838 | */ | |
839 | int vringh_init_kern(struct vringh *vrh, u32 features, | |
840 | unsigned int num, bool weak_barriers, | |
841 | struct vring_desc *desc, | |
842 | struct vring_avail *avail, | |
843 | struct vring_used *used) | |
844 | { | |
845 | /* Sane power of 2 please! */ | |
846 | if (!num || num > 0xffff || (num & (num - 1))) { | |
847 | vringh_bad("Bad ring size %u", num); | |
848 | return -EINVAL; | |
849 | } | |
850 | ||
851 | vrh->event_indices = (features & (1 << VIRTIO_RING_F_EVENT_IDX)); | |
852 | vrh->weak_barriers = weak_barriers; | |
853 | vrh->completed = 0; | |
854 | vrh->last_avail_idx = 0; | |
855 | vrh->last_used_idx = 0; | |
856 | vrh->vring.num = num; | |
857 | vrh->vring.desc = desc; | |
858 | vrh->vring.avail = avail; | |
859 | vrh->vring.used = used; | |
860 | return 0; | |
861 | } | |
862 | EXPORT_SYMBOL(vringh_init_kern); | |
863 | ||
864 | /** | |
865 | * vringh_getdesc_kern - get next available descriptor from kernelspace ring. | |
866 | * @vrh: the kernelspace vring. | |
867 | * @riov: where to put the readable descriptors (or NULL) | |
868 | * @wiov: where to put the writable descriptors (or NULL) | |
869 | * @head: head index we received, for passing to vringh_complete_kern(). | |
870 | * @gfp: flags for allocating larger riov/wiov. | |
871 | * | |
872 | * Returns 0 if there was no descriptor, 1 if there was, or -errno. | |
873 | * | |
874 | * Note that on error return, you can tell the difference between an | |
875 | * invalid ring and a single invalid descriptor: in the former case, | |
876 | * *head will be vrh->vring.num. You may be able to ignore an invalid | |
877 | * descriptor, but there's not much you can do with an invalid ring. | |
878 | * | |
879 | * Note that you may need to clean up riov and wiov, even on error! | |
880 | */ | |
881 | int vringh_getdesc_kern(struct vringh *vrh, | |
882 | struct vringh_kiov *riov, | |
883 | struct vringh_kiov *wiov, | |
884 | u16 *head, | |
885 | gfp_t gfp) | |
886 | { | |
887 | int err; | |
888 | ||
889 | err = __vringh_get_head(vrh, getu16_kern, &vrh->last_avail_idx); | |
890 | if (err < 0) | |
891 | return err; | |
892 | ||
893 | /* Empty... */ | |
894 | if (err == vrh->vring.num) | |
895 | return 0; | |
896 | ||
897 | *head = err; | |
898 | err = __vringh_iov(vrh, *head, riov, wiov, no_range_check, NULL, | |
899 | gfp, copydesc_kern); | |
900 | if (err) | |
901 | return err; | |
902 | ||
903 | return 1; | |
904 | } | |
905 | EXPORT_SYMBOL(vringh_getdesc_kern); | |
906 | ||
907 | /** | |
908 | * vringh_iov_pull_kern - copy bytes from vring_iov. | |
909 | * @riov: the riov as passed to vringh_getdesc_kern() (updated as we consume) | |
910 | * @dst: the place to copy. | |
911 | * @len: the maximum length to copy. | |
912 | * | |
913 | * Returns the bytes copied <= len or a negative errno. | |
914 | */ | |
915 | ssize_t vringh_iov_pull_kern(struct vringh_kiov *riov, void *dst, size_t len) | |
916 | { | |
917 | return vringh_iov_xfer(riov, dst, len, xfer_kern); | |
918 | } | |
919 | EXPORT_SYMBOL(vringh_iov_pull_kern); | |
920 | ||
921 | /** | |
922 | * vringh_iov_push_kern - copy bytes into vring_iov. | |
923 | * @wiov: the wiov as passed to vringh_getdesc_kern() (updated as we consume) | |
924 | * @dst: the place to copy. | |
925 | * @len: the maximum length to copy. | |
926 | * | |
927 | * Returns the bytes copied <= len or a negative errno. | |
928 | */ | |
929 | ssize_t vringh_iov_push_kern(struct vringh_kiov *wiov, | |
930 | const void *src, size_t len) | |
931 | { | |
932 | return vringh_iov_xfer(wiov, (void *)src, len, xfer_kern); | |
933 | } | |
934 | EXPORT_SYMBOL(vringh_iov_push_kern); | |
935 | ||
936 | /** | |
937 | * vringh_abandon_kern - we've decided not to handle the descriptor(s). | |
938 | * @vrh: the vring. | |
939 | * @num: the number of descriptors to put back (ie. num | |
940 | * vringh_get_kern() to undo). | |
941 | * | |
942 | * The next vringh_get_kern() will return the old descriptor(s) again. | |
943 | */ | |
944 | void vringh_abandon_kern(struct vringh *vrh, unsigned int num) | |
945 | { | |
946 | /* We only update vring_avail_event(vr) when we want to be notified, | |
947 | * so we haven't changed that yet. */ | |
948 | vrh->last_avail_idx -= num; | |
949 | } | |
950 | EXPORT_SYMBOL(vringh_abandon_kern); | |
951 | ||
952 | /** | |
953 | * vringh_complete_kern - we've finished with descriptor, publish it. | |
954 | * @vrh: the vring. | |
955 | * @head: the head as filled in by vringh_getdesc_kern. | |
956 | * @len: the length of data we have written. | |
957 | * | |
958 | * You should check vringh_need_notify_kern() after one or more calls | |
959 | * to this function. | |
960 | */ | |
961 | int vringh_complete_kern(struct vringh *vrh, u16 head, u32 len) | |
962 | { | |
963 | struct vring_used_elem used; | |
964 | ||
965 | used.id = head; | |
966 | used.len = len; | |
967 | ||
968 | return __vringh_complete(vrh, &used, 1, putu16_kern, putused_kern); | |
969 | } | |
970 | EXPORT_SYMBOL(vringh_complete_kern); | |
971 | ||
972 | /** | |
973 | * vringh_notify_enable_kern - we want to know if something changes. | |
974 | * @vrh: the vring. | |
975 | * | |
976 | * This always enables notifications, but returns false if there are | |
977 | * now more buffers available in the vring. | |
978 | */ | |
979 | bool vringh_notify_enable_kern(struct vringh *vrh) | |
980 | { | |
981 | return __vringh_notify_enable(vrh, getu16_kern, putu16_kern); | |
982 | } | |
983 | EXPORT_SYMBOL(vringh_notify_enable_kern); | |
984 | ||
985 | /** | |
986 | * vringh_notify_disable_kern - don't tell us if something changes. | |
987 | * @vrh: the vring. | |
988 | * | |
989 | * This is our normal running state: we disable and then only enable when | |
990 | * we're going to sleep. | |
991 | */ | |
992 | void vringh_notify_disable_kern(struct vringh *vrh) | |
993 | { | |
994 | __vringh_notify_disable(vrh, putu16_kern); | |
995 | } | |
996 | EXPORT_SYMBOL(vringh_notify_disable_kern); | |
997 | ||
998 | /** | |
999 | * vringh_need_notify_kern - must we tell the other side about used buffers? | |
1000 | * @vrh: the vring we've called vringh_complete_kern() on. | |
1001 | * | |
1002 | * Returns -errno or 0 if we don't need to tell the other side, 1 if we do. | |
1003 | */ | |
1004 | int vringh_need_notify_kern(struct vringh *vrh) | |
1005 | { | |
1006 | return __vringh_need_notify(vrh, getu16_kern); | |
1007 | } | |
1008 | EXPORT_SYMBOL(vringh_need_notify_kern); | |
f558a845 DJ |
1009 | |
1010 | MODULE_LICENSE("GPL"); |