Commit | Line | Data |
---|---|---|
dd314058 DDG |
1 | /****************************************************************************** |
2 | * gntalloc.c | |
3 | * | |
4 | * Device for creating grant references (in user-space) that may be shared | |
5 | * with other domains. | |
6 | * | |
7 | * This program is distributed in the hope that it will be useful, | |
8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
10 | * GNU General Public License for more details. | |
11 | * | |
12 | * You should have received a copy of the GNU General Public License | |
13 | * along with this program; if not, write to the Free Software | |
14 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
15 | */ | |
16 | ||
17 | /* | |
18 | * This driver exists to allow userspace programs in Linux to allocate kernel | |
19 | * memory that will later be shared with another domain. Without this device, | |
20 | * Linux userspace programs cannot create grant references. | |
21 | * | |
22 | * How this stuff works: | |
23 | * X -> granting a page to Y | |
24 | * Y -> mapping the grant from X | |
25 | * | |
26 | * 1. X uses the gntalloc device to allocate a page of kernel memory, P. | |
27 | * 2. X creates an entry in the grant table that says domid(Y) can access P. | |
28 | * This is done without a hypercall unless the grant table needs expansion. | |
29 | * 3. X gives the grant reference identifier, GREF, to Y. | |
30 | * 4. Y maps the page, either directly into kernel memory for use in a backend | |
31 | * driver, or via a the gntdev device to map into the address space of an | |
32 | * application running in Y. This is the first point at which Xen does any | |
33 | * tracking of the page. | |
34 | * 5. A program in X mmap()s a segment of the gntalloc device that corresponds | |
35 | * to the shared page, and can now communicate with Y over the shared page. | |
36 | * | |
37 | * | |
38 | * NOTE TO USERSPACE LIBRARIES: | |
39 | * The grant allocation and mmap()ing are, naturally, two separate operations. | |
40 | * You set up the sharing by calling the create ioctl() and then the mmap(). | |
41 | * Teardown requires munmap() and either close() or ioctl(). | |
42 | * | |
43 | * WARNING: Since Xen does not allow a guest to forcibly end the use of a grant | |
44 | * reference, this device can be used to consume kernel memory by leaving grant | |
45 | * references mapped by another domain when an application exits. Therefore, | |
46 | * there is a global limit on the number of pages that can be allocated. When | |
47 | * all references to the page are unmapped, it will be freed during the next | |
48 | * grant operation. | |
49 | */ | |
50 | ||
51 | #include <linux/atomic.h> | |
52 | #include <linux/module.h> | |
53 | #include <linux/miscdevice.h> | |
54 | #include <linux/kernel.h> | |
55 | #include <linux/init.h> | |
56 | #include <linux/slab.h> | |
57 | #include <linux/fs.h> | |
58 | #include <linux/device.h> | |
59 | #include <linux/mm.h> | |
60 | #include <linux/uaccess.h> | |
61 | #include <linux/types.h> | |
62 | #include <linux/list.h> | |
63 | ||
64 | #include <xen/xen.h> | |
65 | #include <xen/page.h> | |
66 | #include <xen/grant_table.h> | |
67 | #include <xen/gntalloc.h> | |
68 | ||
69 | static int limit = 1024; | |
70 | module_param(limit, int, 0644); | |
71 | MODULE_PARM_DESC(limit, "Maximum number of grants that may be allocated by " | |
72 | "the gntalloc device"); | |
73 | ||
74 | static LIST_HEAD(gref_list); | |
75 | static DEFINE_SPINLOCK(gref_lock); | |
76 | static int gref_size; | |
77 | ||
78 | /* Metadata on a grant reference. */ | |
79 | struct gntalloc_gref { | |
80 | struct list_head next_gref; /* list entry gref_list */ | |
81 | struct list_head next_file; /* list entry file->list, if open */ | |
82 | struct page *page; /* The shared page */ | |
83 | uint64_t file_index; /* File offset for mmap() */ | |
84 | unsigned int users; /* Use count - when zero, waiting on Xen */ | |
85 | grant_ref_t gref_id; /* The grant reference number */ | |
86 | }; | |
87 | ||
88 | struct gntalloc_file_private_data { | |
89 | struct list_head list; | |
90 | uint64_t index; | |
91 | }; | |
92 | ||
93 | static void __del_gref(struct gntalloc_gref *gref); | |
94 | ||
95 | static void do_cleanup(void) | |
96 | { | |
97 | struct gntalloc_gref *gref, *n; | |
98 | list_for_each_entry_safe(gref, n, &gref_list, next_gref) { | |
99 | if (!gref->users) | |
100 | __del_gref(gref); | |
101 | } | |
102 | } | |
103 | ||
104 | static int add_grefs(struct ioctl_gntalloc_alloc_gref *op, | |
105 | uint32_t *gref_ids, struct gntalloc_file_private_data *priv) | |
106 | { | |
107 | int i, rc, readonly; | |
108 | LIST_HEAD(queue_gref); | |
109 | LIST_HEAD(queue_file); | |
110 | struct gntalloc_gref *gref; | |
111 | ||
112 | readonly = !(op->flags & GNTALLOC_FLAG_WRITABLE); | |
113 | rc = -ENOMEM; | |
114 | for (i = 0; i < op->count; i++) { | |
115 | gref = kzalloc(sizeof(*gref), GFP_KERNEL); | |
116 | if (!gref) | |
117 | goto undo; | |
118 | list_add_tail(&gref->next_gref, &queue_gref); | |
119 | list_add_tail(&gref->next_file, &queue_file); | |
120 | gref->users = 1; | |
121 | gref->file_index = op->index + i * PAGE_SIZE; | |
122 | gref->page = alloc_page(GFP_KERNEL|__GFP_ZERO); | |
123 | if (!gref->page) | |
124 | goto undo; | |
125 | ||
126 | /* Grant foreign access to the page. */ | |
127 | gref->gref_id = gnttab_grant_foreign_access(op->domid, | |
128 | pfn_to_mfn(page_to_pfn(gref->page)), readonly); | |
129 | if (gref->gref_id < 0) { | |
130 | rc = gref->gref_id; | |
131 | goto undo; | |
132 | } | |
133 | gref_ids[i] = gref->gref_id; | |
134 | } | |
135 | ||
136 | /* Add to gref lists. */ | |
137 | spin_lock(&gref_lock); | |
138 | list_splice_tail(&queue_gref, &gref_list); | |
139 | list_splice_tail(&queue_file, &priv->list); | |
140 | spin_unlock(&gref_lock); | |
141 | ||
142 | return 0; | |
143 | ||
144 | undo: | |
145 | spin_lock(&gref_lock); | |
146 | gref_size -= (op->count - i); | |
147 | ||
148 | list_for_each_entry(gref, &queue_file, next_file) { | |
149 | /* __del_gref does not remove from queue_file */ | |
150 | __del_gref(gref); | |
151 | } | |
152 | ||
153 | /* It's possible for the target domain to map the just-allocated grant | |
154 | * references by blindly guessing their IDs; if this is done, then | |
155 | * __del_gref will leave them in the queue_gref list. They need to be | |
156 | * added to the global list so that we can free them when they are no | |
157 | * longer referenced. | |
158 | */ | |
159 | if (unlikely(!list_empty(&queue_gref))) | |
160 | list_splice_tail(&queue_gref, &gref_list); | |
161 | spin_unlock(&gref_lock); | |
162 | return rc; | |
163 | } | |
164 | ||
165 | static void __del_gref(struct gntalloc_gref *gref) | |
166 | { | |
167 | if (gref->gref_id > 0) { | |
168 | if (gnttab_query_foreign_access(gref->gref_id)) | |
169 | return; | |
170 | ||
171 | if (!gnttab_end_foreign_access_ref(gref->gref_id, 0)) | |
172 | return; | |
173 | } | |
174 | ||
175 | gref_size--; | |
176 | list_del(&gref->next_gref); | |
177 | ||
178 | if (gref->page) | |
179 | __free_page(gref->page); | |
180 | ||
181 | kfree(gref); | |
182 | } | |
183 | ||
184 | /* finds contiguous grant references in a file, returns the first */ | |
185 | static struct gntalloc_gref *find_grefs(struct gntalloc_file_private_data *priv, | |
186 | uint64_t index, uint32_t count) | |
187 | { | |
188 | struct gntalloc_gref *rv = NULL, *gref; | |
189 | list_for_each_entry(gref, &priv->list, next_file) { | |
190 | if (gref->file_index == index && !rv) | |
191 | rv = gref; | |
192 | if (rv) { | |
193 | if (gref->file_index != index) | |
194 | return NULL; | |
195 | index += PAGE_SIZE; | |
196 | count--; | |
197 | if (count == 0) | |
198 | return rv; | |
199 | } | |
200 | } | |
201 | return NULL; | |
202 | } | |
203 | ||
204 | /* | |
205 | * ------------------------------------- | |
206 | * File operations. | |
207 | * ------------------------------------- | |
208 | */ | |
209 | static int gntalloc_open(struct inode *inode, struct file *filp) | |
210 | { | |
211 | struct gntalloc_file_private_data *priv; | |
212 | ||
213 | priv = kzalloc(sizeof(*priv), GFP_KERNEL); | |
214 | if (!priv) | |
215 | goto out_nomem; | |
216 | INIT_LIST_HEAD(&priv->list); | |
217 | ||
218 | filp->private_data = priv; | |
219 | ||
220 | pr_debug("%s: priv %p\n", __func__, priv); | |
221 | ||
222 | return 0; | |
223 | ||
224 | out_nomem: | |
225 | return -ENOMEM; | |
226 | } | |
227 | ||
228 | static int gntalloc_release(struct inode *inode, struct file *filp) | |
229 | { | |
230 | struct gntalloc_file_private_data *priv = filp->private_data; | |
231 | struct gntalloc_gref *gref; | |
232 | ||
233 | pr_debug("%s: priv %p\n", __func__, priv); | |
234 | ||
235 | spin_lock(&gref_lock); | |
236 | while (!list_empty(&priv->list)) { | |
237 | gref = list_entry(priv->list.next, | |
238 | struct gntalloc_gref, next_file); | |
239 | list_del(&gref->next_file); | |
240 | gref->users--; | |
241 | if (gref->users == 0) | |
242 | __del_gref(gref); | |
243 | } | |
244 | kfree(priv); | |
245 | spin_unlock(&gref_lock); | |
246 | ||
247 | return 0; | |
248 | } | |
249 | ||
250 | static long gntalloc_ioctl_alloc(struct gntalloc_file_private_data *priv, | |
251 | struct ioctl_gntalloc_alloc_gref __user *arg) | |
252 | { | |
253 | int rc = 0; | |
254 | struct ioctl_gntalloc_alloc_gref op; | |
255 | uint32_t *gref_ids; | |
256 | ||
257 | pr_debug("%s: priv %p\n", __func__, priv); | |
258 | ||
259 | if (copy_from_user(&op, arg, sizeof(op))) { | |
260 | rc = -EFAULT; | |
261 | goto out; | |
262 | } | |
263 | ||
264 | gref_ids = kzalloc(sizeof(gref_ids[0]) * op.count, GFP_TEMPORARY); | |
265 | if (!gref_ids) { | |
266 | rc = -ENOMEM; | |
267 | goto out; | |
268 | } | |
269 | ||
270 | spin_lock(&gref_lock); | |
271 | /* Clean up pages that were at zero (local) users but were still mapped | |
272 | * by remote domains. Since those pages count towards the limit that we | |
273 | * are about to enforce, removing them here is a good idea. | |
274 | */ | |
275 | do_cleanup(); | |
276 | if (gref_size + op.count > limit) { | |
277 | spin_unlock(&gref_lock); | |
278 | rc = -ENOSPC; | |
279 | goto out_free; | |
280 | } | |
281 | gref_size += op.count; | |
282 | op.index = priv->index; | |
283 | priv->index += op.count * PAGE_SIZE; | |
284 | spin_unlock(&gref_lock); | |
285 | ||
286 | rc = add_grefs(&op, gref_ids, priv); | |
287 | if (rc < 0) | |
288 | goto out_free; | |
289 | ||
290 | /* Once we finish add_grefs, it is unsafe to touch the new reference, | |
291 | * since it is possible for a concurrent ioctl to remove it (by guessing | |
292 | * its index). If the userspace application doesn't provide valid memory | |
293 | * to write the IDs to, then it will need to close the file in order to | |
294 | * release - which it will do by segfaulting when it tries to access the | |
295 | * IDs to close them. | |
296 | */ | |
297 | if (copy_to_user(arg, &op, sizeof(op))) { | |
298 | rc = -EFAULT; | |
299 | goto out_free; | |
300 | } | |
301 | if (copy_to_user(arg->gref_ids, gref_ids, | |
302 | sizeof(gref_ids[0]) * op.count)) { | |
303 | rc = -EFAULT; | |
304 | goto out_free; | |
305 | } | |
306 | ||
307 | out_free: | |
308 | kfree(gref_ids); | |
309 | out: | |
310 | return rc; | |
311 | } | |
312 | ||
313 | static long gntalloc_ioctl_dealloc(struct gntalloc_file_private_data *priv, | |
314 | void __user *arg) | |
315 | { | |
316 | int i, rc = 0; | |
317 | struct ioctl_gntalloc_dealloc_gref op; | |
318 | struct gntalloc_gref *gref, *n; | |
319 | ||
320 | pr_debug("%s: priv %p\n", __func__, priv); | |
321 | ||
322 | if (copy_from_user(&op, arg, sizeof(op))) { | |
323 | rc = -EFAULT; | |
324 | goto dealloc_grant_out; | |
325 | } | |
326 | ||
327 | spin_lock(&gref_lock); | |
328 | gref = find_grefs(priv, op.index, op.count); | |
329 | if (gref) { | |
330 | /* Remove from the file list only, and decrease reference count. | |
331 | * The later call to do_cleanup() will remove from gref_list and | |
332 | * free the memory if the pages aren't mapped anywhere. | |
333 | */ | |
334 | for (i = 0; i < op.count; i++) { | |
335 | n = list_entry(gref->next_file.next, | |
336 | struct gntalloc_gref, next_file); | |
337 | list_del(&gref->next_file); | |
338 | gref->users--; | |
339 | gref = n; | |
340 | } | |
341 | } else { | |
342 | rc = -EINVAL; | |
343 | } | |
344 | ||
345 | do_cleanup(); | |
346 | ||
347 | spin_unlock(&gref_lock); | |
348 | dealloc_grant_out: | |
349 | return rc; | |
350 | } | |
351 | ||
352 | static long gntalloc_ioctl(struct file *filp, unsigned int cmd, | |
353 | unsigned long arg) | |
354 | { | |
355 | struct gntalloc_file_private_data *priv = filp->private_data; | |
356 | ||
357 | switch (cmd) { | |
358 | case IOCTL_GNTALLOC_ALLOC_GREF: | |
359 | return gntalloc_ioctl_alloc(priv, (void __user *)arg); | |
360 | ||
361 | case IOCTL_GNTALLOC_DEALLOC_GREF: | |
362 | return gntalloc_ioctl_dealloc(priv, (void __user *)arg); | |
363 | ||
364 | default: | |
365 | return -ENOIOCTLCMD; | |
366 | } | |
367 | ||
368 | return 0; | |
369 | } | |
370 | ||
371 | static void gntalloc_vma_close(struct vm_area_struct *vma) | |
372 | { | |
373 | struct gntalloc_gref *gref = vma->vm_private_data; | |
374 | if (!gref) | |
375 | return; | |
376 | ||
377 | spin_lock(&gref_lock); | |
378 | gref->users--; | |
379 | if (gref->users == 0) | |
380 | __del_gref(gref); | |
381 | spin_unlock(&gref_lock); | |
382 | } | |
383 | ||
384 | static struct vm_operations_struct gntalloc_vmops = { | |
385 | .close = gntalloc_vma_close, | |
386 | }; | |
387 | ||
388 | static int gntalloc_mmap(struct file *filp, struct vm_area_struct *vma) | |
389 | { | |
390 | struct gntalloc_file_private_data *priv = filp->private_data; | |
391 | struct gntalloc_gref *gref; | |
392 | int count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; | |
393 | int rv, i; | |
394 | ||
395 | pr_debug("%s: priv %p, page %lu+%d\n", __func__, | |
396 | priv, vma->vm_pgoff, count); | |
397 | ||
398 | if (!(vma->vm_flags & VM_SHARED)) { | |
399 | printk(KERN_ERR "%s: Mapping must be shared.\n", __func__); | |
400 | return -EINVAL; | |
401 | } | |
402 | ||
403 | spin_lock(&gref_lock); | |
404 | gref = find_grefs(priv, vma->vm_pgoff << PAGE_SHIFT, count); | |
405 | if (gref == NULL) { | |
406 | rv = -ENOENT; | |
407 | pr_debug("%s: Could not find grant reference", | |
408 | __func__); | |
409 | goto out_unlock; | |
410 | } | |
411 | ||
412 | vma->vm_private_data = gref; | |
413 | ||
414 | vma->vm_flags |= VM_RESERVED; | |
415 | vma->vm_flags |= VM_DONTCOPY; | |
416 | vma->vm_flags |= VM_PFNMAP | VM_PFN_AT_MMAP; | |
417 | ||
418 | vma->vm_ops = &gntalloc_vmops; | |
419 | ||
420 | for (i = 0; i < count; i++) { | |
421 | gref->users++; | |
422 | rv = vm_insert_page(vma, vma->vm_start + i * PAGE_SIZE, | |
423 | gref->page); | |
424 | if (rv) | |
425 | goto out_unlock; | |
426 | ||
427 | gref = list_entry(gref->next_file.next, | |
428 | struct gntalloc_gref, next_file); | |
429 | } | |
430 | rv = 0; | |
431 | ||
432 | out_unlock: | |
433 | spin_unlock(&gref_lock); | |
434 | return rv; | |
435 | } | |
436 | ||
437 | static const struct file_operations gntalloc_fops = { | |
438 | .owner = THIS_MODULE, | |
439 | .open = gntalloc_open, | |
440 | .release = gntalloc_release, | |
441 | .unlocked_ioctl = gntalloc_ioctl, | |
442 | .mmap = gntalloc_mmap | |
443 | }; | |
444 | ||
445 | /* | |
446 | * ------------------------------------- | |
447 | * Module creation/destruction. | |
448 | * ------------------------------------- | |
449 | */ | |
450 | static struct miscdevice gntalloc_miscdev = { | |
451 | .minor = MISC_DYNAMIC_MINOR, | |
452 | .name = "xen/gntalloc", | |
453 | .fops = &gntalloc_fops, | |
454 | }; | |
455 | ||
456 | static int __init gntalloc_init(void) | |
457 | { | |
458 | int err; | |
459 | ||
460 | if (!xen_domain()) | |
461 | return -ENODEV; | |
462 | ||
463 | err = misc_register(&gntalloc_miscdev); | |
464 | if (err != 0) { | |
465 | printk(KERN_ERR "Could not register misc gntalloc device\n"); | |
466 | return err; | |
467 | } | |
468 | ||
469 | pr_debug("Created grant allocation device at %d,%d\n", | |
470 | MISC_MAJOR, gntalloc_miscdev.minor); | |
471 | ||
472 | return 0; | |
473 | } | |
474 | ||
475 | static void __exit gntalloc_exit(void) | |
476 | { | |
477 | misc_deregister(&gntalloc_miscdev); | |
478 | } | |
479 | ||
480 | module_init(gntalloc_init); | |
481 | module_exit(gntalloc_exit); | |
482 | ||
483 | MODULE_LICENSE("GPL"); | |
484 | MODULE_AUTHOR("Carter Weatherly <carter.weatherly@jhuapl.edu>, " | |
485 | "Daniel De Graaf <dgdegra@tycho.nsa.gov>"); | |
486 | MODULE_DESCRIPTION("User-space grant reference allocator driver"); |