Commit | Line | Data |
---|---|---|
1c5de193 JF |
1 | /****************************************************************************** |
2 | * privcmd.c | |
3 | * | |
4 | * Interface to privileged domain-0 commands. | |
5 | * | |
6 | * Copyright (c) 2002-2004, K A Fraser, B Dragovic | |
7 | */ | |
8 | ||
283c0972 JP |
9 | #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt |
10 | ||
1c5de193 | 11 | #include <linux/kernel.h> |
d8414d3c | 12 | #include <linux/module.h> |
1c5de193 JF |
13 | #include <linux/sched.h> |
14 | #include <linux/slab.h> | |
15 | #include <linux/string.h> | |
16 | #include <linux/errno.h> | |
17 | #include <linux/mm.h> | |
18 | #include <linux/mman.h> | |
19 | #include <linux/uaccess.h> | |
20 | #include <linux/swap.h> | |
1c5de193 JF |
21 | #include <linux/highmem.h> |
22 | #include <linux/pagemap.h> | |
23 | #include <linux/seq_file.h> | |
d8414d3c | 24 | #include <linux/miscdevice.h> |
1c5de193 JF |
25 | |
26 | #include <asm/pgalloc.h> | |
27 | #include <asm/pgtable.h> | |
28 | #include <asm/tlb.h> | |
29 | #include <asm/xen/hypervisor.h> | |
30 | #include <asm/xen/hypercall.h> | |
31 | ||
32 | #include <xen/xen.h> | |
33 | #include <xen/privcmd.h> | |
34 | #include <xen/interface/xen.h> | |
35 | #include <xen/features.h> | |
36 | #include <xen/page.h> | |
de1ef206 | 37 | #include <xen/xen-ops.h> |
d71f5139 | 38 | #include <xen/balloon.h> |
f020e290 | 39 | |
d8414d3c BB |
40 | #include "privcmd.h" |
41 | ||
42 | MODULE_LICENSE("GPL"); | |
43 | ||
d71f5139 MR |
44 | #define PRIV_VMA_LOCKED ((void *)1) |
45 | ||
a5deabe0 ALC |
46 | static int privcmd_vma_range_is_mapped( |
47 | struct vm_area_struct *vma, | |
48 | unsigned long addr, | |
49 | unsigned long nr_pages); | |
1c5de193 | 50 | |
1c5de193 JF |
51 | static long privcmd_ioctl_hypercall(void __user *udata) |
52 | { | |
53 | struct privcmd_hypercall hypercall; | |
54 | long ret; | |
55 | ||
56 | if (copy_from_user(&hypercall, udata, sizeof(hypercall))) | |
57 | return -EFAULT; | |
58 | ||
fdfd811d | 59 | xen_preemptible_hcall_begin(); |
1c5de193 JF |
60 | ret = privcmd_call(hypercall.op, |
61 | hypercall.arg[0], hypercall.arg[1], | |
62 | hypercall.arg[2], hypercall.arg[3], | |
63 | hypercall.arg[4]); | |
fdfd811d | 64 | xen_preemptible_hcall_end(); |
1c5de193 JF |
65 | |
66 | return ret; | |
67 | } | |
68 | ||
69 | static void free_page_list(struct list_head *pages) | |
70 | { | |
71 | struct page *p, *n; | |
72 | ||
73 | list_for_each_entry_safe(p, n, pages, lru) | |
74 | __free_page(p); | |
75 | ||
76 | INIT_LIST_HEAD(pages); | |
77 | } | |
78 | ||
79 | /* | |
80 | * Given an array of items in userspace, return a list of pages | |
81 | * containing the data. If copying fails, either because of memory | |
82 | * allocation failure or a problem reading user memory, return an | |
83 | * error code; its up to the caller to dispose of any partial list. | |
84 | */ | |
85 | static int gather_array(struct list_head *pagelist, | |
86 | unsigned nelem, size_t size, | |
ceb90fa0 | 87 | const void __user *data) |
1c5de193 JF |
88 | { |
89 | unsigned pageidx; | |
90 | void *pagedata; | |
91 | int ret; | |
92 | ||
93 | if (size > PAGE_SIZE) | |
94 | return 0; | |
95 | ||
96 | pageidx = PAGE_SIZE; | |
97 | pagedata = NULL; /* quiet, gcc */ | |
98 | while (nelem--) { | |
99 | if (pageidx > PAGE_SIZE-size) { | |
100 | struct page *page = alloc_page(GFP_KERNEL); | |
101 | ||
102 | ret = -ENOMEM; | |
103 | if (page == NULL) | |
104 | goto fail; | |
105 | ||
106 | pagedata = page_address(page); | |
107 | ||
108 | list_add_tail(&page->lru, pagelist); | |
109 | pageidx = 0; | |
110 | } | |
111 | ||
112 | ret = -EFAULT; | |
113 | if (copy_from_user(pagedata + pageidx, data, size)) | |
114 | goto fail; | |
115 | ||
116 | data += size; | |
117 | pageidx += size; | |
118 | } | |
119 | ||
120 | ret = 0; | |
121 | ||
122 | fail: | |
123 | return ret; | |
124 | } | |
125 | ||
126 | /* | |
127 | * Call function "fn" on each element of the array fragmented | |
128 | * over a list of pages. | |
129 | */ | |
130 | static int traverse_pages(unsigned nelem, size_t size, | |
131 | struct list_head *pos, | |
132 | int (*fn)(void *data, void *state), | |
133 | void *state) | |
134 | { | |
135 | void *pagedata; | |
136 | unsigned pageidx; | |
f020e290 | 137 | int ret = 0; |
1c5de193 JF |
138 | |
139 | BUG_ON(size > PAGE_SIZE); | |
140 | ||
141 | pageidx = PAGE_SIZE; | |
142 | pagedata = NULL; /* hush, gcc */ | |
143 | ||
144 | while (nelem--) { | |
145 | if (pageidx > PAGE_SIZE-size) { | |
146 | struct page *page; | |
147 | pos = pos->next; | |
148 | page = list_entry(pos, struct page, lru); | |
149 | pagedata = page_address(page); | |
150 | pageidx = 0; | |
151 | } | |
152 | ||
153 | ret = (*fn)(pagedata + pageidx, state); | |
154 | if (ret) | |
155 | break; | |
156 | pageidx += size; | |
157 | } | |
158 | ||
159 | return ret; | |
160 | } | |
161 | ||
4e8c0c8c DV |
162 | /* |
163 | * Similar to traverse_pages, but use each page as a "block" of | |
164 | * data to be processed as one unit. | |
165 | */ | |
166 | static int traverse_pages_block(unsigned nelem, size_t size, | |
167 | struct list_head *pos, | |
168 | int (*fn)(void *data, int nr, void *state), | |
169 | void *state) | |
170 | { | |
171 | void *pagedata; | |
172 | unsigned pageidx; | |
173 | int ret = 0; | |
174 | ||
175 | BUG_ON(size > PAGE_SIZE); | |
176 | ||
177 | pageidx = PAGE_SIZE; | |
178 | ||
179 | while (nelem) { | |
180 | int nr = (PAGE_SIZE/size); | |
181 | struct page *page; | |
182 | if (nr > nelem) | |
183 | nr = nelem; | |
184 | pos = pos->next; | |
185 | page = list_entry(pos, struct page, lru); | |
186 | pagedata = page_address(page); | |
187 | ret = (*fn)(pagedata, nr, state); | |
188 | if (ret) | |
189 | break; | |
190 | nelem -= nr; | |
191 | } | |
192 | ||
193 | return ret; | |
194 | } | |
195 | ||
1c5de193 JF |
196 | struct mmap_mfn_state { |
197 | unsigned long va; | |
198 | struct vm_area_struct *vma; | |
199 | domid_t domain; | |
200 | }; | |
201 | ||
202 | static int mmap_mfn_range(void *data, void *state) | |
203 | { | |
204 | struct privcmd_mmap_entry *msg = data; | |
205 | struct mmap_mfn_state *st = state; | |
206 | struct vm_area_struct *vma = st->vma; | |
207 | int rc; | |
208 | ||
209 | /* Do not allow range to wrap the address space. */ | |
210 | if ((msg->npages > (LONG_MAX >> PAGE_SHIFT)) || | |
211 | ((unsigned long)(msg->npages << PAGE_SHIFT) >= -st->va)) | |
212 | return -EINVAL; | |
213 | ||
214 | /* Range chunks must be contiguous in va space. */ | |
215 | if ((msg->va != st->va) || | |
216 | ((msg->va+(msg->npages<<PAGE_SHIFT)) > vma->vm_end)) | |
217 | return -EINVAL; | |
218 | ||
de1ef206 IC |
219 | rc = xen_remap_domain_mfn_range(vma, |
220 | msg->va & PAGE_MASK, | |
221 | msg->mfn, msg->npages, | |
222 | vma->vm_page_prot, | |
9a032e39 | 223 | st->domain, NULL); |
1c5de193 JF |
224 | if (rc < 0) |
225 | return rc; | |
226 | ||
227 | st->va += msg->npages << PAGE_SHIFT; | |
228 | ||
229 | return 0; | |
230 | } | |
231 | ||
232 | static long privcmd_ioctl_mmap(void __user *udata) | |
233 | { | |
234 | struct privcmd_mmap mmapcmd; | |
235 | struct mm_struct *mm = current->mm; | |
236 | struct vm_area_struct *vma; | |
237 | int rc; | |
238 | LIST_HEAD(pagelist); | |
239 | struct mmap_mfn_state state; | |
240 | ||
d71f5139 MR |
241 | /* We only support privcmd_ioctl_mmap_batch for auto translated. */ |
242 | if (xen_feature(XENFEAT_auto_translated_physmap)) | |
243 | return -ENOSYS; | |
244 | ||
1c5de193 JF |
245 | if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd))) |
246 | return -EFAULT; | |
247 | ||
248 | rc = gather_array(&pagelist, | |
249 | mmapcmd.num, sizeof(struct privcmd_mmap_entry), | |
250 | mmapcmd.entry); | |
251 | ||
252 | if (rc || list_empty(&pagelist)) | |
253 | goto out; | |
254 | ||
255 | down_write(&mm->mmap_sem); | |
256 | ||
257 | { | |
258 | struct page *page = list_first_entry(&pagelist, | |
259 | struct page, lru); | |
260 | struct privcmd_mmap_entry *msg = page_address(page); | |
261 | ||
262 | vma = find_vma(mm, msg->va); | |
263 | rc = -EINVAL; | |
264 | ||
a5deabe0 | 265 | if (!vma || (msg->va != vma->vm_start) || vma->vm_private_data) |
1c5de193 | 266 | goto out_up; |
a5deabe0 | 267 | vma->vm_private_data = PRIV_VMA_LOCKED; |
1c5de193 JF |
268 | } |
269 | ||
270 | state.va = vma->vm_start; | |
271 | state.vma = vma; | |
272 | state.domain = mmapcmd.dom; | |
273 | ||
274 | rc = traverse_pages(mmapcmd.num, sizeof(struct privcmd_mmap_entry), | |
275 | &pagelist, | |
276 | mmap_mfn_range, &state); | |
277 | ||
278 | ||
279 | out_up: | |
280 | up_write(&mm->mmap_sem); | |
281 | ||
282 | out: | |
283 | free_page_list(&pagelist); | |
284 | ||
285 | return rc; | |
286 | } | |
287 | ||
288 | struct mmap_batch_state { | |
289 | domid_t domain; | |
290 | unsigned long va; | |
291 | struct vm_area_struct *vma; | |
d71f5139 | 292 | int index; |
ceb90fa0 ALC |
293 | /* A tristate: |
294 | * 0 for no errors | |
295 | * 1 if at least one error has happened (and no | |
296 | * -ENOENT errors have happened) | |
297 | * -ENOENT if at least 1 -ENOENT has happened. | |
298 | */ | |
299 | int global_error; | |
99beae6c | 300 | int version; |
ceb90fa0 ALC |
301 | |
302 | /* User-space mfn array to store errors in the second pass for V1. */ | |
303 | xen_pfn_t __user *user_mfn; | |
99beae6c ALC |
304 | /* User-space int array to store errors in the second pass for V2. */ |
305 | int __user *user_err; | |
1c5de193 JF |
306 | }; |
307 | ||
d71f5139 MR |
308 | /* auto translated dom0 note: if domU being created is PV, then mfn is |
309 | * mfn(addr on bus). If it's auto xlated, then mfn is pfn (input to HAP). | |
310 | */ | |
4e8c0c8c | 311 | static int mmap_batch_fn(void *data, int nr, void *state) |
1c5de193 JF |
312 | { |
313 | xen_pfn_t *mfnp = data; | |
314 | struct mmap_batch_state *st = state; | |
d71f5139 MR |
315 | struct vm_area_struct *vma = st->vma; |
316 | struct page **pages = vma->vm_private_data; | |
4e8c0c8c | 317 | struct page **cur_pages = NULL; |
ceb90fa0 ALC |
318 | int ret; |
319 | ||
d71f5139 | 320 | if (xen_feature(XENFEAT_auto_translated_physmap)) |
4e8c0c8c | 321 | cur_pages = &pages[st->index]; |
d71f5139 | 322 | |
4e8c0c8c DV |
323 | BUG_ON(nr < 0); |
324 | ret = xen_remap_domain_mfn_array(st->vma, st->va & PAGE_MASK, mfnp, nr, | |
325 | (int *)mfnp, st->vma->vm_page_prot, | |
326 | st->domain, cur_pages); | |
1c5de193 | 327 | |
4e8c0c8c DV |
328 | /* Adjust the global_error? */ |
329 | if (ret != nr) { | |
ceb90fa0 ALC |
330 | if (ret == -ENOENT) |
331 | st->global_error = -ENOENT; | |
332 | else { | |
333 | /* Record that at least one error has happened. */ | |
334 | if (st->global_error == 0) | |
335 | st->global_error = 1; | |
336 | } | |
1c5de193 | 337 | } |
4e8c0c8c DV |
338 | st->va += PAGE_SIZE * nr; |
339 | st->index += nr; | |
1c5de193 JF |
340 | |
341 | return 0; | |
342 | } | |
343 | ||
4e8c0c8c | 344 | static int mmap_return_error(int err, struct mmap_batch_state *st) |
1c5de193 | 345 | { |
4e8c0c8c | 346 | int ret; |
ceb90fa0 | 347 | |
99beae6c | 348 | if (st->version == 1) { |
4e8c0c8c DV |
349 | if (err) { |
350 | xen_pfn_t mfn; | |
351 | ||
352 | ret = get_user(mfn, st->user_mfn); | |
353 | if (ret < 0) | |
354 | return ret; | |
355 | /* | |
356 | * V1 encodes the error codes in the 32bit top | |
357 | * nibble of the mfn (with its known | |
358 | * limitations vis-a-vis 64 bit callers). | |
359 | */ | |
360 | mfn |= (err == -ENOENT) ? | |
361 | PRIVCMD_MMAPBATCH_PAGED_ERROR : | |
362 | PRIVCMD_MMAPBATCH_MFN_ERROR; | |
363 | return __put_user(mfn, st->user_mfn++); | |
364 | } else | |
99beae6c ALC |
365 | st->user_mfn++; |
366 | } else { /* st->version == 2 */ | |
99beae6c ALC |
367 | if (err) |
368 | return __put_user(err, st->user_err++); | |
369 | else | |
370 | st->user_err++; | |
371 | } | |
372 | ||
373 | return 0; | |
1c5de193 JF |
374 | } |
375 | ||
4e8c0c8c DV |
376 | static int mmap_return_errors(void *data, int nr, void *state) |
377 | { | |
378 | struct mmap_batch_state *st = state; | |
379 | int *errs = data; | |
380 | int i; | |
381 | int ret; | |
382 | ||
383 | for (i = 0; i < nr; i++) { | |
384 | ret = mmap_return_error(errs[i], st); | |
385 | if (ret < 0) | |
386 | return ret; | |
387 | } | |
388 | return 0; | |
389 | } | |
390 | ||
d71f5139 MR |
391 | /* Allocate pfns that are then mapped with gmfns from foreign domid. Update |
392 | * the vma with the page info to use later. | |
393 | * Returns: 0 if success, otherwise -errno | |
394 | */ | |
395 | static int alloc_empty_pages(struct vm_area_struct *vma, int numpgs) | |
396 | { | |
397 | int rc; | |
398 | struct page **pages; | |
399 | ||
400 | pages = kcalloc(numpgs, sizeof(pages[0]), GFP_KERNEL); | |
401 | if (pages == NULL) | |
402 | return -ENOMEM; | |
403 | ||
404 | rc = alloc_xenballooned_pages(numpgs, pages, 0); | |
405 | if (rc != 0) { | |
406 | pr_warn("%s Could not alloc %d pfns rc:%d\n", __func__, | |
407 | numpgs, rc); | |
408 | kfree(pages); | |
409 | return -ENOMEM; | |
410 | } | |
a5deabe0 | 411 | BUG_ON(vma->vm_private_data != NULL); |
d71f5139 MR |
412 | vma->vm_private_data = pages; |
413 | ||
414 | return 0; | |
415 | } | |
416 | ||
f31fdf51 JF |
417 | static struct vm_operations_struct privcmd_vm_ops; |
418 | ||
ceb90fa0 | 419 | static long privcmd_ioctl_mmap_batch(void __user *udata, int version) |
1c5de193 JF |
420 | { |
421 | int ret; | |
ceb90fa0 | 422 | struct privcmd_mmapbatch_v2 m; |
1c5de193 JF |
423 | struct mm_struct *mm = current->mm; |
424 | struct vm_area_struct *vma; | |
425 | unsigned long nr_pages; | |
426 | LIST_HEAD(pagelist); | |
427 | struct mmap_batch_state state; | |
428 | ||
ceb90fa0 ALC |
429 | switch (version) { |
430 | case 1: | |
431 | if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch))) | |
432 | return -EFAULT; | |
433 | /* Returns per-frame error in m.arr. */ | |
434 | m.err = NULL; | |
435 | if (!access_ok(VERIFY_WRITE, m.arr, m.num * sizeof(*m.arr))) | |
436 | return -EFAULT; | |
437 | break; | |
438 | case 2: | |
439 | if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch_v2))) | |
440 | return -EFAULT; | |
441 | /* Returns per-frame error code in m.err. */ | |
442 | if (!access_ok(VERIFY_WRITE, m.err, m.num * (sizeof(*m.err)))) | |
443 | return -EFAULT; | |
444 | break; | |
445 | default: | |
446 | return -EINVAL; | |
447 | } | |
1c5de193 JF |
448 | |
449 | nr_pages = m.num; | |
450 | if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT))) | |
451 | return -EINVAL; | |
452 | ||
ceb90fa0 | 453 | ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), m.arr); |
1c5de193 | 454 | |
ceb90fa0 | 455 | if (ret) |
1c5de193 | 456 | goto out; |
ceb90fa0 ALC |
457 | if (list_empty(&pagelist)) { |
458 | ret = -EINVAL; | |
459 | goto out; | |
460 | } | |
461 | ||
99beae6c ALC |
462 | if (version == 2) { |
463 | /* Zero error array now to only copy back actual errors. */ | |
464 | if (clear_user(m.err, sizeof(int) * m.num)) { | |
465 | ret = -EFAULT; | |
466 | goto out; | |
467 | } | |
ceb90fa0 | 468 | } |
1c5de193 JF |
469 | |
470 | down_write(&mm->mmap_sem); | |
471 | ||
472 | vma = find_vma(mm, m.addr); | |
1c5de193 | 473 | if (!vma || |
a5deabe0 | 474 | vma->vm_ops != &privcmd_vm_ops) { |
68fa965d | 475 | ret = -EINVAL; |
a5deabe0 | 476 | goto out_unlock; |
1c5de193 | 477 | } |
a5deabe0 ALC |
478 | |
479 | /* | |
480 | * Caller must either: | |
481 | * | |
482 | * Map the whole VMA range, which will also allocate all the | |
483 | * pages required for the auto_translated_physmap case. | |
484 | * | |
485 | * Or | |
486 | * | |
487 | * Map unmapped holes left from a previous map attempt (e.g., | |
488 | * because those foreign frames were previously paged out). | |
489 | */ | |
490 | if (vma->vm_private_data == NULL) { | |
491 | if (m.addr != vma->vm_start || | |
492 | m.addr + (nr_pages << PAGE_SHIFT) != vma->vm_end) { | |
493 | ret = -EINVAL; | |
494 | goto out_unlock; | |
495 | } | |
496 | if (xen_feature(XENFEAT_auto_translated_physmap)) { | |
497 | ret = alloc_empty_pages(vma, m.num); | |
498 | if (ret < 0) | |
499 | goto out_unlock; | |
500 | } else | |
501 | vma->vm_private_data = PRIV_VMA_LOCKED; | |
502 | } else { | |
503 | if (m.addr < vma->vm_start || | |
504 | m.addr + (nr_pages << PAGE_SHIFT) > vma->vm_end) { | |
505 | ret = -EINVAL; | |
506 | goto out_unlock; | |
507 | } | |
508 | if (privcmd_vma_range_is_mapped(vma, m.addr, nr_pages)) { | |
509 | ret = -EINVAL; | |
510 | goto out_unlock; | |
d71f5139 MR |
511 | } |
512 | } | |
1c5de193 | 513 | |
ceb90fa0 ALC |
514 | state.domain = m.dom; |
515 | state.vma = vma; | |
516 | state.va = m.addr; | |
d71f5139 | 517 | state.index = 0; |
ceb90fa0 | 518 | state.global_error = 0; |
99beae6c | 519 | state.version = version; |
1c5de193 | 520 | |
ceb90fa0 | 521 | /* mmap_batch_fn guarantees ret == 0 */ |
4e8c0c8c DV |
522 | BUG_ON(traverse_pages_block(m.num, sizeof(xen_pfn_t), |
523 | &pagelist, mmap_batch_fn, &state)); | |
1c5de193 JF |
524 | |
525 | up_write(&mm->mmap_sem); | |
526 | ||
99beae6c ALC |
527 | if (state.global_error) { |
528 | /* Write back errors in second pass. */ | |
529 | state.user_mfn = (xen_pfn_t *)m.arr; | |
530 | state.user_err = m.err; | |
4e8c0c8c DV |
531 | ret = traverse_pages_block(m.num, sizeof(xen_pfn_t), |
532 | &pagelist, mmap_return_errors, &state); | |
99beae6c ALC |
533 | } else |
534 | ret = 0; | |
ceb90fa0 ALC |
535 | |
536 | /* If we have not had any EFAULT-like global errors then set the global | |
537 | * error to -ENOENT if necessary. */ | |
538 | if ((ret == 0) && (state.global_error == -ENOENT)) | |
539 | ret = -ENOENT; | |
1c5de193 JF |
540 | |
541 | out: | |
542 | free_page_list(&pagelist); | |
1c5de193 | 543 | return ret; |
a5deabe0 ALC |
544 | |
545 | out_unlock: | |
546 | up_write(&mm->mmap_sem); | |
547 | goto out; | |
1c5de193 JF |
548 | } |
549 | ||
550 | static long privcmd_ioctl(struct file *file, | |
551 | unsigned int cmd, unsigned long data) | |
552 | { | |
553 | int ret = -ENOSYS; | |
554 | void __user *udata = (void __user *) data; | |
555 | ||
556 | switch (cmd) { | |
557 | case IOCTL_PRIVCMD_HYPERCALL: | |
558 | ret = privcmd_ioctl_hypercall(udata); | |
559 | break; | |
560 | ||
561 | case IOCTL_PRIVCMD_MMAP: | |
562 | ret = privcmd_ioctl_mmap(udata); | |
563 | break; | |
564 | ||
565 | case IOCTL_PRIVCMD_MMAPBATCH: | |
ceb90fa0 ALC |
566 | ret = privcmd_ioctl_mmap_batch(udata, 1); |
567 | break; | |
568 | ||
569 | case IOCTL_PRIVCMD_MMAPBATCH_V2: | |
570 | ret = privcmd_ioctl_mmap_batch(udata, 2); | |
1c5de193 JF |
571 | break; |
572 | ||
573 | default: | |
574 | ret = -EINVAL; | |
575 | break; | |
576 | } | |
577 | ||
578 | return ret; | |
579 | } | |
580 | ||
d71f5139 MR |
581 | static void privcmd_close(struct vm_area_struct *vma) |
582 | { | |
583 | struct page **pages = vma->vm_private_data; | |
584 | int numpgs = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; | |
b6497b38 | 585 | int rc; |
d71f5139 | 586 | |
9eff37a8 | 587 | if (!xen_feature(XENFEAT_auto_translated_physmap) || !numpgs || !pages) |
d71f5139 MR |
588 | return; |
589 | ||
b6497b38 IC |
590 | rc = xen_unmap_domain_mfn_range(vma, numpgs, pages); |
591 | if (rc == 0) | |
592 | free_xenballooned_pages(numpgs, pages); | |
593 | else | |
594 | pr_crit("unable to unmap MFN range: leaking %d pages. rc=%d\n", | |
595 | numpgs, rc); | |
d71f5139 MR |
596 | kfree(pages); |
597 | } | |
598 | ||
1c5de193 JF |
599 | static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf) |
600 | { | |
441c7416 JF |
601 | printk(KERN_DEBUG "privcmd_fault: vma=%p %lx-%lx, pgoff=%lx, uv=%p\n", |
602 | vma, vma->vm_start, vma->vm_end, | |
603 | vmf->pgoff, vmf->virtual_address); | |
604 | ||
1c5de193 JF |
605 | return VM_FAULT_SIGBUS; |
606 | } | |
607 | ||
608 | static struct vm_operations_struct privcmd_vm_ops = { | |
d71f5139 | 609 | .close = privcmd_close, |
1c5de193 JF |
610 | .fault = privcmd_fault |
611 | }; | |
612 | ||
613 | static int privcmd_mmap(struct file *file, struct vm_area_struct *vma) | |
614 | { | |
e060e7af SS |
615 | /* DONTCOPY is essential for Xen because copy_page_range doesn't know |
616 | * how to recreate these mappings */ | |
314e51b9 KK |
617 | vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTCOPY | |
618 | VM_DONTEXPAND | VM_DONTDUMP; | |
1c5de193 JF |
619 | vma->vm_ops = &privcmd_vm_ops; |
620 | vma->vm_private_data = NULL; | |
621 | ||
622 | return 0; | |
623 | } | |
624 | ||
a5deabe0 ALC |
625 | /* |
626 | * For MMAPBATCH*. This allows asserting the singleshot mapping | |
627 | * on a per pfn/pte basis. Mapping calls that fail with ENOENT | |
628 | * can be then retried until success. | |
629 | */ | |
630 | static int is_mapped_fn(pte_t *pte, struct page *pmd_page, | |
631 | unsigned long addr, void *data) | |
632 | { | |
633 | return pte_none(*pte) ? 0 : -EBUSY; | |
634 | } | |
635 | ||
636 | static int privcmd_vma_range_is_mapped( | |
637 | struct vm_area_struct *vma, | |
638 | unsigned long addr, | |
639 | unsigned long nr_pages) | |
1c5de193 | 640 | { |
a5deabe0 ALC |
641 | return apply_to_page_range(vma->vm_mm, addr, nr_pages << PAGE_SHIFT, |
642 | is_mapped_fn, NULL) != 0; | |
1c5de193 | 643 | } |
1c5de193 | 644 | |
d8414d3c BB |
645 | const struct file_operations xen_privcmd_fops = { |
646 | .owner = THIS_MODULE, | |
1c5de193 JF |
647 | .unlocked_ioctl = privcmd_ioctl, |
648 | .mmap = privcmd_mmap, | |
649 | }; | |
d8414d3c BB |
650 | EXPORT_SYMBOL_GPL(xen_privcmd_fops); |
651 | ||
652 | static struct miscdevice privcmd_dev = { | |
653 | .minor = MISC_DYNAMIC_MINOR, | |
654 | .name = "xen/privcmd", | |
655 | .fops = &xen_privcmd_fops, | |
656 | }; | |
657 | ||
658 | static int __init privcmd_init(void) | |
659 | { | |
660 | int err; | |
661 | ||
662 | if (!xen_domain()) | |
663 | return -ENODEV; | |
664 | ||
665 | err = misc_register(&privcmd_dev); | |
666 | if (err != 0) { | |
283c0972 | 667 | pr_err("Could not register Xen privcmd device\n"); |
d8414d3c BB |
668 | return err; |
669 | } | |
670 | return 0; | |
671 | } | |
672 | ||
673 | static void __exit privcmd_exit(void) | |
674 | { | |
675 | misc_deregister(&privcmd_dev); | |
676 | } | |
677 | ||
678 | module_init(privcmd_init); | |
679 | module_exit(privcmd_exit); |