1 /******************************************************************************
4 * Interface to privileged domain-0 commands.
6 * Copyright (c) 2002-2004, K A Fraser, B Dragovic
9 #include <linux/kernel.h>
10 #include <linux/module.h>
11 #include <linux/sched.h>
12 #include <linux/slab.h>
13 #include <linux/string.h>
14 #include <linux/errno.h>
16 #include <linux/mman.h>
17 #include <linux/uaccess.h>
18 #include <linux/swap.h>
19 #include <linux/highmem.h>
20 #include <linux/pagemap.h>
21 #include <linux/seq_file.h>
22 #include <linux/miscdevice.h>
24 #include <asm/pgalloc.h>
25 #include <asm/pgtable.h>
27 #include <asm/xen/hypervisor.h>
28 #include <asm/xen/hypercall.h>
31 #include <xen/privcmd.h>
32 #include <xen/interface/xen.h>
33 #include <xen/features.h>
35 #include <xen/xen-ops.h>
39 MODULE_LICENSE("GPL");
41 #ifndef HAVE_ARCH_PRIVCMD_MMAP
42 static int privcmd_enforce_singleshot_mapping(struct vm_area_struct
*vma
);
45 static long privcmd_ioctl_hypercall(void __user
*udata
)
47 struct privcmd_hypercall hypercall
;
50 if (copy_from_user(&hypercall
, udata
, sizeof(hypercall
)))
53 ret
= privcmd_call(hypercall
.op
,
54 hypercall
.arg
[0], hypercall
.arg
[1],
55 hypercall
.arg
[2], hypercall
.arg
[3],
61 static void free_page_list(struct list_head
*pages
)
65 list_for_each_entry_safe(p
, n
, pages
, lru
)
68 INIT_LIST_HEAD(pages
);
72 * Given an array of items in userspace, return a list of pages
73 * containing the data. If copying fails, either because of memory
74 * allocation failure or a problem reading user memory, return an
75 * error code; its up to the caller to dispose of any partial list.
77 static int gather_array(struct list_head
*pagelist
,
78 unsigned nelem
, size_t size
,
79 const void __user
*data
)
89 pagedata
= NULL
; /* quiet, gcc */
91 if (pageidx
> PAGE_SIZE
-size
) {
92 struct page
*page
= alloc_page(GFP_KERNEL
);
98 pagedata
= page_address(page
);
100 list_add_tail(&page
->lru
, pagelist
);
105 if (copy_from_user(pagedata
+ pageidx
, data
, size
))
119 * Call function "fn" on each element of the array fragmented
120 * over a list of pages.
122 static int traverse_pages(unsigned nelem
, size_t size
,
123 struct list_head
*pos
,
124 int (*fn
)(void *data
, void *state
),
131 BUG_ON(size
> PAGE_SIZE
);
134 pagedata
= NULL
; /* hush, gcc */
137 if (pageidx
> PAGE_SIZE
-size
) {
140 page
= list_entry(pos
, struct page
, lru
);
141 pagedata
= page_address(page
);
145 ret
= (*fn
)(pagedata
+ pageidx
, state
);
154 struct mmap_mfn_state
{
156 struct vm_area_struct
*vma
;
160 static int mmap_mfn_range(void *data
, void *state
)
162 struct privcmd_mmap_entry
*msg
= data
;
163 struct mmap_mfn_state
*st
= state
;
164 struct vm_area_struct
*vma
= st
->vma
;
167 /* Do not allow range to wrap the address space. */
168 if ((msg
->npages
> (LONG_MAX
>> PAGE_SHIFT
)) ||
169 ((unsigned long)(msg
->npages
<< PAGE_SHIFT
) >= -st
->va
))
172 /* Range chunks must be contiguous in va space. */
173 if ((msg
->va
!= st
->va
) ||
174 ((msg
->va
+(msg
->npages
<<PAGE_SHIFT
)) > vma
->vm_end
))
177 rc
= xen_remap_domain_mfn_range(vma
,
179 msg
->mfn
, msg
->npages
,
185 st
->va
+= msg
->npages
<< PAGE_SHIFT
;
190 static long privcmd_ioctl_mmap(void __user
*udata
)
192 struct privcmd_mmap mmapcmd
;
193 struct mm_struct
*mm
= current
->mm
;
194 struct vm_area_struct
*vma
;
197 struct mmap_mfn_state state
;
199 if (!xen_initial_domain())
202 if (copy_from_user(&mmapcmd
, udata
, sizeof(mmapcmd
)))
205 rc
= gather_array(&pagelist
,
206 mmapcmd
.num
, sizeof(struct privcmd_mmap_entry
),
209 if (rc
|| list_empty(&pagelist
))
212 down_write(&mm
->mmap_sem
);
215 struct page
*page
= list_first_entry(&pagelist
,
217 struct privcmd_mmap_entry
*msg
= page_address(page
);
219 vma
= find_vma(mm
, msg
->va
);
222 if (!vma
|| (msg
->va
!= vma
->vm_start
) ||
223 !privcmd_enforce_singleshot_mapping(vma
))
227 state
.va
= vma
->vm_start
;
229 state
.domain
= mmapcmd
.dom
;
231 rc
= traverse_pages(mmapcmd
.num
, sizeof(struct privcmd_mmap_entry
),
233 mmap_mfn_range
, &state
);
237 up_write(&mm
->mmap_sem
);
240 free_page_list(&pagelist
);
245 struct mmap_batch_state
{
248 struct vm_area_struct
*vma
;
251 * 1 if at least one error has happened (and no
252 * -ENOENT errors have happened)
253 * -ENOENT if at least 1 -ENOENT has happened.
256 /* An array for individual errors */
259 /* User-space mfn array to store errors in the second pass for V1. */
260 xen_pfn_t __user
*user_mfn
;
263 static int mmap_batch_fn(void *data
, void *state
)
265 xen_pfn_t
*mfnp
= data
;
266 struct mmap_batch_state
*st
= state
;
269 ret
= xen_remap_domain_mfn_range(st
->vma
, st
->va
& PAGE_MASK
, *mfnp
, 1,
270 st
->vma
->vm_page_prot
, st
->domain
);
272 /* Store error code for second pass. */
275 /* And see if it affects the global_error. */
278 st
->global_error
= -ENOENT
;
280 /* Record that at least one error has happened. */
281 if (st
->global_error
== 0)
282 st
->global_error
= 1;
290 static int mmap_return_errors_v1(void *data
, void *state
)
292 xen_pfn_t
*mfnp
= data
;
293 struct mmap_batch_state
*st
= state
;
294 int err
= *(st
->err
++);
297 * V1 encodes the error codes in the 32bit top nibble of the
298 * mfn (with its known limitations vis-a-vis 64 bit callers).
300 *mfnp
|= (err
== -ENOENT
) ?
301 PRIVCMD_MMAPBATCH_PAGED_ERROR
:
302 PRIVCMD_MMAPBATCH_MFN_ERROR
;
303 return __put_user(*mfnp
, st
->user_mfn
++);
306 static struct vm_operations_struct privcmd_vm_ops
;
308 static long privcmd_ioctl_mmap_batch(void __user
*udata
, int version
)
311 struct privcmd_mmapbatch_v2 m
;
312 struct mm_struct
*mm
= current
->mm
;
313 struct vm_area_struct
*vma
;
314 unsigned long nr_pages
;
316 int *err_array
= NULL
;
317 struct mmap_batch_state state
;
319 if (!xen_initial_domain())
324 if (copy_from_user(&m
, udata
, sizeof(struct privcmd_mmapbatch
)))
326 /* Returns per-frame error in m.arr. */
328 if (!access_ok(VERIFY_WRITE
, m
.arr
, m
.num
* sizeof(*m
.arr
)))
332 if (copy_from_user(&m
, udata
, sizeof(struct privcmd_mmapbatch_v2
)))
334 /* Returns per-frame error code in m.err. */
335 if (!access_ok(VERIFY_WRITE
, m
.err
, m
.num
* (sizeof(*m
.err
))))
343 if ((m
.num
<= 0) || (nr_pages
> (LONG_MAX
>> PAGE_SHIFT
)))
346 ret
= gather_array(&pagelist
, m
.num
, sizeof(xen_pfn_t
), m
.arr
);
350 if (list_empty(&pagelist
)) {
355 err_array
= kcalloc(m
.num
, sizeof(int), GFP_KERNEL
);
356 if (err_array
== NULL
) {
361 down_write(&mm
->mmap_sem
);
363 vma
= find_vma(mm
, m
.addr
);
366 vma
->vm_ops
!= &privcmd_vm_ops
||
367 (m
.addr
!= vma
->vm_start
) ||
368 ((m
.addr
+ (nr_pages
<< PAGE_SHIFT
)) != vma
->vm_end
) ||
369 !privcmd_enforce_singleshot_mapping(vma
)) {
370 up_write(&mm
->mmap_sem
);
374 state
.domain
= m
.dom
;
377 state
.global_error
= 0;
378 state
.err
= err_array
;
380 /* mmap_batch_fn guarantees ret == 0 */
381 BUG_ON(traverse_pages(m
.num
, sizeof(xen_pfn_t
),
382 &pagelist
, mmap_batch_fn
, &state
));
384 up_write(&mm
->mmap_sem
);
386 if (state
.global_error
&& (version
== 1)) {
387 /* Write back errors in second pass. */
388 state
.user_mfn
= (xen_pfn_t
*)m
.arr
;
389 state
.err
= err_array
;
390 ret
= traverse_pages(m
.num
, sizeof(xen_pfn_t
),
391 &pagelist
, mmap_return_errors_v1
, &state
);
392 } else if (version
== 2) {
393 ret
= __copy_to_user(m
.err
, err_array
, m
.num
* sizeof(int));
398 /* If we have not had any EFAULT-like global errors then set the global
399 * error to -ENOENT if necessary. */
400 if ((ret
== 0) && (state
.global_error
== -ENOENT
))
405 free_page_list(&pagelist
);
410 static long privcmd_ioctl(struct file
*file
,
411 unsigned int cmd
, unsigned long data
)
414 void __user
*udata
= (void __user
*) data
;
417 case IOCTL_PRIVCMD_HYPERCALL
:
418 ret
= privcmd_ioctl_hypercall(udata
);
421 case IOCTL_PRIVCMD_MMAP
:
422 ret
= privcmd_ioctl_mmap(udata
);
425 case IOCTL_PRIVCMD_MMAPBATCH
:
426 ret
= privcmd_ioctl_mmap_batch(udata
, 1);
429 case IOCTL_PRIVCMD_MMAPBATCH_V2
:
430 ret
= privcmd_ioctl_mmap_batch(udata
, 2);
441 static int privcmd_fault(struct vm_area_struct
*vma
, struct vm_fault
*vmf
)
443 printk(KERN_DEBUG
"privcmd_fault: vma=%p %lx-%lx, pgoff=%lx, uv=%p\n",
444 vma
, vma
->vm_start
, vma
->vm_end
,
445 vmf
->pgoff
, vmf
->virtual_address
);
447 return VM_FAULT_SIGBUS
;
450 static struct vm_operations_struct privcmd_vm_ops
= {
451 .fault
= privcmd_fault
454 static int privcmd_mmap(struct file
*file
, struct vm_area_struct
*vma
)
456 /* DONTCOPY is essential for Xen because copy_page_range doesn't know
457 * how to recreate these mappings */
458 vma
->vm_flags
|= VM_IO
| VM_PFNMAP
| VM_DONTCOPY
|
459 VM_DONTEXPAND
| VM_DONTDUMP
;
460 vma
->vm_ops
= &privcmd_vm_ops
;
461 vma
->vm_private_data
= NULL
;
466 static int privcmd_enforce_singleshot_mapping(struct vm_area_struct
*vma
)
468 return (xchg(&vma
->vm_private_data
, (void *)1) == NULL
);
471 const struct file_operations xen_privcmd_fops
= {
472 .owner
= THIS_MODULE
,
473 .unlocked_ioctl
= privcmd_ioctl
,
474 .mmap
= privcmd_mmap
,
476 EXPORT_SYMBOL_GPL(xen_privcmd_fops
);
478 static struct miscdevice privcmd_dev
= {
479 .minor
= MISC_DYNAMIC_MINOR
,
480 .name
= "xen/privcmd",
481 .fops
= &xen_privcmd_fops
,
484 static int __init
privcmd_init(void)
491 err
= misc_register(&privcmd_dev
);
493 printk(KERN_ERR
"Could not register Xen privcmd device\n");
499 static void __exit
privcmd_exit(void)
501 misc_deregister(&privcmd_dev
);
504 module_init(privcmd_init
);
505 module_exit(privcmd_exit
);