2 * mpx.c - Memory Protection eXtensions
4 * Copyright (c) 2014, Intel Corporation.
5 * Qiaowei Ren <qiaowei.ren@intel.com>
6 * Dave Hansen <dave.hansen@intel.com>
8 #include <linux/kernel.h>
9 #include <linux/slab.h>
10 #include <linux/syscalls.h>
11 #include <linux/sched/sysctl.h>
16 static const char *mpx_mapping_name(struct vm_area_struct
*vma
)
21 static struct vm_operations_struct mpx_vma_ops
= {
22 .name
= mpx_mapping_name
,
26 * This is really a simplified "vm_mmap". it only handles MPX
27 * bounds tables (the bounds directory is user-allocated).
29 * Later on, we use the vma->vm_ops to uniquely identify these
32 static unsigned long mpx_mmap(unsigned long len
)
35 unsigned long addr
, pgoff
;
36 struct mm_struct
*mm
= current
->mm
;
38 struct vm_area_struct
*vma
;
40 /* Only bounds table and bounds directory can be allocated here */
41 if (len
!= MPX_BD_SIZE_BYTES
&& len
!= MPX_BT_SIZE_BYTES
)
44 down_write(&mm
->mmap_sem
);
46 /* Too many mappings? */
47 if (mm
->map_count
> sysctl_max_map_count
) {
52 /* Obtain the address to map to. we verify (or select) it and ensure
53 * that it represents a valid section of the address space.
55 addr
= get_unmapped_area(NULL
, 0, len
, 0, MAP_ANONYMOUS
| MAP_PRIVATE
);
56 if (addr
& ~PAGE_MASK
) {
61 vm_flags
= VM_READ
| VM_WRITE
| VM_MPX
|
62 mm
->def_flags
| VM_MAYREAD
| VM_MAYWRITE
| VM_MAYEXEC
;
64 /* Set pgoff according to addr for anon_vma */
65 pgoff
= addr
>> PAGE_SHIFT
;
67 ret
= mmap_region(NULL
, addr
, len
, vm_flags
, pgoff
);
68 if (IS_ERR_VALUE(ret
))
71 vma
= find_vma(mm
, ret
);
76 vma
->vm_ops
= &mpx_vma_ops
;
78 if (vm_flags
& VM_LOCKED
) {
79 up_write(&mm
->mmap_sem
);
80 mm_populate(ret
, len
);
85 up_write(&mm
->mmap_sem
);
95 static unsigned long get_reg_offset(struct insn
*insn
, struct pt_regs
*regs
,
100 static const int regoff
[] = {
101 offsetof(struct pt_regs
, ax
),
102 offsetof(struct pt_regs
, cx
),
103 offsetof(struct pt_regs
, dx
),
104 offsetof(struct pt_regs
, bx
),
105 offsetof(struct pt_regs
, sp
),
106 offsetof(struct pt_regs
, bp
),
107 offsetof(struct pt_regs
, si
),
108 offsetof(struct pt_regs
, di
),
110 offsetof(struct pt_regs
, r8
),
111 offsetof(struct pt_regs
, r9
),
112 offsetof(struct pt_regs
, r10
),
113 offsetof(struct pt_regs
, r11
),
114 offsetof(struct pt_regs
, r12
),
115 offsetof(struct pt_regs
, r13
),
116 offsetof(struct pt_regs
, r14
),
117 offsetof(struct pt_regs
, r15
),
120 int nr_registers
= ARRAY_SIZE(regoff
);
122 * Don't possibly decode a 32-bit instructions as
123 * reading a 64-bit-only register.
125 if (IS_ENABLED(CONFIG_X86_64
) && !insn
->x86_64
)
130 regno
= X86_MODRM_RM(insn
->modrm
.value
);
131 if (X86_REX_B(insn
->rex_prefix
.value
) == 1)
136 regno
= X86_SIB_INDEX(insn
->sib
.value
);
137 if (X86_REX_X(insn
->rex_prefix
.value
) == 1)
142 regno
= X86_SIB_BASE(insn
->sib
.value
);
143 if (X86_REX_B(insn
->rex_prefix
.value
) == 1)
148 pr_err("invalid register type");
153 if (regno
> nr_registers
) {
154 WARN_ONCE(1, "decoded an instruction with an invalid register");
157 return regoff
[regno
];
161 * return the address being referenced be instruction
162 * for rm=3 returning the content of the rm reg
163 * for rm!=3 calculates the address using SIB and Disp
165 static void __user
*mpx_get_addr_ref(struct insn
*insn
, struct pt_regs
*regs
)
167 unsigned long addr
, addr_offset
;
168 unsigned long base
, base_offset
;
169 unsigned long indx
, indx_offset
;
172 insn_get_modrm(insn
);
174 sib
= insn
->sib
.value
;
176 if (X86_MODRM_MOD(insn
->modrm
.value
) == 3) {
177 addr_offset
= get_reg_offset(insn
, regs
, REG_TYPE_RM
);
180 addr
= regs_get_register(regs
, addr_offset
);
182 if (insn
->sib
.nbytes
) {
183 base_offset
= get_reg_offset(insn
, regs
, REG_TYPE_BASE
);
187 indx_offset
= get_reg_offset(insn
, regs
, REG_TYPE_INDEX
);
191 base
= regs_get_register(regs
, base_offset
);
192 indx
= regs_get_register(regs
, indx_offset
);
193 addr
= base
+ indx
* (1 << X86_SIB_SCALE(sib
));
195 addr_offset
= get_reg_offset(insn
, regs
, REG_TYPE_RM
);
198 addr
= regs_get_register(regs
, addr_offset
);
200 addr
+= insn
->displacement
.value
;
202 return (void __user
*)addr
;
204 return (void __user
*)-1;
207 static int mpx_insn_decode(struct insn
*insn
,
208 struct pt_regs
*regs
)
210 unsigned char buf
[MAX_INSN_SIZE
];
211 int x86_64
= !test_thread_flag(TIF_IA32
);
215 not_copied
= copy_from_user(buf
, (void __user
*)regs
->ip
, sizeof(buf
));
216 nr_copied
= sizeof(buf
) - not_copied
;
218 * The decoder _should_ fail nicely if we pass it a short buffer.
219 * But, let's not depend on that implementation detail. If we
220 * did not get anything, just error out now.
224 insn_init(insn
, buf
, nr_copied
, x86_64
);
225 insn_get_length(insn
);
227 * copy_from_user() tries to get as many bytes as we could see in
228 * the largest possible instruction. If the instruction we are
229 * after is shorter than that _and_ we attempt to copy from
230 * something unreadable, we might get a short read. This is OK
231 * as long as the read did not stop in the middle of the
232 * instruction. Check to see if we got a partial instruction.
234 if (nr_copied
< insn
->length
)
237 insn_get_opcode(insn
);
239 * We only _really_ need to decode bndcl/bndcn/bndcu
240 * Error out on anything else.
242 if (insn
->opcode
.bytes
[0] != 0x0f)
244 if ((insn
->opcode
.bytes
[1] != 0x1a) &&
245 (insn
->opcode
.bytes
[1] != 0x1b))
254 * If a bounds overflow occurs then a #BR is generated. This
255 * function decodes MPX instructions to get violation address
256 * and set this address into extended struct siginfo.
258 * Note that this is not a super precise way of doing this.
259 * Userspace could have, by the time we get here, written
260 * anything it wants in to the instructions. We can not
261 * trust anything about it. They might not be valid
262 * instructions or might encode invalid registers, etc...
264 * The caller is expected to kfree() the returned siginfo_t.
266 siginfo_t
*mpx_generate_siginfo(struct pt_regs
*regs
,
267 struct xsave_struct
*xsave_buf
)
274 err
= mpx_insn_decode(&insn
, regs
);
279 * We know at this point that we are only dealing with
282 insn_get_modrm(&insn
);
283 bndregno
= X86_MODRM_REG(insn
.modrm
.value
);
288 info
= kzalloc(sizeof(*info
), GFP_KERNEL
);
294 * The registers are always 64-bit, but the upper 32
295 * bits are ignored in 32-bit mode. Also, note that the
296 * upper bounds are architecturally represented in 1's
299 * The 'unsigned long' cast is because the compiler
300 * complains when casting from integers to different-size
303 info
->si_lower
= (void __user
*)(unsigned long)
304 (xsave_buf
->bndreg
[bndregno
].lower_bound
);
305 info
->si_upper
= (void __user
*)(unsigned long)
306 (~xsave_buf
->bndreg
[bndregno
].upper_bound
);
307 info
->si_addr_lsb
= 0;
308 info
->si_signo
= SIGSEGV
;
310 info
->si_code
= SEGV_BNDERR
;
311 info
->si_addr
= mpx_get_addr_ref(&insn
, regs
);
313 * We were not able to extract an address from the instruction,
314 * probably because there was something invalid in it.
316 if (info
->si_addr
== (void *)-1) {