Commit | Line | Data |
---|---|---|
3f33ab1c MH |
1 | /* |
2 | * Kernel Probes Jump Optimization (Optprobes) | |
3 | * | |
4 | * This program is free software; you can redistribute it and/or modify | |
5 | * it under the terms of the GNU General Public License as published by | |
6 | * the Free Software Foundation; either version 2 of the License, or | |
7 | * (at your option) any later version. | |
8 | * | |
9 | * This program is distributed in the hope that it will be useful, | |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | * GNU General Public License for more details. | |
13 | * | |
14 | * You should have received a copy of the GNU General Public License | |
15 | * along with this program; if not, write to the Free Software | |
16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | |
17 | * | |
18 | * Copyright (C) IBM Corporation, 2002, 2004 | |
19 | * Copyright (C) Hitachi Ltd., 2012 | |
20 | */ | |
21 | #include <linux/kprobes.h> | |
22 | #include <linux/ptrace.h> | |
23 | #include <linux/string.h> | |
24 | #include <linux/slab.h> | |
25 | #include <linux/hardirq.h> | |
26 | #include <linux/preempt.h> | |
27 | #include <linux/module.h> | |
28 | #include <linux/kdebug.h> | |
29 | #include <linux/kallsyms.h> | |
30 | #include <linux/ftrace.h> | |
31 | ||
32 | #include <asm/cacheflush.h> | |
33 | #include <asm/desc.h> | |
34 | #include <asm/pgtable.h> | |
35 | #include <asm/uaccess.h> | |
36 | #include <asm/alternative.h> | |
37 | #include <asm/insn.h> | |
38 | #include <asm/debugreg.h> | |
39 | ||
40 | #include "kprobes-common.h" | |
41 | ||
42 | unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr) | |
43 | { | |
44 | struct optimized_kprobe *op; | |
45 | struct kprobe *kp; | |
46 | long offs; | |
47 | int i; | |
48 | ||
49 | for (i = 0; i < RELATIVEJUMP_SIZE; i++) { | |
50 | kp = get_kprobe((void *)addr - i); | |
51 | /* This function only handles jump-optimized kprobe */ | |
52 | if (kp && kprobe_optimized(kp)) { | |
53 | op = container_of(kp, struct optimized_kprobe, kp); | |
54 | /* If op->list is not empty, op is under optimizing */ | |
55 | if (list_empty(&op->list)) | |
56 | goto found; | |
57 | } | |
58 | } | |
59 | ||
60 | return addr; | |
61 | found: | |
62 | /* | |
63 | * If the kprobe can be optimized, original bytes which can be | |
64 | * overwritten by jump destination address. In this case, original | |
65 | * bytes must be recovered from op->optinsn.copied_insn buffer. | |
66 | */ | |
67 | memcpy(buf, (void *)addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); | |
68 | if (addr == (unsigned long)kp->addr) { | |
69 | buf[0] = kp->opcode; | |
70 | memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); | |
71 | } else { | |
72 | offs = addr - (unsigned long)kp->addr - 1; | |
73 | memcpy(buf, op->optinsn.copied_insn + offs, RELATIVE_ADDR_SIZE - offs); | |
74 | } | |
75 | ||
76 | return (unsigned long)buf; | |
77 | } | |
78 | ||
79 | /* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */ | |
80 | static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val) | |
81 | { | |
82 | #ifdef CONFIG_X86_64 | |
83 | *addr++ = 0x48; | |
84 | *addr++ = 0xbf; | |
85 | #else | |
86 | *addr++ = 0xb8; | |
87 | #endif | |
88 | *(unsigned long *)addr = val; | |
89 | } | |
90 | ||
91 | static void __used __kprobes kprobes_optinsn_template_holder(void) | |
92 | { | |
93 | asm volatile ( | |
94 | ".global optprobe_template_entry\n" | |
95 | "optprobe_template_entry:\n" | |
96 | #ifdef CONFIG_X86_64 | |
97 | /* We don't bother saving the ss register */ | |
98 | " pushq %rsp\n" | |
99 | " pushfq\n" | |
100 | SAVE_REGS_STRING | |
101 | " movq %rsp, %rsi\n" | |
102 | ".global optprobe_template_val\n" | |
103 | "optprobe_template_val:\n" | |
104 | ASM_NOP5 | |
105 | ASM_NOP5 | |
106 | ".global optprobe_template_call\n" | |
107 | "optprobe_template_call:\n" | |
108 | ASM_NOP5 | |
109 | /* Move flags to rsp */ | |
110 | " movq 144(%rsp), %rdx\n" | |
111 | " movq %rdx, 152(%rsp)\n" | |
112 | RESTORE_REGS_STRING | |
113 | /* Skip flags entry */ | |
114 | " addq $8, %rsp\n" | |
115 | " popfq\n" | |
116 | #else /* CONFIG_X86_32 */ | |
117 | " pushf\n" | |
118 | SAVE_REGS_STRING | |
119 | " movl %esp, %edx\n" | |
120 | ".global optprobe_template_val\n" | |
121 | "optprobe_template_val:\n" | |
122 | ASM_NOP5 | |
123 | ".global optprobe_template_call\n" | |
124 | "optprobe_template_call:\n" | |
125 | ASM_NOP5 | |
126 | RESTORE_REGS_STRING | |
127 | " addl $4, %esp\n" /* skip cs */ | |
128 | " popf\n" | |
129 | #endif | |
130 | ".global optprobe_template_end\n" | |
131 | "optprobe_template_end:\n"); | |
132 | } | |
133 | ||
134 | #define TMPL_MOVE_IDX \ | |
135 | ((long)&optprobe_template_val - (long)&optprobe_template_entry) | |
136 | #define TMPL_CALL_IDX \ | |
137 | ((long)&optprobe_template_call - (long)&optprobe_template_entry) | |
138 | #define TMPL_END_IDX \ | |
139 | ((long)&optprobe_template_end - (long)&optprobe_template_entry) | |
140 | ||
141 | #define INT3_SIZE sizeof(kprobe_opcode_t) | |
142 | ||
143 | /* Optimized kprobe call back function: called from optinsn */ | |
144 | static void __kprobes optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) | |
145 | { | |
146 | struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); | |
147 | unsigned long flags; | |
148 | ||
149 | /* This is possible if op is under delayed unoptimizing */ | |
150 | if (kprobe_disabled(&op->kp)) | |
151 | return; | |
152 | ||
153 | local_irq_save(flags); | |
154 | if (kprobe_running()) { | |
155 | kprobes_inc_nmissed_count(&op->kp); | |
156 | } else { | |
157 | /* Save skipped registers */ | |
158 | #ifdef CONFIG_X86_64 | |
159 | regs->cs = __KERNEL_CS; | |
160 | #else | |
161 | regs->cs = __KERNEL_CS | get_kernel_rpl(); | |
162 | regs->gs = 0; | |
163 | #endif | |
164 | regs->ip = (unsigned long)op->kp.addr + INT3_SIZE; | |
165 | regs->orig_ax = ~0UL; | |
166 | ||
167 | __this_cpu_write(current_kprobe, &op->kp); | |
168 | kcb->kprobe_status = KPROBE_HIT_ACTIVE; | |
169 | opt_pre_handler(&op->kp, regs); | |
170 | __this_cpu_write(current_kprobe, NULL); | |
171 | } | |
172 | local_irq_restore(flags); | |
173 | } | |
174 | ||
175 | static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src) | |
176 | { | |
177 | int len = 0, ret; | |
178 | ||
179 | while (len < RELATIVEJUMP_SIZE) { | |
180 | ret = __copy_instruction(dest + len, src + len); | |
181 | if (!ret || !can_boost(dest + len)) | |
182 | return -EINVAL; | |
183 | len += ret; | |
184 | } | |
185 | /* Check whether the address range is reserved */ | |
186 | if (ftrace_text_reserved(src, src + len - 1) || | |
187 | alternatives_text_reserved(src, src + len - 1) || | |
188 | jump_label_text_reserved(src, src + len - 1)) | |
189 | return -EBUSY; | |
190 | ||
191 | return len; | |
192 | } | |
193 | ||
194 | /* Check whether insn is indirect jump */ | |
195 | static int __kprobes insn_is_indirect_jump(struct insn *insn) | |
196 | { | |
197 | return ((insn->opcode.bytes[0] == 0xff && | |
198 | (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */ | |
199 | insn->opcode.bytes[0] == 0xea); /* Segment based jump */ | |
200 | } | |
201 | ||
202 | /* Check whether insn jumps into specified address range */ | |
203 | static int insn_jump_into_range(struct insn *insn, unsigned long start, int len) | |
204 | { | |
205 | unsigned long target = 0; | |
206 | ||
207 | switch (insn->opcode.bytes[0]) { | |
208 | case 0xe0: /* loopne */ | |
209 | case 0xe1: /* loope */ | |
210 | case 0xe2: /* loop */ | |
211 | case 0xe3: /* jcxz */ | |
212 | case 0xe9: /* near relative jump */ | |
213 | case 0xeb: /* short relative jump */ | |
214 | break; | |
215 | case 0x0f: | |
216 | if ((insn->opcode.bytes[1] & 0xf0) == 0x80) /* jcc near */ | |
217 | break; | |
218 | return 0; | |
219 | default: | |
220 | if ((insn->opcode.bytes[0] & 0xf0) == 0x70) /* jcc short */ | |
221 | break; | |
222 | return 0; | |
223 | } | |
224 | target = (unsigned long)insn->next_byte + insn->immediate.value; | |
225 | ||
226 | return (start <= target && target <= start + len); | |
227 | } | |
228 | ||
229 | /* Decode whole function to ensure any instructions don't jump into target */ | |
230 | static int __kprobes can_optimize(unsigned long paddr) | |
231 | { | |
232 | unsigned long addr, size = 0, offset = 0; | |
233 | struct insn insn; | |
234 | kprobe_opcode_t buf[MAX_INSN_SIZE]; | |
235 | ||
236 | /* Lookup symbol including addr */ | |
237 | if (!kallsyms_lookup_size_offset(paddr, &size, &offset)) | |
238 | return 0; | |
239 | ||
240 | /* | |
241 | * Do not optimize in the entry code due to the unstable | |
242 | * stack handling. | |
243 | */ | |
244 | if ((paddr >= (unsigned long)__entry_text_start) && | |
245 | (paddr < (unsigned long)__entry_text_end)) | |
246 | return 0; | |
247 | ||
248 | /* Check there is enough space for a relative jump. */ | |
249 | if (size - offset < RELATIVEJUMP_SIZE) | |
250 | return 0; | |
251 | ||
252 | /* Decode instructions */ | |
253 | addr = paddr - offset; | |
254 | while (addr < paddr - offset + size) { /* Decode until function end */ | |
255 | if (search_exception_tables(addr)) | |
256 | /* | |
257 | * Since some fixup code will jumps into this function, | |
258 | * we can't optimize kprobe in this function. | |
259 | */ | |
260 | return 0; | |
261 | kernel_insn_init(&insn, (void *)recover_probed_instruction(buf, addr)); | |
262 | insn_get_length(&insn); | |
263 | /* Another subsystem puts a breakpoint */ | |
264 | if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) | |
265 | return 0; | |
266 | /* Recover address */ | |
267 | insn.kaddr = (void *)addr; | |
268 | insn.next_byte = (void *)(addr + insn.length); | |
269 | /* Check any instructions don't jump into target */ | |
270 | if (insn_is_indirect_jump(&insn) || | |
271 | insn_jump_into_range(&insn, paddr + INT3_SIZE, | |
272 | RELATIVE_ADDR_SIZE)) | |
273 | return 0; | |
274 | addr += insn.length; | |
275 | } | |
276 | ||
277 | return 1; | |
278 | } | |
279 | ||
280 | /* Check optimized_kprobe can actually be optimized. */ | |
281 | int __kprobes arch_check_optimized_kprobe(struct optimized_kprobe *op) | |
282 | { | |
283 | int i; | |
284 | struct kprobe *p; | |
285 | ||
286 | for (i = 1; i < op->optinsn.size; i++) { | |
287 | p = get_kprobe(op->kp.addr + i); | |
288 | if (p && !kprobe_disabled(p)) | |
289 | return -EEXIST; | |
290 | } | |
291 | ||
292 | return 0; | |
293 | } | |
294 | ||
295 | /* Check the addr is within the optimized instructions. */ | |
296 | int __kprobes | |
297 | arch_within_optimized_kprobe(struct optimized_kprobe *op, unsigned long addr) | |
298 | { | |
299 | return ((unsigned long)op->kp.addr <= addr && | |
300 | (unsigned long)op->kp.addr + op->optinsn.size > addr); | |
301 | } | |
302 | ||
303 | /* Free optimized instruction slot */ | |
304 | static __kprobes | |
305 | void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty) | |
306 | { | |
307 | if (op->optinsn.insn) { | |
308 | free_optinsn_slot(op->optinsn.insn, dirty); | |
309 | op->optinsn.insn = NULL; | |
310 | op->optinsn.size = 0; | |
311 | } | |
312 | } | |
313 | ||
314 | void __kprobes arch_remove_optimized_kprobe(struct optimized_kprobe *op) | |
315 | { | |
316 | __arch_remove_optimized_kprobe(op, 1); | |
317 | } | |
318 | ||
319 | /* | |
320 | * Copy replacing target instructions | |
321 | * Target instructions MUST be relocatable (checked inside) | |
322 | * This is called when new aggr(opt)probe is allocated or reused. | |
323 | */ | |
324 | int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op) | |
325 | { | |
326 | u8 *buf; | |
327 | int ret; | |
328 | long rel; | |
329 | ||
330 | if (!can_optimize((unsigned long)op->kp.addr)) | |
331 | return -EILSEQ; | |
332 | ||
333 | op->optinsn.insn = get_optinsn_slot(); | |
334 | if (!op->optinsn.insn) | |
335 | return -ENOMEM; | |
336 | ||
337 | /* | |
338 | * Verify if the address gap is in 2GB range, because this uses | |
339 | * a relative jump. | |
340 | */ | |
341 | rel = (long)op->optinsn.insn - (long)op->kp.addr + RELATIVEJUMP_SIZE; | |
342 | if (abs(rel) > 0x7fffffff) | |
343 | return -ERANGE; | |
344 | ||
345 | buf = (u8 *)op->optinsn.insn; | |
346 | ||
347 | /* Copy instructions into the out-of-line buffer */ | |
348 | ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr); | |
349 | if (ret < 0) { | |
350 | __arch_remove_optimized_kprobe(op, 0); | |
351 | return ret; | |
352 | } | |
353 | op->optinsn.size = ret; | |
354 | ||
355 | /* Copy arch-dep-instance from template */ | |
356 | memcpy(buf, &optprobe_template_entry, TMPL_END_IDX); | |
357 | ||
358 | /* Set probe information */ | |
359 | synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op); | |
360 | ||
361 | /* Set probe function call */ | |
362 | synthesize_relcall(buf + TMPL_CALL_IDX, optimized_callback); | |
363 | ||
364 | /* Set returning jmp instruction at the tail of out-of-line buffer */ | |
365 | synthesize_reljump(buf + TMPL_END_IDX + op->optinsn.size, | |
366 | (u8 *)op->kp.addr + op->optinsn.size); | |
367 | ||
368 | flush_icache_range((unsigned long) buf, | |
369 | (unsigned long) buf + TMPL_END_IDX + | |
370 | op->optinsn.size + RELATIVEJUMP_SIZE); | |
371 | return 0; | |
372 | } | |
373 | ||
374 | #define MAX_OPTIMIZE_PROBES 256 | |
375 | static struct text_poke_param *jump_poke_params; | |
376 | static struct jump_poke_buffer { | |
377 | u8 buf[RELATIVEJUMP_SIZE]; | |
378 | } *jump_poke_bufs; | |
379 | ||
380 | static void __kprobes setup_optimize_kprobe(struct text_poke_param *tprm, | |
381 | u8 *insn_buf, | |
382 | struct optimized_kprobe *op) | |
383 | { | |
384 | s32 rel = (s32)((long)op->optinsn.insn - | |
385 | ((long)op->kp.addr + RELATIVEJUMP_SIZE)); | |
386 | ||
387 | /* Backup instructions which will be replaced by jump address */ | |
388 | memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE, | |
389 | RELATIVE_ADDR_SIZE); | |
390 | ||
391 | insn_buf[0] = RELATIVEJUMP_OPCODE; | |
392 | *(s32 *)(&insn_buf[1]) = rel; | |
393 | ||
394 | tprm->addr = op->kp.addr; | |
395 | tprm->opcode = insn_buf; | |
396 | tprm->len = RELATIVEJUMP_SIZE; | |
397 | } | |
398 | ||
399 | /* | |
400 | * Replace breakpoints (int3) with relative jumps. | |
401 | * Caller must call with locking kprobe_mutex and text_mutex. | |
402 | */ | |
403 | void __kprobes arch_optimize_kprobes(struct list_head *oplist) | |
404 | { | |
405 | struct optimized_kprobe *op, *tmp; | |
406 | int c = 0; | |
407 | ||
408 | list_for_each_entry_safe(op, tmp, oplist, list) { | |
409 | WARN_ON(kprobe_disabled(&op->kp)); | |
410 | /* Setup param */ | |
411 | setup_optimize_kprobe(&jump_poke_params[c], | |
412 | jump_poke_bufs[c].buf, op); | |
413 | list_del_init(&op->list); | |
414 | if (++c >= MAX_OPTIMIZE_PROBES) | |
415 | break; | |
416 | } | |
417 | ||
418 | /* | |
419 | * text_poke_smp doesn't support NMI/MCE code modifying. | |
420 | * However, since kprobes itself also doesn't support NMI/MCE | |
421 | * code probing, it's not a problem. | |
422 | */ | |
423 | text_poke_smp_batch(jump_poke_params, c); | |
424 | } | |
425 | ||
426 | static void __kprobes setup_unoptimize_kprobe(struct text_poke_param *tprm, | |
427 | u8 *insn_buf, | |
428 | struct optimized_kprobe *op) | |
429 | { | |
430 | /* Set int3 to first byte for kprobes */ | |
431 | insn_buf[0] = BREAKPOINT_INSTRUCTION; | |
432 | memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); | |
433 | ||
434 | tprm->addr = op->kp.addr; | |
435 | tprm->opcode = insn_buf; | |
436 | tprm->len = RELATIVEJUMP_SIZE; | |
437 | } | |
438 | ||
439 | /* | |
440 | * Recover original instructions and breakpoints from relative jumps. | |
441 | * Caller must call with locking kprobe_mutex. | |
442 | */ | |
443 | extern void arch_unoptimize_kprobes(struct list_head *oplist, | |
444 | struct list_head *done_list) | |
445 | { | |
446 | struct optimized_kprobe *op, *tmp; | |
447 | int c = 0; | |
448 | ||
449 | list_for_each_entry_safe(op, tmp, oplist, list) { | |
450 | /* Setup param */ | |
451 | setup_unoptimize_kprobe(&jump_poke_params[c], | |
452 | jump_poke_bufs[c].buf, op); | |
453 | list_move(&op->list, done_list); | |
454 | if (++c >= MAX_OPTIMIZE_PROBES) | |
455 | break; | |
456 | } | |
457 | ||
458 | /* | |
459 | * text_poke_smp doesn't support NMI/MCE code modifying. | |
460 | * However, since kprobes itself also doesn't support NMI/MCE | |
461 | * code probing, it's not a problem. | |
462 | */ | |
463 | text_poke_smp_batch(jump_poke_params, c); | |
464 | } | |
465 | ||
466 | /* Replace a relative jump with a breakpoint (int3). */ | |
467 | void __kprobes arch_unoptimize_kprobe(struct optimized_kprobe *op) | |
468 | { | |
469 | u8 buf[RELATIVEJUMP_SIZE]; | |
470 | ||
471 | /* Set int3 to first byte for kprobes */ | |
472 | buf[0] = BREAKPOINT_INSTRUCTION; | |
473 | memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); | |
474 | text_poke_smp(op->kp.addr, buf, RELATIVEJUMP_SIZE); | |
475 | } | |
476 | ||
477 | int __kprobes | |
478 | setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter) | |
479 | { | |
480 | struct optimized_kprobe *op; | |
481 | ||
482 | if (p->flags & KPROBE_FLAG_OPTIMIZED) { | |
483 | /* This kprobe is really able to run optimized path. */ | |
484 | op = container_of(p, struct optimized_kprobe, kp); | |
485 | /* Detour through copied instructions */ | |
486 | regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX; | |
487 | if (!reenter) | |
488 | reset_current_kprobe(); | |
489 | preempt_enable_no_resched(); | |
490 | return 1; | |
491 | } | |
492 | return 0; | |
493 | } | |
494 | ||
495 | int __kprobes arch_init_optprobes(void) | |
496 | { | |
497 | /* Allocate code buffer and parameter array */ | |
498 | jump_poke_bufs = kmalloc(sizeof(struct jump_poke_buffer) * | |
499 | MAX_OPTIMIZE_PROBES, GFP_KERNEL); | |
500 | if (!jump_poke_bufs) | |
501 | return -ENOMEM; | |
502 | ||
503 | jump_poke_params = kmalloc(sizeof(struct text_poke_param) * | |
504 | MAX_OPTIMIZE_PROBES, GFP_KERNEL); | |
505 | if (!jump_poke_params) { | |
506 | kfree(jump_poke_bufs); | |
507 | jump_poke_bufs = NULL; | |
508 | return -ENOMEM; | |
509 | } | |
510 | ||
511 | return 0; | |
512 | } |