Merge branch 'x86-debug-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
[deliverable/linux.git] / arch / x86 / kernel / uprobes.c
CommitLineData
2b144498 1/*
7b2d81d4 2 * User-space Probes (UProbes) for x86
2b144498
SD
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * Copyright (C) IBM Corporation, 2008-2011
19 * Authors:
20 * Srikar Dronamraju
21 * Jim Keniston
22 */
2b144498
SD
23#include <linux/kernel.h>
24#include <linux/sched.h>
25#include <linux/ptrace.h>
26#include <linux/uprobes.h>
0326f5a9 27#include <linux/uaccess.h>
2b144498
SD
28
29#include <linux/kdebug.h>
0326f5a9 30#include <asm/processor.h>
2b144498
SD
31#include <asm/insn.h>
32
33/* Post-execution fixups. */
34
35/* No fixup needed */
0326f5a9
SD
36#define UPROBE_FIX_NONE 0x0
37
2b144498 38/* Adjust IP back to vicinity of actual insn */
900771a4 39#define UPROBE_FIX_IP 0x1
0326f5a9 40
2b144498 41/* Adjust the return address of a call insn */
900771a4 42#define UPROBE_FIX_CALL 0x2
2b144498 43
900771a4
SD
44#define UPROBE_FIX_RIP_AX 0x8000
45#define UPROBE_FIX_RIP_CX 0x4000
2b144498 46
0326f5a9
SD
47#define UPROBE_TRAP_NR UINT_MAX
48
2b144498 49/* Adaptations for mhiramat x86 decoder v14. */
7b2d81d4
IM
50#define OPCODE1(insn) ((insn)->opcode.bytes[0])
51#define OPCODE2(insn) ((insn)->opcode.bytes[1])
52#define OPCODE3(insn) ((insn)->opcode.bytes[2])
53#define MODRM_REG(insn) X86_MODRM_REG(insn->modrm.value)
2b144498
SD
54
55#define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\
56 (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \
57 (b4##UL << 0x4)|(b5##UL << 0x5)|(b6##UL << 0x6)|(b7##UL << 0x7) | \
58 (b8##UL << 0x8)|(b9##UL << 0x9)|(ba##UL << 0xa)|(bb##UL << 0xb) | \
59 (bc##UL << 0xc)|(bd##UL << 0xd)|(be##UL << 0xe)|(bf##UL << 0xf)) \
60 << (row % 32))
61
04a3d984
SD
62/*
63 * Good-instruction tables for 32-bit apps. This is non-const and volatile
64 * to keep gcc from statically optimizing it out, as variable_test_bit makes
65 * some versions of gcc to think only *(unsigned long*) is used.
66 */
67static volatile u32 good_insns_32[256 / 32] = {
2b144498
SD
68 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
69 /* ---------------------------------------------- */
70 W(0x00, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0) | /* 00 */
71 W(0x10, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0) , /* 10 */
72 W(0x20, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1) | /* 20 */
73 W(0x30, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1) , /* 30 */
74 W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */
75 W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */
76 W(0x60, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 60 */
77 W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 70 */
78 W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
79 W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */
80 W(0xa0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* a0 */
81 W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* b0 */
82 W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* c0 */
83 W(0xd0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
84 W(0xe0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* e0 */
85 W(0xf0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1) /* f0 */
86 /* ---------------------------------------------- */
87 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
88};
89
90/* Using this for both 64-bit and 32-bit apps */
04a3d984 91static volatile u32 good_2byte_insns[256 / 32] = {
2b144498
SD
92 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
93 /* ---------------------------------------------- */
94 W(0x00, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1) | /* 00 */
95 W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* 10 */
96 W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 20 */
97 W(0x30, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 30 */
98 W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */
99 W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */
100 W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 60 */
101 W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) , /* 70 */
102 W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
103 W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */
104 W(0xa0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1) | /* a0 */
105 W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* b0 */
106 W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* c0 */
107 W(0xd0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
108 W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* e0 */
109 W(0xf0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0) /* f0 */
110 /* ---------------------------------------------- */
111 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
112};
113
04a3d984
SD
114#ifdef CONFIG_X86_64
115/* Good-instruction tables for 64-bit apps */
116static volatile u32 good_insns_64[256 / 32] = {
117 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
118 /* ---------------------------------------------- */
119 W(0x00, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) | /* 00 */
120 W(0x10, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) , /* 10 */
121 W(0x20, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) | /* 20 */
122 W(0x30, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) , /* 30 */
123 W(0x40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 40 */
124 W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */
125 W(0x60, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 60 */
126 W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 70 */
127 W(0x80, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
128 W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */
129 W(0xa0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* a0 */
130 W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* b0 */
131 W(0xc0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* c0 */
132 W(0xd0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
133 W(0xe0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* e0 */
134 W(0xf0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1) /* f0 */
135 /* ---------------------------------------------- */
136 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
137};
138#endif
2b144498
SD
139#undef W
140
141/*
142 * opcodes we'll probably never support:
7b2d81d4
IM
143 *
144 * 6c-6d, e4-e5, ec-ed - in
145 * 6e-6f, e6-e7, ee-ef - out
146 * cc, cd - int3, int
147 * cf - iret
148 * d6 - illegal instruction
149 * f1 - int1/icebp
150 * f4 - hlt
151 * fa, fb - cli, sti
152 * 0f - lar, lsl, syscall, clts, sysret, sysenter, sysexit, invd, wbinvd, ud2
2b144498
SD
153 *
154 * invalid opcodes in 64-bit mode:
2b144498 155 *
7b2d81d4
IM
156 * 06, 0e, 16, 1e, 27, 2f, 37, 3f, 60-62, 82, c4-c5, d4-d5
157 * 63 - we support this opcode in x86_64 but not in i386.
2b144498
SD
158 *
159 * opcodes we may need to refine support for:
7b2d81d4
IM
160 *
161 * 0f - 2-byte instructions: For many of these instructions, the validity
162 * depends on the prefix and/or the reg field. On such instructions, we
163 * just consider the opcode combination valid if it corresponds to any
164 * valid instruction.
165 *
166 * 8f - Group 1 - only reg = 0 is OK
167 * c6-c7 - Group 11 - only reg = 0 is OK
168 * d9-df - fpu insns with some illegal encodings
169 * f2, f3 - repnz, repz prefixes. These are also the first byte for
170 * certain floating-point instructions, such as addsd.
171 *
172 * fe - Group 4 - only reg = 0 or 1 is OK
173 * ff - Group 5 - only reg = 0-6 is OK
2b144498
SD
174 *
175 * others -- Do we need to support these?
7b2d81d4
IM
176 *
177 * 0f - (floating-point?) prefetch instructions
178 * 07, 17, 1f - pop es, pop ss, pop ds
179 * 26, 2e, 36, 3e - es:, cs:, ss:, ds: segment prefixes --
2b144498 180 * but 64 and 65 (fs: and gs:) seem to be used, so we support them
7b2d81d4
IM
181 * 67 - addr16 prefix
182 * ce - into
183 * f0 - lock prefix
2b144498
SD
184 */
185
186/*
187 * TODO:
188 * - Where necessary, examine the modrm byte and allow only valid instructions
189 * in the different Groups and fpu instructions.
190 */
191
192static bool is_prefix_bad(struct insn *insn)
193{
194 int i;
195
196 for (i = 0; i < insn->prefixes.nbytes; i++) {
197 switch (insn->prefixes.bytes[i]) {
7b2d81d4
IM
198 case 0x26: /* INAT_PFX_ES */
199 case 0x2E: /* INAT_PFX_CS */
200 case 0x36: /* INAT_PFX_DS */
201 case 0x3E: /* INAT_PFX_SS */
202 case 0xF0: /* INAT_PFX_LOCK */
2b144498
SD
203 return true;
204 }
205 }
206 return false;
207}
208
3ff54efd 209static int validate_insn_32bits(struct arch_uprobe *auprobe, struct insn *insn)
2b144498 210{
3ff54efd 211 insn_init(insn, auprobe->insn, false);
2b144498
SD
212
213 /* Skip good instruction prefixes; reject "bad" ones. */
214 insn_get_opcode(insn);
215 if (is_prefix_bad(insn))
216 return -ENOTSUPP;
7b2d81d4 217
2b144498
SD
218 if (test_bit(OPCODE1(insn), (unsigned long *)good_insns_32))
219 return 0;
7b2d81d4 220
2b144498
SD
221 if (insn->opcode.nbytes == 2) {
222 if (test_bit(OPCODE2(insn), (unsigned long *)good_2byte_insns))
223 return 0;
224 }
7b2d81d4 225
2b144498
SD
226 return -ENOTSUPP;
227}
228
229/*
0326f5a9
SD
230 * Figure out which fixups arch_uprobe_post_xol() will need to perform, and
231 * annotate arch_uprobe->fixups accordingly. To start with,
232 * arch_uprobe->fixups is either zero or it reflects rip-related fixups.
2b144498 233 */
3ff54efd 234static void prepare_fixups(struct arch_uprobe *auprobe, struct insn *insn)
2b144498
SD
235{
236 bool fix_ip = true, fix_call = false; /* defaults */
237 int reg;
238
239 insn_get_opcode(insn); /* should be a nop */
240
241 switch (OPCODE1(insn)) {
242 case 0xc3: /* ret/lret */
243 case 0xcb:
244 case 0xc2:
245 case 0xca:
246 /* ip is correct */
247 fix_ip = false;
248 break;
249 case 0xe8: /* call relative - Fix return addr */
250 fix_call = true;
251 break;
252 case 0x9a: /* call absolute - Fix return addr, not ip */
253 fix_call = true;
254 fix_ip = false;
255 break;
256 case 0xff:
257 insn_get_modrm(insn);
258 reg = MODRM_REG(insn);
259 if (reg == 2 || reg == 3) {
260 /* call or lcall, indirect */
261 /* Fix return addr; ip is correct. */
262 fix_call = true;
263 fix_ip = false;
264 } else if (reg == 4 || reg == 5) {
265 /* jmp or ljmp, indirect */
266 /* ip is correct. */
267 fix_ip = false;
268 }
269 break;
270 case 0xea: /* jmp absolute -- ip is correct */
271 fix_ip = false;
272 break;
273 default:
274 break;
275 }
276 if (fix_ip)
900771a4 277 auprobe->fixups |= UPROBE_FIX_IP;
2b144498 278 if (fix_call)
900771a4 279 auprobe->fixups |= UPROBE_FIX_CALL;
2b144498
SD
280}
281
282#ifdef CONFIG_X86_64
283/*
3ff54efd 284 * If arch_uprobe->insn doesn't use rip-relative addressing, return
2b144498
SD
285 * immediately. Otherwise, rewrite the instruction so that it accesses
286 * its memory operand indirectly through a scratch register. Set
3ff54efd 287 * arch_uprobe->fixups and arch_uprobe->rip_rela_target_address
2b144498
SD
288 * accordingly. (The contents of the scratch register will be saved
289 * before we single-step the modified instruction, and restored
290 * afterward.)
291 *
292 * We do this because a rip-relative instruction can access only a
293 * relatively small area (+/- 2 GB from the instruction), and the XOL
294 * area typically lies beyond that area. At least for instructions
295 * that store to memory, we can't execute the original instruction
296 * and "fix things up" later, because the misdirected store could be
297 * disastrous.
298 *
299 * Some useful facts about rip-relative instructions:
7b2d81d4
IM
300 *
301 * - There's always a modrm byte.
302 * - There's never a SIB byte.
303 * - The displacement is always 4 bytes.
2b144498 304 */
e3343e6a
SD
305static void
306handle_riprel_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn)
2b144498
SD
307{
308 u8 *cursor;
309 u8 reg;
310
311 if (mm->context.ia32_compat)
312 return;
313
3ff54efd 314 auprobe->rip_rela_target_address = 0x0;
2b144498
SD
315 if (!insn_rip_relative(insn))
316 return;
317
318 /*
319 * insn_rip_relative() would have decoded rex_prefix, modrm.
320 * Clear REX.b bit (extension of MODRM.rm field):
321 * we want to encode rax/rcx, not r8/r9.
322 */
323 if (insn->rex_prefix.nbytes) {
3ff54efd 324 cursor = auprobe->insn + insn_offset_rex_prefix(insn);
2b144498
SD
325 *cursor &= 0xfe; /* Clearing REX.B bit */
326 }
327
328 /*
329 * Point cursor at the modrm byte. The next 4 bytes are the
330 * displacement. Beyond the displacement, for some instructions,
331 * is the immediate operand.
332 */
3ff54efd 333 cursor = auprobe->insn + insn_offset_modrm(insn);
2b144498
SD
334 insn_get_length(insn);
335
336 /*
337 * Convert from rip-relative addressing to indirect addressing
338 * via a scratch register. Change the r/m field from 0x5 (%rip)
339 * to 0x0 (%rax) or 0x1 (%rcx), and squeeze out the offset field.
340 */
341 reg = MODRM_REG(insn);
342 if (reg == 0) {
343 /*
344 * The register operand (if any) is either the A register
345 * (%rax, %eax, etc.) or (if the 0x4 bit is set in the
346 * REX prefix) %r8. In any case, we know the C register
347 * is NOT the register operand, so we use %rcx (register
348 * #1) for the scratch register.
349 */
900771a4 350 auprobe->fixups = UPROBE_FIX_RIP_CX;
2b144498
SD
351 /* Change modrm from 00 000 101 to 00 000 001. */
352 *cursor = 0x1;
353 } else {
354 /* Use %rax (register #0) for the scratch register. */
900771a4 355 auprobe->fixups = UPROBE_FIX_RIP_AX;
2b144498
SD
356 /* Change modrm from 00 xxx 101 to 00 xxx 000 */
357 *cursor = (reg << 3);
358 }
359
360 /* Target address = address of next instruction + (signed) offset */
3ff54efd 361 auprobe->rip_rela_target_address = (long)insn->length + insn->displacement.value;
7b2d81d4 362
2b144498
SD
363 /* Displacement field is gone; slide immediate field (if any) over. */
364 if (insn->immediate.nbytes) {
365 cursor++;
7b2d81d4 366 memmove(cursor, cursor + insn->displacement.nbytes, insn->immediate.nbytes);
2b144498
SD
367 }
368 return;
369}
370
3ff54efd 371static int validate_insn_64bits(struct arch_uprobe *auprobe, struct insn *insn)
2b144498 372{
3ff54efd 373 insn_init(insn, auprobe->insn, true);
2b144498
SD
374
375 /* Skip good instruction prefixes; reject "bad" ones. */
376 insn_get_opcode(insn);
377 if (is_prefix_bad(insn))
378 return -ENOTSUPP;
7b2d81d4 379
2b144498
SD
380 if (test_bit(OPCODE1(insn), (unsigned long *)good_insns_64))
381 return 0;
7b2d81d4 382
2b144498
SD
383 if (insn->opcode.nbytes == 2) {
384 if (test_bit(OPCODE2(insn), (unsigned long *)good_2byte_insns))
385 return 0;
386 }
387 return -ENOTSUPP;
388}
389
e3343e6a 390static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn)
2b144498
SD
391{
392 if (mm->context.ia32_compat)
3ff54efd
SD
393 return validate_insn_32bits(auprobe, insn);
394 return validate_insn_64bits(auprobe, insn);
2b144498 395}
7b2d81d4 396#else /* 32-bit: */
e3343e6a 397static void handle_riprel_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn)
2b144498 398{
7b2d81d4 399 /* No RIP-relative addressing on 32-bit */
2b144498
SD
400}
401
e3343e6a 402static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn)
2b144498 403{
3ff54efd 404 return validate_insn_32bits(auprobe, insn);
2b144498
SD
405}
406#endif /* CONFIG_X86_64 */
407
408/**
0326f5a9 409 * arch_uprobe_analyze_insn - instruction analysis including validity and fixups.
2b144498 410 * @mm: the probed address space.
3ff54efd 411 * @arch_uprobe: the probepoint information.
7eb9ba5e 412 * @addr: virtual address at which to install the probepoint
2b144498
SD
413 * Return 0 on success or a -ve number on error.
414 */
7eb9ba5e 415int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long addr)
2b144498
SD
416{
417 int ret;
418 struct insn insn;
419
3ff54efd 420 auprobe->fixups = 0;
e3343e6a 421 ret = validate_insn_bits(auprobe, mm, &insn);
2b144498
SD
422 if (ret != 0)
423 return ret;
7b2d81d4 424
e3343e6a 425 handle_riprel_insn(auprobe, mm, &insn);
3ff54efd 426 prepare_fixups(auprobe, &insn);
7b2d81d4 427
2b144498
SD
428 return 0;
429}
0326f5a9
SD
430
431#ifdef CONFIG_X86_64
432/*
433 * If we're emulating a rip-relative instruction, save the contents
434 * of the scratch register and store the target address in that register.
435 */
436static void
437pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs,
438 struct arch_uprobe_task *autask)
439{
440 if (auprobe->fixups & UPROBE_FIX_RIP_AX) {
441 autask->saved_scratch_register = regs->ax;
442 regs->ax = current->utask->vaddr;
443 regs->ax += auprobe->rip_rela_target_address;
444 } else if (auprobe->fixups & UPROBE_FIX_RIP_CX) {
445 autask->saved_scratch_register = regs->cx;
446 regs->cx = current->utask->vaddr;
447 regs->cx += auprobe->rip_rela_target_address;
448 }
449}
450#else
451static void
452pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs,
453 struct arch_uprobe_task *autask)
454{
455 /* No RIP-relative addressing on 32-bit */
456}
457#endif
458
459/*
460 * arch_uprobe_pre_xol - prepare to execute out of line.
461 * @auprobe: the probepoint information.
462 * @regs: reflects the saved user state of current task.
463 */
464int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
465{
466 struct arch_uprobe_task *autask;
467
468 autask = &current->utask->autask;
469 autask->saved_trap_nr = current->thread.trap_nr;
470 current->thread.trap_nr = UPROBE_TRAP_NR;
471 regs->ip = current->utask->xol_vaddr;
472 pre_xol_rip_insn(auprobe, regs, autask);
473
474 return 0;
475}
476
477/*
478 * This function is called by arch_uprobe_post_xol() to adjust the return
479 * address pushed by a call instruction executed out of line.
480 */
481static int adjust_ret_addr(unsigned long sp, long correction)
482{
483 int rasize, ncopied;
484 long ra = 0;
485
486 if (is_ia32_task())
487 rasize = 4;
488 else
489 rasize = 8;
490
491 ncopied = copy_from_user(&ra, (void __user *)sp, rasize);
492 if (unlikely(ncopied))
493 return -EFAULT;
494
495 ra += correction;
496 ncopied = copy_to_user((void __user *)sp, &ra, rasize);
497 if (unlikely(ncopied))
498 return -EFAULT;
499
500 return 0;
501}
502
503#ifdef CONFIG_X86_64
504static bool is_riprel_insn(struct arch_uprobe *auprobe)
505{
506 return ((auprobe->fixups & (UPROBE_FIX_RIP_AX | UPROBE_FIX_RIP_CX)) != 0);
507}
508
509static void
510handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, long *correction)
511{
512 if (is_riprel_insn(auprobe)) {
513 struct arch_uprobe_task *autask;
514
515 autask = &current->utask->autask;
516 if (auprobe->fixups & UPROBE_FIX_RIP_AX)
517 regs->ax = autask->saved_scratch_register;
518 else
519 regs->cx = autask->saved_scratch_register;
520
521 /*
522 * The original instruction includes a displacement, and so
523 * is 4 bytes longer than what we've just single-stepped.
524 * Fall through to handle stuff like "jmpq *...(%rip)" and
525 * "callq *...(%rip)".
526 */
527 if (correction)
528 *correction += 4;
529 }
530}
531#else
532static void
533handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, long *correction)
534{
535 /* No RIP-relative addressing on 32-bit */
536}
537#endif
538
539/*
540 * If xol insn itself traps and generates a signal(Say,
541 * SIGILL/SIGSEGV/etc), then detect the case where a singlestepped
542 * instruction jumps back to its own address. It is assumed that anything
543 * like do_page_fault/do_trap/etc sets thread.trap_nr != -1.
544 *
545 * arch_uprobe_pre_xol/arch_uprobe_post_xol save/restore thread.trap_nr,
546 * arch_uprobe_xol_was_trapped() simply checks that ->trap_nr is not equal to
547 * UPROBE_TRAP_NR == -1 set by arch_uprobe_pre_xol().
548 */
549bool arch_uprobe_xol_was_trapped(struct task_struct *t)
550{
551 if (t->thread.trap_nr != UPROBE_TRAP_NR)
552 return true;
553
554 return false;
555}
556
557/*
558 * Called after single-stepping. To avoid the SMP problems that can
559 * occur when we temporarily put back the original opcode to
560 * single-step, we single-stepped a copy of the instruction.
561 *
562 * This function prepares to resume execution after the single-step.
563 * We have to fix things up as follows:
564 *
565 * Typically, the new ip is relative to the copied instruction. We need
566 * to make it relative to the original instruction (FIX_IP). Exceptions
567 * are return instructions and absolute or indirect jump or call instructions.
568 *
569 * If the single-stepped instruction was a call, the return address that
570 * is atop the stack is the address following the copied instruction. We
571 * need to make it the address following the original instruction (FIX_CALL).
572 *
573 * If the original instruction was a rip-relative instruction such as
574 * "movl %edx,0xnnnn(%rip)", we have instead executed an equivalent
575 * instruction using a scratch register -- e.g., "movl %edx,(%rax)".
576 * We need to restore the contents of the scratch register and adjust
577 * the ip, keeping in mind that the instruction we executed is 4 bytes
578 * shorter than the original instruction (since we squeezed out the offset
579 * field). (FIX_RIP_AX or FIX_RIP_CX)
580 */
581int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
582{
583 struct uprobe_task *utask;
584 long correction;
585 int result = 0;
586
587 WARN_ON_ONCE(current->thread.trap_nr != UPROBE_TRAP_NR);
588
589 utask = current->utask;
590 current->thread.trap_nr = utask->autask.saved_trap_nr;
591 correction = (long)(utask->vaddr - utask->xol_vaddr);
592 handle_riprel_post_xol(auprobe, regs, &correction);
593 if (auprobe->fixups & UPROBE_FIX_IP)
594 regs->ip += correction;
595
596 if (auprobe->fixups & UPROBE_FIX_CALL)
597 result = adjust_ret_addr(regs->sp, correction);
598
599 return result;
600}
601
602/* callback routine for handling exceptions. */
603int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, void *data)
604{
605 struct die_args *args = data;
606 struct pt_regs *regs = args->regs;
607 int ret = NOTIFY_DONE;
608
609 /* We are only interested in userspace traps */
610 if (regs && !user_mode_vm(regs))
611 return NOTIFY_DONE;
612
613 switch (val) {
614 case DIE_INT3:
615 if (uprobe_pre_sstep_notifier(regs))
616 ret = NOTIFY_STOP;
617
618 break;
619
620 case DIE_DEBUG:
621 if (uprobe_post_sstep_notifier(regs))
622 ret = NOTIFY_STOP;
623
624 default:
625 break;
626 }
627
628 return ret;
629}
630
631/*
632 * This function gets called when XOL instruction either gets trapped or
633 * the thread has a fatal signal, so reset the instruction pointer to its
634 * probed address.
635 */
636void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
637{
638 struct uprobe_task *utask = current->utask;
639
640 current->thread.trap_nr = utask->autask.saved_trap_nr;
641 handle_riprel_post_xol(auprobe, regs, NULL);
642 instruction_pointer_set(regs, utask->vaddr);
643}
644
645/*
646 * Skip these instructions as per the currently known x86 ISA.
647 * 0x66* { 0x90 | 0x0f 0x1f | 0x0f 0x19 | 0x87 0xc0 }
648 */
649bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
650{
651 int i;
652
653 for (i = 0; i < MAX_UINSN_BYTES; i++) {
654 if ((auprobe->insn[i] == 0x66))
655 continue;
656
657 if (auprobe->insn[i] == 0x90)
658 return true;
659
660 if (i == (MAX_UINSN_BYTES - 1))
661 break;
662
663 if ((auprobe->insn[i] == 0x0f) && (auprobe->insn[i+1] == 0x1f))
664 return true;
665
666 if ((auprobe->insn[i] == 0x0f) && (auprobe->insn[i+1] == 0x19))
667 return true;
668
669 if ((auprobe->insn[i] == 0x87) && (auprobe->insn[i+1] == 0xc0))
670 return true;
671
672 break;
673 }
674 return false;
675}
This page took 0.086893 seconds and 5 git commands to generate.