Commit | Line | Data |
---|---|---|
eb13296c MH |
1 | /* |
2 | * x86 instruction analysis | |
3 | * | |
4 | * This program is free software; you can redistribute it and/or modify | |
5 | * it under the terms of the GNU General Public License as published by | |
6 | * the Free Software Foundation; either version 2 of the License, or | |
7 | * (at your option) any later version. | |
8 | * | |
9 | * This program is distributed in the hope that it will be useful, | |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | * GNU General Public License for more details. | |
13 | * | |
14 | * You should have received a copy of the GNU General Public License | |
15 | * along with this program; if not, write to the Free Software | |
16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | |
17 | * | |
18 | * Copyright (C) IBM Corporation, 2002, 2004, 2009 | |
19 | */ | |
20 | ||
21 | #include <linux/string.h> | |
22 | #include <asm/inat.h> | |
23 | #include <asm/insn.h> | |
24 | ||
53a019a9 MH |
25 | /* Verify next sizeof(t) bytes can be on the same instruction */ |
26 | #define validate_next(t, insn, n) \ | |
27 | ((insn)->next_byte + sizeof(t) + n - (insn)->kaddr <= MAX_INSN_SIZE) | |
28 | ||
29 | #define __get_next(t, insn) \ | |
30 | ({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; }) | |
31 | ||
32 | #define __peek_nbyte_next(t, insn, n) \ | |
33 | ({ t r = *(t*)((insn)->next_byte + n); r; }) | |
eb13296c | 34 | |
53a019a9 MH |
35 | #define get_next(t, insn) \ |
36 | ({ if (unlikely(!validate_next(t, insn, 0))) goto err_out; __get_next(t, insn); }) | |
eb13296c | 37 | |
e0e492e9 | 38 | #define peek_nbyte_next(t, insn, n) \ |
53a019a9 MH |
39 | ({ if (unlikely(!validate_next(t, insn, n))) goto err_out; __peek_nbyte_next(t, insn, n); }) |
40 | ||
41 | #define peek_next(t, insn) peek_nbyte_next(t, insn, 0) | |
e0e492e9 | 42 | |
eb13296c MH |
43 | /** |
44 | * insn_init() - initialize struct insn | |
45 | * @insn: &struct insn to be initialized | |
46 | * @kaddr: address (in kernel memory) of instruction (or copy thereof) | |
47 | * @x86_64: !0 for 64-bit kernel or 64-bit app | |
48 | */ | |
49 | void insn_init(struct insn *insn, const void *kaddr, int x86_64) | |
50 | { | |
51 | memset(insn, 0, sizeof(*insn)); | |
52 | insn->kaddr = kaddr; | |
53 | insn->next_byte = kaddr; | |
54 | insn->x86_64 = x86_64 ? 1 : 0; | |
55 | insn->opnd_bytes = 4; | |
56 | if (x86_64) | |
57 | insn->addr_bytes = 8; | |
58 | else | |
59 | insn->addr_bytes = 4; | |
60 | } | |
61 | ||
62 | /** | |
63 | * insn_get_prefixes - scan x86 instruction prefix bytes | |
64 | * @insn: &struct insn containing instruction | |
65 | * | |
66 | * Populates the @insn->prefixes bitmap, and updates @insn->next_byte | |
67 | * to point to the (first) opcode. No effect if @insn->prefixes.got | |
68 | * is already set. | |
69 | */ | |
70 | void insn_get_prefixes(struct insn *insn) | |
71 | { | |
72 | struct insn_field *prefixes = &insn->prefixes; | |
73 | insn_attr_t attr; | |
74 | insn_byte_t b, lb; | |
75 | int i, nb; | |
76 | ||
77 | if (prefixes->got) | |
78 | return; | |
79 | ||
80 | nb = 0; | |
81 | lb = 0; | |
82 | b = peek_next(insn_byte_t, insn); | |
83 | attr = inat_get_opcode_attribute(b); | |
04d46c1b | 84 | while (inat_is_legacy_prefix(attr)) { |
eb13296c MH |
85 | /* Skip if same prefix */ |
86 | for (i = 0; i < nb; i++) | |
87 | if (prefixes->bytes[i] == b) | |
88 | goto found; | |
89 | if (nb == 4) | |
90 | /* Invalid instruction */ | |
91 | break; | |
92 | prefixes->bytes[nb++] = b; | |
93 | if (inat_is_address_size_prefix(attr)) { | |
94 | /* address size switches 2/4 or 4/8 */ | |
95 | if (insn->x86_64) | |
96 | insn->addr_bytes ^= 12; | |
97 | else | |
98 | insn->addr_bytes ^= 6; | |
99 | } else if (inat_is_operand_size_prefix(attr)) { | |
100 | /* oprand size switches 2/4 */ | |
101 | insn->opnd_bytes ^= 6; | |
102 | } | |
103 | found: | |
104 | prefixes->nbytes++; | |
105 | insn->next_byte++; | |
106 | lb = b; | |
107 | b = peek_next(insn_byte_t, insn); | |
108 | attr = inat_get_opcode_attribute(b); | |
109 | } | |
110 | /* Set the last prefix */ | |
111 | if (lb && lb != insn->prefixes.bytes[3]) { | |
112 | if (unlikely(insn->prefixes.bytes[3])) { | |
113 | /* Swap the last prefix */ | |
114 | b = insn->prefixes.bytes[3]; | |
115 | for (i = 0; i < nb; i++) | |
116 | if (prefixes->bytes[i] == lb) | |
117 | prefixes->bytes[i] = b; | |
118 | } | |
119 | insn->prefixes.bytes[3] = lb; | |
120 | } | |
121 | ||
e0e492e9 | 122 | /* Decode REX prefix */ |
eb13296c MH |
123 | if (insn->x86_64) { |
124 | b = peek_next(insn_byte_t, insn); | |
125 | attr = inat_get_opcode_attribute(b); | |
126 | if (inat_is_rex_prefix(attr)) { | |
127 | insn->rex_prefix.value = b; | |
128 | insn->rex_prefix.nbytes = 1; | |
129 | insn->next_byte++; | |
130 | if (X86_REX_W(b)) | |
131 | /* REX.W overrides opnd_size */ | |
132 | insn->opnd_bytes = 8; | |
133 | } | |
134 | } | |
135 | insn->rex_prefix.got = 1; | |
e0e492e9 MH |
136 | |
137 | /* Decode VEX prefix */ | |
138 | b = peek_next(insn_byte_t, insn); | |
139 | attr = inat_get_opcode_attribute(b); | |
140 | if (inat_is_vex_prefix(attr)) { | |
141 | insn_byte_t b2 = peek_nbyte_next(insn_byte_t, insn, 1); | |
142 | if (!insn->x86_64) { | |
143 | /* | |
144 | * In 32-bits mode, if the [7:6] bits (mod bits of | |
145 | * ModRM) on the second byte are not 11b, it is | |
146 | * LDS or LES. | |
147 | */ | |
148 | if (X86_MODRM_MOD(b2) != 3) | |
149 | goto vex_end; | |
150 | } | |
151 | insn->vex_prefix.bytes[0] = b; | |
152 | insn->vex_prefix.bytes[1] = b2; | |
153 | if (inat_is_vex3_prefix(attr)) { | |
154 | b2 = peek_nbyte_next(insn_byte_t, insn, 2); | |
155 | insn->vex_prefix.bytes[2] = b2; | |
156 | insn->vex_prefix.nbytes = 3; | |
157 | insn->next_byte += 3; | |
158 | if (insn->x86_64 && X86_VEX_W(b2)) | |
159 | /* VEX.W overrides opnd_size */ | |
160 | insn->opnd_bytes = 8; | |
161 | } else { | |
162 | insn->vex_prefix.nbytes = 2; | |
163 | insn->next_byte += 2; | |
164 | } | |
165 | } | |
166 | vex_end: | |
167 | insn->vex_prefix.got = 1; | |
168 | ||
eb13296c | 169 | prefixes->got = 1; |
53a019a9 MH |
170 | |
171 | err_out: | |
eb13296c MH |
172 | return; |
173 | } | |
174 | ||
175 | /** | |
176 | * insn_get_opcode - collect opcode(s) | |
177 | * @insn: &struct insn containing instruction | |
178 | * | |
179 | * Populates @insn->opcode, updates @insn->next_byte to point past the | |
180 | * opcode byte(s), and set @insn->attr (except for groups). | |
181 | * If necessary, first collects any preceding (prefix) bytes. | |
182 | * Sets @insn->opcode.value = opcode1. No effect if @insn->opcode.got | |
183 | * is already 1. | |
184 | */ | |
185 | void insn_get_opcode(struct insn *insn) | |
186 | { | |
187 | struct insn_field *opcode = &insn->opcode; | |
188 | insn_byte_t op, pfx; | |
189 | if (opcode->got) | |
190 | return; | |
191 | if (!insn->prefixes.got) | |
192 | insn_get_prefixes(insn); | |
193 | ||
194 | /* Get first opcode */ | |
195 | op = get_next(insn_byte_t, insn); | |
196 | opcode->bytes[0] = op; | |
197 | opcode->nbytes = 1; | |
e0e492e9 MH |
198 | |
199 | /* Check if there is VEX prefix or not */ | |
200 | if (insn_is_avx(insn)) { | |
201 | insn_byte_t m, p; | |
202 | m = insn_vex_m_bits(insn); | |
203 | p = insn_vex_p_bits(insn); | |
204 | insn->attr = inat_get_avx_attribute(op, m, p); | |
205 | if (!inat_accept_vex(insn->attr)) | |
206 | insn->attr = 0; /* This instruction is bad */ | |
207 | goto end; /* VEX has only 1 byte for opcode */ | |
208 | } | |
209 | ||
eb13296c MH |
210 | insn->attr = inat_get_opcode_attribute(op); |
211 | while (inat_is_escape(insn->attr)) { | |
212 | /* Get escaped opcode */ | |
213 | op = get_next(insn_byte_t, insn); | |
214 | opcode->bytes[opcode->nbytes++] = op; | |
215 | pfx = insn_last_prefix(insn); | |
216 | insn->attr = inat_get_escape_attribute(op, pfx, insn->attr); | |
217 | } | |
e0e492e9 MH |
218 | if (inat_must_vex(insn->attr)) |
219 | insn->attr = 0; /* This instruction is bad */ | |
220 | end: | |
eb13296c | 221 | opcode->got = 1; |
53a019a9 MH |
222 | |
223 | err_out: | |
224 | return; | |
eb13296c MH |
225 | } |
226 | ||
227 | /** | |
228 | * insn_get_modrm - collect ModRM byte, if any | |
229 | * @insn: &struct insn containing instruction | |
230 | * | |
231 | * Populates @insn->modrm and updates @insn->next_byte to point past the | |
232 | * ModRM byte, if any. If necessary, first collects the preceding bytes | |
233 | * (prefixes and opcode(s)). No effect if @insn->modrm.got is already 1. | |
234 | */ | |
235 | void insn_get_modrm(struct insn *insn) | |
236 | { | |
237 | struct insn_field *modrm = &insn->modrm; | |
238 | insn_byte_t pfx, mod; | |
239 | if (modrm->got) | |
240 | return; | |
241 | if (!insn->opcode.got) | |
242 | insn_get_opcode(insn); | |
243 | ||
244 | if (inat_has_modrm(insn->attr)) { | |
245 | mod = get_next(insn_byte_t, insn); | |
246 | modrm->value = mod; | |
247 | modrm->nbytes = 1; | |
248 | if (inat_is_group(insn->attr)) { | |
249 | pfx = insn_last_prefix(insn); | |
250 | insn->attr = inat_get_group_attribute(mod, pfx, | |
251 | insn->attr); | |
252 | } | |
253 | } | |
254 | ||
255 | if (insn->x86_64 && inat_is_force64(insn->attr)) | |
256 | insn->opnd_bytes = 8; | |
257 | modrm->got = 1; | |
53a019a9 MH |
258 | |
259 | err_out: | |
260 | return; | |
eb13296c MH |
261 | } |
262 | ||
263 | ||
264 | /** | |
265 | * insn_rip_relative() - Does instruction use RIP-relative addressing mode? | |
266 | * @insn: &struct insn containing instruction | |
267 | * | |
268 | * If necessary, first collects the instruction up to and including the | |
269 | * ModRM byte. No effect if @insn->x86_64 is 0. | |
270 | */ | |
271 | int insn_rip_relative(struct insn *insn) | |
272 | { | |
273 | struct insn_field *modrm = &insn->modrm; | |
274 | ||
275 | if (!insn->x86_64) | |
276 | return 0; | |
277 | if (!modrm->got) | |
278 | insn_get_modrm(insn); | |
279 | /* | |
280 | * For rip-relative instructions, the mod field (top 2 bits) | |
281 | * is zero and the r/m field (bottom 3 bits) is 0x5. | |
282 | */ | |
283 | return (modrm->nbytes && (modrm->value & 0xc7) == 0x5); | |
284 | } | |
285 | ||
286 | /** | |
287 | * insn_get_sib() - Get the SIB byte of instruction | |
288 | * @insn: &struct insn containing instruction | |
289 | * | |
290 | * If necessary, first collects the instruction up to and including the | |
291 | * ModRM byte. | |
292 | */ | |
293 | void insn_get_sib(struct insn *insn) | |
294 | { | |
295 | insn_byte_t modrm; | |
296 | ||
297 | if (insn->sib.got) | |
298 | return; | |
299 | if (!insn->modrm.got) | |
300 | insn_get_modrm(insn); | |
301 | if (insn->modrm.nbytes) { | |
302 | modrm = (insn_byte_t)insn->modrm.value; | |
303 | if (insn->addr_bytes != 2 && | |
304 | X86_MODRM_MOD(modrm) != 3 && X86_MODRM_RM(modrm) == 4) { | |
305 | insn->sib.value = get_next(insn_byte_t, insn); | |
306 | insn->sib.nbytes = 1; | |
307 | } | |
308 | } | |
309 | insn->sib.got = 1; | |
53a019a9 MH |
310 | |
311 | err_out: | |
312 | return; | |
eb13296c MH |
313 | } |
314 | ||
315 | ||
316 | /** | |
317 | * insn_get_displacement() - Get the displacement of instruction | |
318 | * @insn: &struct insn containing instruction | |
319 | * | |
320 | * If necessary, first collects the instruction up to and including the | |
321 | * SIB byte. | |
322 | * Displacement value is sign-expanded. | |
323 | */ | |
324 | void insn_get_displacement(struct insn *insn) | |
325 | { | |
326 | insn_byte_t mod, rm, base; | |
327 | ||
328 | if (insn->displacement.got) | |
329 | return; | |
330 | if (!insn->sib.got) | |
331 | insn_get_sib(insn); | |
332 | if (insn->modrm.nbytes) { | |
333 | /* | |
334 | * Interpreting the modrm byte: | |
335 | * mod = 00 - no displacement fields (exceptions below) | |
336 | * mod = 01 - 1-byte displacement field | |
337 | * mod = 10 - displacement field is 4 bytes, or 2 bytes if | |
338 | * address size = 2 (0x67 prefix in 32-bit mode) | |
339 | * mod = 11 - no memory operand | |
340 | * | |
341 | * If address size = 2... | |
342 | * mod = 00, r/m = 110 - displacement field is 2 bytes | |
343 | * | |
344 | * If address size != 2... | |
345 | * mod != 11, r/m = 100 - SIB byte exists | |
346 | * mod = 00, SIB base = 101 - displacement field is 4 bytes | |
347 | * mod = 00, r/m = 101 - rip-relative addressing, displacement | |
348 | * field is 4 bytes | |
349 | */ | |
350 | mod = X86_MODRM_MOD(insn->modrm.value); | |
351 | rm = X86_MODRM_RM(insn->modrm.value); | |
352 | base = X86_SIB_BASE(insn->sib.value); | |
353 | if (mod == 3) | |
354 | goto out; | |
355 | if (mod == 1) { | |
356 | insn->displacement.value = get_next(char, insn); | |
357 | insn->displacement.nbytes = 1; | |
358 | } else if (insn->addr_bytes == 2) { | |
359 | if ((mod == 0 && rm == 6) || mod == 2) { | |
360 | insn->displacement.value = | |
361 | get_next(short, insn); | |
362 | insn->displacement.nbytes = 2; | |
363 | } | |
364 | } else { | |
365 | if ((mod == 0 && rm == 5) || mod == 2 || | |
366 | (mod == 0 && base == 5)) { | |
367 | insn->displacement.value = get_next(int, insn); | |
368 | insn->displacement.nbytes = 4; | |
369 | } | |
370 | } | |
371 | } | |
372 | out: | |
373 | insn->displacement.got = 1; | |
53a019a9 MH |
374 | |
375 | err_out: | |
376 | return; | |
eb13296c MH |
377 | } |
378 | ||
379 | /* Decode moffset16/32/64 */ | |
380 | static void __get_moffset(struct insn *insn) | |
381 | { | |
382 | switch (insn->addr_bytes) { | |
383 | case 2: | |
384 | insn->moffset1.value = get_next(short, insn); | |
385 | insn->moffset1.nbytes = 2; | |
386 | break; | |
387 | case 4: | |
388 | insn->moffset1.value = get_next(int, insn); | |
389 | insn->moffset1.nbytes = 4; | |
390 | break; | |
391 | case 8: | |
392 | insn->moffset1.value = get_next(int, insn); | |
393 | insn->moffset1.nbytes = 4; | |
394 | insn->moffset2.value = get_next(int, insn); | |
395 | insn->moffset2.nbytes = 4; | |
396 | break; | |
397 | } | |
398 | insn->moffset1.got = insn->moffset2.got = 1; | |
53a019a9 MH |
399 | |
400 | err_out: | |
401 | return; | |
eb13296c MH |
402 | } |
403 | ||
404 | /* Decode imm v32(Iz) */ | |
405 | static void __get_immv32(struct insn *insn) | |
406 | { | |
407 | switch (insn->opnd_bytes) { | |
408 | case 2: | |
409 | insn->immediate.value = get_next(short, insn); | |
410 | insn->immediate.nbytes = 2; | |
411 | break; | |
412 | case 4: | |
413 | case 8: | |
414 | insn->immediate.value = get_next(int, insn); | |
415 | insn->immediate.nbytes = 4; | |
416 | break; | |
417 | } | |
53a019a9 MH |
418 | |
419 | err_out: | |
420 | return; | |
eb13296c MH |
421 | } |
422 | ||
423 | /* Decode imm v64(Iv/Ov) */ | |
424 | static void __get_immv(struct insn *insn) | |
425 | { | |
426 | switch (insn->opnd_bytes) { | |
427 | case 2: | |
428 | insn->immediate1.value = get_next(short, insn); | |
429 | insn->immediate1.nbytes = 2; | |
430 | break; | |
431 | case 4: | |
432 | insn->immediate1.value = get_next(int, insn); | |
433 | insn->immediate1.nbytes = 4; | |
434 | break; | |
435 | case 8: | |
436 | insn->immediate1.value = get_next(int, insn); | |
437 | insn->immediate1.nbytes = 4; | |
438 | insn->immediate2.value = get_next(int, insn); | |
439 | insn->immediate2.nbytes = 4; | |
440 | break; | |
441 | } | |
442 | insn->immediate1.got = insn->immediate2.got = 1; | |
53a019a9 MH |
443 | |
444 | err_out: | |
445 | return; | |
eb13296c MH |
446 | } |
447 | ||
448 | /* Decode ptr16:16/32(Ap) */ | |
449 | static void __get_immptr(struct insn *insn) | |
450 | { | |
451 | switch (insn->opnd_bytes) { | |
452 | case 2: | |
453 | insn->immediate1.value = get_next(short, insn); | |
454 | insn->immediate1.nbytes = 2; | |
455 | break; | |
456 | case 4: | |
457 | insn->immediate1.value = get_next(int, insn); | |
458 | insn->immediate1.nbytes = 4; | |
459 | break; | |
460 | case 8: | |
461 | /* ptr16:64 is not exist (no segment) */ | |
462 | return; | |
463 | } | |
464 | insn->immediate2.value = get_next(unsigned short, insn); | |
465 | insn->immediate2.nbytes = 2; | |
466 | insn->immediate1.got = insn->immediate2.got = 1; | |
53a019a9 MH |
467 | |
468 | err_out: | |
469 | return; | |
eb13296c MH |
470 | } |
471 | ||
472 | /** | |
473 | * insn_get_immediate() - Get the immediates of instruction | |
474 | * @insn: &struct insn containing instruction | |
475 | * | |
476 | * If necessary, first collects the instruction up to and including the | |
477 | * displacement bytes. | |
478 | * Basically, most of immediates are sign-expanded. Unsigned-value can be | |
479 | * get by bit masking with ((1 << (nbytes * 8)) - 1) | |
480 | */ | |
481 | void insn_get_immediate(struct insn *insn) | |
482 | { | |
483 | if (insn->immediate.got) | |
484 | return; | |
485 | if (!insn->displacement.got) | |
486 | insn_get_displacement(insn); | |
487 | ||
488 | if (inat_has_moffset(insn->attr)) { | |
489 | __get_moffset(insn); | |
490 | goto done; | |
491 | } | |
492 | ||
493 | if (!inat_has_immediate(insn->attr)) | |
494 | /* no immediates */ | |
495 | goto done; | |
496 | ||
497 | switch (inat_immediate_size(insn->attr)) { | |
498 | case INAT_IMM_BYTE: | |
499 | insn->immediate.value = get_next(char, insn); | |
500 | insn->immediate.nbytes = 1; | |
501 | break; | |
502 | case INAT_IMM_WORD: | |
503 | insn->immediate.value = get_next(short, insn); | |
504 | insn->immediate.nbytes = 2; | |
505 | break; | |
506 | case INAT_IMM_DWORD: | |
507 | insn->immediate.value = get_next(int, insn); | |
508 | insn->immediate.nbytes = 4; | |
509 | break; | |
510 | case INAT_IMM_QWORD: | |
511 | insn->immediate1.value = get_next(int, insn); | |
512 | insn->immediate1.nbytes = 4; | |
513 | insn->immediate2.value = get_next(int, insn); | |
514 | insn->immediate2.nbytes = 4; | |
515 | break; | |
516 | case INAT_IMM_PTR: | |
517 | __get_immptr(insn); | |
518 | break; | |
519 | case INAT_IMM_VWORD32: | |
520 | __get_immv32(insn); | |
521 | break; | |
522 | case INAT_IMM_VWORD: | |
523 | __get_immv(insn); | |
524 | break; | |
525 | default: | |
526 | break; | |
527 | } | |
528 | if (inat_has_second_immediate(insn->attr)) { | |
529 | insn->immediate2.value = get_next(char, insn); | |
530 | insn->immediate2.nbytes = 1; | |
531 | } | |
532 | done: | |
533 | insn->immediate.got = 1; | |
53a019a9 MH |
534 | |
535 | err_out: | |
536 | return; | |
eb13296c MH |
537 | } |
538 | ||
539 | /** | |
540 | * insn_get_length() - Get the length of instruction | |
541 | * @insn: &struct insn containing instruction | |
542 | * | |
543 | * If necessary, first collects the instruction up to and including the | |
544 | * immediates bytes. | |
545 | */ | |
546 | void insn_get_length(struct insn *insn) | |
547 | { | |
548 | if (insn->length) | |
549 | return; | |
550 | if (!insn->immediate.got) | |
551 | insn_get_immediate(insn); | |
552 | insn->length = (unsigned char)((unsigned long)insn->next_byte | |
553 | - (unsigned long)insn->kaddr); | |
554 | } |