Commit | Line | Data |
---|---|---|
09ca5ab2 | 1 | /* bpf_jit_comp.c: BPF JIT compiler |
0ca87f05 ME |
2 | * |
3 | * Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation | |
4 | * | |
5 | * Based on the x86 BPF compiler, by Eric Dumazet (eric.dumazet@gmail.com) | |
09ca5ab2 | 6 | * Ported to ppc32 by Denis Kirjanov <kda@linux-powerpc.org> |
0ca87f05 ME |
7 | * |
8 | * This program is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU General Public License | |
10 | * as published by the Free Software Foundation; version 2 | |
11 | * of the License. | |
12 | */ | |
13 | #include <linux/moduleloader.h> | |
14 | #include <asm/cacheflush.h> | |
15 | #include <linux/netdevice.h> | |
16 | #include <linux/filter.h> | |
5082dfb7 DB |
17 | #include <linux/if_vlan.h> |
18 | ||
0ca87f05 ME |
19 | #include "bpf_jit.h" |
20 | ||
0ca87f05 ME |
21 | int bpf_jit_enable __read_mostly; |
22 | ||
0ca87f05 ME |
23 | static inline void bpf_flush_icache(void *start, void *end) |
24 | { | |
25 | smp_wmb(); | |
26 | flush_icache_range((unsigned long)start, (unsigned long)end); | |
27 | } | |
28 | ||
7ae457c1 | 29 | static void bpf_jit_build_prologue(struct bpf_prog *fp, u32 *image, |
0ca87f05 ME |
30 | struct codegen_context *ctx) |
31 | { | |
32 | int i; | |
33 | const struct sock_filter *filter = fp->insns; | |
34 | ||
35 | if (ctx->seen & (SEEN_MEM | SEEN_DATAREF)) { | |
36 | /* Make stackframe */ | |
37 | if (ctx->seen & SEEN_DATAREF) { | |
38 | /* If we call any helpers (for loads), save LR */ | |
c75df6f9 | 39 | EMIT(PPC_INST_MFLR | __PPC_RT(R0)); |
09ca5ab2 | 40 | PPC_BPF_STL(0, 1, PPC_LR_STKOFF); |
0ca87f05 ME |
41 | |
42 | /* Back up non-volatile regs. */ | |
09ca5ab2 DK |
43 | PPC_BPF_STL(r_D, 1, -(REG_SZ*(32-r_D))); |
44 | PPC_BPF_STL(r_HL, 1, -(REG_SZ*(32-r_HL))); | |
0ca87f05 ME |
45 | } |
46 | if (ctx->seen & SEEN_MEM) { | |
47 | /* | |
48 | * Conditionally save regs r15-r31 as some will be used | |
49 | * for M[] data. | |
50 | */ | |
51 | for (i = r_M; i < (r_M+16); i++) { | |
52 | if (ctx->seen & (1 << (i-r_M))) | |
09ca5ab2 | 53 | PPC_BPF_STL(i, 1, -(REG_SZ*(32-i))); |
0ca87f05 ME |
54 | } |
55 | } | |
09ca5ab2 | 56 | PPC_BPF_STLU(1, 1, -BPF_PPC_STACKFRAME); |
0ca87f05 ME |
57 | } |
58 | ||
59 | if (ctx->seen & SEEN_DATAREF) { | |
60 | /* | |
61 | * If this filter needs to access skb data, | |
62 | * prepare r_D and r_HL: | |
63 | * r_HL = skb->len - skb->data_len | |
64 | * r_D = skb->data | |
65 | */ | |
66 | PPC_LWZ_OFFS(r_scratch1, r_skb, offsetof(struct sk_buff, | |
67 | data_len)); | |
68 | PPC_LWZ_OFFS(r_HL, r_skb, offsetof(struct sk_buff, len)); | |
69 | PPC_SUB(r_HL, r_HL, r_scratch1); | |
09ca5ab2 | 70 | PPC_LL_OFFS(r_D, r_skb, offsetof(struct sk_buff, data)); |
0ca87f05 ME |
71 | } |
72 | ||
73 | if (ctx->seen & SEEN_XREG) { | |
74 | /* | |
75 | * TODO: Could also detect whether first instr. sets X and | |
76 | * avoid this (as below, with A). | |
77 | */ | |
78 | PPC_LI(r_X, 0); | |
79 | } | |
80 | ||
55795ef5 RV |
81 | /* make sure we dont leak kernel information to user */ |
82 | if (bpf_needs_clear_a(&filter[0])) | |
0ca87f05 | 83 | PPC_LI(r_A, 0); |
0ca87f05 ME |
84 | } |
85 | ||
86 | static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) | |
87 | { | |
88 | int i; | |
89 | ||
90 | if (ctx->seen & (SEEN_MEM | SEEN_DATAREF)) { | |
91 | PPC_ADDI(1, 1, BPF_PPC_STACKFRAME); | |
92 | if (ctx->seen & SEEN_DATAREF) { | |
09ca5ab2 | 93 | PPC_BPF_LL(0, 1, PPC_LR_STKOFF); |
0ca87f05 | 94 | PPC_MTLR(0); |
09ca5ab2 DK |
95 | PPC_BPF_LL(r_D, 1, -(REG_SZ*(32-r_D))); |
96 | PPC_BPF_LL(r_HL, 1, -(REG_SZ*(32-r_HL))); | |
0ca87f05 ME |
97 | } |
98 | if (ctx->seen & SEEN_MEM) { | |
99 | /* Restore any saved non-vol registers */ | |
100 | for (i = r_M; i < (r_M+16); i++) { | |
101 | if (ctx->seen & (1 << (i-r_M))) | |
09ca5ab2 | 102 | PPC_BPF_LL(i, 1, -(REG_SZ*(32-i))); |
0ca87f05 ME |
103 | } |
104 | } | |
105 | } | |
106 | /* The RETs have left a return value in R3. */ | |
107 | ||
108 | PPC_BLR(); | |
109 | } | |
110 | ||
05be1824 JS |
111 | #define CHOOSE_LOAD_FUNC(K, func) \ |
112 | ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset) | |
113 | ||
0ca87f05 | 114 | /* Assemble the body code between the prologue & epilogue. */ |
7ae457c1 | 115 | static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, |
0ca87f05 ME |
116 | struct codegen_context *ctx, |
117 | unsigned int *addrs) | |
118 | { | |
119 | const struct sock_filter *filter = fp->insns; | |
120 | int flen = fp->len; | |
121 | u8 *func; | |
122 | unsigned int true_cond; | |
123 | int i; | |
124 | ||
125 | /* Start of epilogue code */ | |
126 | unsigned int exit_addr = addrs[flen]; | |
127 | ||
128 | for (i = 0; i < flen; i++) { | |
129 | unsigned int K = filter[i].k; | |
34805931 | 130 | u16 code = bpf_anc_helper(&filter[i]); |
0ca87f05 ME |
131 | |
132 | /* | |
133 | * addrs[] maps a BPF bytecode address into a real offset from | |
134 | * the start of the body code. | |
135 | */ | |
136 | addrs[i] = ctx->idx * 4; | |
137 | ||
34805931 | 138 | switch (code) { |
0ca87f05 | 139 | /*** ALU ops ***/ |
34805931 | 140 | case BPF_ALU | BPF_ADD | BPF_X: /* A += X; */ |
0ca87f05 ME |
141 | ctx->seen |= SEEN_XREG; |
142 | PPC_ADD(r_A, r_A, r_X); | |
143 | break; | |
34805931 | 144 | case BPF_ALU | BPF_ADD | BPF_K: /* A += K; */ |
0ca87f05 ME |
145 | if (!K) |
146 | break; | |
147 | PPC_ADDI(r_A, r_A, IMM_L(K)); | |
148 | if (K >= 32768) | |
149 | PPC_ADDIS(r_A, r_A, IMM_HA(K)); | |
150 | break; | |
34805931 | 151 | case BPF_ALU | BPF_SUB | BPF_X: /* A -= X; */ |
0ca87f05 ME |
152 | ctx->seen |= SEEN_XREG; |
153 | PPC_SUB(r_A, r_A, r_X); | |
154 | break; | |
34805931 | 155 | case BPF_ALU | BPF_SUB | BPF_K: /* A -= K */ |
0ca87f05 ME |
156 | if (!K) |
157 | break; | |
158 | PPC_ADDI(r_A, r_A, IMM_L(-K)); | |
159 | if (K >= 32768) | |
160 | PPC_ADDIS(r_A, r_A, IMM_HA(-K)); | |
161 | break; | |
34805931 | 162 | case BPF_ALU | BPF_MUL | BPF_X: /* A *= X; */ |
0ca87f05 ME |
163 | ctx->seen |= SEEN_XREG; |
164 | PPC_MUL(r_A, r_A, r_X); | |
165 | break; | |
34805931 | 166 | case BPF_ALU | BPF_MUL | BPF_K: /* A *= K */ |
0ca87f05 ME |
167 | if (K < 32768) |
168 | PPC_MULI(r_A, r_A, K); | |
169 | else { | |
170 | PPC_LI32(r_scratch1, K); | |
171 | PPC_MUL(r_A, r_A, r_scratch1); | |
172 | } | |
173 | break; | |
34805931 | 174 | case BPF_ALU | BPF_MOD | BPF_X: /* A %= X; */ |
cadaecd2 | 175 | case BPF_ALU | BPF_DIV | BPF_X: /* A /= X; */ |
b0c06d33 VM |
176 | ctx->seen |= SEEN_XREG; |
177 | PPC_CMPWI(r_X, 0); | |
178 | if (ctx->pc_ret0 != -1) { | |
179 | PPC_BCC(COND_EQ, addrs[ctx->pc_ret0]); | |
180 | } else { | |
181 | PPC_BCC_SHORT(COND_NE, (ctx->idx*4)+12); | |
182 | PPC_LI(r_ret, 0); | |
183 | PPC_JMP(exit_addr); | |
184 | } | |
cadaecd2 DK |
185 | if (code == (BPF_ALU | BPF_MOD | BPF_X)) { |
186 | PPC_DIVWU(r_scratch1, r_A, r_X); | |
187 | PPC_MUL(r_scratch1, r_X, r_scratch1); | |
188 | PPC_SUB(r_A, r_A, r_scratch1); | |
189 | } else { | |
190 | PPC_DIVWU(r_A, r_A, r_X); | |
191 | } | |
b0c06d33 | 192 | break; |
34805931 | 193 | case BPF_ALU | BPF_MOD | BPF_K: /* A %= K; */ |
b0c06d33 VM |
194 | PPC_LI32(r_scratch2, K); |
195 | PPC_DIVWU(r_scratch1, r_A, r_scratch2); | |
196 | PPC_MUL(r_scratch1, r_scratch2, r_scratch1); | |
197 | PPC_SUB(r_A, r_A, r_scratch1); | |
198 | break; | |
34805931 | 199 | case BPF_ALU | BPF_DIV | BPF_K: /* A /= K */ |
aee636c4 ED |
200 | if (K == 1) |
201 | break; | |
0ca87f05 | 202 | PPC_LI32(r_scratch1, K); |
aee636c4 | 203 | PPC_DIVWU(r_A, r_A, r_scratch1); |
0ca87f05 | 204 | break; |
34805931 | 205 | case BPF_ALU | BPF_AND | BPF_X: |
0ca87f05 ME |
206 | ctx->seen |= SEEN_XREG; |
207 | PPC_AND(r_A, r_A, r_X); | |
208 | break; | |
34805931 | 209 | case BPF_ALU | BPF_AND | BPF_K: |
0ca87f05 ME |
210 | if (!IMM_H(K)) |
211 | PPC_ANDI(r_A, r_A, K); | |
212 | else { | |
213 | PPC_LI32(r_scratch1, K); | |
214 | PPC_AND(r_A, r_A, r_scratch1); | |
215 | } | |
216 | break; | |
34805931 | 217 | case BPF_ALU | BPF_OR | BPF_X: |
0ca87f05 ME |
218 | ctx->seen |= SEEN_XREG; |
219 | PPC_OR(r_A, r_A, r_X); | |
220 | break; | |
34805931 | 221 | case BPF_ALU | BPF_OR | BPF_K: |
0ca87f05 ME |
222 | if (IMM_L(K)) |
223 | PPC_ORI(r_A, r_A, IMM_L(K)); | |
224 | if (K >= 65536) | |
225 | PPC_ORIS(r_A, r_A, IMM_H(K)); | |
226 | break; | |
34805931 DB |
227 | case BPF_ANC | SKF_AD_ALU_XOR_X: |
228 | case BPF_ALU | BPF_XOR | BPF_X: /* A ^= X */ | |
02871903 DB |
229 | ctx->seen |= SEEN_XREG; |
230 | PPC_XOR(r_A, r_A, r_X); | |
231 | break; | |
34805931 | 232 | case BPF_ALU | BPF_XOR | BPF_K: /* A ^= K */ |
02871903 DB |
233 | if (IMM_L(K)) |
234 | PPC_XORI(r_A, r_A, IMM_L(K)); | |
235 | if (K >= 65536) | |
236 | PPC_XORIS(r_A, r_A, IMM_H(K)); | |
237 | break; | |
34805931 | 238 | case BPF_ALU | BPF_LSH | BPF_X: /* A <<= X; */ |
0ca87f05 ME |
239 | ctx->seen |= SEEN_XREG; |
240 | PPC_SLW(r_A, r_A, r_X); | |
241 | break; | |
34805931 | 242 | case BPF_ALU | BPF_LSH | BPF_K: |
0ca87f05 ME |
243 | if (K == 0) |
244 | break; | |
245 | else | |
246 | PPC_SLWI(r_A, r_A, K); | |
247 | break; | |
34805931 | 248 | case BPF_ALU | BPF_RSH | BPF_X: /* A >>= X; */ |
0ca87f05 ME |
249 | ctx->seen |= SEEN_XREG; |
250 | PPC_SRW(r_A, r_A, r_X); | |
251 | break; | |
34805931 | 252 | case BPF_ALU | BPF_RSH | BPF_K: /* A >>= K; */ |
0ca87f05 ME |
253 | if (K == 0) |
254 | break; | |
255 | else | |
256 | PPC_SRWI(r_A, r_A, K); | |
257 | break; | |
34805931 | 258 | case BPF_ALU | BPF_NEG: |
0ca87f05 ME |
259 | PPC_NEG(r_A, r_A); |
260 | break; | |
34805931 | 261 | case BPF_RET | BPF_K: |
0ca87f05 ME |
262 | PPC_LI32(r_ret, K); |
263 | if (!K) { | |
264 | if (ctx->pc_ret0 == -1) | |
265 | ctx->pc_ret0 = i; | |
266 | } | |
267 | /* | |
268 | * If this isn't the very last instruction, branch to | |
269 | * the epilogue if we've stuff to clean up. Otherwise, | |
270 | * if there's nothing to tidy, just return. If we /are/ | |
271 | * the last instruction, we're about to fall through to | |
272 | * the epilogue to return. | |
273 | */ | |
274 | if (i != flen - 1) { | |
275 | /* | |
276 | * Note: 'seen' is properly valid only on pass | |
277 | * #2. Both parts of this conditional are the | |
278 | * same instruction size though, meaning the | |
279 | * first pass will still correctly determine the | |
280 | * code size/addresses. | |
281 | */ | |
282 | if (ctx->seen) | |
283 | PPC_JMP(exit_addr); | |
284 | else | |
285 | PPC_BLR(); | |
286 | } | |
287 | break; | |
34805931 | 288 | case BPF_RET | BPF_A: |
0ca87f05 ME |
289 | PPC_MR(r_ret, r_A); |
290 | if (i != flen - 1) { | |
291 | if (ctx->seen) | |
292 | PPC_JMP(exit_addr); | |
293 | else | |
294 | PPC_BLR(); | |
295 | } | |
296 | break; | |
34805931 | 297 | case BPF_MISC | BPF_TAX: /* X = A */ |
0ca87f05 ME |
298 | PPC_MR(r_X, r_A); |
299 | break; | |
34805931 | 300 | case BPF_MISC | BPF_TXA: /* A = X */ |
0ca87f05 ME |
301 | ctx->seen |= SEEN_XREG; |
302 | PPC_MR(r_A, r_X); | |
303 | break; | |
304 | ||
305 | /*** Constant loads/M[] access ***/ | |
34805931 | 306 | case BPF_LD | BPF_IMM: /* A = K */ |
0ca87f05 ME |
307 | PPC_LI32(r_A, K); |
308 | break; | |
34805931 | 309 | case BPF_LDX | BPF_IMM: /* X = K */ |
0ca87f05 ME |
310 | PPC_LI32(r_X, K); |
311 | break; | |
34805931 | 312 | case BPF_LD | BPF_MEM: /* A = mem[K] */ |
0ca87f05 ME |
313 | PPC_MR(r_A, r_M + (K & 0xf)); |
314 | ctx->seen |= SEEN_MEM | (1<<(K & 0xf)); | |
315 | break; | |
34805931 | 316 | case BPF_LDX | BPF_MEM: /* X = mem[K] */ |
0ca87f05 ME |
317 | PPC_MR(r_X, r_M + (K & 0xf)); |
318 | ctx->seen |= SEEN_MEM | (1<<(K & 0xf)); | |
319 | break; | |
34805931 | 320 | case BPF_ST: /* mem[K] = A */ |
0ca87f05 ME |
321 | PPC_MR(r_M + (K & 0xf), r_A); |
322 | ctx->seen |= SEEN_MEM | (1<<(K & 0xf)); | |
323 | break; | |
34805931 | 324 | case BPF_STX: /* mem[K] = X */ |
0ca87f05 ME |
325 | PPC_MR(r_M + (K & 0xf), r_X); |
326 | ctx->seen |= SEEN_XREG | SEEN_MEM | (1<<(K & 0xf)); | |
327 | break; | |
34805931 | 328 | case BPF_LD | BPF_W | BPF_LEN: /* A = skb->len; */ |
0ca87f05 ME |
329 | BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4); |
330 | PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, len)); | |
331 | break; | |
34805931 | 332 | case BPF_LDX | BPF_W | BPF_LEN: /* X = skb->len; */ |
0ca87f05 ME |
333 | PPC_LWZ_OFFS(r_X, r_skb, offsetof(struct sk_buff, len)); |
334 | break; | |
335 | ||
336 | /*** Ancillary info loads ***/ | |
34805931 | 337 | case BPF_ANC | SKF_AD_PROTOCOL: /* A = ntohs(skb->protocol); */ |
0ca87f05 ME |
338 | BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, |
339 | protocol) != 2); | |
9c662cad PB |
340 | PPC_NTOHS_OFFS(r_A, r_skb, offsetof(struct sk_buff, |
341 | protocol)); | |
0ca87f05 | 342 | break; |
34805931 | 343 | case BPF_ANC | SKF_AD_IFINDEX: |
5b61c4db DK |
344 | case BPF_ANC | SKF_AD_HATYPE: |
345 | BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, | |
346 | ifindex) != 4); | |
347 | BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, | |
348 | type) != 2); | |
09ca5ab2 | 349 | PPC_LL_OFFS(r_scratch1, r_skb, offsetof(struct sk_buff, |
0ca87f05 ME |
350 | dev)); |
351 | PPC_CMPDI(r_scratch1, 0); | |
352 | if (ctx->pc_ret0 != -1) { | |
353 | PPC_BCC(COND_EQ, addrs[ctx->pc_ret0]); | |
354 | } else { | |
355 | /* Exit, returning 0; first pass hits here. */ | |
5b61c4db | 356 | PPC_BCC_SHORT(COND_NE, ctx->idx * 4 + 12); |
0ca87f05 ME |
357 | PPC_LI(r_ret, 0); |
358 | PPC_JMP(exit_addr); | |
359 | } | |
5b61c4db DK |
360 | if (code == (BPF_ANC | SKF_AD_IFINDEX)) { |
361 | PPC_LWZ_OFFS(r_A, r_scratch1, | |
0ca87f05 | 362 | offsetof(struct net_device, ifindex)); |
5b61c4db DK |
363 | } else { |
364 | PPC_LHZ_OFFS(r_A, r_scratch1, | |
365 | offsetof(struct net_device, type)); | |
366 | } | |
367 | ||
0ca87f05 | 368 | break; |
34805931 | 369 | case BPF_ANC | SKF_AD_MARK: |
0ca87f05 ME |
370 | BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); |
371 | PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, | |
372 | mark)); | |
373 | break; | |
34805931 | 374 | case BPF_ANC | SKF_AD_RXHASH: |
61b905da | 375 | BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4); |
0ca87f05 | 376 | PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, |
61b905da | 377 | hash)); |
0ca87f05 | 378 | break; |
34805931 DB |
379 | case BPF_ANC | SKF_AD_VLAN_TAG: |
380 | case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT: | |
5082dfb7 | 381 | BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2); |
3fc60aa0 DK |
382 | BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000); |
383 | ||
5082dfb7 DB |
384 | PPC_LHZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, |
385 | vlan_tci)); | |
dba63115 | 386 | if (code == (BPF_ANC | SKF_AD_VLAN_TAG)) { |
3fc60aa0 | 387 | PPC_ANDI(r_A, r_A, ~VLAN_TAG_PRESENT); |
dba63115 | 388 | } else { |
5082dfb7 | 389 | PPC_ANDI(r_A, r_A, VLAN_TAG_PRESENT); |
dba63115 DK |
390 | PPC_SRWI(r_A, r_A, 12); |
391 | } | |
5082dfb7 | 392 | break; |
34805931 | 393 | case BPF_ANC | SKF_AD_QUEUE: |
0ca87f05 ME |
394 | BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, |
395 | queue_mapping) != 2); | |
396 | PPC_LHZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, | |
397 | queue_mapping)); | |
398 | break; | |
4e235761 DK |
399 | case BPF_ANC | SKF_AD_PKTTYPE: |
400 | PPC_LBZ_OFFS(r_A, r_skb, PKT_TYPE_OFFSET()); | |
401 | PPC_ANDI(r_A, r_A, PKT_TYPE_MAX); | |
402 | PPC_SRWI(r_A, r_A, 5); | |
403 | break; | |
34805931 | 404 | case BPF_ANC | SKF_AD_CPU: |
02290948 | 405 | PPC_BPF_LOAD_CPU(r_A); |
0ca87f05 | 406 | break; |
0ca87f05 | 407 | /*** Absolute loads from packet header/data ***/ |
34805931 | 408 | case BPF_LD | BPF_W | BPF_ABS: |
05be1824 | 409 | func = CHOOSE_LOAD_FUNC(K, sk_load_word); |
0ca87f05 | 410 | goto common_load; |
34805931 | 411 | case BPF_LD | BPF_H | BPF_ABS: |
05be1824 | 412 | func = CHOOSE_LOAD_FUNC(K, sk_load_half); |
0ca87f05 | 413 | goto common_load; |
34805931 | 414 | case BPF_LD | BPF_B | BPF_ABS: |
05be1824 | 415 | func = CHOOSE_LOAD_FUNC(K, sk_load_byte); |
0ca87f05 | 416 | common_load: |
05be1824 | 417 | /* Load from [K]. */ |
0ca87f05 | 418 | ctx->seen |= SEEN_DATAREF; |
09ca5ab2 | 419 | PPC_FUNC_ADDR(r_scratch1, func); |
0ca87f05 ME |
420 | PPC_MTLR(r_scratch1); |
421 | PPC_LI32(r_addr, K); | |
422 | PPC_BLRL(); | |
423 | /* | |
424 | * Helper returns 'lt' condition on error, and an | |
425 | * appropriate return value in r3 | |
426 | */ | |
427 | PPC_BCC(COND_LT, exit_addr); | |
428 | break; | |
429 | ||
430 | /*** Indirect loads from packet header/data ***/ | |
34805931 | 431 | case BPF_LD | BPF_W | BPF_IND: |
0ca87f05 ME |
432 | func = sk_load_word; |
433 | goto common_load_ind; | |
34805931 | 434 | case BPF_LD | BPF_H | BPF_IND: |
0ca87f05 ME |
435 | func = sk_load_half; |
436 | goto common_load_ind; | |
34805931 | 437 | case BPF_LD | BPF_B | BPF_IND: |
0ca87f05 ME |
438 | func = sk_load_byte; |
439 | common_load_ind: | |
440 | /* | |
441 | * Load from [X + K]. Negative offsets are tested for | |
05be1824 | 442 | * in the helper functions. |
0ca87f05 ME |
443 | */ |
444 | ctx->seen |= SEEN_DATAREF | SEEN_XREG; | |
09ca5ab2 | 445 | PPC_FUNC_ADDR(r_scratch1, func); |
0ca87f05 ME |
446 | PPC_MTLR(r_scratch1); |
447 | PPC_ADDI(r_addr, r_X, IMM_L(K)); | |
448 | if (K >= 32768) | |
449 | PPC_ADDIS(r_addr, r_addr, IMM_HA(K)); | |
450 | PPC_BLRL(); | |
451 | /* If error, cr0.LT set */ | |
452 | PPC_BCC(COND_LT, exit_addr); | |
453 | break; | |
454 | ||
34805931 | 455 | case BPF_LDX | BPF_B | BPF_MSH: |
05be1824 | 456 | func = CHOOSE_LOAD_FUNC(K, sk_load_byte_msh); |
0ca87f05 ME |
457 | goto common_load; |
458 | break; | |
459 | ||
460 | /*** Jump and branches ***/ | |
34805931 | 461 | case BPF_JMP | BPF_JA: |
0ca87f05 ME |
462 | if (K != 0) |
463 | PPC_JMP(addrs[i + 1 + K]); | |
464 | break; | |
465 | ||
34805931 DB |
466 | case BPF_JMP | BPF_JGT | BPF_K: |
467 | case BPF_JMP | BPF_JGT | BPF_X: | |
0ca87f05 ME |
468 | true_cond = COND_GT; |
469 | goto cond_branch; | |
34805931 DB |
470 | case BPF_JMP | BPF_JGE | BPF_K: |
471 | case BPF_JMP | BPF_JGE | BPF_X: | |
0ca87f05 ME |
472 | true_cond = COND_GE; |
473 | goto cond_branch; | |
34805931 DB |
474 | case BPF_JMP | BPF_JEQ | BPF_K: |
475 | case BPF_JMP | BPF_JEQ | BPF_X: | |
0ca87f05 ME |
476 | true_cond = COND_EQ; |
477 | goto cond_branch; | |
34805931 DB |
478 | case BPF_JMP | BPF_JSET | BPF_K: |
479 | case BPF_JMP | BPF_JSET | BPF_X: | |
0ca87f05 ME |
480 | true_cond = COND_NE; |
481 | /* Fall through */ | |
482 | cond_branch: | |
483 | /* same targets, can avoid doing the test :) */ | |
484 | if (filter[i].jt == filter[i].jf) { | |
485 | if (filter[i].jt > 0) | |
486 | PPC_JMP(addrs[i + 1 + filter[i].jt]); | |
487 | break; | |
488 | } | |
489 | ||
34805931 DB |
490 | switch (code) { |
491 | case BPF_JMP | BPF_JGT | BPF_X: | |
492 | case BPF_JMP | BPF_JGE | BPF_X: | |
493 | case BPF_JMP | BPF_JEQ | BPF_X: | |
0ca87f05 ME |
494 | ctx->seen |= SEEN_XREG; |
495 | PPC_CMPLW(r_A, r_X); | |
496 | break; | |
34805931 | 497 | case BPF_JMP | BPF_JSET | BPF_X: |
0ca87f05 ME |
498 | ctx->seen |= SEEN_XREG; |
499 | PPC_AND_DOT(r_scratch1, r_A, r_X); | |
500 | break; | |
34805931 DB |
501 | case BPF_JMP | BPF_JEQ | BPF_K: |
502 | case BPF_JMP | BPF_JGT | BPF_K: | |
503 | case BPF_JMP | BPF_JGE | BPF_K: | |
0ca87f05 ME |
504 | if (K < 32768) |
505 | PPC_CMPLWI(r_A, K); | |
506 | else { | |
507 | PPC_LI32(r_scratch1, K); | |
508 | PPC_CMPLW(r_A, r_scratch1); | |
509 | } | |
510 | break; | |
34805931 | 511 | case BPF_JMP | BPF_JSET | BPF_K: |
0ca87f05 ME |
512 | if (K < 32768) |
513 | /* PPC_ANDI is /only/ dot-form */ | |
514 | PPC_ANDI(r_scratch1, r_A, K); | |
515 | else { | |
516 | PPC_LI32(r_scratch1, K); | |
517 | PPC_AND_DOT(r_scratch1, r_A, | |
518 | r_scratch1); | |
519 | } | |
520 | break; | |
521 | } | |
522 | /* Sometimes branches are constructed "backward", with | |
523 | * the false path being the branch and true path being | |
524 | * a fallthrough to the next instruction. | |
525 | */ | |
526 | if (filter[i].jt == 0) | |
527 | /* Swap the sense of the branch */ | |
528 | PPC_BCC(true_cond ^ COND_CMP_TRUE, | |
529 | addrs[i + 1 + filter[i].jf]); | |
530 | else { | |
531 | PPC_BCC(true_cond, addrs[i + 1 + filter[i].jt]); | |
532 | if (filter[i].jf != 0) | |
533 | PPC_JMP(addrs[i + 1 + filter[i].jf]); | |
534 | } | |
535 | break; | |
536 | default: | |
537 | /* The filter contains something cruel & unusual. | |
538 | * We don't handle it, but also there shouldn't be | |
539 | * anything missing from our list. | |
540 | */ | |
541 | if (printk_ratelimit()) | |
542 | pr_err("BPF filter opcode %04x (@%d) unsupported\n", | |
543 | filter[i].code, i); | |
544 | return -ENOTSUPP; | |
545 | } | |
546 | ||
547 | } | |
548 | /* Set end-of-body-code address for exit. */ | |
549 | addrs[i] = ctx->idx * 4; | |
550 | ||
551 | return 0; | |
552 | } | |
553 | ||
7ae457c1 | 554 | void bpf_jit_compile(struct bpf_prog *fp) |
0ca87f05 ME |
555 | { |
556 | unsigned int proglen; | |
557 | unsigned int alloclen; | |
558 | u32 *image = NULL; | |
559 | u32 *code_base; | |
560 | unsigned int *addrs; | |
561 | struct codegen_context cgctx; | |
562 | int pass; | |
563 | int flen = fp->len; | |
564 | ||
565 | if (!bpf_jit_enable) | |
566 | return; | |
567 | ||
568 | addrs = kzalloc((flen+1) * sizeof(*addrs), GFP_KERNEL); | |
569 | if (addrs == NULL) | |
570 | return; | |
571 | ||
572 | /* | |
573 | * There are multiple assembly passes as the generated code will change | |
574 | * size as it settles down, figuring out the max branch offsets/exit | |
575 | * paths required. | |
576 | * | |
577 | * The range of standard conditional branches is +/- 32Kbytes. Since | |
578 | * BPF_MAXINSNS = 4096, we can only jump from (worst case) start to | |
579 | * finish with 8 bytes/instruction. Not feasible, so long jumps are | |
580 | * used, distinct from short branches. | |
581 | * | |
582 | * Current: | |
583 | * | |
584 | * For now, both branch types assemble to 2 words (short branches padded | |
585 | * with a NOP); this is less efficient, but assembly will always complete | |
586 | * after exactly 3 passes: | |
587 | * | |
588 | * First pass: No code buffer; Program is "faux-generated" -- no code | |
589 | * emitted but maximum size of output determined (and addrs[] filled | |
590 | * in). Also, we note whether we use M[], whether we use skb data, etc. | |
591 | * All generation choices assumed to be 'worst-case', e.g. branches all | |
592 | * far (2 instructions), return path code reduction not available, etc. | |
593 | * | |
594 | * Second pass: Code buffer allocated with size determined previously. | |
595 | * Prologue generated to support features we have seen used. Exit paths | |
596 | * determined and addrs[] is filled in again, as code may be slightly | |
597 | * smaller as a result. | |
598 | * | |
599 | * Third pass: Code generated 'for real', and branch destinations | |
600 | * determined from now-accurate addrs[] map. | |
601 | * | |
602 | * Ideal: | |
603 | * | |
604 | * If we optimise this, near branches will be shorter. On the | |
605 | * first assembly pass, we should err on the side of caution and | |
606 | * generate the biggest code. On subsequent passes, branches will be | |
607 | * generated short or long and code size will reduce. With smaller | |
608 | * code, more branches may fall into the short category, and code will | |
609 | * reduce more. | |
610 | * | |
611 | * Finally, if we see one pass generate code the same size as the | |
612 | * previous pass we have converged and should now generate code for | |
613 | * real. Allocating at the end will also save the memory that would | |
614 | * otherwise be wasted by the (small) current code shrinkage. | |
615 | * Preferably, we should do a small number of passes (e.g. 5) and if we | |
616 | * haven't converged by then, get impatient and force code to generate | |
617 | * as-is, even if the odd branch would be left long. The chances of a | |
618 | * long jump are tiny with all but the most enormous of BPF filter | |
619 | * inputs, so we should usually converge on the third pass. | |
620 | */ | |
621 | ||
622 | cgctx.idx = 0; | |
623 | cgctx.seen = 0; | |
624 | cgctx.pc_ret0 = -1; | |
625 | /* Scouting faux-generate pass 0 */ | |
626 | if (bpf_jit_build_body(fp, 0, &cgctx, addrs)) | |
627 | /* We hit something illegal or unsupported. */ | |
628 | goto out; | |
629 | ||
630 | /* | |
631 | * Pretend to build prologue, given the features we've seen. This will | |
632 | * update ctgtx.idx as it pretends to output instructions, then we can | |
633 | * calculate total size from idx. | |
634 | */ | |
635 | bpf_jit_build_prologue(fp, 0, &cgctx); | |
636 | bpf_jit_build_epilogue(0, &cgctx); | |
637 | ||
638 | proglen = cgctx.idx * 4; | |
639 | alloclen = proglen + FUNCTION_DESCR_SIZE; | |
ed900ffb | 640 | image = module_alloc(alloclen); |
0ca87f05 ME |
641 | if (!image) |
642 | goto out; | |
643 | ||
644 | code_base = image + (FUNCTION_DESCR_SIZE/4); | |
645 | ||
646 | /* Code generation passes 1-2 */ | |
647 | for (pass = 1; pass < 3; pass++) { | |
648 | /* Now build the prologue, body code & epilogue for real. */ | |
649 | cgctx.idx = 0; | |
650 | bpf_jit_build_prologue(fp, code_base, &cgctx); | |
651 | bpf_jit_build_body(fp, code_base, &cgctx, addrs); | |
652 | bpf_jit_build_epilogue(code_base, &cgctx); | |
653 | ||
654 | if (bpf_jit_enable > 1) | |
655 | pr_info("Pass %d: shrink = %d, seen = 0x%x\n", pass, | |
656 | proglen - (cgctx.idx * 4), cgctx.seen); | |
657 | } | |
658 | ||
659 | if (bpf_jit_enable > 1) | |
79617801 DB |
660 | /* Note that we output the base address of the code_base |
661 | * rather than image, since opcodes are in code_base. | |
662 | */ | |
663 | bpf_jit_dump(flen, proglen, pass, code_base); | |
0ca87f05 ME |
664 | |
665 | if (image) { | |
0ca87f05 | 666 | bpf_flush_icache(code_base, code_base + (proglen/4)); |
09ca5ab2 | 667 | #ifdef CONFIG_PPC64 |
0ca87f05 ME |
668 | /* Function descriptor nastiness: Address + TOC */ |
669 | ((u64 *)image)[0] = (u64)code_base; | |
670 | ((u64 *)image)[1] = local_paca->kernel_toc; | |
09ca5ab2 | 671 | #endif |
0ca87f05 | 672 | fp->bpf_func = (void *)image; |
a91263d5 | 673 | fp->jited = 1; |
0ca87f05 ME |
674 | } |
675 | out: | |
676 | kfree(addrs); | |
677 | return; | |
678 | } | |
679 | ||
7ae457c1 | 680 | void bpf_jit_free(struct bpf_prog *fp) |
0ca87f05 | 681 | { |
f8bbbfc3 | 682 | if (fp->jited) |
be1f221c | 683 | module_memfree(fp->bpf_func); |
60a3b225 DB |
684 | |
685 | bpf_prog_unlock_free(fp); | |
0ca87f05 | 686 | } |