[deliverable/linux.git] / arch / powerpc / net / bpf_jit_64.S

/* bpf_jit.S: Packet/header access helper functions
 * for PPC64 BPF compiler.
 *
 * Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; version 2
 * of the License.
 */

#include <asm/ppc_asm.h>
#include "bpf_jit.h"

/*
 * All of these routines are called directly from generated code,
 * whose register usage is:
 *
 * r3		skb
 * r4,r5	A,X
 * r6		*** address parameter to helper ***
 * r7-r10	scratch
 * r14		skb->data
 * r15		skb headlen
 * r16-31	M[]
 */

/*
 * To consider: These helpers are so small it could be better to just
 * generate them inline.  Inline code can do the simple headlen check
 * then branch directly to slow_path_XXX if required.  (In fact, could
 * load a spare GPR with the address of slow_path_generic and pass size
 * as an argument, making the call site a mtlr, li and bllr.)
 */
	.globl	sk_load_word
sk_load_word:
	cmpdi	r_addr, 0
	blt	bpf_slow_path_word_neg
	.globl	sk_load_word_positive_offset
sk_load_word_positive_offset:
	/* Are we accessing past headlen? */
	subi	r_scratch1, r_HL, 4
	cmpd	r_scratch1, r_addr
	blt	bpf_slow_path_word
	/* Nope, just hitting the header.  cr0 here is eq or gt! */
	lwzx	r_A, r_D, r_addr
	/* When big endian we don't need to byteswap. */
	blr	/* Return success, cr0 != LT */

	.globl	sk_load_half
sk_load_half:
	cmpdi	r_addr, 0
	blt	bpf_slow_path_half_neg
	.globl	sk_load_half_positive_offset
sk_load_half_positive_offset:
	subi	r_scratch1, r_HL, 2
	cmpd	r_scratch1, r_addr
	blt	bpf_slow_path_half
	lhzx	r_A, r_D, r_addr
	blr

	.globl	sk_load_byte
sk_load_byte:
	cmpdi	r_addr, 0
	blt	bpf_slow_path_byte_neg
	.globl	sk_load_byte_positive_offset
sk_load_byte_positive_offset:
	cmpd	r_HL, r_addr
	ble	bpf_slow_path_byte
	lbzx	r_A, r_D, r_addr
	blr

/*
 * BPF_S_LDX_B_MSH: ldxb  4*([offset]&0xf)
 * r_addr is the offset value
 */
	.globl sk_load_byte_msh
sk_load_byte_msh:
	cmpdi	r_addr, 0
	blt	bpf_slow_path_byte_msh_neg
	.globl sk_load_byte_msh_positive_offset
sk_load_byte_msh_positive_offset:
	cmpd	r_HL, r_addr
	ble	bpf_slow_path_byte_msh
	lbzx	r_X, r_D, r_addr
	rlwinm	r_X, r_X, 2, 32-4-2, 31-2
	blr

/* Call out to skb_copy_bits:
 * We'll need to back up our volatile regs first; we have
 * local variable space at r1+(BPF_PPC_STACK_BASIC).
 * Allocate a new stack frame here to remain ABI-compliant in
 * stashing LR.
 */
#define bpf_slow_path_common(SIZE)				\
	mflr	r0;						\
	std	r0, 16(r1);					\
	/* R3 goes in parameter space of caller's frame */	\
	std	r_skb, (BPF_PPC_STACKFRAME+48)(r1);		\
	std	r_A, (BPF_PPC_STACK_BASIC+(0*8))(r1);		\
	std	r_X, (BPF_PPC_STACK_BASIC+(1*8))(r1);		\
	addi	r5, r1, BPF_PPC_STACK_BASIC+(2*8);		\
	stdu	r1, -BPF_PPC_SLOWPATH_FRAME(r1);		\
	/* R3 = r_skb, as passed */				\
	mr	r4, r_addr;					\
	li	r6, SIZE;					\
	bl	skb_copy_bits;					\
	/* R3 = 0 on success */					\
	addi	r1, r1, BPF_PPC_SLOWPATH_FRAME;			\
	ld	r0, 16(r1);					\
	ld	r_A, (BPF_PPC_STACK_BASIC+(0*8))(r1);		\
	ld	r_X, (BPF_PPC_STACK_BASIC+(1*8))(r1);		\
	mtlr	r0;						\
	cmpdi	r3, 0;						\
	blt	bpf_error;	/* cr0 = LT */			\
	ld	r_skb, (BPF_PPC_STACKFRAME+48)(r1);		\
	/* Great success! */

bpf_slow_path_word:
	bpf_slow_path_common(4)
	/* Data value is on stack, and cr0 != LT */
	lwz	r_A, BPF_PPC_STACK_BASIC+(2*8)(r1)
	blr

bpf_slow_path_half:
	bpf_slow_path_common(2)
	lhz	r_A, BPF_PPC_STACK_BASIC+(2*8)(r1)
	blr

bpf_slow_path_byte:
	bpf_slow_path_common(1)
	lbz	r_A, BPF_PPC_STACK_BASIC+(2*8)(r1)
	blr

bpf_slow_path_byte_msh:
	bpf_slow_path_common(1)
	lbz	r_X, BPF_PPC_STACK_BASIC+(2*8)(r1)
	rlwinm	r_X, r_X, 2, 32-4-2, 31-2
	blr

/* Call out to bpf_internal_load_pointer_neg_helper:
 * We'll need to back up our volatile regs first; we have
 * local variable space at r1+(BPF_PPC_STACK_BASIC).
 * Allocate a new stack frame here to remain ABI-compliant in
 * stashing LR.
 */
#define sk_negative_common(SIZE)				\
	mflr	r0;						\
	std	r0, 16(r1);					\
	/* R3 goes in parameter space of caller's frame */	\
	std	r_skb, (BPF_PPC_STACKFRAME+48)(r1);		\
	std	r_A, (BPF_PPC_STACK_BASIC+(0*8))(r1);		\
	std	r_X, (BPF_PPC_STACK_BASIC+(1*8))(r1);		\
	stdu	r1, -BPF_PPC_SLOWPATH_FRAME(r1);		\
	/* R3 = r_skb, as passed */				\
	mr	r4, r_addr;					\
	li	r5, SIZE;					\
	bl	bpf_internal_load_pointer_neg_helper;		\
	/* R3 != 0 on success */				\
	addi	r1, r1, BPF_PPC_SLOWPATH_FRAME;			\
	ld	r0, 16(r1);					\
	ld	r_A, (BPF_PPC_STACK_BASIC+(0*8))(r1);		\
	ld	r_X, (BPF_PPC_STACK_BASIC+(1*8))(r1);		\
	mtlr	r0;						\
	cmpldi	r3, 0;						\
	beq	bpf_error_slow;	/* cr0 = EQ */			\
	mr	r_addr, r3;					\
	ld	r_skb, (BPF_PPC_STACKFRAME+48)(r1);		\
	/* Great success! */

bpf_slow_path_word_neg:
	lis     r_scratch1,-32	/* SKF_LL_OFF */
	cmpd	r_addr, r_scratch1	/* addr < SKF_* */
	blt	bpf_error	/* cr0 = LT */
	.globl	sk_load_word_negative_offset
sk_load_word_negative_offset:
	sk_negative_common(4)
	lwz	r_A, 0(r_addr)
	blr

bpf_slow_path_half_neg:
	lis     r_scratch1,-32	/* SKF_LL_OFF */
	cmpd	r_addr, r_scratch1	/* addr < SKF_* */
	blt	bpf_error	/* cr0 = LT */
	.globl	sk_load_half_negative_offset
sk_load_half_negative_offset:
	sk_negative_common(2)
	lhz	r_A, 0(r_addr)
	blr

bpf_slow_path_byte_neg:
	lis     r_scratch1,-32	/* SKF_LL_OFF */
	cmpd	r_addr, r_scratch1	/* addr < SKF_* */
	blt	bpf_error	/* cr0 = LT */
	.globl	sk_load_byte_negative_offset
sk_load_byte_negative_offset:
	sk_negative_common(1)
	lbz	r_A, 0(r_addr)
	blr

bpf_slow_path_byte_msh_neg:
	lis     r_scratch1,-32	/* SKF_LL_OFF */
	cmpd	r_addr, r_scratch1	/* addr < SKF_* */
	blt	bpf_error	/* cr0 = LT */
	.globl	sk_load_byte_msh_negative_offset
sk_load_byte_msh_negative_offset:
	sk_negative_common(1)
	lbz	r_X, 0(r_addr)
	rlwinm	r_X, r_X, 2, 32-4-2, 31-2
	blr

bpf_error_slow:
	/* fabricate a cr0 = lt */
	li	r_scratch1, -1
	cmpdi	r_scratch1, 0
bpf_error:
	/* Entered with cr0 = lt */
	li	r3, 0
	/* Generated code will 'blt epilogue', returning 0. */
	blr
Commit	Line	Data
0ca87f05 ME	1	/* bpf_jit.S: Packet/header access helper functions
	2	* for PPC64 BPF compiler.
	3	*
	4	* Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation
	5	*
	6	* This program is free software; you can redistribute it and/or
	7	* modify it under the terms of the GNU General Public License
	8	* as published by the Free Software Foundation; version 2
	9	* of the License.
	10	*/
	11
	12	#include <asm/ppc_asm.h>
	13	#include "bpf_jit.h"
	14
	15	/*
	16	* All of these routines are called directly from generated code,
	17	* whose register usage is:
	18	*
	19	* r3 skb
	20	* r4,r5 A,X
	21	* r6 * address parameter to helper *
	22	* r7-r10 scratch
	23	* r14 skb->data
	24	* r15 skb headlen
	25	* r16-31 M[]
	26	*/
	27
	28	/*
	29	* To consider: These helpers are so small it could be better to just
	30	* generate them inline. Inline code can do the simple headlen check
	31	* then branch directly to slow_path_XXX if required. (In fact, could
	32	* load a spare GPR with the address of slow_path_generic and pass size
	33	* as an argument, making the call site a mtlr, li and bllr.)
0ca87f05 ME	34	*/
	35	.globl sk_load_word
	36	sk_load_word:
	37	cmpdi r_addr, 0
05be1824 JS	38	blt bpf_slow_path_word_neg
	39	.globl sk_load_word_positive_offset
	40	sk_load_word_positive_offset:
0ca87f05 ME	41	/* Are we accessing past headlen? */
	42	subi r_scratch1, r_HL, 4
	43	cmpd r_scratch1, r_addr
	44	blt bpf_slow_path_word
	45	/* Nope, just hitting the header. cr0 here is eq or gt! */
	46	lwzx r_A, r_D, r_addr
	47	/* When big endian we don't need to byteswap. */
	48	blr /* Return success, cr0 != LT */
	49
	50	.globl sk_load_half
	51	sk_load_half:
	52	cmpdi r_addr, 0
05be1824 JS	53	blt bpf_slow_path_half_neg
	54	.globl sk_load_half_positive_offset
	55	sk_load_half_positive_offset:
0ca87f05 ME	56	subi r_scratch1, r_HL, 2
	57	cmpd r_scratch1, r_addr
	58	blt bpf_slow_path_half
	59	lhzx r_A, r_D, r_addr
	60	blr
	61
	62	.globl sk_load_byte
	63	sk_load_byte:
	64	cmpdi r_addr, 0
05be1824 JS	65	blt bpf_slow_path_byte_neg
	66	.globl sk_load_byte_positive_offset
	67	sk_load_byte_positive_offset:
0ca87f05 ME	68	cmpd r_HL, r_addr
	69	ble bpf_slow_path_byte
	70	lbzx r_A, r_D, r_addr
	71	blr
	72
	73	/*
	74	* BPF_S_LDX_B_MSH: ldxb 4*([offset]&0xf)
05be1824	75	* r_addr is the offset value
0ca87f05 ME	76	*/
	77	.globl sk_load_byte_msh
	78	sk_load_byte_msh:
05be1824 JS	79	cmpdi r_addr, 0
	80	blt bpf_slow_path_byte_msh_neg
	81	.globl sk_load_byte_msh_positive_offset
	82	sk_load_byte_msh_positive_offset:
0ca87f05 ME	83	cmpd r_HL, r_addr
	84	ble bpf_slow_path_byte_msh
	85	lbzx r_X, r_D, r_addr
	86	rlwinm r_X, r_X, 2, 32-4-2, 31-2
	87	blr
	88
0ca87f05 ME	89	/* Call out to skb_copy_bits:
	90	* We'll need to back up our volatile regs first; we have
	91	* local variable space at r1+(BPF_PPC_STACK_BASIC).
	92	* Allocate a new stack frame here to remain ABI-compliant in
	93	* stashing LR.
	94	*/
	95	#define bpf_slow_path_common(SIZE) \
	96	mflr r0; \
	97	std r0, 16(r1); \
	98	/* R3 goes in parameter space of caller's frame */ \
	99	std r_skb, (BPF_PPC_STACKFRAME+48)(r1); \
	100	std r_A, (BPF_PPC_STACK_BASIC+(0*8))(r1); \
	101	std r_X, (BPF_PPC_STACK_BASIC+(1*8))(r1); \
	102	addi r5, r1, BPF_PPC_STACK_BASIC+(2*8); \
	103	stdu r1, -BPF_PPC_SLOWPATH_FRAME(r1); \
	104	/* R3 = r_skb, as passed */ \
	105	mr r4, r_addr; \
	106	li r6, SIZE; \
	107	bl skb_copy_bits; \
	108	/* R3 = 0 on success */ \
	109	addi r1, r1, BPF_PPC_SLOWPATH_FRAME; \
	110	ld r0, 16(r1); \
	111	ld r_A, (BPF_PPC_STACK_BASIC+(0*8))(r1); \
	112	ld r_X, (BPF_PPC_STACK_BASIC+(1*8))(r1); \
	113	mtlr r0; \
	114	cmpdi r3, 0; \
	115	blt bpf_error; /* cr0 = LT */ \
	116	ld r_skb, (BPF_PPC_STACKFRAME+48)(r1); \
	117	/* Great success! */
	118
	119	bpf_slow_path_word:
	120	bpf_slow_path_common(4)
	121	/* Data value is on stack, and cr0 != LT */
	122	lwz r_A, BPF_PPC_STACK_BASIC+(2*8)(r1)
	123	blr
	124
	125	bpf_slow_path_half:
	126	bpf_slow_path_common(2)
	127	lhz r_A, BPF_PPC_STACK_BASIC+(2*8)(r1)
	128	blr
	129
	130	bpf_slow_path_byte:
	131	bpf_slow_path_common(1)
	132	lbz r_A, BPF_PPC_STACK_BASIC+(2*8)(r1)
	133	blr
	134
	135	bpf_slow_path_byte_msh:
	136	bpf_slow_path_common(1)
	137	lbz r_X, BPF_PPC_STACK_BASIC+(2*8)(r1)
	138	rlwinm r_X, r_X, 2, 32-4-2, 31-2
	139	blr
05be1824 JS	140
	141	/* Call out to bpf_internal_load_pointer_neg_helper:
	142	* We'll need to back up our volatile regs first; we have
	143	* local variable space at r1+(BPF_PPC_STACK_BASIC).
	144	* Allocate a new stack frame here to remain ABI-compliant in
	145	* stashing LR.
	146	*/
	147	#define sk_negative_common(SIZE) \
	148	mflr r0; \
	149	std r0, 16(r1); \
	150	/* R3 goes in parameter space of caller's frame */ \
	151	std r_skb, (BPF_PPC_STACKFRAME+48)(r1); \
	152	std r_A, (BPF_PPC_STACK_BASIC+(0*8))(r1); \
	153	std r_X, (BPF_PPC_STACK_BASIC+(1*8))(r1); \
	154	stdu r1, -BPF_PPC_SLOWPATH_FRAME(r1); \
	155	/* R3 = r_skb, as passed */ \
	156	mr r4, r_addr; \
	157	li r5, SIZE; \
	158	bl bpf_internal_load_pointer_neg_helper; \
	159	/* R3 != 0 on success */ \
	160	addi r1, r1, BPF_PPC_SLOWPATH_FRAME; \
	161	ld r0, 16(r1); \
	162	ld r_A, (BPF_PPC_STACK_BASIC+(0*8))(r1); \
	163	ld r_X, (BPF_PPC_STACK_BASIC+(1*8))(r1); \
	164	mtlr r0; \
	165	cmpldi r3, 0; \
	166	beq bpf_error_slow; /* cr0 = EQ */ \
	167	mr r_addr, r3; \
	168	ld r_skb, (BPF_PPC_STACKFRAME+48)(r1); \
	169	/* Great success! */
	170
	171	bpf_slow_path_word_neg:
	172	lis r_scratch1,-32 /* SKF_LL_OFF */
	173	cmpd r_addr, r_scratch1 /* addr < SKF_* */
	174	blt bpf_error /* cr0 = LT */
	175	.globl sk_load_word_negative_offset
	176	sk_load_word_negative_offset:
	177	sk_negative_common(4)
	178	lwz r_A, 0(r_addr)
	179	blr
	180
	181	bpf_slow_path_half_neg:
	182	lis r_scratch1,-32 /* SKF_LL_OFF */
	183	cmpd r_addr, r_scratch1 /* addr < SKF_* */
	184	blt bpf_error /* cr0 = LT */
	185	.globl sk_load_half_negative_offset
	186	sk_load_half_negative_offset:
	187	sk_negative_common(2)
	188	lhz r_A, 0(r_addr)
	189	blr
	190
	191	bpf_slow_path_byte_neg:
	192	lis r_scratch1,-32 /* SKF_LL_OFF */
	193	cmpd r_addr, r_scratch1 /* addr < SKF_* */
	194	blt bpf_error /* cr0 = LT */
	195	.globl sk_load_byte_negative_offset
	196	sk_load_byte_negative_offset:
	197	sk_negative_common(1)
	198	lbz r_A, 0(r_addr)
	199	blr
	200
	201	bpf_slow_path_byte_msh_neg:
	202	lis r_scratch1,-32 /* SKF_LL_OFF */
	203	cmpd r_addr, r_scratch1 /* addr < SKF_* */
204	blt bpf_error /* cr0 = LT */
205	.globl sk_load_byte_msh_negative_offset
206	sk_load_byte_msh_negative_offset:
207	sk_negative_common(1)
208	lbz r_X, 0(r_addr)
209	rlwinm r_X, r_X, 2, 32-4-2, 31-2
210	blr
211
212	bpf_error_slow:
213	/* fabricate a cr0 = lt */
214	li r_scratch1, -1
215	cmpdi r_scratch1, 0
216	bpf_error:
217	/* Entered with cr0 = lt */
218	li r3, 0
219	/* Generated code will 'blt epilogue', returning 0. */
220	blr