[deliverable/linux.git] / arch / arm / lib / memset.S

/*
 *  linux/arch/arm/lib/memset.S
 *
 *  Copyright (C) 1995-2000 Russell King
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 *
 *  ASM optimised string functions
 */
#include <linux/linkage.h>
#include <asm/assembler.h>

	.text
	.align	5
	.word	0

1:	subs	r2, r2, #4		@ 1 do we have enough
	blt	5f			@ 1 bytes to align with?
	cmp	r3, #2			@ 1
	strltb	r1, [ip], #1		@ 1
	strleb	r1, [ip], #1		@ 1
	strb	r1, [ip], #1		@ 1
	add	r2, r2, r3		@ 1 (r2 = r2 - (4 - r3))
/*
 * The pointer is now aligned and the length is adjusted.  Try doing the
 * memset again.
 */

ENTRY(memset)
/*
 * Preserve the contents of r0 for the return value.
 */
	mov	ip, r0
	ands	r3, ip, #3		@ 1 unaligned?
	bne	1b			@ 1
/*
 * we know that the pointer in ip is aligned to a word boundary.
 */
	orr	r1, r1, r1, lsl #8
	orr	r1, r1, r1, lsl #16
	mov	r3, r1
	cmp	r2, #16
	blt	4f

#if ! CALGN(1)+0

/*
 * We need 2 extra registers for this loop - use r8 and the LR
 */
	stmfd	sp!, {r8, lr}
	mov	r8, r1
	mov	lr, r1

2:	subs	r2, r2, #64
	stmgeia	ip!, {r1, r3, r8, lr}	@ 64 bytes at a time.
	stmgeia	ip!, {r1, r3, r8, lr}
	stmgeia	ip!, {r1, r3, r8, lr}
	stmgeia	ip!, {r1, r3, r8, lr}
	bgt	2b
	ldmeqfd	sp!, {r8, pc}		@ Now <64 bytes to go.
/*
 * No need to correct the count; we're only testing bits from now on
 */
	tst	r2, #32
	stmneia	ip!, {r1, r3, r8, lr}
	stmneia	ip!, {r1, r3, r8, lr}
	tst	r2, #16
	stmneia	ip!, {r1, r3, r8, lr}
	ldmfd	sp!, {r8, lr}

#else

/*
 * This version aligns the destination pointer in order to write
 * whole cache lines at once.
 */

	stmfd	sp!, {r4-r8, lr}
	mov	r4, r1
	mov	r5, r1
	mov	r6, r1
	mov	r7, r1
	mov	r8, r1
	mov	lr, r1

	cmp	r2, #96
	tstgt	ip, #31
	ble	3f

	and	r8, ip, #31
	rsb	r8, r8, #32
	sub	r2, r2, r8
	movs	r8, r8, lsl #(32 - 4)
	stmcsia	ip!, {r4, r5, r6, r7}
	stmmiia	ip!, {r4, r5}
	tst	r8, #(1 << 30)
	mov	r8, r1
	strne	r1, [ip], #4

3:	subs	r2, r2, #64
	stmgeia	ip!, {r1, r3-r8, lr}
	stmgeia	ip!, {r1, r3-r8, lr}
	bgt	3b
	ldmeqfd	sp!, {r4-r8, pc}

	tst	r2, #32
	stmneia	ip!, {r1, r3-r8, lr}
	tst	r2, #16
	stmneia	ip!, {r4-r7}
	ldmfd	sp!, {r4-r8, lr}

#endif

4:	tst	r2, #8
	stmneia	ip!, {r1, r3}
	tst	r2, #4
	strne	r1, [ip], #4
/*
 * When we get here, we've got less than 4 bytes to zero.  We
 * may have an unaligned pointer as well.
 */
5:	tst	r2, #2
	strneb	r1, [ip], #1
	strneb	r1, [ip], #1
	tst	r2, #1
	strneb	r1, [ip], #1
	mov	pc, lr
ENDPROC(memset)
Commit	Line	Data
1da177e4 LT	1	/*
	2	* linux/arch/arm/lib/memset.S
	3	*
	4	* Copyright (C) 1995-2000 Russell King
	5	*
	6	* This program is free software; you can redistribute it and/or modify
	7	* it under the terms of the GNU General Public License version 2 as
	8	* published by the Free Software Foundation.
	9	*
	10	* ASM optimised string functions
	11	*/
	12	#include <linux/linkage.h>
	13	#include <asm/assembler.h>
	14
	15	.text
	16	.align 5
	17	.word 0
	18
	19	1: subs r2, r2, #4 @ 1 do we have enough
	20	blt 5f @ 1 bytes to align with?
	21	cmp r3, #2 @ 1
455bd4c4 ID	22	strltb r1, [ip], #1 @ 1
	23	strleb r1, [ip], #1 @ 1
	24	strb r1, [ip], #1 @ 1
1da177e4 LT	25	add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3))
	26	/*
	27	* The pointer is now aligned and the length is adjusted. Try doing the
59f0cb0f	28	* memset again.
1da177e4 LT	29	*/
	30
	31	ENTRY(memset)
455bd4c4 ID	32	/*
	33	* Preserve the contents of r0 for the return value.
	34	*/
	35	mov ip, r0
	36	ands r3, ip, #3 @ 1 unaligned?
1da177e4 LT	37	bne 1b @ 1
1da177e4 LT	38	/*
455bd4c4	39	* we know that the pointer in ip is aligned to a word boundary.
1da177e4 LT	40	*/
	41	orr r1, r1, r1, lsl #8
	42	orr r1, r1, r1, lsl #16
	43	mov r3, r1
	44	cmp r2, #16
	45	blt 4f
f91a8dcc NP	46
	47	#if ! CALGN(1)+0
	48
1da177e4	49	/*
455bd4c4	50	* We need 2 extra registers for this loop - use r8 and the LR
1da177e4	51	*/
455bd4c4 ID	52	stmfd sp!, {r8, lr}
455bd4c4 ID	53	mov r8, r1
1da177e4 LT	54	mov lr, r1
	55
	56	2: subs r2, r2, #64
455bd4c4 ID	57	stmgeia ip!, {r1, r3, r8, lr} @ 64 bytes at a time.
	58	stmgeia ip!, {r1, r3, r8, lr}
	59	stmgeia ip!, {r1, r3, r8, lr}
	60	stmgeia ip!, {r1, r3, r8, lr}
1da177e4	61	bgt 2b
455bd4c4	62	ldmeqfd sp!, {r8, pc} @ Now <64 bytes to go.
1da177e4 LT	63	/*
	64	* No need to correct the count; we're only testing bits from now on
	65	*/
	66	tst r2, #32
455bd4c4 ID	67	stmneia ip!, {r1, r3, r8, lr}
455bd4c4 ID	68	stmneia ip!, {r1, r3, r8, lr}
1da177e4	69	tst r2, #16
455bd4c4 ID	70	stmneia ip!, {r1, r3, r8, lr}
455bd4c4 ID	71	ldmfd sp!, {r8, lr}
1da177e4	72
f91a8dcc NP	73	#else
	74
	75	/*
	76	* This version aligns the destination pointer in order to write
	77	* whole cache lines at once.
	78	*/
	79
455bd4c4	80	stmfd sp!, {r4-r8, lr}
f91a8dcc NP	81	mov r4, r1
	82	mov r5, r1
	83	mov r6, r1
	84	mov r7, r1
455bd4c4	85	mov r8, r1
f91a8dcc NP	86	mov lr, r1
	87
	88	cmp r2, #96
455bd4c4	89	tstgt ip, #31
f91a8dcc NP	90	ble 3f
f91a8dcc NP	91
455bd4c4 ID	92	and r8, ip, #31
	93	rsb r8, r8, #32
	94	sub r2, r2, r8
	95	movs r8, r8, lsl #(32 - 4)
	96	stmcsia ip!, {r4, r5, r6, r7}
	97	stmmiia ip!, {r4, r5}
	98	tst r8, #(1 << 30)
	99	mov r8, r1
	100	strne r1, [ip], #4
f91a8dcc NP	101
f91a8dcc NP	102	3: subs r2, r2, #64
455bd4c4 ID	103	stmgeia ip!, {r1, r3-r8, lr}
455bd4c4 ID	104	stmgeia ip!, {r1, r3-r8, lr}
f91a8dcc	105	bgt 3b
455bd4c4	106	ldmeqfd sp!, {r4-r8, pc}
f91a8dcc NP	107
f91a8dcc NP	108	tst r2, #32
455bd4c4	109	stmneia ip!, {r1, r3-r8, lr}
f91a8dcc	110	tst r2, #16
455bd4c4 ID	111	stmneia ip!, {r4-r7}
455bd4c4 ID	112	ldmfd sp!, {r4-r8, lr}
f91a8dcc NP	113
	114	#endif
	115
1da177e4	116	4: tst r2, #8
455bd4c4	117	stmneia ip!, {r1, r3}
1da177e4	118	tst r2, #4
455bd4c4	119	strne r1, [ip], #4
1da177e4 LT	120	/*
	121	* When we get here, we've got less than 4 bytes to zero. We
	122	* may have an unaligned pointer as well.
	123	*/
	124	5: tst r2, #2
455bd4c4 ID	125	strneb r1, [ip], #1
455bd4c4 ID	126	strneb r1, [ip], #1
1da177e4	127	tst r2, #1
455bd4c4	128	strneb r1, [ip], #1
7999d8d7	129	mov pc, lr
93ed3970	130	ENDPROC(memset)