ARM: 7668/1: fix memset-related crashes caused by recent GCC (4.7.2) optimizations
[deliverable/linux.git] / arch / arm / lib / memset.S
CommitLineData
1da177e4
LT
1/*
2 * linux/arch/arm/lib/memset.S
3 *
4 * Copyright (C) 1995-2000 Russell King
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 *
10 * ASM optimised string functions
11 */
12#include <linux/linkage.h>
13#include <asm/assembler.h>
14
15 .text
16 .align 5
17 .word 0
18
191: subs r2, r2, #4 @ 1 do we have enough
20 blt 5f @ 1 bytes to align with?
21 cmp r3, #2 @ 1
455bd4c4
ID
22 strltb r1, [ip], #1 @ 1
23 strleb r1, [ip], #1 @ 1
24 strb r1, [ip], #1 @ 1
1da177e4
LT
25 add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3))
26/*
27 * The pointer is now aligned and the length is adjusted. Try doing the
59f0cb0f 28 * memset again.
1da177e4
LT
29 */
30
31ENTRY(memset)
455bd4c4
ID
32/*
33 * Preserve the contents of r0 for the return value.
34 */
35 mov ip, r0
36 ands r3, ip, #3 @ 1 unaligned?
1da177e4
LT
37 bne 1b @ 1
38/*
455bd4c4 39 * we know that the pointer in ip is aligned to a word boundary.
1da177e4
LT
40 */
41 orr r1, r1, r1, lsl #8
42 orr r1, r1, r1, lsl #16
43 mov r3, r1
44 cmp r2, #16
45 blt 4f
f91a8dcc
NP
46
47#if ! CALGN(1)+0
48
1da177e4 49/*
455bd4c4 50 * We need 2 extra registers for this loop - use r8 and the LR
1da177e4 51 */
455bd4c4
ID
52 stmfd sp!, {r8, lr}
53 mov r8, r1
1da177e4
LT
54 mov lr, r1
55
562: subs r2, r2, #64
455bd4c4
ID
57 stmgeia ip!, {r1, r3, r8, lr} @ 64 bytes at a time.
58 stmgeia ip!, {r1, r3, r8, lr}
59 stmgeia ip!, {r1, r3, r8, lr}
60 stmgeia ip!, {r1, r3, r8, lr}
1da177e4 61 bgt 2b
455bd4c4 62 ldmeqfd sp!, {r8, pc} @ Now <64 bytes to go.
1da177e4
LT
63/*
64 * No need to correct the count; we're only testing bits from now on
65 */
66 tst r2, #32
455bd4c4
ID
67 stmneia ip!, {r1, r3, r8, lr}
68 stmneia ip!, {r1, r3, r8, lr}
1da177e4 69 tst r2, #16
455bd4c4
ID
70 stmneia ip!, {r1, r3, r8, lr}
71 ldmfd sp!, {r8, lr}
1da177e4 72
f91a8dcc
NP
73#else
74
75/*
76 * This version aligns the destination pointer in order to write
77 * whole cache lines at once.
78 */
79
455bd4c4 80 stmfd sp!, {r4-r8, lr}
f91a8dcc
NP
81 mov r4, r1
82 mov r5, r1
83 mov r6, r1
84 mov r7, r1
455bd4c4 85 mov r8, r1
f91a8dcc
NP
86 mov lr, r1
87
88 cmp r2, #96
455bd4c4 89 tstgt ip, #31
f91a8dcc
NP
90 ble 3f
91
455bd4c4
ID
92 and r8, ip, #31
93 rsb r8, r8, #32
94 sub r2, r2, r8
95 movs r8, r8, lsl #(32 - 4)
96 stmcsia ip!, {r4, r5, r6, r7}
97 stmmiia ip!, {r4, r5}
98 tst r8, #(1 << 30)
99 mov r8, r1
100 strne r1, [ip], #4
f91a8dcc
NP
101
1023: subs r2, r2, #64
455bd4c4
ID
103 stmgeia ip!, {r1, r3-r8, lr}
104 stmgeia ip!, {r1, r3-r8, lr}
f91a8dcc 105 bgt 3b
455bd4c4 106 ldmeqfd sp!, {r4-r8, pc}
f91a8dcc
NP
107
108 tst r2, #32
455bd4c4 109 stmneia ip!, {r1, r3-r8, lr}
f91a8dcc 110 tst r2, #16
455bd4c4
ID
111 stmneia ip!, {r4-r7}
112 ldmfd sp!, {r4-r8, lr}
f91a8dcc
NP
113
114#endif
115
1da177e4 1164: tst r2, #8
455bd4c4 117 stmneia ip!, {r1, r3}
1da177e4 118 tst r2, #4
455bd4c4 119 strne r1, [ip], #4
1da177e4
LT
120/*
121 * When we get here, we've got less than 4 bytes to zero. We
122 * may have an unaligned pointer as well.
123 */
1245: tst r2, #2
455bd4c4
ID
125 strneb r1, [ip], #1
126 strneb r1, [ip], #1
1da177e4 127 tst r2, #1
455bd4c4 128 strneb r1, [ip], #1
7999d8d7 129 mov pc, lr
93ed3970 130ENDPROC(memset)
This page took 0.608157 seconds and 5 git commands to generate.