Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* memset.S: optimised assembly memset |
2 | * | |
3 | * Copyright (C) 2003 Red Hat, Inc. All Rights Reserved. | |
4 | * Written by David Howells (dhowells@redhat.com) | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU General Public License | |
8 | * as published by the Free Software Foundation; either version | |
9 | * 2 of the License, or (at your option) any later version. | |
10 | */ | |
11 | ||
12 | ||
13 | .text | |
14 | .p2align 4 | |
15 | ||
16 | ############################################################################### | |
17 | # | |
18 | # void *memset(void *p, char ch, size_t count) | |
19 | # | |
20 | # - NOTE: must not use any stack. exception detection performs function return | |
21 | # to caller's fixup routine, aborting the remainder of the set | |
22 | # GR4, GR7, GR8, and GR11 must be managed | |
23 | # | |
24 | ############################################################################### | |
25 | .globl memset,__memset_end | |
26 | .type memset,@function | |
27 | memset: | |
28 | orcc.p gr10,gr0,gr5,icc3 ; GR5 = count | |
29 | andi gr9,#0xff,gr9 | |
30 | or.p gr8,gr0,gr4 ; GR4 = address | |
31 | beqlr icc3,#0 | |
32 | ||
33 | # conditionally write a byte to 2b-align the address | |
34 | setlos.p #1,gr6 | |
35 | andicc gr4,#1,gr0,icc0 | |
36 | ckne icc0,cc7 | |
37 | cstb.p gr9,@(gr4,gr0) ,cc7,#1 | |
38 | csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3 | |
39 | cadd.p gr4,gr6,gr4 ,cc7,#1 | |
40 | beqlr icc3,#0 | |
41 | ||
42 | # conditionally write a word to 4b-align the address | |
43 | andicc.p gr4,#2,gr0,icc0 | |
44 | subicc gr5,#2,gr0,icc1 | |
45 | setlos.p #2,gr6 | |
46 | ckne icc0,cc7 | |
47 | slli.p gr9,#8,gr12 ; need to double up the pattern | |
48 | cknc icc1,cc5 | |
49 | or.p gr9,gr12,gr12 | |
50 | andcr cc7,cc5,cc7 | |
51 | ||
52 | csth.p gr12,@(gr4,gr0) ,cc7,#1 | |
53 | csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3 | |
54 | cadd.p gr4,gr6,gr4 ,cc7,#1 | |
55 | beqlr icc3,#0 | |
56 | ||
57 | # conditionally write a dword to 8b-align the address | |
58 | andicc.p gr4,#4,gr0,icc0 | |
59 | subicc gr5,#4,gr0,icc1 | |
60 | setlos.p #4,gr6 | |
61 | ckne icc0,cc7 | |
62 | slli.p gr12,#16,gr13 ; need to quadruple-up the pattern | |
63 | cknc icc1,cc5 | |
64 | or.p gr13,gr12,gr12 | |
65 | andcr cc7,cc5,cc7 | |
66 | ||
67 | cst.p gr12,@(gr4,gr0) ,cc7,#1 | |
68 | csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3 | |
69 | cadd.p gr4,gr6,gr4 ,cc7,#1 | |
70 | beqlr icc3,#0 | |
71 | ||
72 | or.p gr12,gr12,gr13 ; need to octuple-up the pattern | |
73 | ||
74 | # the address is now 8b-aligned - loop around writing 64b chunks | |
75 | setlos #8,gr7 | |
76 | subi.p gr4,#8,gr4 ; store with update index does weird stuff | |
77 | setlos #64,gr6 | |
78 | ||
79 | subicc gr5,#64,gr0,icc0 | |
80 | 0: cknc icc0,cc7 | |
81 | cstdu gr12,@(gr4,gr7) ,cc7,#1 | |
82 | cstdu gr12,@(gr4,gr7) ,cc7,#1 | |
83 | cstdu gr12,@(gr4,gr7) ,cc7,#1 | |
84 | cstdu gr12,@(gr4,gr7) ,cc7,#1 | |
85 | cstdu gr12,@(gr4,gr7) ,cc7,#1 | |
86 | cstdu.p gr12,@(gr4,gr7) ,cc7,#1 | |
87 | csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3 | |
88 | cstdu.p gr12,@(gr4,gr7) ,cc7,#1 | |
89 | subicc gr5,#64,gr0,icc0 | |
90 | cstdu.p gr12,@(gr4,gr7) ,cc7,#1 | |
91 | beqlr icc3,#0 | |
92 | bnc icc0,#2,0b | |
93 | ||
94 | # now do 32-byte remnant | |
95 | subicc.p gr5,#32,gr0,icc0 | |
96 | setlos #32,gr6 | |
97 | cknc icc0,cc7 | |
98 | cstdu.p gr12,@(gr4,gr7) ,cc7,#1 | |
99 | csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3 | |
100 | cstdu.p gr12,@(gr4,gr7) ,cc7,#1 | |
101 | setlos #16,gr6 | |
102 | cstdu.p gr12,@(gr4,gr7) ,cc7,#1 | |
103 | subicc gr5,#16,gr0,icc0 | |
104 | cstdu.p gr12,@(gr4,gr7) ,cc7,#1 | |
105 | beqlr icc3,#0 | |
106 | ||
107 | # now do 16-byte remnant | |
108 | cknc icc0,cc7 | |
109 | cstdu.p gr12,@(gr4,gr7) ,cc7,#1 | |
110 | csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3 | |
111 | cstdu.p gr12,@(gr4,gr7) ,cc7,#1 | |
112 | beqlr icc3,#0 | |
113 | ||
114 | # now do 8-byte remnant | |
115 | subicc gr5,#8,gr0,icc1 | |
116 | cknc icc1,cc7 | |
117 | cstdu.p gr12,@(gr4,gr7) ,cc7,#1 | |
118 | csubcc gr5,gr7,gr5 ,cc7,#1 ; also set ICC3 | |
119 | setlos.p #4,gr7 | |
120 | beqlr icc3,#0 | |
121 | ||
122 | # now do 4-byte remnant | |
123 | subicc gr5,#4,gr0,icc0 | |
124 | addi.p gr4,#4,gr4 | |
125 | cknc icc0,cc7 | |
126 | cstu.p gr12,@(gr4,gr7) ,cc7,#1 | |
127 | csubcc gr5,gr7,gr5 ,cc7,#1 ; also set ICC3 | |
128 | subicc.p gr5,#2,gr0,icc1 | |
129 | beqlr icc3,#0 | |
130 | ||
131 | # now do 2-byte remnant | |
132 | setlos #2,gr7 | |
133 | addi.p gr4,#2,gr4 | |
134 | cknc icc1,cc7 | |
135 | csthu.p gr12,@(gr4,gr7) ,cc7,#1 | |
136 | csubcc gr5,gr7,gr5 ,cc7,#1 ; also set ICC3 | |
137 | subicc.p gr5,#1,gr0,icc0 | |
138 | beqlr icc3,#0 | |
139 | ||
140 | # now do 1-byte remnant | |
141 | setlos #0,gr7 | |
142 | addi.p gr4,#2,gr4 | |
143 | cknc icc0,cc7 | |
144 | cstb.p gr12,@(gr4,gr0) ,cc7,#1 | |
145 | bralr | |
146 | __memset_end: | |
147 | ||
148 | .size memset, __memset_end-memset | |
149 | ||
150 | ############################################################################### | |
151 | # | |
152 | # clear memory in userspace | |
153 | # - return the number of bytes that could not be cleared (0 on complete success) | |
154 | # | |
155 | # long __memset_user(void *p, size_t count) | |
156 | # | |
157 | ############################################################################### | |
158 | .globl __memset_user, __memset_user_error_lr, __memset_user_error_handler | |
159 | .type __memset_user,@function | |
160 | __memset_user: | |
161 | movsg lr,gr11 | |
162 | ||
163 | # abuse memset to do the dirty work | |
164 | or.p gr9,gr9,gr10 | |
165 | setlos #0,gr9 | |
166 | call memset | |
167 | __memset_user_error_lr: | |
168 | jmpl.p @(gr11,gr0) | |
169 | setlos #0,gr8 | |
170 | ||
171 | # deal any exception generated by memset | |
172 | # GR4 - memset's address tracking pointer | |
173 | # GR7 - memset's step value (index register for store insns) | |
174 | # GR8 - memset's original start address | |
175 | # GR10 - memset's original count | |
176 | __memset_user_error_handler: | |
177 | add.p gr4,gr7,gr4 | |
178 | add gr8,gr10,gr8 | |
179 | jmpl.p @(gr11,gr0) | |
180 | sub gr8,gr4,gr8 ; we return the amount left uncleared | |
181 | ||
182 | .size __memset_user, .-__memset_user |