Commit | Line | Data |
---|---|---|
867e359b CM |
1 | /* |
2 | * Copyright 2010 Tilera Corporation. All Rights Reserved. | |
3 | * | |
4 | * This program is free software; you can redistribute it and/or | |
5 | * modify it under the terms of the GNU General Public License | |
6 | * as published by the Free Software Foundation, version 2. | |
7 | * | |
8 | * This program is distributed in the hope that it will be useful, but | |
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
10 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | |
11 | * NON INFRINGEMENT. See the GNU General Public License for | |
12 | * more details. | |
13 | * | |
14 | * Support routines for atomic operations. Each function takes: | |
15 | * | |
16 | * r0: address to manipulate | |
5fb682b0 | 17 | * r1: pointer to atomic lock guarding this operation (for ATOMIC_LOCK_REG) |
867e359b CM |
18 | * r2: new value to write, or for cmpxchg/add_unless, value to compare against |
19 | * r3: (cmpxchg/xchg_add_unless) new value to write or add; | |
20 | * (atomic64 ops) high word of value to write | |
21 | * r4/r5: (cmpxchg64/add_unless64) new value to write or add | |
22 | * | |
23 | * The 32-bit routines return a "struct __get_user" so that the futex code | |
24 | * has an opportunity to return -EFAULT to the user if needed. | |
25 | * The 64-bit routines just return a "long long" with the value, | |
26 | * since they are only used from kernel space and don't expect to fault. | |
27 | * Support for 16-bit ops is included in the framework but we don't provide | |
28 | * any (x86_64 has an atomic_inc_short(), so we might want to some day). | |
29 | * | |
30 | * Note that the caller is advised to issue a suitable L1 or L2 | |
31 | * prefetch on the address being manipulated to avoid extra stalls. | |
32 | * In addition, the hot path is on two icache lines, and we start with | |
33 | * a jump to the second line to make sure they are both in cache so | |
34 | * that we never stall waiting on icache fill while holding the lock. | |
35 | * (This doesn't work out with most 64-bit ops, since they consume | |
36 | * too many bundles, so may take an extra i-cache stall.) | |
37 | * | |
38 | * These routines set the INTERRUPT_CRITICAL_SECTION bit, just | |
39 | * like sys_cmpxchg(), so that NMIs like PERF_COUNT will not interrupt | |
40 | * the code, just page faults. | |
41 | * | |
42 | * If the load or store faults in a way that can be directly fixed in | |
43 | * the do_page_fault_ics() handler (e.g. a vmalloc reference) we fix it | |
44 | * directly, return to the instruction that faulted, and retry it. | |
45 | * | |
46 | * If the load or store faults in a way that potentially requires us | |
47 | * to release the atomic lock, then retry (e.g. a migrating PTE), we | |
48 | * reset the PC in do_page_fault_ics() to the "tns" instruction so | |
49 | * that on return we will reacquire the lock and restart the op. We | |
50 | * are somewhat overloading the exception_table_entry notion by doing | |
51 | * this, since those entries are not normally used for migrating PTEs. | |
52 | * | |
53 | * If the main page fault handler discovers a bad address, it will see | |
54 | * the PC pointing to the "tns" instruction (due to the earlier | |
55 | * exception_table_entry processing in do_page_fault_ics), and | |
56 | * re-reset the PC to the fault handler, atomic_bad_address(), which | |
57 | * effectively takes over from the atomic op and can either return a | |
58 | * bad "struct __get_user" (for user addresses) or can just panic (for | |
59 | * bad kernel addresses). | |
60 | * | |
61 | * Note that if the value we would store is the same as what we | |
df29ccb6 | 62 | * loaded, we bypass the store. Other platforms with true atomics can |
867e359b CM |
63 | * make the guarantee that a non-atomic __clear_bit(), for example, |
64 | * can safely race with an atomic test_and_set_bit(); this example is | |
65 | * from bit_spinlock.h in slub_lock() / slub_unlock(). We can't do | |
66 | * that on Tile since the "atomic" op is really just a | |
67 | * read/modify/write, and can race with the non-atomic | |
68 | * read/modify/write. However, if we can short-circuit the write when | |
69 | * it is not needed, in the atomic case, we avoid the race. | |
70 | */ | |
71 | ||
72 | #include <linux/linkage.h> | |
d52104b2 | 73 | #include <asm/atomic_32.h> |
867e359b CM |
74 | #include <asm/page.h> |
75 | #include <asm/processor.h> | |
76 | ||
77 | .section .text.atomic,"ax" | |
78 | ENTRY(__start_atomic_asm_code) | |
79 | ||
80 | .macro atomic_op, name, bitwidth, body | |
81 | .align 64 | |
82 | STD_ENTRY_SECTION(__atomic\name, .text.atomic) | |
83 | { | |
84 | movei r24, 1 | |
85 | j 4f /* branch to second cache line */ | |
86 | } | |
87 | 1: { | |
88 | .ifc \bitwidth,16 | |
89 | lh r22, r0 | |
90 | .else | |
91 | lw r22, r0 | |
2db09827 | 92 | addi r28, r0, 4 |
867e359b CM |
93 | .endif |
94 | } | |
95 | .ifc \bitwidth,64 | |
2db09827 | 96 | lw r23, r28 |
867e359b CM |
97 | .endif |
98 | \body /* set r24, and r25 if 64-bit */ | |
99 | { | |
100 | seq r26, r22, r24 | |
101 | seq r27, r23, r25 | |
102 | } | |
103 | .ifc \bitwidth,64 | |
104 | bbnst r27, 2f | |
105 | .endif | |
106 | bbs r26, 3f /* skip write-back if it's the same value */ | |
107 | 2: { | |
108 | .ifc \bitwidth,16 | |
109 | sh r0, r24 | |
110 | .else | |
111 | sw r0, r24 | |
867e359b CM |
112 | .endif |
113 | } | |
114 | .ifc \bitwidth,64 | |
2db09827 | 115 | sw r28, r25 |
867e359b CM |
116 | .endif |
117 | mf | |
118 | 3: { | |
119 | move r0, r22 | |
120 | .ifc \bitwidth,64 | |
121 | move r1, r23 | |
122 | .else | |
123 | move r1, zero | |
124 | .endif | |
125 | sw ATOMIC_LOCK_REG_NAME, zero | |
126 | } | |
127 | mtspr INTERRUPT_CRITICAL_SECTION, zero | |
128 | jrp lr | |
129 | 4: { | |
130 | move ATOMIC_LOCK_REG_NAME, r1 | |
131 | mtspr INTERRUPT_CRITICAL_SECTION, r24 | |
132 | } | |
133 | #ifndef CONFIG_SMP | |
134 | j 1b /* no atomic locks */ | |
135 | #else | |
136 | { | |
137 | tns r21, ATOMIC_LOCK_REG_NAME | |
138 | moveli r23, 2048 /* maximum backoff time in cycles */ | |
139 | } | |
140 | { | |
141 | bzt r21, 1b /* branch if lock acquired */ | |
142 | moveli r25, 32 /* starting backoff time in cycles */ | |
143 | } | |
144 | 5: mtspr INTERRUPT_CRITICAL_SECTION, zero | |
145 | mfspr r26, CYCLE_LOW /* get start point for this backoff */ | |
146 | 6: mfspr r22, CYCLE_LOW /* test to see if we've backed off enough */ | |
147 | sub r22, r22, r26 | |
148 | slt r22, r22, r25 | |
149 | bbst r22, 6b | |
150 | { | |
151 | mtspr INTERRUPT_CRITICAL_SECTION, r24 | |
152 | shli r25, r25, 1 /* double the backoff; retry the tns */ | |
153 | } | |
154 | { | |
155 | tns r21, ATOMIC_LOCK_REG_NAME | |
156 | slt r26, r23, r25 /* is the proposed backoff too big? */ | |
157 | } | |
158 | { | |
159 | bzt r21, 1b /* branch if lock acquired */ | |
160 | mvnz r25, r26, r23 | |
161 | } | |
162 | j 5b | |
163 | #endif | |
164 | STD_ENDPROC(__atomic\name) | |
165 | .ifc \bitwidth,32 | |
166 | .pushsection __ex_table,"a" | |
d4d9eab4 | 167 | .align 4 |
867e359b CM |
168 | .word 1b, __atomic\name |
169 | .word 2b, __atomic\name | |
170 | .word __atomic\name, __atomic_bad_address | |
171 | .popsection | |
172 | .endif | |
173 | .endm | |
174 | ||
175 | atomic_op _cmpxchg, 32, "seq r26, r22, r2; { bbns r26, 3f; move r24, r3 }" | |
176 | atomic_op _xchg, 32, "move r24, r2" | |
177 | atomic_op _xchg_add, 32, "add r24, r22, r2" | |
178 | atomic_op _xchg_add_unless, 32, \ | |
179 | "sne r26, r22, r2; { bbns r26, 3f; add r24, r22, r3 }" | |
180 | atomic_op _or, 32, "or r24, r22, r2" | |
2957c035 | 181 | atomic_op _and, 32, "and r24, r22, r2" |
867e359b CM |
182 | atomic_op _andn, 32, "nor r2, r2, zero; and r24, r22, r2" |
183 | atomic_op _xor, 32, "xor r24, r22, r2" | |
184 | ||
185 | atomic_op 64_cmpxchg, 64, "{ seq r26, r22, r2; seq r27, r23, r3 }; \ | |
186 | { bbns r26, 3f; move r24, r4 }; { bbns r27, 3f; move r25, r5 }" | |
187 | atomic_op 64_xchg, 64, "{ move r24, r2; move r25, r3 }" | |
188 | atomic_op 64_xchg_add, 64, "{ add r24, r22, r2; add r25, r23, r3 }; \ | |
189 | slt_u r26, r24, r22; add r25, r25, r26" | |
190 | atomic_op 64_xchg_add_unless, 64, \ | |
191 | "{ sne r26, r22, r2; sne r27, r23, r3 }; \ | |
192 | { bbns r26, 3f; add r24, r22, r4 }; \ | |
193 | { bbns r27, 3f; add r25, r23, r5 }; \ | |
194 | slt_u r26, r24, r22; add r25, r25, r26" | |
2957c035 CM |
195 | atomic_op 64_or, 64, "{ or r24, r22, r2; or r25, r23, r3 }" |
196 | atomic_op 64_and, 64, "{ and r24, r22, r2; and r25, r23, r3 }" | |
197 | atomic_op 64_xor, 64, "{ xor r24, r22, r2; xor r25, r23, r3 }" | |
867e359b CM |
198 | |
199 | jrp lr /* happy backtracer */ | |
200 | ||
201 | ENTRY(__end_atomic_asm_code) |