Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * Intel SMP support routines. | |
3 | * | |
4 | * (c) 1995 Alan Cox, Building #3 <alan@redhat.com> | |
5 | * (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com> | |
6 | * (c) 2002,2003 Andi Kleen, SuSE Labs. | |
7 | * | |
8 | * This code is released under the GNU General Public License version 2 or | |
9 | * later. | |
10 | */ | |
11 | ||
12 | #include <linux/init.h> | |
13 | ||
14 | #include <linux/mm.h> | |
1da177e4 LT |
15 | #include <linux/delay.h> |
16 | #include <linux/spinlock.h> | |
1da177e4 LT |
17 | #include <linux/smp.h> |
18 | #include <linux/kernel_stat.h> | |
19 | #include <linux/mc146818rtc.h> | |
20 | #include <linux/interrupt.h> | |
21 | ||
22 | #include <asm/mtrr.h> | |
23 | #include <asm/pgalloc.h> | |
24 | #include <asm/tlbflush.h> | |
25 | #include <asm/mach_apic.h> | |
26 | #include <asm/mmu_context.h> | |
27 | #include <asm/proto.h> | |
a8ab26fe | 28 | #include <asm/apicdef.h> |
95833c83 | 29 | #include <asm/idle.h> |
1da177e4 LT |
30 | |
31 | /* | |
32 | * Smarter SMP flushing macros. | |
33 | * c/o Linus Torvalds. | |
34 | * | |
35 | * These mean you can really definitely utterly forget about | |
36 | * writing to user space from interrupts. (Its not allowed anyway). | |
37 | * | |
38 | * Optimizations Manfred Spraul <manfred@colorfullife.com> | |
e5bc8b6b AK |
39 | * |
40 | * More scalable flush, from Andi Kleen | |
41 | * | |
42 | * To avoid global state use 8 different call vectors. | |
43 | * Each CPU uses a specific vector to trigger flushes on other | |
44 | * CPUs. Depending on the received vector the target CPUs look into | |
45 | * the right per cpu variable for the flush data. | |
46 | * | |
47 | * With more than 8 CPUs they are hashed to the 8 available | |
48 | * vectors. The limited global vector space forces us to this right now. | |
49 | * In future when interrupts are split into per CPU domains this could be | |
50 | * fixed, at the cost of triggering multiple IPIs in some cases. | |
1da177e4 LT |
51 | */ |
52 | ||
e5bc8b6b AK |
53 | union smp_flush_state { |
54 | struct { | |
55 | cpumask_t flush_cpumask; | |
56 | struct mm_struct *flush_mm; | |
57 | unsigned long flush_va; | |
1da177e4 | 58 | #define FLUSH_ALL -1ULL |
e5bc8b6b AK |
59 | spinlock_t tlbstate_lock; |
60 | }; | |
61 | char pad[SMP_CACHE_BYTES]; | |
62 | } ____cacheline_aligned; | |
63 | ||
64 | /* State is put into the per CPU data section, but padded | |
65 | to a full cache line because other CPUs can access it and we don't | |
66 | want false sharing in the per cpu data segment. */ | |
67 | static DEFINE_PER_CPU(union smp_flush_state, flush_state); | |
1da177e4 LT |
68 | |
69 | /* | |
70 | * We cannot call mmdrop() because we are in interrupt context, | |
71 | * instead update mm->cpu_vm_mask. | |
72 | */ | |
e5bc8b6b | 73 | static inline void leave_mm(int cpu) |
1da177e4 LT |
74 | { |
75 | if (read_pda(mmu_state) == TLBSTATE_OK) | |
76 | BUG(); | |
b1fc513d | 77 | cpu_clear(cpu, read_pda(active_mm)->cpu_vm_mask); |
e3ebadd9 | 78 | load_cr3(swapper_pg_dir); |
1da177e4 LT |
79 | } |
80 | ||
81 | /* | |
82 | * | |
83 | * The flush IPI assumes that a thread switch happens in this order: | |
84 | * [cpu0: the cpu that switches] | |
85 | * 1) switch_mm() either 1a) or 1b) | |
86 | * 1a) thread switch to a different mm | |
b1fc513d | 87 | * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask); |
1da177e4 LT |
88 | * Stop ipi delivery for the old mm. This is not synchronized with |
89 | * the other cpus, but smp_invalidate_interrupt ignore flush ipis | |
90 | * for the wrong mm, and in the worst case we perform a superfluous | |
91 | * tlb flush. | |
92 | * 1a2) set cpu mmu_state to TLBSTATE_OK | |
93 | * Now the smp_invalidate_interrupt won't call leave_mm if cpu0 | |
94 | * was in lazy tlb mode. | |
95 | * 1a3) update cpu active_mm | |
96 | * Now cpu0 accepts tlb flushes for the new mm. | |
b1fc513d | 97 | * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask); |
1da177e4 LT |
98 | * Now the other cpus will send tlb flush ipis. |
99 | * 1a4) change cr3. | |
100 | * 1b) thread switch without mm change | |
101 | * cpu active_mm is correct, cpu0 already handles | |
102 | * flush ipis. | |
103 | * 1b1) set cpu mmu_state to TLBSTATE_OK | |
104 | * 1b2) test_and_set the cpu bit in cpu_vm_mask. | |
105 | * Atomically set the bit [other cpus will start sending flush ipis], | |
106 | * and test the bit. | |
107 | * 1b3) if the bit was 0: leave_mm was called, flush the tlb. | |
108 | * 2) switch %%esp, ie current | |
109 | * | |
110 | * The interrupt must handle 2 special cases: | |
111 | * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm. | |
112 | * - the cpu performs speculative tlb reads, i.e. even if the cpu only | |
113 | * runs in kernel space, the cpu could load tlb entries for user space | |
114 | * pages. | |
115 | * | |
116 | * The good news is that cpu mmu_state is local to each cpu, no | |
117 | * write/read ordering problems. | |
118 | */ | |
119 | ||
120 | /* | |
121 | * TLB flush IPI: | |
122 | * | |
123 | * 1) Flush the tlb entries if the cpu uses the mm that's being flushed. | |
124 | * 2) Leave the mm if we are in the lazy tlb mode. | |
e5bc8b6b AK |
125 | * |
126 | * Interrupts are disabled. | |
1da177e4 LT |
127 | */ |
128 | ||
e5bc8b6b | 129 | asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs) |
1da177e4 | 130 | { |
e5bc8b6b AK |
131 | int cpu; |
132 | int sender; | |
133 | union smp_flush_state *f; | |
1da177e4 | 134 | |
e5bc8b6b AK |
135 | cpu = smp_processor_id(); |
136 | /* | |
19eadf98 | 137 | * orig_rax contains the negated interrupt vector. |
e5bc8b6b AK |
138 | * Use that to determine where the sender put the data. |
139 | */ | |
19eadf98 | 140 | sender = ~regs->orig_rax - INVALIDATE_TLB_VECTOR_START; |
e5bc8b6b | 141 | f = &per_cpu(flush_state, sender); |
1da177e4 | 142 | |
e5bc8b6b | 143 | if (!cpu_isset(cpu, f->flush_cpumask)) |
1da177e4 LT |
144 | goto out; |
145 | /* | |
146 | * This was a BUG() but until someone can quote me the | |
147 | * line from the intel manual that guarantees an IPI to | |
148 | * multiple CPUs is retried _only_ on the erroring CPUs | |
149 | * its staying as a return | |
150 | * | |
151 | * BUG(); | |
152 | */ | |
153 | ||
e5bc8b6b | 154 | if (f->flush_mm == read_pda(active_mm)) { |
1da177e4 | 155 | if (read_pda(mmu_state) == TLBSTATE_OK) { |
e5bc8b6b | 156 | if (f->flush_va == FLUSH_ALL) |
1da177e4 LT |
157 | local_flush_tlb(); |
158 | else | |
e5bc8b6b | 159 | __flush_tlb_one(f->flush_va); |
1da177e4 LT |
160 | } else |
161 | leave_mm(cpu); | |
162 | } | |
5df3574e | 163 | out: |
1da177e4 | 164 | ack_APIC_irq(); |
e5bc8b6b | 165 | cpu_clear(cpu, f->flush_cpumask); |
1da177e4 LT |
166 | } |
167 | ||
168 | static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, | |
169 | unsigned long va) | |
170 | { | |
e5bc8b6b AK |
171 | int sender; |
172 | union smp_flush_state *f; | |
1da177e4 | 173 | |
e5bc8b6b AK |
174 | /* Caller has disabled preemption */ |
175 | sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS; | |
176 | f = &per_cpu(flush_state, sender); | |
177 | ||
178 | /* Could avoid this lock when | |
179 | num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is | |
180 | probably not worth checking this for a cache-hot lock. */ | |
181 | spin_lock(&f->tlbstate_lock); | |
182 | ||
183 | f->flush_mm = mm; | |
184 | f->flush_va = va; | |
185 | cpus_or(f->flush_cpumask, cpumask, f->flush_cpumask); | |
1da177e4 LT |
186 | |
187 | /* | |
188 | * We have to send the IPI only to | |
189 | * CPUs affected. | |
190 | */ | |
e5bc8b6b | 191 | send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR_START + sender); |
1da177e4 | 192 | |
e5bc8b6b AK |
193 | while (!cpus_empty(f->flush_cpumask)) |
194 | cpu_relax(); | |
1da177e4 | 195 | |
e5bc8b6b AK |
196 | f->flush_mm = NULL; |
197 | f->flush_va = 0; | |
198 | spin_unlock(&f->tlbstate_lock); | |
1da177e4 | 199 | } |
e5bc8b6b AK |
200 | |
201 | int __cpuinit init_smp_flush(void) | |
202 | { | |
203 | int i; | |
204 | for_each_cpu_mask(i, cpu_possible_map) { | |
825e037f | 205 | spin_lock_init(&per_cpu(flush_state, i).tlbstate_lock); |
e5bc8b6b AK |
206 | } |
207 | return 0; | |
208 | } | |
209 | ||
210 | core_initcall(init_smp_flush); | |
1da177e4 LT |
211 | |
212 | void flush_tlb_current_task(void) | |
213 | { | |
214 | struct mm_struct *mm = current->mm; | |
215 | cpumask_t cpu_mask; | |
216 | ||
217 | preempt_disable(); | |
218 | cpu_mask = mm->cpu_vm_mask; | |
219 | cpu_clear(smp_processor_id(), cpu_mask); | |
220 | ||
221 | local_flush_tlb(); | |
222 | if (!cpus_empty(cpu_mask)) | |
223 | flush_tlb_others(cpu_mask, mm, FLUSH_ALL); | |
224 | preempt_enable(); | |
225 | } | |
2ee60e17 | 226 | EXPORT_SYMBOL(flush_tlb_current_task); |
1da177e4 LT |
227 | |
228 | void flush_tlb_mm (struct mm_struct * mm) | |
229 | { | |
230 | cpumask_t cpu_mask; | |
231 | ||
232 | preempt_disable(); | |
233 | cpu_mask = mm->cpu_vm_mask; | |
234 | cpu_clear(smp_processor_id(), cpu_mask); | |
235 | ||
236 | if (current->active_mm == mm) { | |
237 | if (current->mm) | |
238 | local_flush_tlb(); | |
239 | else | |
240 | leave_mm(smp_processor_id()); | |
241 | } | |
242 | if (!cpus_empty(cpu_mask)) | |
243 | flush_tlb_others(cpu_mask, mm, FLUSH_ALL); | |
da8f153e | 244 | |
1da177e4 LT |
245 | preempt_enable(); |
246 | } | |
2ee60e17 | 247 | EXPORT_SYMBOL(flush_tlb_mm); |
1da177e4 LT |
248 | |
249 | void flush_tlb_page(struct vm_area_struct * vma, unsigned long va) | |
250 | { | |
251 | struct mm_struct *mm = vma->vm_mm; | |
252 | cpumask_t cpu_mask; | |
253 | ||
254 | preempt_disable(); | |
255 | cpu_mask = mm->cpu_vm_mask; | |
256 | cpu_clear(smp_processor_id(), cpu_mask); | |
257 | ||
258 | if (current->active_mm == mm) { | |
259 | if(current->mm) | |
260 | __flush_tlb_one(va); | |
261 | else | |
262 | leave_mm(smp_processor_id()); | |
263 | } | |
264 | ||
265 | if (!cpus_empty(cpu_mask)) | |
266 | flush_tlb_others(cpu_mask, mm, va); | |
267 | ||
268 | preempt_enable(); | |
269 | } | |
2ee60e17 | 270 | EXPORT_SYMBOL(flush_tlb_page); |
1da177e4 LT |
271 | |
272 | static void do_flush_tlb_all(void* info) | |
273 | { | |
274 | unsigned long cpu = smp_processor_id(); | |
275 | ||
276 | __flush_tlb_all(); | |
277 | if (read_pda(mmu_state) == TLBSTATE_LAZY) | |
278 | leave_mm(cpu); | |
279 | } | |
280 | ||
281 | void flush_tlb_all(void) | |
282 | { | |
283 | on_each_cpu(do_flush_tlb_all, NULL, 1, 1); | |
284 | } | |
285 | ||
1da177e4 LT |
286 | /* |
287 | * this function sends a 'reschedule' IPI to another CPU. | |
288 | * it goes straight through and wastes no time serializing | |
289 | * anything. Worst case is that we lose a reschedule ... | |
290 | */ | |
291 | ||
292 | void smp_send_reschedule(int cpu) | |
293 | { | |
294 | send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR); | |
295 | } | |
296 | ||
297 | /* | |
298 | * Structure and data for smp_call_function(). This is designed to minimise | |
299 | * static memory requirements. It also looks cleaner. | |
300 | */ | |
301 | static DEFINE_SPINLOCK(call_lock); | |
302 | ||
303 | struct call_data_struct { | |
304 | void (*func) (void *info); | |
305 | void *info; | |
306 | atomic_t started; | |
307 | atomic_t finished; | |
308 | int wait; | |
309 | }; | |
310 | ||
311 | static struct call_data_struct * call_data; | |
312 | ||
884d9e40 AR |
313 | void lock_ipi_call_lock(void) |
314 | { | |
315 | spin_lock_irq(&call_lock); | |
316 | } | |
317 | ||
318 | void unlock_ipi_call_lock(void) | |
319 | { | |
320 | spin_unlock_irq(&call_lock); | |
321 | } | |
322 | ||
3d483f47 EB |
323 | /* |
324 | * this function sends a 'generic call function' IPI to one other CPU | |
325 | * in the system. | |
f1f4e83f AK |
326 | * |
327 | * cpu is a standard Linux logical CPU number. | |
3d483f47 | 328 | */ |
f1f4e83f AK |
329 | static void |
330 | __smp_call_function_single(int cpu, void (*func) (void *info), void *info, | |
3d483f47 EB |
331 | int nonatomic, int wait) |
332 | { | |
333 | struct call_data_struct data; | |
334 | int cpus = 1; | |
335 | ||
336 | data.func = func; | |
337 | data.info = info; | |
338 | atomic_set(&data.started, 0); | |
339 | data.wait = wait; | |
340 | if (wait) | |
341 | atomic_set(&data.finished, 0); | |
342 | ||
343 | call_data = &data; | |
344 | wmb(); | |
345 | /* Send a message to all other CPUs and wait for them to respond */ | |
346 | send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_VECTOR); | |
347 | ||
348 | /* Wait for response */ | |
349 | while (atomic_read(&data.started) != cpus) | |
350 | cpu_relax(); | |
351 | ||
352 | if (!wait) | |
353 | return; | |
354 | ||
355 | while (atomic_read(&data.finished) != cpus) | |
356 | cpu_relax(); | |
357 | } | |
358 | ||
359 | /* | |
4055551b | 360 | * smp_call_function_single - Run a function on a specific CPU |
3d483f47 EB |
361 | * @func: The function to run. This must be fast and non-blocking. |
362 | * @info: An arbitrary pointer to pass to the function. | |
363 | * @nonatomic: Currently unused. | |
364 | * @wait: If true, wait until function has completed on other CPUs. | |
365 | * | |
366 | * Retrurns 0 on success, else a negative status code. | |
367 | * | |
368 | * Does not return until the remote CPU is nearly ready to execute <func> | |
369 | * or is or has executed. | |
370 | */ | |
371 | ||
372 | int smp_call_function_single (int cpu, void (*func) (void *info), void *info, | |
373 | int nonatomic, int wait) | |
374 | { | |
375 | /* prevent preemption and reschedule on another processor */ | |
376 | int me = get_cpu(); | |
4055551b AK |
377 | |
378 | /* Can deadlock when called with interrupts disabled */ | |
379 | WARN_ON(irqs_disabled()); | |
380 | ||
3d483f47 | 381 | if (cpu == me) { |
4055551b AK |
382 | local_irq_disable(); |
383 | func(info); | |
384 | local_irq_enable(); | |
3d483f47 | 385 | put_cpu(); |
8c131af1 | 386 | return 0; |
3d483f47 | 387 | } |
a38a44c1 | 388 | |
d9c6d691 | 389 | spin_lock(&call_lock); |
3d483f47 | 390 | __smp_call_function_single(cpu, func, info, nonatomic, wait); |
d9c6d691 | 391 | spin_unlock(&call_lock); |
3d483f47 EB |
392 | put_cpu(); |
393 | return 0; | |
394 | } | |
64a26a73 | 395 | EXPORT_SYMBOL(smp_call_function_single); |
3d483f47 | 396 | |
1da177e4 LT |
397 | /* |
398 | * this function sends a 'generic call function' IPI to all other CPUs | |
399 | * in the system. | |
400 | */ | |
401 | static void __smp_call_function (void (*func) (void *info), void *info, | |
402 | int nonatomic, int wait) | |
403 | { | |
404 | struct call_data_struct data; | |
405 | int cpus = num_online_cpus()-1; | |
406 | ||
407 | if (!cpus) | |
408 | return; | |
409 | ||
410 | data.func = func; | |
411 | data.info = info; | |
412 | atomic_set(&data.started, 0); | |
413 | data.wait = wait; | |
414 | if (wait) | |
415 | atomic_set(&data.finished, 0); | |
416 | ||
417 | call_data = &data; | |
418 | wmb(); | |
419 | /* Send a message to all other CPUs and wait for them to respond */ | |
420 | send_IPI_allbutself(CALL_FUNCTION_VECTOR); | |
421 | ||
422 | /* Wait for response */ | |
423 | while (atomic_read(&data.started) != cpus) | |
424 | cpu_relax(); | |
425 | ||
426 | if (!wait) | |
427 | return; | |
428 | ||
429 | while (atomic_read(&data.finished) != cpus) | |
430 | cpu_relax(); | |
431 | } | |
432 | ||
433 | /* | |
434 | * smp_call_function - run a function on all other CPUs. | |
435 | * @func: The function to run. This must be fast and non-blocking. | |
436 | * @info: An arbitrary pointer to pass to the function. | |
437 | * @nonatomic: currently unused. | |
438 | * @wait: If true, wait (atomically) until function has completed on other | |
439 | * CPUs. | |
440 | * | |
441 | * Returns 0 on success, else a negative status code. Does not return until | |
442 | * remote CPUs are nearly ready to execute func or are or have executed. | |
443 | * | |
444 | * You must not call this function with disabled interrupts or from a | |
445 | * hardware interrupt handler or from a bottom half handler. | |
446 | * Actually there are a few legal cases, like panic. | |
447 | */ | |
448 | int smp_call_function (void (*func) (void *info), void *info, int nonatomic, | |
449 | int wait) | |
450 | { | |
451 | spin_lock(&call_lock); | |
452 | __smp_call_function(func,info,nonatomic,wait); | |
453 | spin_unlock(&call_lock); | |
454 | return 0; | |
455 | } | |
2ee60e17 | 456 | EXPORT_SYMBOL(smp_call_function); |
1da177e4 | 457 | |
9964cf7d | 458 | static void stop_this_cpu(void *dummy) |
1da177e4 | 459 | { |
9964cf7d | 460 | local_irq_disable(); |
1da177e4 LT |
461 | /* |
462 | * Remove this CPU: | |
463 | */ | |
464 | cpu_clear(smp_processor_id(), cpu_online_map); | |
1da177e4 | 465 | disable_local_APIC(); |
1da177e4 | 466 | for (;;) |
46d13a38 | 467 | halt(); |
1da177e4 LT |
468 | } |
469 | ||
470 | void smp_send_stop(void) | |
471 | { | |
9964cf7d JB |
472 | int nolock; |
473 | unsigned long flags; | |
474 | ||
1da177e4 LT |
475 | if (reboot_force) |
476 | return; | |
9964cf7d | 477 | |
1da177e4 | 478 | /* Don't deadlock on the call lock in panic */ |
9964cf7d JB |
479 | nolock = !spin_trylock(&call_lock); |
480 | local_irq_save(flags); | |
481 | __smp_call_function(stop_this_cpu, NULL, 0, 0); | |
1da177e4 LT |
482 | if (!nolock) |
483 | spin_unlock(&call_lock); | |
1da177e4 | 484 | disable_local_APIC(); |
9964cf7d | 485 | local_irq_restore(flags); |
1da177e4 LT |
486 | } |
487 | ||
488 | /* | |
489 | * Reschedule call back. Nothing to do, | |
490 | * all the work is done automatically when | |
491 | * we return from the interrupt. | |
492 | */ | |
493 | asmlinkage void smp_reschedule_interrupt(void) | |
494 | { | |
495 | ack_APIC_irq(); | |
496 | } | |
497 | ||
498 | asmlinkage void smp_call_function_interrupt(void) | |
499 | { | |
500 | void (*func) (void *info) = call_data->func; | |
501 | void *info = call_data->info; | |
502 | int wait = call_data->wait; | |
503 | ||
504 | ack_APIC_irq(); | |
505 | /* | |
506 | * Notify initiating CPU that I've grabbed the data and am | |
507 | * about to execute the function | |
508 | */ | |
509 | mb(); | |
510 | atomic_inc(&call_data->started); | |
511 | /* | |
512 | * At this point the info structure may be out of scope unless wait==1 | |
513 | */ | |
95833c83 | 514 | exit_idle(); |
1da177e4 LT |
515 | irq_enter(); |
516 | (*func)(info); | |
517 | irq_exit(); | |
518 | if (wait) { | |
519 | mb(); | |
520 | atomic_inc(&call_data->finished); | |
521 | } | |
522 | } | |
a8ab26fe | 523 |