Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * Intel SMP support routines. | |
3 | * | |
4 | * (c) 1995 Alan Cox, Building #3 <alan@redhat.com> | |
5 | * (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com> | |
6 | * (c) 2002,2003 Andi Kleen, SuSE Labs. | |
7 | * | |
8 | * This code is released under the GNU General Public License version 2 or | |
9 | * later. | |
10 | */ | |
11 | ||
12 | #include <linux/init.h> | |
13 | ||
14 | #include <linux/mm.h> | |
1da177e4 LT |
15 | #include <linux/delay.h> |
16 | #include <linux/spinlock.h> | |
1da177e4 LT |
17 | #include <linux/smp.h> |
18 | #include <linux/kernel_stat.h> | |
19 | #include <linux/mc146818rtc.h> | |
20 | #include <linux/interrupt.h> | |
21 | ||
22 | #include <asm/mtrr.h> | |
23 | #include <asm/pgalloc.h> | |
24 | #include <asm/tlbflush.h> | |
25 | #include <asm/mach_apic.h> | |
26 | #include <asm/mmu_context.h> | |
27 | #include <asm/proto.h> | |
a8ab26fe | 28 | #include <asm/apicdef.h> |
95833c83 | 29 | #include <asm/idle.h> |
1da177e4 LT |
30 | |
31 | /* | |
16da2f93 | 32 | * Smarter SMP flushing macros. |
1da177e4 LT |
33 | * c/o Linus Torvalds. |
34 | * | |
35 | * These mean you can really definitely utterly forget about | |
36 | * writing to user space from interrupts. (Its not allowed anyway). | |
37 | * | |
38 | * Optimizations Manfred Spraul <manfred@colorfullife.com> | |
e5bc8b6b | 39 | * |
16da2f93 | 40 | * More scalable flush, from Andi Kleen |
e5bc8b6b | 41 | * |
16da2f93 TG |
42 | * To avoid global state use 8 different call vectors. |
43 | * Each CPU uses a specific vector to trigger flushes on other | |
44 | * CPUs. Depending on the received vector the target CPUs look into | |
e5bc8b6b AK |
45 | * the right per cpu variable for the flush data. |
46 | * | |
16da2f93 TG |
47 | * With more than 8 CPUs they are hashed to the 8 available |
48 | * vectors. The limited global vector space forces us to this right now. | |
e5bc8b6b AK |
49 | * In future when interrupts are split into per CPU domains this could be |
50 | * fixed, at the cost of triggering multiple IPIs in some cases. | |
1da177e4 LT |
51 | */ |
52 | ||
e5bc8b6b AK |
53 | union smp_flush_state { |
54 | struct { | |
55 | cpumask_t flush_cpumask; | |
56 | struct mm_struct *flush_mm; | |
57 | unsigned long flush_va; | |
e5bc8b6b AK |
58 | spinlock_t tlbstate_lock; |
59 | }; | |
60 | char pad[SMP_CACHE_BYTES]; | |
61 | } ____cacheline_aligned; | |
62 | ||
63 | /* State is put into the per CPU data section, but padded | |
64 | to a full cache line because other CPUs can access it and we don't | |
65 | want false sharing in the per cpu data segment. */ | |
66 | static DEFINE_PER_CPU(union smp_flush_state, flush_state); | |
1da177e4 LT |
67 | |
68 | /* | |
16da2f93 | 69 | * We cannot call mmdrop() because we are in interrupt context, |
1da177e4 LT |
70 | * instead update mm->cpu_vm_mask. |
71 | */ | |
e5bc8b6b | 72 | static inline void leave_mm(int cpu) |
1da177e4 LT |
73 | { |
74 | if (read_pda(mmu_state) == TLBSTATE_OK) | |
75 | BUG(); | |
b1fc513d | 76 | cpu_clear(cpu, read_pda(active_mm)->cpu_vm_mask); |
e3ebadd9 | 77 | load_cr3(swapper_pg_dir); |
1da177e4 LT |
78 | } |
79 | ||
80 | /* | |
81 | * | |
82 | * The flush IPI assumes that a thread switch happens in this order: | |
83 | * [cpu0: the cpu that switches] | |
84 | * 1) switch_mm() either 1a) or 1b) | |
85 | * 1a) thread switch to a different mm | |
b1fc513d | 86 | * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask); |
16da2f93 TG |
87 | * Stop ipi delivery for the old mm. This is not synchronized with |
88 | * the other cpus, but smp_invalidate_interrupt ignore flush ipis | |
89 | * for the wrong mm, and in the worst case we perform a superfluous | |
90 | * tlb flush. | |
1da177e4 | 91 | * 1a2) set cpu mmu_state to TLBSTATE_OK |
16da2f93 | 92 | * Now the smp_invalidate_interrupt won't call leave_mm if cpu0 |
1da177e4 LT |
93 | * was in lazy tlb mode. |
94 | * 1a3) update cpu active_mm | |
16da2f93 | 95 | * Now cpu0 accepts tlb flushes for the new mm. |
b1fc513d | 96 | * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask); |
16da2f93 | 97 | * Now the other cpus will send tlb flush ipis. |
1da177e4 LT |
98 | * 1a4) change cr3. |
99 | * 1b) thread switch without mm change | |
100 | * cpu active_mm is correct, cpu0 already handles | |
101 | * flush ipis. | |
102 | * 1b1) set cpu mmu_state to TLBSTATE_OK | |
103 | * 1b2) test_and_set the cpu bit in cpu_vm_mask. | |
16da2f93 TG |
104 | * Atomically set the bit [other cpus will start sending flush ipis], |
105 | * and test the bit. | |
1da177e4 LT |
106 | * 1b3) if the bit was 0: leave_mm was called, flush the tlb. |
107 | * 2) switch %%esp, ie current | |
108 | * | |
109 | * The interrupt must handle 2 special cases: | |
110 | * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm. | |
111 | * - the cpu performs speculative tlb reads, i.e. even if the cpu only | |
112 | * runs in kernel space, the cpu could load tlb entries for user space | |
113 | * pages. | |
114 | * | |
115 | * The good news is that cpu mmu_state is local to each cpu, no | |
116 | * write/read ordering problems. | |
117 | */ | |
118 | ||
119 | /* | |
120 | * TLB flush IPI: | |
121 | * | |
122 | * 1) Flush the tlb entries if the cpu uses the mm that's being flushed. | |
123 | * 2) Leave the mm if we are in the lazy tlb mode. | |
e5bc8b6b AK |
124 | * |
125 | * Interrupts are disabled. | |
1da177e4 LT |
126 | */ |
127 | ||
e5bc8b6b | 128 | asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs) |
1da177e4 | 129 | { |
e5bc8b6b AK |
130 | int cpu; |
131 | int sender; | |
132 | union smp_flush_state *f; | |
1da177e4 | 133 | |
e5bc8b6b AK |
134 | cpu = smp_processor_id(); |
135 | /* | |
19eadf98 | 136 | * orig_rax contains the negated interrupt vector. |
e5bc8b6b AK |
137 | * Use that to determine where the sender put the data. |
138 | */ | |
65ea5b03 | 139 | sender = ~regs->orig_ax - INVALIDATE_TLB_VECTOR_START; |
e5bc8b6b | 140 | f = &per_cpu(flush_state, sender); |
1da177e4 | 141 | |
e5bc8b6b | 142 | if (!cpu_isset(cpu, f->flush_cpumask)) |
1da177e4 | 143 | goto out; |
16da2f93 | 144 | /* |
1da177e4 LT |
145 | * This was a BUG() but until someone can quote me the |
146 | * line from the intel manual that guarantees an IPI to | |
147 | * multiple CPUs is retried _only_ on the erroring CPUs | |
148 | * its staying as a return | |
149 | * | |
150 | * BUG(); | |
151 | */ | |
16da2f93 | 152 | |
e5bc8b6b | 153 | if (f->flush_mm == read_pda(active_mm)) { |
1da177e4 | 154 | if (read_pda(mmu_state) == TLBSTATE_OK) { |
0b9c99b6 | 155 | if (f->flush_va == TLB_FLUSH_ALL) |
1da177e4 LT |
156 | local_flush_tlb(); |
157 | else | |
e5bc8b6b | 158 | __flush_tlb_one(f->flush_va); |
1da177e4 LT |
159 | } else |
160 | leave_mm(cpu); | |
161 | } | |
5df3574e | 162 | out: |
1da177e4 | 163 | ack_APIC_irq(); |
e5bc8b6b | 164 | cpu_clear(cpu, f->flush_cpumask); |
38e760a1 | 165 | add_pda(irq_tlb_count, 1); |
1da177e4 LT |
166 | } |
167 | ||
0b9c99b6 TG |
168 | void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, |
169 | unsigned long va) | |
1da177e4 | 170 | { |
e5bc8b6b AK |
171 | int sender; |
172 | union smp_flush_state *f; | |
0b9c99b6 | 173 | cpumask_t cpumask = *cpumaskp; |
1da177e4 | 174 | |
e5bc8b6b AK |
175 | /* Caller has disabled preemption */ |
176 | sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS; | |
177 | f = &per_cpu(flush_state, sender); | |
178 | ||
16da2f93 TG |
179 | /* |
180 | * Could avoid this lock when | |
181 | * num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is | |
182 | * probably not worth checking this for a cache-hot lock. | |
183 | */ | |
e5bc8b6b AK |
184 | spin_lock(&f->tlbstate_lock); |
185 | ||
186 | f->flush_mm = mm; | |
187 | f->flush_va = va; | |
188 | cpus_or(f->flush_cpumask, cpumask, f->flush_cpumask); | |
1da177e4 LT |
189 | |
190 | /* | |
191 | * We have to send the IPI only to | |
192 | * CPUs affected. | |
193 | */ | |
e5bc8b6b | 194 | send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR_START + sender); |
1da177e4 | 195 | |
e5bc8b6b AK |
196 | while (!cpus_empty(f->flush_cpumask)) |
197 | cpu_relax(); | |
1da177e4 | 198 | |
e5bc8b6b AK |
199 | f->flush_mm = NULL; |
200 | f->flush_va = 0; | |
201 | spin_unlock(&f->tlbstate_lock); | |
1da177e4 | 202 | } |
e5bc8b6b AK |
203 | |
204 | int __cpuinit init_smp_flush(void) | |
205 | { | |
206 | int i; | |
16da2f93 | 207 | |
e5bc8b6b | 208 | for_each_cpu_mask(i, cpu_possible_map) { |
825e037f | 209 | spin_lock_init(&per_cpu(flush_state, i).tlbstate_lock); |
e5bc8b6b AK |
210 | } |
211 | return 0; | |
212 | } | |
e5bc8b6b | 213 | core_initcall(init_smp_flush); |
16da2f93 | 214 | |
1da177e4 LT |
215 | void flush_tlb_current_task(void) |
216 | { | |
217 | struct mm_struct *mm = current->mm; | |
218 | cpumask_t cpu_mask; | |
219 | ||
220 | preempt_disable(); | |
221 | cpu_mask = mm->cpu_vm_mask; | |
222 | cpu_clear(smp_processor_id(), cpu_mask); | |
223 | ||
224 | local_flush_tlb(); | |
225 | if (!cpus_empty(cpu_mask)) | |
0b9c99b6 | 226 | flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); |
1da177e4 LT |
227 | preempt_enable(); |
228 | } | |
229 | ||
230 | void flush_tlb_mm (struct mm_struct * mm) | |
231 | { | |
232 | cpumask_t cpu_mask; | |
233 | ||
234 | preempt_disable(); | |
235 | cpu_mask = mm->cpu_vm_mask; | |
236 | cpu_clear(smp_processor_id(), cpu_mask); | |
237 | ||
238 | if (current->active_mm == mm) { | |
239 | if (current->mm) | |
240 | local_flush_tlb(); | |
241 | else | |
242 | leave_mm(smp_processor_id()); | |
243 | } | |
244 | if (!cpus_empty(cpu_mask)) | |
0b9c99b6 | 245 | flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); |
da8f153e | 246 | |
1da177e4 LT |
247 | preempt_enable(); |
248 | } | |
249 | ||
250 | void flush_tlb_page(struct vm_area_struct * vma, unsigned long va) | |
251 | { | |
252 | struct mm_struct *mm = vma->vm_mm; | |
253 | cpumask_t cpu_mask; | |
254 | ||
255 | preempt_disable(); | |
256 | cpu_mask = mm->cpu_vm_mask; | |
257 | cpu_clear(smp_processor_id(), cpu_mask); | |
258 | ||
259 | if (current->active_mm == mm) { | |
260 | if(current->mm) | |
261 | __flush_tlb_one(va); | |
16da2f93 TG |
262 | else |
263 | leave_mm(smp_processor_id()); | |
1da177e4 LT |
264 | } |
265 | ||
266 | if (!cpus_empty(cpu_mask)) | |
267 | flush_tlb_others(cpu_mask, mm, va); | |
268 | ||
269 | preempt_enable(); | |
270 | } | |
271 | ||
272 | static void do_flush_tlb_all(void* info) | |
273 | { | |
274 | unsigned long cpu = smp_processor_id(); | |
275 | ||
276 | __flush_tlb_all(); | |
277 | if (read_pda(mmu_state) == TLBSTATE_LAZY) | |
278 | leave_mm(cpu); | |
279 | } | |
280 | ||
281 | void flush_tlb_all(void) | |
282 | { | |
283 | on_each_cpu(do_flush_tlb_all, NULL, 1, 1); | |
284 | } | |
285 | ||
1da177e4 LT |
286 | /* |
287 | * this function sends a 'reschedule' IPI to another CPU. | |
288 | * it goes straight through and wastes no time serializing | |
289 | * anything. Worst case is that we lose a reschedule ... | |
290 | */ | |
291 | ||
292 | void smp_send_reschedule(int cpu) | |
293 | { | |
294 | send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR); | |
295 | } | |
296 | ||
297 | /* | |
298 | * Structure and data for smp_call_function(). This is designed to minimise | |
299 | * static memory requirements. It also looks cleaner. | |
300 | */ | |
301 | static DEFINE_SPINLOCK(call_lock); | |
302 | ||
303 | struct call_data_struct { | |
304 | void (*func) (void *info); | |
305 | void *info; | |
306 | atomic_t started; | |
307 | atomic_t finished; | |
308 | int wait; | |
309 | }; | |
310 | ||
311 | static struct call_data_struct * call_data; | |
312 | ||
884d9e40 AR |
313 | void lock_ipi_call_lock(void) |
314 | { | |
315 | spin_lock_irq(&call_lock); | |
316 | } | |
317 | ||
318 | void unlock_ipi_call_lock(void) | |
319 | { | |
320 | spin_unlock_irq(&call_lock); | |
321 | } | |
322 | ||
3d483f47 | 323 | /* |
66d16ed4 LV |
324 | * this function sends a 'generic call function' IPI to all other CPU |
325 | * of the system defined in the mask. | |
3d483f47 | 326 | */ |
16da2f93 TG |
327 | static int __smp_call_function_mask(cpumask_t mask, |
328 | void (*func)(void *), void *info, | |
329 | int wait) | |
3d483f47 EB |
330 | { |
331 | struct call_data_struct data; | |
66d16ed4 LV |
332 | cpumask_t allbutself; |
333 | int cpus; | |
334 | ||
335 | allbutself = cpu_online_map; | |
336 | cpu_clear(smp_processor_id(), allbutself); | |
337 | ||
338 | cpus_and(mask, mask, allbutself); | |
339 | cpus = cpus_weight(mask); | |
340 | ||
341 | if (!cpus) | |
342 | return 0; | |
3d483f47 EB |
343 | |
344 | data.func = func; | |
345 | data.info = info; | |
346 | atomic_set(&data.started, 0); | |
347 | data.wait = wait; | |
348 | if (wait) | |
349 | atomic_set(&data.finished, 0); | |
350 | ||
351 | call_data = &data; | |
352 | wmb(); | |
66d16ed4 LV |
353 | |
354 | /* Send a message to other CPUs */ | |
355 | if (cpus_equal(mask, allbutself)) | |
356 | send_IPI_allbutself(CALL_FUNCTION_VECTOR); | |
357 | else | |
358 | send_IPI_mask(mask, CALL_FUNCTION_VECTOR); | |
3d483f47 EB |
359 | |
360 | /* Wait for response */ | |
361 | while (atomic_read(&data.started) != cpus) | |
362 | cpu_relax(); | |
363 | ||
364 | if (!wait) | |
66d16ed4 | 365 | return 0; |
3d483f47 EB |
366 | |
367 | while (atomic_read(&data.finished) != cpus) | |
368 | cpu_relax(); | |
66d16ed4 LV |
369 | |
370 | return 0; | |
371 | } | |
372 | /** | |
373 | * smp_call_function_mask(): Run a function on a set of other CPUs. | |
374 | * @mask: The set of cpus to run on. Must not include the current cpu. | |
375 | * @func: The function to run. This must be fast and non-blocking. | |
376 | * @info: An arbitrary pointer to pass to the function. | |
377 | * @wait: If true, wait (atomically) until function has completed on other CPUs. | |
378 | * | |
379 | * Returns 0 on success, else a negative status code. | |
380 | * | |
381 | * If @wait is true, then returns once @func has returned; otherwise | |
382 | * it returns just before the target cpu calls @func. | |
383 | * | |
384 | * You must not call this function with disabled interrupts or from a | |
385 | * hardware interrupt handler or from a bottom half handler. | |
386 | */ | |
387 | int smp_call_function_mask(cpumask_t mask, | |
388 | void (*func)(void *), void *info, | |
389 | int wait) | |
390 | { | |
391 | int ret; | |
392 | ||
393 | /* Can deadlock when called with interrupts disabled */ | |
394 | WARN_ON(irqs_disabled()); | |
395 | ||
396 | spin_lock(&call_lock); | |
397 | ret = __smp_call_function_mask(mask, func, info, wait); | |
398 | spin_unlock(&call_lock); | |
399 | return ret; | |
3d483f47 | 400 | } |
66d16ed4 | 401 | EXPORT_SYMBOL(smp_call_function_mask); |
3d483f47 EB |
402 | |
403 | /* | |
4055551b | 404 | * smp_call_function_single - Run a function on a specific CPU |
3d483f47 EB |
405 | * @func: The function to run. This must be fast and non-blocking. |
406 | * @info: An arbitrary pointer to pass to the function. | |
407 | * @nonatomic: Currently unused. | |
408 | * @wait: If true, wait until function has completed on other CPUs. | |
409 | * | |
410 | * Retrurns 0 on success, else a negative status code. | |
411 | * | |
412 | * Does not return until the remote CPU is nearly ready to execute <func> | |
413 | * or is or has executed. | |
414 | */ | |
415 | ||
416 | int smp_call_function_single (int cpu, void (*func) (void *info), void *info, | |
16da2f93 | 417 | int nonatomic, int wait) |
3d483f47 EB |
418 | { |
419 | /* prevent preemption and reschedule on another processor */ | |
16da2f93 | 420 | int ret, me = get_cpu(); |
4055551b AK |
421 | |
422 | /* Can deadlock when called with interrupts disabled */ | |
423 | WARN_ON(irqs_disabled()); | |
424 | ||
3d483f47 | 425 | if (cpu == me) { |
4055551b AK |
426 | local_irq_disable(); |
427 | func(info); | |
428 | local_irq_enable(); | |
3d483f47 | 429 | put_cpu(); |
8c131af1 | 430 | return 0; |
3d483f47 | 431 | } |
a38a44c1 | 432 | |
66d16ed4 LV |
433 | ret = smp_call_function_mask(cpumask_of_cpu(cpu), func, info, wait); |
434 | ||
3d483f47 | 435 | put_cpu(); |
66d16ed4 | 436 | return ret; |
3d483f47 | 437 | } |
64a26a73 | 438 | EXPORT_SYMBOL(smp_call_function_single); |
3d483f47 | 439 | |
1da177e4 LT |
440 | /* |
441 | * smp_call_function - run a function on all other CPUs. | |
442 | * @func: The function to run. This must be fast and non-blocking. | |
443 | * @info: An arbitrary pointer to pass to the function. | |
444 | * @nonatomic: currently unused. | |
445 | * @wait: If true, wait (atomically) until function has completed on other | |
446 | * CPUs. | |
447 | * | |
448 | * Returns 0 on success, else a negative status code. Does not return until | |
449 | * remote CPUs are nearly ready to execute func or are or have executed. | |
450 | * | |
451 | * You must not call this function with disabled interrupts or from a | |
452 | * hardware interrupt handler or from a bottom half handler. | |
453 | * Actually there are a few legal cases, like panic. | |
454 | */ | |
455 | int smp_call_function (void (*func) (void *info), void *info, int nonatomic, | |
456 | int wait) | |
457 | { | |
66d16ed4 | 458 | return smp_call_function_mask(cpu_online_map, func, info, wait); |
1da177e4 | 459 | } |
2ee60e17 | 460 | EXPORT_SYMBOL(smp_call_function); |
1da177e4 | 461 | |
9964cf7d | 462 | static void stop_this_cpu(void *dummy) |
1da177e4 | 463 | { |
9964cf7d | 464 | local_irq_disable(); |
1da177e4 LT |
465 | /* |
466 | * Remove this CPU: | |
467 | */ | |
468 | cpu_clear(smp_processor_id(), cpu_online_map); | |
1da177e4 | 469 | disable_local_APIC(); |
16da2f93 | 470 | for (;;) |
46d13a38 | 471 | halt(); |
16da2f93 | 472 | } |
1da177e4 LT |
473 | |
474 | void smp_send_stop(void) | |
475 | { | |
9964cf7d JB |
476 | int nolock; |
477 | unsigned long flags; | |
478 | ||
1da177e4 LT |
479 | if (reboot_force) |
480 | return; | |
9964cf7d | 481 | |
1da177e4 | 482 | /* Don't deadlock on the call lock in panic */ |
9964cf7d JB |
483 | nolock = !spin_trylock(&call_lock); |
484 | local_irq_save(flags); | |
66d16ed4 | 485 | __smp_call_function_mask(cpu_online_map, stop_this_cpu, NULL, 0); |
1da177e4 LT |
486 | if (!nolock) |
487 | spin_unlock(&call_lock); | |
1da177e4 | 488 | disable_local_APIC(); |
9964cf7d | 489 | local_irq_restore(flags); |
1da177e4 LT |
490 | } |
491 | ||
492 | /* | |
493 | * Reschedule call back. Nothing to do, | |
494 | * all the work is done automatically when | |
495 | * we return from the interrupt. | |
496 | */ | |
497 | asmlinkage void smp_reschedule_interrupt(void) | |
498 | { | |
499 | ack_APIC_irq(); | |
38e760a1 | 500 | add_pda(irq_resched_count, 1); |
1da177e4 LT |
501 | } |
502 | ||
503 | asmlinkage void smp_call_function_interrupt(void) | |
504 | { | |
505 | void (*func) (void *info) = call_data->func; | |
506 | void *info = call_data->info; | |
507 | int wait = call_data->wait; | |
508 | ||
509 | ack_APIC_irq(); | |
510 | /* | |
511 | * Notify initiating CPU that I've grabbed the data and am | |
512 | * about to execute the function | |
513 | */ | |
514 | mb(); | |
515 | atomic_inc(&call_data->started); | |
516 | /* | |
517 | * At this point the info structure may be out of scope unless wait==1 | |
518 | */ | |
95833c83 | 519 | exit_idle(); |
1da177e4 LT |
520 | irq_enter(); |
521 | (*func)(info); | |
38e760a1 | 522 | add_pda(irq_call_count, 1); |
1da177e4 LT |
523 | irq_exit(); |
524 | if (wait) { | |
525 | mb(); | |
526 | atomic_inc(&call_data->finished); | |
527 | } | |
528 | } | |
a8ab26fe | 529 |