Commit | Line | Data |
---|---|---|
f87e4cac JF |
1 | /* |
2 | * Xen SMP support | |
3 | * | |
4 | * This file implements the Xen versions of smp_ops. SMP under Xen is | |
5 | * very straightforward. Bringing a CPU up is simply a matter of | |
6 | * loading its initial context and setting it running. | |
7 | * | |
8 | * IPIs are handled through the Xen event mechanism. | |
9 | * | |
10 | * Because virtual CPUs can be scheduled onto any real CPU, there's no | |
11 | * useful topology information for the kernel to make use of. As a | |
12 | * result, all CPUs are treated as if they're single-core and | |
13 | * single-threaded. | |
14 | * | |
15 | * This does not handle HOTPLUG_CPU yet. | |
16 | */ | |
17 | #include <linux/sched.h> | |
18 | #include <linux/err.h> | |
19 | #include <linux/smp.h> | |
20 | ||
21 | #include <asm/paravirt.h> | |
22 | #include <asm/desc.h> | |
23 | #include <asm/pgtable.h> | |
24 | #include <asm/cpu.h> | |
25 | ||
26 | #include <xen/interface/xen.h> | |
27 | #include <xen/interface/vcpu.h> | |
28 | ||
29 | #include <asm/xen/interface.h> | |
30 | #include <asm/xen/hypercall.h> | |
31 | ||
32 | #include <xen/page.h> | |
33 | #include <xen/events.h> | |
34 | ||
35 | #include "xen-ops.h" | |
36 | #include "mmu.h" | |
37 | ||
0e91398f | 38 | cpumask_t xen_cpu_initialized_map; |
f87e4cac | 39 | |
3b16cf87 JA |
40 | static DEFINE_PER_CPU(int, resched_irq); |
41 | static DEFINE_PER_CPU(int, callfunc_irq); | |
42 | static DEFINE_PER_CPU(int, callfuncsingle_irq); | |
43 | static DEFINE_PER_CPU(int, debug_irq) = -1; | |
f87e4cac JF |
44 | |
45 | static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id); | |
3b16cf87 | 46 | static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id); |
f87e4cac JF |
47 | |
48 | /* | |
49 | * Reschedule call back. Nothing to do, | |
50 | * all the work is done automatically when | |
51 | * we return from the interrupt. | |
52 | */ | |
53 | static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id) | |
54 | { | |
38bb5ab4 JF |
55 | #ifdef CONFIG_X86_32 |
56 | __get_cpu_var(irq_stat).irq_resched_count++; | |
57 | #else | |
58 | add_pda(irq_resched_count, 1); | |
59 | #endif | |
60 | ||
f87e4cac JF |
61 | return IRQ_HANDLED; |
62 | } | |
63 | ||
64 | static __cpuinit void cpu_bringup_and_idle(void) | |
65 | { | |
66 | int cpu = smp_processor_id(); | |
67 | ||
68 | cpu_init(); | |
c7b75947 JF |
69 | preempt_disable(); |
70 | ||
e2a81baf | 71 | xen_enable_sysenter(); |
6fcac6d3 | 72 | xen_enable_syscall(); |
f87e4cac | 73 | |
c7b75947 JF |
74 | cpu = smp_processor_id(); |
75 | smp_store_cpu_info(cpu); | |
76 | cpu_data(cpu).x86_max_cores = 1; | |
77 | set_cpu_sibling_map(cpu); | |
f87e4cac JF |
78 | |
79 | xen_setup_cpu_clockevents(); | |
80 | ||
c7b75947 JF |
81 | cpu_set(cpu, cpu_online_map); |
82 | x86_write_percpu(cpu_state, CPU_ONLINE); | |
83 | wmb(); | |
84 | ||
f87e4cac JF |
85 | /* We can take interrupts now: we're officially "up". */ |
86 | local_irq_enable(); | |
87 | ||
88 | wmb(); /* make sure everything is out */ | |
89 | cpu_idle(); | |
90 | } | |
91 | ||
92 | static int xen_smp_intr_init(unsigned int cpu) | |
93 | { | |
94 | int rc; | |
ee523ca1 | 95 | const char *resched_name, *callfunc_name, *debug_name; |
f87e4cac JF |
96 | |
97 | resched_name = kasprintf(GFP_KERNEL, "resched%d", cpu); | |
98 | rc = bind_ipi_to_irqhandler(XEN_RESCHEDULE_VECTOR, | |
99 | cpu, | |
100 | xen_reschedule_interrupt, | |
101 | IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING, | |
102 | resched_name, | |
103 | NULL); | |
104 | if (rc < 0) | |
105 | goto fail; | |
106 | per_cpu(resched_irq, cpu) = rc; | |
107 | ||
108 | callfunc_name = kasprintf(GFP_KERNEL, "callfunc%d", cpu); | |
109 | rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_VECTOR, | |
110 | cpu, | |
111 | xen_call_function_interrupt, | |
112 | IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING, | |
113 | callfunc_name, | |
114 | NULL); | |
115 | if (rc < 0) | |
116 | goto fail; | |
117 | per_cpu(callfunc_irq, cpu) = rc; | |
118 | ||
ee523ca1 JF |
119 | debug_name = kasprintf(GFP_KERNEL, "debug%d", cpu); |
120 | rc = bind_virq_to_irqhandler(VIRQ_DEBUG, cpu, xen_debug_interrupt, | |
121 | IRQF_DISABLED | IRQF_PERCPU | IRQF_NOBALANCING, | |
122 | debug_name, NULL); | |
123 | if (rc < 0) | |
124 | goto fail; | |
125 | per_cpu(debug_irq, cpu) = rc; | |
126 | ||
3b16cf87 JA |
127 | callfunc_name = kasprintf(GFP_KERNEL, "callfuncsingle%d", cpu); |
128 | rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_SINGLE_VECTOR, | |
129 | cpu, | |
130 | xen_call_function_single_interrupt, | |
131 | IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING, | |
132 | callfunc_name, | |
133 | NULL); | |
134 | if (rc < 0) | |
135 | goto fail; | |
136 | per_cpu(callfuncsingle_irq, cpu) = rc; | |
137 | ||
f87e4cac JF |
138 | return 0; |
139 | ||
140 | fail: | |
141 | if (per_cpu(resched_irq, cpu) >= 0) | |
142 | unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL); | |
143 | if (per_cpu(callfunc_irq, cpu) >= 0) | |
144 | unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL); | |
ee523ca1 JF |
145 | if (per_cpu(debug_irq, cpu) >= 0) |
146 | unbind_from_irqhandler(per_cpu(debug_irq, cpu), NULL); | |
3b16cf87 JA |
147 | if (per_cpu(callfuncsingle_irq, cpu) >= 0) |
148 | unbind_from_irqhandler(per_cpu(callfuncsingle_irq, cpu), NULL); | |
149 | ||
f87e4cac JF |
150 | return rc; |
151 | } | |
152 | ||
c7b75947 | 153 | static void __init xen_fill_possible_map(void) |
f87e4cac JF |
154 | { |
155 | int i, rc; | |
156 | ||
157 | for (i = 0; i < NR_CPUS; i++) { | |
158 | rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL); | |
4560a294 JF |
159 | if (rc >= 0) { |
160 | num_processors++; | |
f87e4cac | 161 | cpu_set(i, cpu_possible_map); |
4560a294 | 162 | } |
f87e4cac JF |
163 | } |
164 | } | |
165 | ||
a9e7062d | 166 | static void __init xen_smp_prepare_boot_cpu(void) |
f87e4cac | 167 | { |
f87e4cac JF |
168 | BUG_ON(smp_processor_id() != 0); |
169 | native_smp_prepare_boot_cpu(); | |
170 | ||
f87e4cac JF |
171 | /* We've switched to the "real" per-cpu gdt, so make sure the |
172 | old memory can be recycled */ | |
c7b75947 | 173 | make_lowmem_page_readwrite(&per_cpu_var(gdt_page)); |
60223a32 JF |
174 | |
175 | xen_setup_vcpu_info_placement(); | |
f87e4cac JF |
176 | } |
177 | ||
a9e7062d | 178 | static void __init xen_smp_prepare_cpus(unsigned int max_cpus) |
f87e4cac JF |
179 | { |
180 | unsigned cpu; | |
181 | ||
f87e4cac | 182 | smp_store_cpu_info(0); |
c7b75947 | 183 | cpu_data(0).x86_max_cores = 1; |
f87e4cac JF |
184 | set_cpu_sibling_map(0); |
185 | ||
186 | if (xen_smp_intr_init(0)) | |
187 | BUG(); | |
188 | ||
ecaa6c9d | 189 | xen_cpu_initialized_map = cpumask_of_cpu(0); |
f87e4cac JF |
190 | |
191 | /* Restrict the possible_map according to max_cpus. */ | |
192 | while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) { | |
7c04e64a | 193 | for (cpu = NR_CPUS - 1; !cpu_possible(cpu); cpu--) |
f87e4cac JF |
194 | continue; |
195 | cpu_clear(cpu, cpu_possible_map); | |
196 | } | |
197 | ||
198 | for_each_possible_cpu (cpu) { | |
199 | struct task_struct *idle; | |
200 | ||
201 | if (cpu == 0) | |
202 | continue; | |
203 | ||
204 | idle = fork_idle(cpu); | |
205 | if (IS_ERR(idle)) | |
206 | panic("failed fork for CPU %d", cpu); | |
207 | ||
208 | cpu_set(cpu, cpu_present_map); | |
209 | } | |
210 | ||
211 | //init_xenbus_allowed_cpumask(); | |
212 | } | |
213 | ||
214 | static __cpuinit int | |
215 | cpu_initialize_context(unsigned int cpu, struct task_struct *idle) | |
216 | { | |
217 | struct vcpu_guest_context *ctxt; | |
c7b75947 | 218 | struct desc_struct *gdt; |
f87e4cac | 219 | |
ecaa6c9d | 220 | if (cpu_test_and_set(cpu, xen_cpu_initialized_map)) |
f87e4cac JF |
221 | return 0; |
222 | ||
223 | ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL); | |
224 | if (ctxt == NULL) | |
225 | return -ENOMEM; | |
226 | ||
c7b75947 JF |
227 | gdt = get_cpu_gdt_table(cpu); |
228 | ||
f87e4cac JF |
229 | ctxt->flags = VGCF_IN_KERNEL; |
230 | ctxt->user_regs.ds = __USER_DS; | |
231 | ctxt->user_regs.es = __USER_DS; | |
f87e4cac | 232 | ctxt->user_regs.ss = __KERNEL_DS; |
c7b75947 JF |
233 | #ifdef CONFIG_X86_32 |
234 | ctxt->user_regs.fs = __KERNEL_PERCPU; | |
235 | #endif | |
f87e4cac JF |
236 | ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle; |
237 | ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */ | |
238 | ||
239 | memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt)); | |
240 | ||
241 | xen_copy_trap_info(ctxt->trap_ctxt); | |
242 | ||
243 | ctxt->ldt_ents = 0; | |
244 | ||
c7b75947 JF |
245 | BUG_ON((unsigned long)gdt & ~PAGE_MASK); |
246 | make_lowmem_page_readonly(gdt); | |
f87e4cac | 247 | |
c7b75947 JF |
248 | ctxt->gdt_frames[0] = virt_to_mfn(gdt); |
249 | ctxt->gdt_ents = GDT_ENTRIES; | |
f87e4cac JF |
250 | |
251 | ctxt->user_regs.cs = __KERNEL_CS; | |
faca6227 | 252 | ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs); |
f87e4cac JF |
253 | |
254 | ctxt->kernel_ss = __KERNEL_DS; | |
faca6227 | 255 | ctxt->kernel_sp = idle->thread.sp0; |
f87e4cac | 256 | |
c7b75947 | 257 | #ifdef CONFIG_X86_32 |
f87e4cac | 258 | ctxt->event_callback_cs = __KERNEL_CS; |
f87e4cac | 259 | ctxt->failsafe_callback_cs = __KERNEL_CS; |
c7b75947 JF |
260 | #endif |
261 | ctxt->event_callback_eip = (unsigned long)xen_hypervisor_callback; | |
f87e4cac JF |
262 | ctxt->failsafe_callback_eip = (unsigned long)xen_failsafe_callback; |
263 | ||
264 | per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir); | |
265 | ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir)); | |
266 | ||
267 | if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, ctxt)) | |
268 | BUG(); | |
269 | ||
270 | kfree(ctxt); | |
271 | return 0; | |
272 | } | |
273 | ||
a9e7062d | 274 | static int __cpuinit xen_cpu_up(unsigned int cpu) |
f87e4cac JF |
275 | { |
276 | struct task_struct *idle = idle_task(cpu); | |
277 | int rc; | |
278 | ||
279 | #if 0 | |
280 | rc = cpu_up_check(cpu); | |
281 | if (rc) | |
282 | return rc; | |
283 | #endif | |
284 | ||
c7b75947 JF |
285 | #ifdef CONFIG_X86_64 |
286 | /* Allocate node local memory for AP pdas */ | |
287 | WARN_ON(cpu == 0); | |
288 | if (cpu > 0) { | |
289 | rc = get_local_pda(cpu); | |
290 | if (rc) | |
291 | return rc; | |
292 | } | |
293 | #endif | |
294 | ||
295 | #ifdef CONFIG_X86_32 | |
f87e4cac JF |
296 | init_gdt(cpu); |
297 | per_cpu(current_task, cpu) = idle; | |
f87e4cac | 298 | irq_ctx_init(cpu); |
c7b75947 JF |
299 | #else |
300 | cpu_pda(cpu)->pcurrent = idle; | |
301 | clear_tsk_thread_flag(idle, TIF_FORK); | |
302 | #endif | |
f87e4cac JF |
303 | xen_setup_timer(cpu); |
304 | ||
c7b75947 JF |
305 | per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; |
306 | ||
f87e4cac JF |
307 | /* make sure interrupts start blocked */ |
308 | per_cpu(xen_vcpu, cpu)->evtchn_upcall_mask = 1; | |
309 | ||
310 | rc = cpu_initialize_context(cpu, idle); | |
311 | if (rc) | |
312 | return rc; | |
313 | ||
314 | if (num_online_cpus() == 1) | |
315 | alternatives_smp_switch(1); | |
316 | ||
317 | rc = xen_smp_intr_init(cpu); | |
318 | if (rc) | |
319 | return rc; | |
320 | ||
f87e4cac JF |
321 | rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL); |
322 | BUG_ON(rc); | |
323 | ||
c7b75947 JF |
324 | while(per_cpu(cpu_state, cpu) != CPU_ONLINE) { |
325 | HYPERVISOR_sched_op(SCHEDOP_yield, 0); | |
326 | barrier(); | |
327 | } | |
328 | ||
f87e4cac JF |
329 | return 0; |
330 | } | |
331 | ||
a9e7062d | 332 | static void xen_smp_cpus_done(unsigned int max_cpus) |
f87e4cac JF |
333 | { |
334 | } | |
335 | ||
336 | static void stop_self(void *v) | |
337 | { | |
338 | int cpu = smp_processor_id(); | |
339 | ||
340 | /* make sure we're not pinning something down */ | |
341 | load_cr3(swapper_pg_dir); | |
342 | /* should set up a minimal gdt */ | |
343 | ||
344 | HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL); | |
345 | BUG(); | |
346 | } | |
347 | ||
a9e7062d | 348 | static void xen_smp_send_stop(void) |
f87e4cac | 349 | { |
8691e5a8 | 350 | smp_call_function(stop_self, NULL, 0); |
f87e4cac JF |
351 | } |
352 | ||
a9e7062d | 353 | static void xen_smp_send_reschedule(int cpu) |
f87e4cac JF |
354 | { |
355 | xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR); | |
356 | } | |
357 | ||
f87e4cac JF |
358 | static void xen_send_IPI_mask(cpumask_t mask, enum ipi_vector vector) |
359 | { | |
360 | unsigned cpu; | |
361 | ||
362 | cpus_and(mask, mask, cpu_online_map); | |
363 | ||
364 | for_each_cpu_mask(cpu, mask) | |
365 | xen_send_IPI_one(cpu, vector); | |
366 | } | |
367 | ||
a9e7062d | 368 | static void xen_smp_send_call_function_ipi(cpumask_t mask) |
3b16cf87 JA |
369 | { |
370 | int cpu; | |
371 | ||
372 | xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR); | |
373 | ||
374 | /* Make sure other vcpus get a chance to run if they need to. */ | |
375 | for_each_cpu_mask(cpu, mask) { | |
376 | if (xen_vcpu_stolen(cpu)) { | |
377 | HYPERVISOR_sched_op(SCHEDOP_yield, 0); | |
378 | break; | |
379 | } | |
380 | } | |
381 | } | |
382 | ||
a9e7062d | 383 | static void xen_smp_send_call_function_single_ipi(int cpu) |
3b16cf87 JA |
384 | { |
385 | xen_send_IPI_mask(cpumask_of_cpu(cpu), XEN_CALL_FUNCTION_SINGLE_VECTOR); | |
386 | } | |
387 | ||
f87e4cac JF |
388 | static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id) |
389 | { | |
f87e4cac | 390 | irq_enter(); |
3b16cf87 | 391 | generic_smp_call_function_interrupt(); |
c7b75947 | 392 | #ifdef CONFIG_X86_32 |
38e760a1 | 393 | __get_cpu_var(irq_stat).irq_call_count++; |
c7b75947 JF |
394 | #else |
395 | add_pda(irq_call_count, 1); | |
396 | #endif | |
f87e4cac JF |
397 | irq_exit(); |
398 | ||
f87e4cac JF |
399 | return IRQ_HANDLED; |
400 | } | |
401 | ||
3b16cf87 | 402 | static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id) |
f87e4cac | 403 | { |
3b16cf87 JA |
404 | irq_enter(); |
405 | generic_smp_call_function_single_interrupt(); | |
c7b75947 | 406 | #ifdef CONFIG_X86_32 |
3b16cf87 | 407 | __get_cpu_var(irq_stat).irq_call_count++; |
c7b75947 JF |
408 | #else |
409 | add_pda(irq_call_count, 1); | |
410 | #endif | |
3b16cf87 | 411 | irq_exit(); |
f87e4cac | 412 | |
3b16cf87 | 413 | return IRQ_HANDLED; |
f87e4cac | 414 | } |
a9e7062d JF |
415 | |
416 | static const struct smp_ops xen_smp_ops __initdata = { | |
417 | .smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu, | |
418 | .smp_prepare_cpus = xen_smp_prepare_cpus, | |
419 | .cpu_up = xen_cpu_up, | |
420 | .smp_cpus_done = xen_smp_cpus_done, | |
421 | ||
422 | .smp_send_stop = xen_smp_send_stop, | |
423 | .smp_send_reschedule = xen_smp_send_reschedule, | |
424 | ||
425 | .send_call_func_ipi = xen_smp_send_call_function_ipi, | |
426 | .send_call_func_single_ipi = xen_smp_send_call_function_single_ipi, | |
427 | }; | |
428 | ||
429 | void __init xen_smp_init(void) | |
430 | { | |
431 | smp_ops = xen_smp_ops; | |
c7b75947 | 432 | xen_fill_possible_map(); |
56397f8d | 433 | paravirt_use_bytelocks(); |
a9e7062d | 434 | } |