847aa79876459c13f54f0c4b18a0cefa42281ef1
[deliverable/linux.git] / arch / x86 / kernel / io_apic_64.c
1 /*
2 * Intel IO-APIC support for multi-Pentium hosts.
3 *
4 * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo
5 *
6 * Many thanks to Stig Venaas for trying out countless experimental
7 * patches and reporting/debugging problems patiently!
8 *
9 * (c) 1999, Multiple IO-APIC support, developed by
10 * Ken-ichi Yaku <yaku@css1.kbnes.nec.co.jp> and
11 * Hidemi Kishimoto <kisimoto@css1.kbnes.nec.co.jp>,
12 * further tested and cleaned up by Zach Brown <zab@redhat.com>
13 * and Ingo Molnar <mingo@redhat.com>
14 *
15 * Fixes
16 * Maciej W. Rozycki : Bits for genuine 82489DX APICs;
17 * thanks to Eric Gilmore
18 * and Rolf G. Tews
19 * for testing these extensively
20 * Paul Diefenbaugh : Added full ACPI support
21 */
22
23 #include <linux/mm.h>
24 #include <linux/interrupt.h>
25 #include <linux/init.h>
26 #include <linux/delay.h>
27 #include <linux/sched.h>
28 #include <linux/pci.h>
29 #include <linux/mc146818rtc.h>
30 #include <linux/acpi.h>
31 #include <linux/sysdev.h>
32 #include <linux/msi.h>
33 #include <linux/htirq.h>
34 #include <linux/dmar.h>
35 #include <linux/jiffies.h>
36 #ifdef CONFIG_ACPI
37 #include <acpi/acpi_bus.h>
38 #endif
39 #include <linux/bootmem.h>
40 #include <linux/dmar.h>
41
42 #include <asm/idle.h>
43 #include <asm/io.h>
44 #include <asm/smp.h>
45 #include <asm/desc.h>
46 #include <asm/proto.h>
47 #include <asm/acpi.h>
48 #include <asm/dma.h>
49 #include <asm/i8259.h>
50 #include <asm/nmi.h>
51 #include <asm/msidef.h>
52 #include <asm/hypertransport.h>
53 #include <asm/irq_remapping.h>
54
55 #include <mach_ipi.h>
56 #include <mach_apic.h>
57
58 #define __apicdebuginit(type) static type __init
59
60 int ioapic_force;
61
62 int sis_apic_bug; /* not actually supported, dummy for compile */
63
64 static DEFINE_SPINLOCK(ioapic_lock);
65 static DEFINE_SPINLOCK(vector_lock);
66
67 int first_free_entry;
68 /*
69 * Rough estimation of how many shared IRQs there are, can
70 * be changed anytime.
71 */
72 int pin_map_size;
73
74 /*
75 * # of IRQ routing registers
76 */
77 int nr_ioapic_registers[MAX_IO_APICS];
78
79 /* I/O APIC entries */
80 struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
81 int nr_ioapics;
82
83 /* MP IRQ source entries */
84 struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
85
86 /* # of MP IRQ source entries */
87 int mp_irq_entries;
88
89 DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
90
91 int skip_ioapic_setup;
92
93 static int __init parse_noapic(char *str)
94 {
95 disable_ioapic_setup();
96 return 0;
97 }
98 early_param("noapic", parse_noapic);
99
100
101 struct irq_cfg;
102 struct irq_pin_list;
103 struct irq_cfg {
104 unsigned int irq;
105 struct irq_cfg *next;
106 struct irq_pin_list *irq_2_pin;
107 cpumask_t domain;
108 cpumask_t old_domain;
109 unsigned move_cleanup_count;
110 u8 vector;
111 u8 move_in_progress : 1;
112 };
113
114 /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
115 static struct irq_cfg irq_cfg_legacy[] __initdata = {
116 [0] = { .irq = 0, .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, },
117 [1] = { .irq = 1, .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, },
118 [2] = { .irq = 2, .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, },
119 [3] = { .irq = 3, .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR, },
120 [4] = { .irq = 4, .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR, },
121 [5] = { .irq = 5, .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR, },
122 [6] = { .irq = 6, .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR, },
123 [7] = { .irq = 7, .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR, },
124 [8] = { .irq = 8, .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR, },
125 [9] = { .irq = 9, .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR, },
126 [10] = { .irq = 10, .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, },
127 [11] = { .irq = 11, .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, },
128 [12] = { .irq = 12, .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, },
129 [13] = { .irq = 13, .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, },
130 [14] = { .irq = 14, .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, },
131 [15] = { .irq = 15, .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
132 };
133
134 static struct irq_cfg irq_cfg_init = { .irq = -1U, };
135 /* need to be biger than size of irq_cfg_legacy */
136 static int nr_irq_cfg = 32;
137
138 static int __init parse_nr_irq_cfg(char *arg)
139 {
140 if (arg) {
141 nr_irq_cfg = simple_strtoul(arg, NULL, 0);
142 if (nr_irq_cfg < 32)
143 nr_irq_cfg = 32;
144 }
145 return 0;
146 }
147
148 early_param("nr_irq_cfg", parse_nr_irq_cfg);
149
150 static void init_one_irq_cfg(struct irq_cfg *cfg)
151 {
152 memcpy(cfg, &irq_cfg_init, sizeof(struct irq_cfg));
153 }
154
155 static struct irq_cfg *irq_cfgx;
156 static struct irq_cfg *irq_cfgx_free;
157 static void __init init_work(void *data)
158 {
159 struct dyn_array *da = data;
160 struct irq_cfg *cfg;
161 int legacy_count;
162 int i;
163
164 cfg = *da->name;
165
166 memcpy(cfg, irq_cfg_legacy, sizeof(irq_cfg_legacy));
167
168 legacy_count = sizeof(irq_cfg_legacy)/sizeof(irq_cfg_legacy[0]);
169 for (i = legacy_count; i < *da->nr; i++)
170 init_one_irq_cfg(&cfg[i]);
171
172 for (i = 1; i < *da->nr; i++)
173 cfg[i-1].next = &cfg[i];
174
175 irq_cfgx_free = &irq_cfgx[legacy_count];
176 irq_cfgx[legacy_count - 1].next = NULL;
177 }
178
179 #define for_each_irq_cfg(cfg) \
180 for (cfg = irq_cfgx; cfg; cfg = cfg->next)
181
182 DEFINE_DYN_ARRAY(irq_cfgx, sizeof(struct irq_cfg), nr_irq_cfg, PAGE_SIZE, init_work);
183
184 static struct irq_cfg *irq_cfg(unsigned int irq)
185 {
186 struct irq_cfg *cfg;
187
188 cfg = irq_cfgx;
189 while (cfg) {
190 if (cfg->irq == irq)
191 return cfg;
192
193 cfg = cfg->next;
194 }
195
196 return NULL;
197 }
198
199 static struct irq_cfg *irq_cfg_alloc(unsigned int irq)
200 {
201 struct irq_cfg *cfg, *cfg_pri;
202 int i;
203 int count = 0;
204
205 cfg_pri = cfg = irq_cfgx;
206 while (cfg) {
207 if (cfg->irq == irq)
208 return cfg;
209
210 cfg_pri = cfg;
211 cfg = cfg->next;
212 count++;
213 }
214
215 if (!irq_cfgx_free) {
216 unsigned long phys;
217 unsigned long total_bytes;
218 /*
219 * we run out of pre-allocate ones, allocate more
220 */
221 printk(KERN_DEBUG "try to get more irq_cfg %d\n", nr_irq_cfg);
222
223 total_bytes = sizeof(struct irq_cfg) * nr_irq_cfg;
224 if (after_bootmem)
225 cfg = kzalloc(total_bytes, GFP_ATOMIC);
226 else
227 cfg = __alloc_bootmem_nopanic(total_bytes, PAGE_SIZE, 0);
228
229 if (!cfg)
230 panic("please boot with nr_irq_cfg= %d\n", count * 2);
231
232 phys = __pa(cfg);
233 printk(KERN_DEBUG "irq_irq ==> [%#lx - %#lx]\n", phys, phys + total_bytes);
234
235 for (i = 0; i < nr_irq_cfg; i++)
236 init_one_irq_cfg(&cfg[i]);
237
238 for (i = 1; i < nr_irq_cfg; i++)
239 cfg[i-1].next = &cfg[i];
240
241 irq_cfgx_free = cfg;
242 }
243
244 cfg = irq_cfgx_free;
245 irq_cfgx_free = irq_cfgx_free->next;
246 cfg->next = NULL;
247 if (cfg_pri)
248 cfg_pri->next = cfg;
249 else
250 irq_cfgx = cfg;
251 cfg->irq = irq;
252 printk(KERN_DEBUG "found new irq_cfg for irq %d\n", cfg->irq);
253 #ifdef CONFIG_HAVE_SPARSE_IRQ_DEBUG
254 {
255 /* dump the results */
256 struct irq_cfg *cfg;
257 unsigned long phys;
258 unsigned long bytes = sizeof(struct irq_cfg);
259
260 printk(KERN_DEBUG "=========================== %d\n", irq);
261 printk(KERN_DEBUG "irq_cfg dump after get that for %d\n", irq);
262 for_each_irq_cfg(cfg) {
263 phys = __pa(cfg);
264 printk(KERN_DEBUG "irq_cfg %d ==> [%#lx - %#lx]\n", cfg->irq, phys, phys + bytes);
265 }
266 printk(KERN_DEBUG "===========================\n");
267 }
268 #endif
269 return cfg;
270 }
271
272 /*
273 * This is performance-critical, we want to do it O(1)
274 *
275 * the indexing order of this array favors 1:1 mappings
276 * between pins and IRQs.
277 */
278
279 struct irq_pin_list {
280 int apic, pin;
281 struct irq_pin_list *next;
282 };
283
284 static struct irq_pin_list *irq_2_pin_head;
285 /* fill one page ? */
286 static int nr_irq_2_pin = 0x100;
287 static struct irq_pin_list *irq_2_pin_ptr;
288 static void __init irq_2_pin_init_work(void *data)
289 {
290 struct dyn_array *da = data;
291 struct irq_pin_list *pin;
292 int i;
293
294 pin = *da->name;
295
296 for (i = 1; i < *da->nr; i++)
297 pin[i-1].next = &pin[i];
298
299 irq_2_pin_ptr = &pin[0];
300 }
301 DEFINE_DYN_ARRAY(irq_2_pin_head, sizeof(struct irq_pin_list), nr_irq_2_pin, PAGE_SIZE, irq_2_pin_init_work);
302
303 static struct irq_pin_list *get_one_free_irq_2_pin(void)
304 {
305 struct irq_pin_list *pin;
306 int i;
307
308 pin = irq_2_pin_ptr;
309
310 if (pin) {
311 irq_2_pin_ptr = pin->next;
312 pin->next = NULL;
313 return pin;
314 }
315
316 /*
317 * we run out of pre-allocate ones, allocate more
318 */
319 printk(KERN_DEBUG "try to get more irq_2_pin %d\n", nr_irq_2_pin);
320
321 if (after_bootmem)
322 pin = kzalloc(sizeof(struct irq_pin_list)*nr_irq_2_pin,
323 GFP_ATOMIC);
324 else
325 pin = __alloc_bootmem_nopanic(sizeof(struct irq_pin_list) *
326 nr_irq_2_pin, PAGE_SIZE, 0);
327
328 if (!pin)
329 panic("can not get more irq_2_pin\n");
330
331 for (i = 1; i < nr_irq_2_pin; i++)
332 pin[i-1].next = &pin[i];
333
334 irq_2_pin_ptr = pin->next;
335 pin->next = NULL;
336
337 return pin;
338 }
339
340 struct io_apic {
341 unsigned int index;
342 unsigned int unused[3];
343 unsigned int data;
344 };
345
346 static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
347 {
348 return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
349 + (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK);
350 }
351
352 static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
353 {
354 struct io_apic __iomem *io_apic = io_apic_base(apic);
355 writel(reg, &io_apic->index);
356 return readl(&io_apic->data);
357 }
358
359 static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
360 {
361 struct io_apic __iomem *io_apic = io_apic_base(apic);
362 writel(reg, &io_apic->index);
363 writel(value, &io_apic->data);
364 }
365
366 /*
367 * Re-write a value: to be used for read-modify-write
368 * cycles where the read already set up the index register.
369 */
370 static inline void io_apic_modify(unsigned int apic, unsigned int value)
371 {
372 struct io_apic __iomem *io_apic = io_apic_base(apic);
373 writel(value, &io_apic->data);
374 }
375
376 static bool io_apic_level_ack_pending(unsigned int irq)
377 {
378 struct irq_pin_list *entry;
379 unsigned long flags;
380 struct irq_cfg *cfg = irq_cfg(irq);
381
382 spin_lock_irqsave(&ioapic_lock, flags);
383 entry = cfg->irq_2_pin;
384 for (;;) {
385 unsigned int reg;
386 int pin;
387
388 if (!entry)
389 break;
390 pin = entry->pin;
391 reg = io_apic_read(entry->apic, 0x10 + pin*2);
392 /* Is the remote IRR bit set? */
393 if (reg & IO_APIC_REDIR_REMOTE_IRR) {
394 spin_unlock_irqrestore(&ioapic_lock, flags);
395 return true;
396 }
397 if (!entry->next)
398 break;
399 entry = entry->next;
400 }
401 spin_unlock_irqrestore(&ioapic_lock, flags);
402
403 return false;
404 }
405
406 union entry_union {
407 struct { u32 w1, w2; };
408 struct IO_APIC_route_entry entry;
409 };
410
411 static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
412 {
413 union entry_union eu;
414 unsigned long flags;
415 spin_lock_irqsave(&ioapic_lock, flags);
416 eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
417 eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
418 spin_unlock_irqrestore(&ioapic_lock, flags);
419 return eu.entry;
420 }
421
422 /*
423 * When we write a new IO APIC routing entry, we need to write the high
424 * word first! If the mask bit in the low word is clear, we will enable
425 * the interrupt, and we need to make sure the entry is fully populated
426 * before that happens.
427 */
428 static void
429 __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
430 {
431 union entry_union eu;
432 eu.entry = e;
433 io_apic_write(apic, 0x11 + 2*pin, eu.w2);
434 io_apic_write(apic, 0x10 + 2*pin, eu.w1);
435 }
436
437 static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
438 {
439 unsigned long flags;
440 spin_lock_irqsave(&ioapic_lock, flags);
441 __ioapic_write_entry(apic, pin, e);
442 spin_unlock_irqrestore(&ioapic_lock, flags);
443 }
444
445 /*
446 * When we mask an IO APIC routing entry, we need to write the low
447 * word first, in order to set the mask bit before we change the
448 * high bits!
449 */
450 static void ioapic_mask_entry(int apic, int pin)
451 {
452 unsigned long flags;
453 union entry_union eu = { .entry.mask = 1 };
454
455 spin_lock_irqsave(&ioapic_lock, flags);
456 io_apic_write(apic, 0x10 + 2*pin, eu.w1);
457 io_apic_write(apic, 0x11 + 2*pin, eu.w2);
458 spin_unlock_irqrestore(&ioapic_lock, flags);
459 }
460
461 #ifdef CONFIG_SMP
462 static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
463 {
464 int apic, pin;
465 struct irq_cfg *cfg;
466 struct irq_pin_list *entry;
467
468 cfg = irq_cfg(irq);
469 entry = cfg->irq_2_pin;
470 for (;;) {
471 unsigned int reg;
472
473 if (!entry)
474 break;
475
476 apic = entry->apic;
477 pin = entry->pin;
478 #ifdef CONFIG_INTR_REMAP
479 /*
480 * With interrupt-remapping, destination information comes
481 * from interrupt-remapping table entry.
482 */
483 if (!irq_remapped(irq))
484 io_apic_write(apic, 0x11 + pin*2, dest);
485 #else
486 io_apic_write(apic, 0x11 + pin*2, dest);
487 #endif
488 reg = io_apic_read(apic, 0x10 + pin*2);
489 reg &= ~IO_APIC_REDIR_VECTOR_MASK;
490 reg |= vector;
491 io_apic_modify(apic, reg);
492 if (!entry->next)
493 break;
494 entry = entry->next;
495 }
496 }
497
498 static int assign_irq_vector(int irq, cpumask_t mask);
499
500 static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
501 {
502 struct irq_cfg *cfg = irq_cfg(irq);
503 unsigned long flags;
504 unsigned int dest;
505 cpumask_t tmp;
506 struct irq_desc *desc;
507
508 cpus_and(tmp, mask, cpu_online_map);
509 if (cpus_empty(tmp))
510 return;
511
512 if (assign_irq_vector(irq, mask))
513 return;
514
515 cpus_and(tmp, cfg->domain, mask);
516 dest = cpu_mask_to_apicid(tmp);
517
518 /*
519 * Only the high 8 bits are valid.
520 */
521 dest = SET_APIC_LOGICAL_ID(dest);
522
523 desc = irq_to_desc(irq);
524 spin_lock_irqsave(&ioapic_lock, flags);
525 __target_IO_APIC_irq(irq, dest, cfg->vector);
526 desc->affinity = mask;
527 spin_unlock_irqrestore(&ioapic_lock, flags);
528 }
529 #endif
530
531 /*
532 * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
533 * shared ISA-space IRQs, so we have to support them. We are super
534 * fast in the common case, and fast for shared ISA-space IRQs.
535 */
536 static void add_pin_to_irq(unsigned int irq, int apic, int pin)
537 {
538 struct irq_cfg *cfg;
539 struct irq_pin_list *entry;
540
541 /* first time to refer irq_cfg, so with new */
542 cfg = irq_cfg_alloc(irq);
543 entry = cfg->irq_2_pin;
544 if (!entry) {
545 entry = get_one_free_irq_2_pin();
546 cfg->irq_2_pin = entry;
547 entry->apic = apic;
548 entry->pin = pin;
549 printk(KERN_DEBUG " 0 add_pin_to_irq: irq %d --> apic %d pin %d\n", irq, apic, pin);
550 return;
551 }
552
553 while (entry->next) {
554 /* not again, please */
555 if (entry->apic == apic && entry->pin == pin)
556 return;
557
558 entry = entry->next;
559 }
560
561 entry->next = get_one_free_irq_2_pin();
562 entry = entry->next;
563 entry->apic = apic;
564 entry->pin = pin;
565 printk(KERN_DEBUG " x add_pin_to_irq: irq %d --> apic %d pin %d\n", irq, apic, pin);
566 }
567
568 /*
569 * Reroute an IRQ to a different pin.
570 */
571 static void __init replace_pin_at_irq(unsigned int irq,
572 int oldapic, int oldpin,
573 int newapic, int newpin)
574 {
575 struct irq_cfg *cfg = irq_cfg(irq);
576 struct irq_pin_list *entry = cfg->irq_2_pin;
577 int replaced = 0;
578
579 while (entry) {
580 if (entry->apic == oldapic && entry->pin == oldpin) {
581 entry->apic = newapic;
582 entry->pin = newpin;
583 replaced = 1;
584 /* every one is different, right? */
585 break;
586 }
587 entry = entry->next;
588 }
589
590 /* why? call replace before add? */
591 if (!replaced)
592 add_pin_to_irq(irq, newapic, newpin);
593 }
594
595 /*
596 * Synchronize the IO-APIC and the CPU by doing
597 * a dummy read from the IO-APIC
598 */
599 static inline void io_apic_sync(unsigned int apic)
600 {
601 struct io_apic __iomem *io_apic = io_apic_base(apic);
602 readl(&io_apic->data);
603 }
604
605 #define __DO_ACTION(R, ACTION, FINAL) \
606 \
607 { \
608 int pin; \
609 struct irq_cfg *cfg; \
610 struct irq_pin_list *entry; \
611 \
612 cfg = irq_cfg(irq); \
613 entry = cfg->irq_2_pin; \
614 for (;;) { \
615 unsigned int reg; \
616 if (!entry) \
617 break; \
618 pin = entry->pin; \
619 reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \
620 reg ACTION; \
621 io_apic_modify(entry->apic, reg); \
622 FINAL; \
623 if (!entry->next) \
624 break; \
625 entry = entry->next; \
626 } \
627 }
628
629 #define DO_ACTION(name,R,ACTION, FINAL) \
630 \
631 static void name##_IO_APIC_irq (unsigned int irq) \
632 __DO_ACTION(R, ACTION, FINAL)
633
634 /* mask = 1 */
635 DO_ACTION(__mask, 0, |= IO_APIC_REDIR_MASKED, io_apic_sync(entry->apic))
636
637 /* mask = 0 */
638 DO_ACTION(__unmask, 0, &= ~IO_APIC_REDIR_MASKED, )
639
640 static void mask_IO_APIC_irq (unsigned int irq)
641 {
642 unsigned long flags;
643
644 spin_lock_irqsave(&ioapic_lock, flags);
645 __mask_IO_APIC_irq(irq);
646 spin_unlock_irqrestore(&ioapic_lock, flags);
647 }
648
649 static void unmask_IO_APIC_irq (unsigned int irq)
650 {
651 unsigned long flags;
652
653 spin_lock_irqsave(&ioapic_lock, flags);
654 __unmask_IO_APIC_irq(irq);
655 spin_unlock_irqrestore(&ioapic_lock, flags);
656 }
657
658 static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
659 {
660 struct IO_APIC_route_entry entry;
661
662 /* Check delivery_mode to be sure we're not clearing an SMI pin */
663 entry = ioapic_read_entry(apic, pin);
664 if (entry.delivery_mode == dest_SMI)
665 return;
666 /*
667 * Disable it in the IO-APIC irq-routing table:
668 */
669 ioapic_mask_entry(apic, pin);
670 }
671
672 static void clear_IO_APIC (void)
673 {
674 int apic, pin;
675
676 for (apic = 0; apic < nr_ioapics; apic++)
677 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
678 clear_IO_APIC_pin(apic, pin);
679 }
680
681 #ifdef CONFIG_INTR_REMAP
682 /* I/O APIC RTE contents at the OS boot up */
683 static struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS];
684
685 /*
686 * Saves and masks all the unmasked IO-APIC RTE's
687 */
688 int save_mask_IO_APIC_setup(void)
689 {
690 union IO_APIC_reg_01 reg_01;
691 unsigned long flags;
692 int apic, pin;
693
694 /*
695 * The number of IO-APIC IRQ registers (== #pins):
696 */
697 for (apic = 0; apic < nr_ioapics; apic++) {
698 spin_lock_irqsave(&ioapic_lock, flags);
699 reg_01.raw = io_apic_read(apic, 1);
700 spin_unlock_irqrestore(&ioapic_lock, flags);
701 nr_ioapic_registers[apic] = reg_01.bits.entries+1;
702 }
703
704 for (apic = 0; apic < nr_ioapics; apic++) {
705 early_ioapic_entries[apic] =
706 kzalloc(sizeof(struct IO_APIC_route_entry) *
707 nr_ioapic_registers[apic], GFP_KERNEL);
708 if (!early_ioapic_entries[apic])
709 return -ENOMEM;
710 }
711
712 for (apic = 0; apic < nr_ioapics; apic++)
713 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
714 struct IO_APIC_route_entry entry;
715
716 entry = early_ioapic_entries[apic][pin] =
717 ioapic_read_entry(apic, pin);
718 if (!entry.mask) {
719 entry.mask = 1;
720 ioapic_write_entry(apic, pin, entry);
721 }
722 }
723 return 0;
724 }
725
726 void restore_IO_APIC_setup(void)
727 {
728 int apic, pin;
729
730 for (apic = 0; apic < nr_ioapics; apic++)
731 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
732 ioapic_write_entry(apic, pin,
733 early_ioapic_entries[apic][pin]);
734 }
735
736 void reinit_intr_remapped_IO_APIC(int intr_remapping)
737 {
738 /*
739 * for now plain restore of previous settings.
740 * TBD: In the case of OS enabling interrupt-remapping,
741 * IO-APIC RTE's need to be setup to point to interrupt-remapping
742 * table entries. for now, do a plain restore, and wait for
743 * the setup_IO_APIC_irqs() to do proper initialization.
744 */
745 restore_IO_APIC_setup();
746 }
747 #endif
748
749 /*
750 * Find the IRQ entry number of a certain pin.
751 */
752 static int find_irq_entry(int apic, int pin, int type)
753 {
754 int i;
755
756 for (i = 0; i < mp_irq_entries; i++)
757 if (mp_irqs[i].mp_irqtype == type &&
758 (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid ||
759 mp_irqs[i].mp_dstapic == MP_APIC_ALL) &&
760 mp_irqs[i].mp_dstirq == pin)
761 return i;
762
763 return -1;
764 }
765
766 /*
767 * Find the pin to which IRQ[irq] (ISA) is connected
768 */
769 static int __init find_isa_irq_pin(int irq, int type)
770 {
771 int i;
772
773 for (i = 0; i < mp_irq_entries; i++) {
774 int lbus = mp_irqs[i].mp_srcbus;
775
776 if (test_bit(lbus, mp_bus_not_pci) &&
777 (mp_irqs[i].mp_irqtype == type) &&
778 (mp_irqs[i].mp_srcbusirq == irq))
779
780 return mp_irqs[i].mp_dstirq;
781 }
782 return -1;
783 }
784
785 static int __init find_isa_irq_apic(int irq, int type)
786 {
787 int i;
788
789 for (i = 0; i < mp_irq_entries; i++) {
790 int lbus = mp_irqs[i].mp_srcbus;
791
792 if (test_bit(lbus, mp_bus_not_pci) &&
793 (mp_irqs[i].mp_irqtype == type) &&
794 (mp_irqs[i].mp_srcbusirq == irq))
795 break;
796 }
797 if (i < mp_irq_entries) {
798 int apic;
799 for(apic = 0; apic < nr_ioapics; apic++) {
800 if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic)
801 return apic;
802 }
803 }
804
805 return -1;
806 }
807
808 /*
809 * Find a specific PCI IRQ entry.
810 * Not an __init, possibly needed by modules
811 */
812 static int pin_2_irq(int idx, int apic, int pin);
813
814 int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
815 {
816 int apic, i, best_guess = -1;
817
818 apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
819 bus, slot, pin);
820 if (test_bit(bus, mp_bus_not_pci)) {
821 apic_printk(APIC_VERBOSE, "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
822 return -1;
823 }
824 for (i = 0; i < mp_irq_entries; i++) {
825 int lbus = mp_irqs[i].mp_srcbus;
826
827 for (apic = 0; apic < nr_ioapics; apic++)
828 if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic ||
829 mp_irqs[i].mp_dstapic == MP_APIC_ALL)
830 break;
831
832 if (!test_bit(lbus, mp_bus_not_pci) &&
833 !mp_irqs[i].mp_irqtype &&
834 (bus == lbus) &&
835 (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) {
836 int irq = pin_2_irq(i,apic,mp_irqs[i].mp_dstirq);
837
838 if (!(apic || IO_APIC_IRQ(irq)))
839 continue;
840
841 if (pin == (mp_irqs[i].mp_srcbusirq & 3))
842 return irq;
843 /*
844 * Use the first all-but-pin matching entry as a
845 * best-guess fuzzy result for broken mptables.
846 */
847 if (best_guess < 0)
848 best_guess = irq;
849 }
850 }
851 return best_guess;
852 }
853
854 /* ISA interrupts are always polarity zero edge triggered,
855 * when listed as conforming in the MP table. */
856
857 #define default_ISA_trigger(idx) (0)
858 #define default_ISA_polarity(idx) (0)
859
860 /* PCI interrupts are always polarity one level triggered,
861 * when listed as conforming in the MP table. */
862
863 #define default_PCI_trigger(idx) (1)
864 #define default_PCI_polarity(idx) (1)
865
866 static int MPBIOS_polarity(int idx)
867 {
868 int bus = mp_irqs[idx].mp_srcbus;
869 int polarity;
870
871 /*
872 * Determine IRQ line polarity (high active or low active):
873 */
874 switch (mp_irqs[idx].mp_irqflag & 3)
875 {
876 case 0: /* conforms, ie. bus-type dependent polarity */
877 if (test_bit(bus, mp_bus_not_pci))
878 polarity = default_ISA_polarity(idx);
879 else
880 polarity = default_PCI_polarity(idx);
881 break;
882 case 1: /* high active */
883 {
884 polarity = 0;
885 break;
886 }
887 case 2: /* reserved */
888 {
889 printk(KERN_WARNING "broken BIOS!!\n");
890 polarity = 1;
891 break;
892 }
893 case 3: /* low active */
894 {
895 polarity = 1;
896 break;
897 }
898 default: /* invalid */
899 {
900 printk(KERN_WARNING "broken BIOS!!\n");
901 polarity = 1;
902 break;
903 }
904 }
905 return polarity;
906 }
907
908 static int MPBIOS_trigger(int idx)
909 {
910 int bus = mp_irqs[idx].mp_srcbus;
911 int trigger;
912
913 /*
914 * Determine IRQ trigger mode (edge or level sensitive):
915 */
916 switch ((mp_irqs[idx].mp_irqflag>>2) & 3)
917 {
918 case 0: /* conforms, ie. bus-type dependent */
919 if (test_bit(bus, mp_bus_not_pci))
920 trigger = default_ISA_trigger(idx);
921 else
922 trigger = default_PCI_trigger(idx);
923 break;
924 case 1: /* edge */
925 {
926 trigger = 0;
927 break;
928 }
929 case 2: /* reserved */
930 {
931 printk(KERN_WARNING "broken BIOS!!\n");
932 trigger = 1;
933 break;
934 }
935 case 3: /* level */
936 {
937 trigger = 1;
938 break;
939 }
940 default: /* invalid */
941 {
942 printk(KERN_WARNING "broken BIOS!!\n");
943 trigger = 0;
944 break;
945 }
946 }
947 return trigger;
948 }
949
950 static inline int irq_polarity(int idx)
951 {
952 return MPBIOS_polarity(idx);
953 }
954
955 static inline int irq_trigger(int idx)
956 {
957 return MPBIOS_trigger(idx);
958 }
959
960 static int pin_2_irq(int idx, int apic, int pin)
961 {
962 int irq, i;
963 int bus = mp_irqs[idx].mp_srcbus;
964
965 /*
966 * Debugging check, we are in big trouble if this message pops up!
967 */
968 if (mp_irqs[idx].mp_dstirq != pin)
969 printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
970
971 if (test_bit(bus, mp_bus_not_pci)) {
972 irq = mp_irqs[idx].mp_srcbusirq;
973 } else {
974 /*
975 * PCI IRQs are mapped in order
976 */
977 i = irq = 0;
978 while (i < apic)
979 irq += nr_ioapic_registers[i++];
980 irq += pin;
981 }
982 return irq;
983 }
984
985 void lock_vector_lock(void)
986 {
987 /* Used to the online set of cpus does not change
988 * during assign_irq_vector.
989 */
990 spin_lock(&vector_lock);
991 }
992
993 void unlock_vector_lock(void)
994 {
995 spin_unlock(&vector_lock);
996 }
997
998 static int __assign_irq_vector(int irq, cpumask_t mask)
999 {
1000 /*
1001 * NOTE! The local APIC isn't very good at handling
1002 * multiple interrupts at the same interrupt level.
1003 * As the interrupt level is determined by taking the
1004 * vector number and shifting that right by 4, we
1005 * want to spread these out a bit so that they don't
1006 * all fall in the same interrupt level.
1007 *
1008 * Also, we've got to be careful not to trash gate
1009 * 0x80, because int 0x80 is hm, kind of importantish. ;)
1010 */
1011 static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0;
1012 unsigned int old_vector;
1013 int cpu;
1014 struct irq_cfg *cfg;
1015
1016 cfg = irq_cfg(irq);
1017
1018 /* Only try and allocate irqs on cpus that are present */
1019 cpus_and(mask, mask, cpu_online_map);
1020
1021 if ((cfg->move_in_progress) || cfg->move_cleanup_count)
1022 return -EBUSY;
1023
1024 old_vector = cfg->vector;
1025 if (old_vector) {
1026 cpumask_t tmp;
1027 cpus_and(tmp, cfg->domain, mask);
1028 if (!cpus_empty(tmp))
1029 return 0;
1030 }
1031
1032 for_each_cpu_mask_nr(cpu, mask) {
1033 cpumask_t domain, new_mask;
1034 int new_cpu;
1035 int vector, offset;
1036
1037 domain = vector_allocation_domain(cpu);
1038 cpus_and(new_mask, domain, cpu_online_map);
1039
1040 vector = current_vector;
1041 offset = current_offset;
1042 next:
1043 vector += 8;
1044 if (vector >= first_system_vector) {
1045 /* If we run out of vectors on large boxen, must share them. */
1046 offset = (offset + 1) % 8;
1047 vector = FIRST_DEVICE_VECTOR + offset;
1048 }
1049 if (unlikely(current_vector == vector))
1050 continue;
1051 if (vector == IA32_SYSCALL_VECTOR)
1052 goto next;
1053 for_each_cpu_mask_nr(new_cpu, new_mask)
1054 if (per_cpu(vector_irq, new_cpu)[vector] != -1)
1055 goto next;
1056 /* Found one! */
1057 current_vector = vector;
1058 current_offset = offset;
1059 if (old_vector) {
1060 cfg->move_in_progress = 1;
1061 cfg->old_domain = cfg->domain;
1062 }
1063 for_each_cpu_mask_nr(new_cpu, new_mask)
1064 per_cpu(vector_irq, new_cpu)[vector] = irq;
1065 cfg->vector = vector;
1066 cfg->domain = domain;
1067 return 0;
1068 }
1069 return -ENOSPC;
1070 }
1071
1072 static int assign_irq_vector(int irq, cpumask_t mask)
1073 {
1074 int err;
1075 unsigned long flags;
1076
1077 spin_lock_irqsave(&vector_lock, flags);
1078 err = __assign_irq_vector(irq, mask);
1079 spin_unlock_irqrestore(&vector_lock, flags);
1080 return err;
1081 }
1082
1083 static void __clear_irq_vector(int irq)
1084 {
1085 struct irq_cfg *cfg;
1086 cpumask_t mask;
1087 int cpu, vector;
1088
1089 cfg = irq_cfg(irq);
1090 BUG_ON(!cfg->vector);
1091
1092 vector = cfg->vector;
1093 cpus_and(mask, cfg->domain, cpu_online_map);
1094 for_each_cpu_mask_nr(cpu, mask)
1095 per_cpu(vector_irq, cpu)[vector] = -1;
1096
1097 cfg->vector = 0;
1098 cpus_clear(cfg->domain);
1099 }
1100
1101 void __setup_vector_irq(int cpu)
1102 {
1103 /* Initialize vector_irq on a new cpu */
1104 /* This function must be called with vector_lock held */
1105 int irq, vector;
1106 struct irq_cfg *cfg;
1107
1108 /* Mark the inuse vectors */
1109 for_each_irq_cfg(cfg) {
1110 if (!cpu_isset(cpu, cfg->domain))
1111 continue;
1112 vector = cfg->vector;
1113 irq = cfg->irq;
1114 per_cpu(vector_irq, cpu)[vector] = irq;
1115 }
1116 /* Mark the free vectors */
1117 for (vector = 0; vector < NR_VECTORS; ++vector) {
1118 irq = per_cpu(vector_irq, cpu)[vector];
1119 if (irq < 0)
1120 continue;
1121
1122 cfg = irq_cfg(irq);
1123 if (!cpu_isset(cpu, cfg->domain))
1124 per_cpu(vector_irq, cpu)[vector] = -1;
1125 }
1126 }
1127
1128 static struct irq_chip ioapic_chip;
1129 #ifdef CONFIG_INTR_REMAP
1130 static struct irq_chip ir_ioapic_chip;
1131 #endif
1132
1133 static void ioapic_register_intr(int irq, unsigned long trigger)
1134 {
1135 struct irq_desc *desc;
1136
1137 /* first time to use this irq_desc */
1138 if (irq < 16)
1139 desc = irq_to_desc(irq);
1140 else
1141 desc = irq_to_desc_alloc(irq);
1142
1143 if (trigger)
1144 desc->status |= IRQ_LEVEL;
1145 else
1146 desc->status &= ~IRQ_LEVEL;
1147
1148 #ifdef CONFIG_INTR_REMAP
1149 if (irq_remapped(irq)) {
1150 desc->status |= IRQ_MOVE_PCNTXT;
1151 if (trigger)
1152 set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
1153 handle_fasteoi_irq,
1154 "fasteoi");
1155 else
1156 set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
1157 handle_edge_irq, "edge");
1158 return;
1159 }
1160 #endif
1161 if (trigger)
1162 set_irq_chip_and_handler_name(irq, &ioapic_chip,
1163 handle_fasteoi_irq,
1164 "fasteoi");
1165 else
1166 set_irq_chip_and_handler_name(irq, &ioapic_chip,
1167 handle_edge_irq, "edge");
1168 }
1169
1170 static int setup_ioapic_entry(int apic, int irq,
1171 struct IO_APIC_route_entry *entry,
1172 unsigned int destination, int trigger,
1173 int polarity, int vector)
1174 {
1175 /*
1176 * add it to the IO-APIC irq-routing table:
1177 */
1178 memset(entry,0,sizeof(*entry));
1179
1180 #ifdef CONFIG_INTR_REMAP
1181 if (intr_remapping_enabled) {
1182 struct intel_iommu *iommu = map_ioapic_to_ir(apic);
1183 struct irte irte;
1184 struct IR_IO_APIC_route_entry *ir_entry =
1185 (struct IR_IO_APIC_route_entry *) entry;
1186 int index;
1187
1188 if (!iommu)
1189 panic("No mapping iommu for ioapic %d\n", apic);
1190
1191 index = alloc_irte(iommu, irq, 1);
1192 if (index < 0)
1193 panic("Failed to allocate IRTE for ioapic %d\n", apic);
1194
1195 memset(&irte, 0, sizeof(irte));
1196
1197 irte.present = 1;
1198 irte.dst_mode = INT_DEST_MODE;
1199 irte.trigger_mode = trigger;
1200 irte.dlvry_mode = INT_DELIVERY_MODE;
1201 irte.vector = vector;
1202 irte.dest_id = IRTE_DEST(destination);
1203
1204 modify_irte(irq, &irte);
1205
1206 ir_entry->index2 = (index >> 15) & 0x1;
1207 ir_entry->zero = 0;
1208 ir_entry->format = 1;
1209 ir_entry->index = (index & 0x7fff);
1210 } else
1211 #endif
1212 {
1213 entry->delivery_mode = INT_DELIVERY_MODE;
1214 entry->dest_mode = INT_DEST_MODE;
1215 entry->dest = destination;
1216 }
1217
1218 entry->mask = 0; /* enable IRQ */
1219 entry->trigger = trigger;
1220 entry->polarity = polarity;
1221 entry->vector = vector;
1222
1223 /* Mask level triggered irqs.
1224 * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
1225 */
1226 if (trigger)
1227 entry->mask = 1;
1228 return 0;
1229 }
1230
1231 static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
1232 int trigger, int polarity)
1233 {
1234 struct irq_cfg *cfg;
1235 struct IO_APIC_route_entry entry;
1236 cpumask_t mask;
1237
1238 if (!IO_APIC_IRQ(irq))
1239 return;
1240
1241 cfg = irq_cfg(irq);
1242
1243 mask = TARGET_CPUS;
1244 if (assign_irq_vector(irq, mask))
1245 return;
1246
1247 cpus_and(mask, cfg->domain, mask);
1248
1249 apic_printk(APIC_VERBOSE,KERN_DEBUG
1250 "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
1251 "IRQ %d Mode:%i Active:%i)\n",
1252 apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector,
1253 irq, trigger, polarity);
1254
1255
1256 if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry,
1257 cpu_mask_to_apicid(mask), trigger, polarity,
1258 cfg->vector)) {
1259 printk("Failed to setup ioapic entry for ioapic %d, pin %d\n",
1260 mp_ioapics[apic].mp_apicid, pin);
1261 __clear_irq_vector(irq);
1262 return;
1263 }
1264
1265 ioapic_register_intr(irq, trigger);
1266 if (irq < 16)
1267 disable_8259A_irq(irq);
1268
1269 ioapic_write_entry(apic, pin, entry);
1270 }
1271
1272 static void __init setup_IO_APIC_irqs(void)
1273 {
1274 int apic, pin, idx, irq, first_notcon = 1;
1275
1276 apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
1277
1278 for (apic = 0; apic < nr_ioapics; apic++) {
1279 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
1280
1281 idx = find_irq_entry(apic,pin,mp_INT);
1282 if (idx == -1) {
1283 if (first_notcon) {
1284 apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mp_apicid, pin);
1285 first_notcon = 0;
1286 } else
1287 apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mp_apicid, pin);
1288 continue;
1289 }
1290 if (!first_notcon) {
1291 apic_printk(APIC_VERBOSE, " not connected.\n");
1292 first_notcon = 1;
1293 }
1294
1295 irq = pin_2_irq(idx, apic, pin);
1296 add_pin_to_irq(irq, apic, pin);
1297
1298 setup_IO_APIC_irq(apic, pin, irq,
1299 irq_trigger(idx), irq_polarity(idx));
1300 }
1301 }
1302
1303 if (!first_notcon)
1304 apic_printk(APIC_VERBOSE, " not connected.\n");
1305 }
1306
1307 /*
1308 * Set up the timer pin, possibly with the 8259A-master behind.
1309 */
1310 static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
1311 int vector)
1312 {
1313 struct IO_APIC_route_entry entry;
1314
1315 #ifdef CONFIG_INTR_REMAP
1316 if (intr_remapping_enabled)
1317 return;
1318 #endif
1319
1320 memset(&entry, 0, sizeof(entry));
1321
1322 /*
1323 * We use logical delivery to get the timer IRQ
1324 * to the first CPU.
1325 */
1326 entry.dest_mode = INT_DEST_MODE;
1327 entry.mask = 1; /* mask IRQ now */
1328 entry.dest = cpu_mask_to_apicid(TARGET_CPUS);
1329 entry.delivery_mode = INT_DELIVERY_MODE;
1330 entry.polarity = 0;
1331 entry.trigger = 0;
1332 entry.vector = vector;
1333
1334 /*
1335 * The timer IRQ doesn't have to know that behind the
1336 * scene we may have a 8259A-master in AEOI mode ...
1337 */
1338 set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge");
1339
1340 /*
1341 * Add it to the IO-APIC irq-routing table:
1342 */
1343 ioapic_write_entry(apic, pin, entry);
1344 }
1345
1346
1347 __apicdebuginit(void) print_IO_APIC(void)
1348 {
1349 int apic, i;
1350 union IO_APIC_reg_00 reg_00;
1351 union IO_APIC_reg_01 reg_01;
1352 union IO_APIC_reg_02 reg_02;
1353 unsigned long flags;
1354 struct irq_cfg *cfg;
1355
1356 if (apic_verbosity == APIC_QUIET)
1357 return;
1358
1359 printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
1360 for (i = 0; i < nr_ioapics; i++)
1361 printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
1362 mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]);
1363
1364 /*
1365 * We are a bit conservative about what we expect. We have to
1366 * know about every hardware change ASAP.
1367 */
1368 printk(KERN_INFO "testing the IO APIC.......................\n");
1369
1370 for (apic = 0; apic < nr_ioapics; apic++) {
1371
1372 spin_lock_irqsave(&ioapic_lock, flags);
1373 reg_00.raw = io_apic_read(apic, 0);
1374 reg_01.raw = io_apic_read(apic, 1);
1375 if (reg_01.bits.version >= 0x10)
1376 reg_02.raw = io_apic_read(apic, 2);
1377 spin_unlock_irqrestore(&ioapic_lock, flags);
1378
1379 printk("\n");
1380 printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid);
1381 printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
1382 printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID);
1383 printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type);
1384 printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.bits.LTS);
1385
1386 printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)&reg_01);
1387 printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries);
1388
1389 printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ);
1390 printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version);
1391
1392 if (reg_01.bits.version >= 0x10) {
1393 printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
1394 printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration);
1395 }
1396
1397 printk(KERN_DEBUG ".... IRQ redirection table:\n");
1398
1399 printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol"
1400 " Stat Dmod Deli Vect: \n");
1401
1402 for (i = 0; i <= reg_01.bits.entries; i++) {
1403 struct IO_APIC_route_entry entry;
1404
1405 entry = ioapic_read_entry(apic, i);
1406
1407 printk(KERN_DEBUG " %02x %03X ",
1408 i,
1409 entry.dest
1410 );
1411
1412 printk("%1d %1d %1d %1d %1d %1d %1d %02X\n",
1413 entry.mask,
1414 entry.trigger,
1415 entry.irr,
1416 entry.polarity,
1417 entry.delivery_status,
1418 entry.dest_mode,
1419 entry.delivery_mode,
1420 entry.vector
1421 );
1422 }
1423 }
1424 printk(KERN_DEBUG "IRQ to pin mappings:\n");
1425 for_each_irq_cfg(cfg) {
1426 struct irq_pin_list *entry = cfg->irq_2_pin;
1427 if (!entry)
1428 continue;
1429 printk(KERN_DEBUG "IRQ%d ", cfg->irq);
1430 for (;;) {
1431 printk("-> %d:%d", entry->apic, entry->pin);
1432 if (!entry->next)
1433 break;
1434 entry = entry->next;
1435 }
1436 printk("\n");
1437 }
1438
1439 printk(KERN_INFO ".................................... done.\n");
1440
1441 return;
1442 }
1443
1444 __apicdebuginit(void) print_APIC_bitfield(int base)
1445 {
1446 unsigned int v;
1447 int i, j;
1448
1449 if (apic_verbosity == APIC_QUIET)
1450 return;
1451
1452 printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG);
1453 for (i = 0; i < 8; i++) {
1454 v = apic_read(base + i*0x10);
1455 for (j = 0; j < 32; j++) {
1456 if (v & (1<<j))
1457 printk("1");
1458 else
1459 printk("0");
1460 }
1461 printk("\n");
1462 }
1463 }
1464
1465 __apicdebuginit(void) print_local_APIC(void *dummy)
1466 {
1467 unsigned int v, ver, maxlvt;
1468 unsigned long icr;
1469
1470 if (apic_verbosity == APIC_QUIET)
1471 return;
1472
1473 printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
1474 smp_processor_id(), hard_smp_processor_id());
1475 v = apic_read(APIC_ID);
1476 printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, read_apic_id());
1477 v = apic_read(APIC_LVR);
1478 printk(KERN_INFO "... APIC VERSION: %08x\n", v);
1479 ver = GET_APIC_VERSION(v);
1480 maxlvt = lapic_get_maxlvt();
1481
1482 v = apic_read(APIC_TASKPRI);
1483 printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
1484
1485 v = apic_read(APIC_ARBPRI);
1486 printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
1487 v & APIC_ARBPRI_MASK);
1488 v = apic_read(APIC_PROCPRI);
1489 printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
1490
1491 v = apic_read(APIC_EOI);
1492 printk(KERN_DEBUG "... APIC EOI: %08x\n", v);
1493 v = apic_read(APIC_RRR);
1494 printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
1495 v = apic_read(APIC_LDR);
1496 printk(KERN_DEBUG "... APIC LDR: %08x\n", v);
1497 v = apic_read(APIC_DFR);
1498 printk(KERN_DEBUG "... APIC DFR: %08x\n", v);
1499 v = apic_read(APIC_SPIV);
1500 printk(KERN_DEBUG "... APIC SPIV: %08x\n", v);
1501
1502 printk(KERN_DEBUG "... APIC ISR field:\n");
1503 print_APIC_bitfield(APIC_ISR);
1504 printk(KERN_DEBUG "... APIC TMR field:\n");
1505 print_APIC_bitfield(APIC_TMR);
1506 printk(KERN_DEBUG "... APIC IRR field:\n");
1507 print_APIC_bitfield(APIC_IRR);
1508
1509 v = apic_read(APIC_ESR);
1510 printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
1511
1512 icr = apic_icr_read();
1513 printk(KERN_DEBUG "... APIC ICR: %08x\n", (u32)icr);
1514 printk(KERN_DEBUG "... APIC ICR2: %08x\n", (u32)(icr >> 32));
1515
1516 v = apic_read(APIC_LVTT);
1517 printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
1518
1519 if (maxlvt > 3) { /* PC is LVT#4. */
1520 v = apic_read(APIC_LVTPC);
1521 printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v);
1522 }
1523 v = apic_read(APIC_LVT0);
1524 printk(KERN_DEBUG "... APIC LVT0: %08x\n", v);
1525 v = apic_read(APIC_LVT1);
1526 printk(KERN_DEBUG "... APIC LVT1: %08x\n", v);
1527
1528 if (maxlvt > 2) { /* ERR is LVT#3. */
1529 v = apic_read(APIC_LVTERR);
1530 printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v);
1531 }
1532
1533 v = apic_read(APIC_TMICT);
1534 printk(KERN_DEBUG "... APIC TMICT: %08x\n", v);
1535 v = apic_read(APIC_TMCCT);
1536 printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v);
1537 v = apic_read(APIC_TDCR);
1538 printk(KERN_DEBUG "... APIC TDCR: %08x\n", v);
1539 printk("\n");
1540 }
1541
1542 __apicdebuginit(void) print_all_local_APICs(void)
1543 {
1544 on_each_cpu(print_local_APIC, NULL, 1);
1545 }
1546
1547 __apicdebuginit(void) print_PIC(void)
1548 {
1549 unsigned int v;
1550 unsigned long flags;
1551
1552 if (apic_verbosity == APIC_QUIET)
1553 return;
1554
1555 printk(KERN_DEBUG "\nprinting PIC contents\n");
1556
1557 spin_lock_irqsave(&i8259A_lock, flags);
1558
1559 v = inb(0xa1) << 8 | inb(0x21);
1560 printk(KERN_DEBUG "... PIC IMR: %04x\n", v);
1561
1562 v = inb(0xa0) << 8 | inb(0x20);
1563 printk(KERN_DEBUG "... PIC IRR: %04x\n", v);
1564
1565 outb(0x0b,0xa0);
1566 outb(0x0b,0x20);
1567 v = inb(0xa0) << 8 | inb(0x20);
1568 outb(0x0a,0xa0);
1569 outb(0x0a,0x20);
1570
1571 spin_unlock_irqrestore(&i8259A_lock, flags);
1572
1573 printk(KERN_DEBUG "... PIC ISR: %04x\n", v);
1574
1575 v = inb(0x4d1) << 8 | inb(0x4d0);
1576 printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
1577 }
1578
1579 __apicdebuginit(int) print_all_ICs(void)
1580 {
1581 print_PIC();
1582 print_all_local_APICs();
1583 print_IO_APIC();
1584
1585 return 0;
1586 }
1587
1588 fs_initcall(print_all_ICs);
1589
1590
1591 /* Where if anywhere is the i8259 connect in external int mode */
1592 static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
1593
1594 void __init enable_IO_APIC(void)
1595 {
1596 union IO_APIC_reg_01 reg_01;
1597 int i8259_apic, i8259_pin;
1598 int apic;
1599 unsigned long flags;
1600
1601 /*
1602 * The number of IO-APIC IRQ registers (== #pins):
1603 */
1604 for (apic = 0; apic < nr_ioapics; apic++) {
1605 spin_lock_irqsave(&ioapic_lock, flags);
1606 reg_01.raw = io_apic_read(apic, 1);
1607 spin_unlock_irqrestore(&ioapic_lock, flags);
1608 nr_ioapic_registers[apic] = reg_01.bits.entries+1;
1609 }
1610 for(apic = 0; apic < nr_ioapics; apic++) {
1611 int pin;
1612 /* See if any of the pins is in ExtINT mode */
1613 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
1614 struct IO_APIC_route_entry entry;
1615 entry = ioapic_read_entry(apic, pin);
1616
1617 /* If the interrupt line is enabled and in ExtInt mode
1618 * I have found the pin where the i8259 is connected.
1619 */
1620 if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) {
1621 ioapic_i8259.apic = apic;
1622 ioapic_i8259.pin = pin;
1623 goto found_i8259;
1624 }
1625 }
1626 }
1627 found_i8259:
1628 /* Look to see what if the MP table has reported the ExtINT */
1629 i8259_pin = find_isa_irq_pin(0, mp_ExtINT);
1630 i8259_apic = find_isa_irq_apic(0, mp_ExtINT);
1631 /* Trust the MP table if nothing is setup in the hardware */
1632 if ((ioapic_i8259.pin == -1) && (i8259_pin >= 0)) {
1633 printk(KERN_WARNING "ExtINT not setup in hardware but reported by MP table\n");
1634 ioapic_i8259.pin = i8259_pin;
1635 ioapic_i8259.apic = i8259_apic;
1636 }
1637 /* Complain if the MP table and the hardware disagree */
1638 if (((ioapic_i8259.apic != i8259_apic) || (ioapic_i8259.pin != i8259_pin)) &&
1639 (i8259_pin >= 0) && (ioapic_i8259.pin >= 0))
1640 {
1641 printk(KERN_WARNING "ExtINT in hardware and MP table differ\n");
1642 }
1643
1644 /*
1645 * Do not trust the IO-APIC being empty at bootup
1646 */
1647 clear_IO_APIC();
1648 }
1649
1650 /*
1651 * Not an __init, needed by the reboot code
1652 */
1653 void disable_IO_APIC(void)
1654 {
1655 /*
1656 * Clear the IO-APIC before rebooting:
1657 */
1658 clear_IO_APIC();
1659
1660 /*
1661 * If the i8259 is routed through an IOAPIC
1662 * Put that IOAPIC in virtual wire mode
1663 * so legacy interrupts can be delivered.
1664 */
1665 if (ioapic_i8259.pin != -1) {
1666 struct IO_APIC_route_entry entry;
1667
1668 memset(&entry, 0, sizeof(entry));
1669 entry.mask = 0; /* Enabled */
1670 entry.trigger = 0; /* Edge */
1671 entry.irr = 0;
1672 entry.polarity = 0; /* High */
1673 entry.delivery_status = 0;
1674 entry.dest_mode = 0; /* Physical */
1675 entry.delivery_mode = dest_ExtINT; /* ExtInt */
1676 entry.vector = 0;
1677 entry.dest = read_apic_id();
1678
1679 /*
1680 * Add it to the IO-APIC irq-routing table:
1681 */
1682 ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
1683 }
1684
1685 disconnect_bsp_APIC(ioapic_i8259.pin != -1);
1686 }
1687
1688 static int no_timer_check;
1689
1690 static int __init notimercheck(char *s)
1691 {
1692 no_timer_check = 1;
1693 return 1;
1694 }
1695 __setup("no_timer_check", notimercheck);
1696
1697 /*
1698 * There is a nasty bug in some older SMP boards, their mptable lies
1699 * about the timer IRQ. We do the following to work around the situation:
1700 *
1701 * - timer IRQ defaults to IO-APIC IRQ
1702 * - if this function detects that timer IRQs are defunct, then we fall
1703 * back to ISA timer IRQs
1704 */
1705 static int __init timer_irq_works(void)
1706 {
1707 unsigned long t1 = jiffies;
1708 unsigned long flags;
1709
1710 if (no_timer_check)
1711 return 1;
1712
1713 local_save_flags(flags);
1714 local_irq_enable();
1715 /* Let ten ticks pass... */
1716 mdelay((10 * 1000) / HZ);
1717 local_irq_restore(flags);
1718
1719 /*
1720 * Expect a few ticks at least, to be sure some possible
1721 * glue logic does not lock up after one or two first
1722 * ticks in a non-ExtINT mode. Also the local APIC
1723 * might have cached one ExtINT interrupt. Finally, at
1724 * least one tick may be lost due to delays.
1725 */
1726
1727 /* jiffies wrap? */
1728 if (time_after(jiffies, t1 + 4))
1729 return 1;
1730 return 0;
1731 }
1732
1733 /*
1734 * In the SMP+IOAPIC case it might happen that there are an unspecified
1735 * number of pending IRQ events unhandled. These cases are very rare,
1736 * so we 'resend' these IRQs via IPIs, to the same CPU. It's much
1737 * better to do it this way as thus we do not have to be aware of
1738 * 'pending' interrupts in the IRQ path, except at this point.
1739 */
1740 /*
1741 * Edge triggered needs to resend any interrupt
1742 * that was delayed but this is now handled in the device
1743 * independent code.
1744 */
1745
1746 /*
1747 * Starting up a edge-triggered IO-APIC interrupt is
1748 * nasty - we need to make sure that we get the edge.
1749 * If it is already asserted for some reason, we need
1750 * return 1 to indicate that is was pending.
1751 *
1752 * This is not complete - we should be able to fake
1753 * an edge even if it isn't on the 8259A...
1754 */
1755
1756 static unsigned int startup_ioapic_irq(unsigned int irq)
1757 {
1758 int was_pending = 0;
1759 unsigned long flags;
1760
1761 spin_lock_irqsave(&ioapic_lock, flags);
1762 if (irq < 16) {
1763 disable_8259A_irq(irq);
1764 if (i8259A_irq_pending(irq))
1765 was_pending = 1;
1766 }
1767 __unmask_IO_APIC_irq(irq);
1768 spin_unlock_irqrestore(&ioapic_lock, flags);
1769
1770 return was_pending;
1771 }
1772
1773 static int ioapic_retrigger_irq(unsigned int irq)
1774 {
1775 struct irq_cfg *cfg = irq_cfg(irq);
1776 unsigned long flags;
1777
1778 spin_lock_irqsave(&vector_lock, flags);
1779 send_IPI_mask(cpumask_of_cpu(first_cpu(cfg->domain)), cfg->vector);
1780 spin_unlock_irqrestore(&vector_lock, flags);
1781
1782 return 1;
1783 }
1784
1785 /*
1786 * Level and edge triggered IO-APIC interrupts need different handling,
1787 * so we use two separate IRQ descriptors. Edge triggered IRQs can be
1788 * handled with the level-triggered descriptor, but that one has slightly
1789 * more overhead. Level-triggered interrupts cannot be handled with the
1790 * edge-triggered handler, without risking IRQ storms and other ugly
1791 * races.
1792 */
1793
1794 #ifdef CONFIG_SMP
1795
1796 #ifdef CONFIG_INTR_REMAP
1797 static void ir_irq_migration(struct work_struct *work);
1798
1799 static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
1800
1801 /*
1802 * Migrate the IO-APIC irq in the presence of intr-remapping.
1803 *
1804 * For edge triggered, irq migration is a simple atomic update(of vector
1805 * and cpu destination) of IRTE and flush the hardware cache.
1806 *
1807 * For level triggered, we need to modify the io-apic RTE aswell with the update
1808 * vector information, along with modifying IRTE with vector and destination.
1809 * So irq migration for level triggered is little bit more complex compared to
1810 * edge triggered migration. But the good news is, we use the same algorithm
1811 * for level triggered migration as we have today, only difference being,
1812 * we now initiate the irq migration from process context instead of the
1813 * interrupt context.
1814 *
1815 * In future, when we do a directed EOI (combined with cpu EOI broadcast
1816 * suppression) to the IO-APIC, level triggered irq migration will also be
1817 * as simple as edge triggered migration and we can do the irq migration
1818 * with a simple atomic update to IO-APIC RTE.
1819 */
1820 static void migrate_ioapic_irq(int irq, cpumask_t mask)
1821 {
1822 struct irq_cfg *cfg;
1823 struct irq_desc *desc;
1824 cpumask_t tmp, cleanup_mask;
1825 struct irte irte;
1826 int modify_ioapic_rte;
1827 unsigned int dest;
1828 unsigned long flags;
1829
1830 cpus_and(tmp, mask, cpu_online_map);
1831 if (cpus_empty(tmp))
1832 return;
1833
1834 if (get_irte(irq, &irte))
1835 return;
1836
1837 if (assign_irq_vector(irq, mask))
1838 return;
1839
1840 cfg = irq_cfg(irq);
1841 cpus_and(tmp, cfg->domain, mask);
1842 dest = cpu_mask_to_apicid(tmp);
1843
1844 desc = irq_to_desc(irq);
1845 modify_ioapic_rte = desc->status & IRQ_LEVEL;
1846 if (modify_ioapic_rte) {
1847 spin_lock_irqsave(&ioapic_lock, flags);
1848 __target_IO_APIC_irq(irq, dest, cfg->vector);
1849 spin_unlock_irqrestore(&ioapic_lock, flags);
1850 }
1851
1852 irte.vector = cfg->vector;
1853 irte.dest_id = IRTE_DEST(dest);
1854
1855 /*
1856 * Modified the IRTE and flushes the Interrupt entry cache.
1857 */
1858 modify_irte(irq, &irte);
1859
1860 if (cfg->move_in_progress) {
1861 cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
1862 cfg->move_cleanup_count = cpus_weight(cleanup_mask);
1863 send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
1864 cfg->move_in_progress = 0;
1865 }
1866
1867 desc->affinity = mask;
1868 }
1869
1870 static int migrate_irq_remapped_level(int irq)
1871 {
1872 int ret = -1;
1873 struct irq_desc *desc = irq_to_desc(irq);
1874
1875 mask_IO_APIC_irq(irq);
1876
1877 if (io_apic_level_ack_pending(irq)) {
1878 /*
1879 * Interrupt in progress. Migrating irq now will change the
1880 * vector information in the IO-APIC RTE and that will confuse
1881 * the EOI broadcast performed by cpu.
1882 * So, delay the irq migration to the next instance.
1883 */
1884 schedule_delayed_work(&ir_migration_work, 1);
1885 goto unmask;
1886 }
1887
1888 /* everthing is clear. we have right of way */
1889 migrate_ioapic_irq(irq, desc->pending_mask);
1890
1891 ret = 0;
1892 desc->status &= ~IRQ_MOVE_PENDING;
1893 cpus_clear(desc->pending_mask);
1894
1895 unmask:
1896 unmask_IO_APIC_irq(irq);
1897 return ret;
1898 }
1899
1900 static void ir_irq_migration(struct work_struct *work)
1901 {
1902 unsigned int irq;
1903 struct irq_desc *desc;
1904
1905 for_each_irq_desc(irq, desc) {
1906 if (desc->status & IRQ_MOVE_PENDING) {
1907 unsigned long flags;
1908
1909 spin_lock_irqsave(&desc->lock, flags);
1910 if (!desc->chip->set_affinity ||
1911 !(desc->status & IRQ_MOVE_PENDING)) {
1912 desc->status &= ~IRQ_MOVE_PENDING;
1913 spin_unlock_irqrestore(&desc->lock, flags);
1914 continue;
1915 }
1916
1917 desc->chip->set_affinity(irq, desc->pending_mask);
1918 spin_unlock_irqrestore(&desc->lock, flags);
1919 }
1920 }
1921 }
1922
1923 /*
1924 * Migrates the IRQ destination in the process context.
1925 */
1926 static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
1927 {
1928 struct irq_desc *desc = irq_to_desc(irq);
1929
1930 if (desc->status & IRQ_LEVEL) {
1931 desc->status |= IRQ_MOVE_PENDING;
1932 desc->pending_mask = mask;
1933 migrate_irq_remapped_level(irq);
1934 return;
1935 }
1936
1937 migrate_ioapic_irq(irq, mask);
1938 }
1939 #endif
1940
1941 asmlinkage void smp_irq_move_cleanup_interrupt(void)
1942 {
1943 unsigned vector, me;
1944 ack_APIC_irq();
1945 exit_idle();
1946 irq_enter();
1947
1948 me = smp_processor_id();
1949 for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
1950 unsigned int irq;
1951 struct irq_desc *desc;
1952 struct irq_cfg *cfg;
1953 irq = __get_cpu_var(vector_irq)[vector];
1954
1955 desc = irq_to_desc(irq);
1956 if (!desc)
1957 continue;
1958
1959 cfg = irq_cfg(irq);
1960 spin_lock(&desc->lock);
1961 if (!cfg->move_cleanup_count)
1962 goto unlock;
1963
1964 if ((vector == cfg->vector) && cpu_isset(me, cfg->domain))
1965 goto unlock;
1966
1967 __get_cpu_var(vector_irq)[vector] = -1;
1968 cfg->move_cleanup_count--;
1969 unlock:
1970 spin_unlock(&desc->lock);
1971 }
1972
1973 irq_exit();
1974 }
1975
1976 static void irq_complete_move(unsigned int irq)
1977 {
1978 struct irq_cfg *cfg = irq_cfg(irq);
1979 unsigned vector, me;
1980
1981 if (likely(!cfg->move_in_progress))
1982 return;
1983
1984 vector = ~get_irq_regs()->orig_ax;
1985 me = smp_processor_id();
1986 if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) {
1987 cpumask_t cleanup_mask;
1988
1989 cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
1990 cfg->move_cleanup_count = cpus_weight(cleanup_mask);
1991 send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
1992 cfg->move_in_progress = 0;
1993 }
1994 }
1995 #else
1996 static inline void irq_complete_move(unsigned int irq) {}
1997 #endif
1998 #ifdef CONFIG_INTR_REMAP
1999 static void ack_x2apic_level(unsigned int irq)
2000 {
2001 ack_x2APIC_irq();
2002 }
2003
2004 static void ack_x2apic_edge(unsigned int irq)
2005 {
2006 ack_x2APIC_irq();
2007 }
2008 #endif
2009
2010 static void ack_apic_edge(unsigned int irq)
2011 {
2012 irq_complete_move(irq);
2013 move_native_irq(irq);
2014 ack_APIC_irq();
2015 }
2016
2017 static void ack_apic_level(unsigned int irq)
2018 {
2019 int do_unmask_irq = 0;
2020
2021 irq_complete_move(irq);
2022 #ifdef CONFIG_GENERIC_PENDING_IRQ
2023 /* If we are moving the irq we need to mask it */
2024 if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) {
2025 do_unmask_irq = 1;
2026 mask_IO_APIC_irq(irq);
2027 }
2028 #endif
2029
2030 /*
2031 * We must acknowledge the irq before we move it or the acknowledge will
2032 * not propagate properly.
2033 */
2034 ack_APIC_irq();
2035
2036 /* Now we can move and renable the irq */
2037 if (unlikely(do_unmask_irq)) {
2038 /* Only migrate the irq if the ack has been received.
2039 *
2040 * On rare occasions the broadcast level triggered ack gets
2041 * delayed going to ioapics, and if we reprogram the
2042 * vector while Remote IRR is still set the irq will never
2043 * fire again.
2044 *
2045 * To prevent this scenario we read the Remote IRR bit
2046 * of the ioapic. This has two effects.
2047 * - On any sane system the read of the ioapic will
2048 * flush writes (and acks) going to the ioapic from
2049 * this cpu.
2050 * - We get to see if the ACK has actually been delivered.
2051 *
2052 * Based on failed experiments of reprogramming the
2053 * ioapic entry from outside of irq context starting
2054 * with masking the ioapic entry and then polling until
2055 * Remote IRR was clear before reprogramming the
2056 * ioapic I don't trust the Remote IRR bit to be
2057 * completey accurate.
2058 *
2059 * However there appears to be no other way to plug
2060 * this race, so if the Remote IRR bit is not
2061 * accurate and is causing problems then it is a hardware bug
2062 * and you can go talk to the chipset vendor about it.
2063 */
2064 if (!io_apic_level_ack_pending(irq))
2065 move_masked_irq(irq);
2066 unmask_IO_APIC_irq(irq);
2067 }
2068 }
2069
2070 static struct irq_chip ioapic_chip __read_mostly = {
2071 .name = "IO-APIC",
2072 .startup = startup_ioapic_irq,
2073 .mask = mask_IO_APIC_irq,
2074 .unmask = unmask_IO_APIC_irq,
2075 .ack = ack_apic_edge,
2076 .eoi = ack_apic_level,
2077 #ifdef CONFIG_SMP
2078 .set_affinity = set_ioapic_affinity_irq,
2079 #endif
2080 .retrigger = ioapic_retrigger_irq,
2081 };
2082
2083 #ifdef CONFIG_INTR_REMAP
2084 static struct irq_chip ir_ioapic_chip __read_mostly = {
2085 .name = "IR-IO-APIC",
2086 .startup = startup_ioapic_irq,
2087 .mask = mask_IO_APIC_irq,
2088 .unmask = unmask_IO_APIC_irq,
2089 .ack = ack_x2apic_edge,
2090 .eoi = ack_x2apic_level,
2091 #ifdef CONFIG_SMP
2092 .set_affinity = set_ir_ioapic_affinity_irq,
2093 #endif
2094 .retrigger = ioapic_retrigger_irq,
2095 };
2096 #endif
2097
2098 static inline void init_IO_APIC_traps(void)
2099 {
2100 int irq;
2101 struct irq_desc *desc;
2102 struct irq_cfg *cfg;
2103
2104 /*
2105 * NOTE! The local APIC isn't very good at handling
2106 * multiple interrupts at the same interrupt level.
2107 * As the interrupt level is determined by taking the
2108 * vector number and shifting that right by 4, we
2109 * want to spread these out a bit so that they don't
2110 * all fall in the same interrupt level.
2111 *
2112 * Also, we've got to be careful not to trash gate
2113 * 0x80, because int 0x80 is hm, kind of importantish. ;)
2114 */
2115 for_each_irq_cfg(cfg) {
2116 irq = cfg->irq;
2117 if (IO_APIC_IRQ(irq) && !cfg->vector) {
2118 /*
2119 * Hmm.. We don't have an entry for this,
2120 * so default to an old-fashioned 8259
2121 * interrupt if we can..
2122 */
2123 if (irq < 16)
2124 make_8259A_irq(irq);
2125 else {
2126 desc = irq_to_desc(irq);
2127 /* Strange. Oh, well.. */
2128 desc->chip = &no_irq_chip;
2129 }
2130 }
2131 }
2132 }
2133
2134 static void unmask_lapic_irq(unsigned int irq)
2135 {
2136 unsigned long v;
2137
2138 v = apic_read(APIC_LVT0);
2139 apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
2140 }
2141
2142 static void mask_lapic_irq(unsigned int irq)
2143 {
2144 unsigned long v;
2145
2146 v = apic_read(APIC_LVT0);
2147 apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
2148 }
2149
2150 static void ack_lapic_irq (unsigned int irq)
2151 {
2152 ack_APIC_irq();
2153 }
2154
2155 static struct irq_chip lapic_chip __read_mostly = {
2156 .name = "local-APIC",
2157 .mask = mask_lapic_irq,
2158 .unmask = unmask_lapic_irq,
2159 .ack = ack_lapic_irq,
2160 };
2161
2162 static void lapic_register_intr(int irq)
2163 {
2164 struct irq_desc *desc;
2165
2166 desc = irq_to_desc(irq);
2167 desc->status &= ~IRQ_LEVEL;
2168 set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
2169 "edge");
2170 }
2171
2172 static void __init setup_nmi(void)
2173 {
2174 /*
2175 * Dirty trick to enable the NMI watchdog ...
2176 * We put the 8259A master into AEOI mode and
2177 * unmask on all local APICs LVT0 as NMI.
2178 *
2179 * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
2180 * is from Maciej W. Rozycki - so we do not have to EOI from
2181 * the NMI handler or the timer interrupt.
2182 */
2183 printk(KERN_INFO "activating NMI Watchdog ...");
2184
2185 enable_NMI_through_LVT0();
2186
2187 printk(" done.\n");
2188 }
2189
2190 /*
2191 * This looks a bit hackish but it's about the only one way of sending
2192 * a few INTA cycles to 8259As and any associated glue logic. ICR does
2193 * not support the ExtINT mode, unfortunately. We need to send these
2194 * cycles as some i82489DX-based boards have glue logic that keeps the
2195 * 8259A interrupt line asserted until INTA. --macro
2196 */
2197 static inline void __init unlock_ExtINT_logic(void)
2198 {
2199 int apic, pin, i;
2200 struct IO_APIC_route_entry entry0, entry1;
2201 unsigned char save_control, save_freq_select;
2202
2203 pin = find_isa_irq_pin(8, mp_INT);
2204 apic = find_isa_irq_apic(8, mp_INT);
2205 if (pin == -1)
2206 return;
2207
2208 entry0 = ioapic_read_entry(apic, pin);
2209
2210 clear_IO_APIC_pin(apic, pin);
2211
2212 memset(&entry1, 0, sizeof(entry1));
2213
2214 entry1.dest_mode = 0; /* physical delivery */
2215 entry1.mask = 0; /* unmask IRQ now */
2216 entry1.dest = hard_smp_processor_id();
2217 entry1.delivery_mode = dest_ExtINT;
2218 entry1.polarity = entry0.polarity;
2219 entry1.trigger = 0;
2220 entry1.vector = 0;
2221
2222 ioapic_write_entry(apic, pin, entry1);
2223
2224 save_control = CMOS_READ(RTC_CONTROL);
2225 save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
2226 CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6,
2227 RTC_FREQ_SELECT);
2228 CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL);
2229
2230 i = 100;
2231 while (i-- > 0) {
2232 mdelay(10);
2233 if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF)
2234 i -= 10;
2235 }
2236
2237 CMOS_WRITE(save_control, RTC_CONTROL);
2238 CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
2239 clear_IO_APIC_pin(apic, pin);
2240
2241 ioapic_write_entry(apic, pin, entry0);
2242 }
2243
2244 static int disable_timer_pin_1 __initdata;
2245 /* Actually the next is obsolete, but keep it for paranoid reasons -AK */
2246 static int __init disable_timer_pin_setup(char *arg)
2247 {
2248 disable_timer_pin_1 = 1;
2249 return 0;
2250 }
2251 early_param("disable_timer_pin_1", disable_timer_pin_setup);
2252
2253 int timer_through_8259 __initdata;
2254
2255 /*
2256 * This code may look a bit paranoid, but it's supposed to cooperate with
2257 * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ
2258 * is so screwy. Thanks to Brian Perkins for testing/hacking this beast
2259 * fanatically on his truly buggy board.
2260 *
2261 * FIXME: really need to revamp this for modern platforms only.
2262 */
2263 static inline void __init check_timer(void)
2264 {
2265 struct irq_cfg *cfg = irq_cfg(0);
2266 int apic1, pin1, apic2, pin2;
2267 unsigned long flags;
2268 int no_pin1 = 0;
2269
2270 local_irq_save(flags);
2271
2272 /*
2273 * get/set the timer IRQ vector:
2274 */
2275 disable_8259A_irq(0);
2276 assign_irq_vector(0, TARGET_CPUS);
2277
2278 /*
2279 * As IRQ0 is to be enabled in the 8259A, the virtual
2280 * wire has to be disabled in the local APIC.
2281 */
2282 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
2283 init_8259A(1);
2284
2285 pin1 = find_isa_irq_pin(0, mp_INT);
2286 apic1 = find_isa_irq_apic(0, mp_INT);
2287 pin2 = ioapic_i8259.pin;
2288 apic2 = ioapic_i8259.apic;
2289
2290 apic_printk(APIC_QUIET, KERN_INFO "..TIMER: vector=0x%02X "
2291 "apic1=%d pin1=%d apic2=%d pin2=%d\n",
2292 cfg->vector, apic1, pin1, apic2, pin2);
2293
2294 /*
2295 * Some BIOS writers are clueless and report the ExtINTA
2296 * I/O APIC input from the cascaded 8259A as the timer
2297 * interrupt input. So just in case, if only one pin
2298 * was found above, try it both directly and through the
2299 * 8259A.
2300 */
2301 if (pin1 == -1) {
2302 #ifdef CONFIG_INTR_REMAP
2303 if (intr_remapping_enabled)
2304 panic("BIOS bug: timer not connected to IO-APIC");
2305 #endif
2306 pin1 = pin2;
2307 apic1 = apic2;
2308 no_pin1 = 1;
2309 } else if (pin2 == -1) {
2310 pin2 = pin1;
2311 apic2 = apic1;
2312 }
2313
2314 if (pin1 != -1) {
2315 /*
2316 * Ok, does IRQ0 through the IOAPIC work?
2317 */
2318 if (no_pin1) {
2319 add_pin_to_irq(0, apic1, pin1);
2320 setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
2321 }
2322 unmask_IO_APIC_irq(0);
2323 if (timer_irq_works()) {
2324 if (nmi_watchdog == NMI_IO_APIC) {
2325 setup_nmi();
2326 enable_8259A_irq(0);
2327 }
2328 if (disable_timer_pin_1 > 0)
2329 clear_IO_APIC_pin(0, pin1);
2330 goto out;
2331 }
2332 #ifdef CONFIG_INTR_REMAP
2333 if (intr_remapping_enabled)
2334 panic("timer doesn't work through Interrupt-remapped IO-APIC");
2335 #endif
2336 clear_IO_APIC_pin(apic1, pin1);
2337 if (!no_pin1)
2338 apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
2339 "8254 timer not connected to IO-APIC\n");
2340
2341 apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer "
2342 "(IRQ0) through the 8259A ...\n");
2343 apic_printk(APIC_QUIET, KERN_INFO
2344 "..... (found apic %d pin %d) ...\n", apic2, pin2);
2345 /*
2346 * legacy devices should be connected to IO APIC #0
2347 */
2348 replace_pin_at_irq(0, apic1, pin1, apic2, pin2);
2349 setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
2350 unmask_IO_APIC_irq(0);
2351 enable_8259A_irq(0);
2352 if (timer_irq_works()) {
2353 apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
2354 timer_through_8259 = 1;
2355 if (nmi_watchdog == NMI_IO_APIC) {
2356 disable_8259A_irq(0);
2357 setup_nmi();
2358 enable_8259A_irq(0);
2359 }
2360 goto out;
2361 }
2362 /*
2363 * Cleanup, just in case ...
2364 */
2365 disable_8259A_irq(0);
2366 clear_IO_APIC_pin(apic2, pin2);
2367 apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
2368 }
2369
2370 if (nmi_watchdog == NMI_IO_APIC) {
2371 apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work "
2372 "through the IO-APIC - disabling NMI Watchdog!\n");
2373 nmi_watchdog = NMI_NONE;
2374 }
2375
2376 apic_printk(APIC_QUIET, KERN_INFO
2377 "...trying to set up timer as Virtual Wire IRQ...\n");
2378
2379 lapic_register_intr(0);
2380 apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */
2381 enable_8259A_irq(0);
2382
2383 if (timer_irq_works()) {
2384 apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
2385 goto out;
2386 }
2387 disable_8259A_irq(0);
2388 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector);
2389 apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n");
2390
2391 apic_printk(APIC_QUIET, KERN_INFO
2392 "...trying to set up timer as ExtINT IRQ...\n");
2393
2394 init_8259A(0);
2395 make_8259A_irq(0);
2396 apic_write(APIC_LVT0, APIC_DM_EXTINT);
2397
2398 unlock_ExtINT_logic();
2399
2400 if (timer_irq_works()) {
2401 apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
2402 goto out;
2403 }
2404 apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n");
2405 panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a "
2406 "report. Then try booting with the 'noapic' option.\n");
2407 out:
2408 local_irq_restore(flags);
2409 }
2410
2411 /*
2412 * Traditionally ISA IRQ2 is the cascade IRQ, and is not available
2413 * to devices. However there may be an I/O APIC pin available for
2414 * this interrupt regardless. The pin may be left unconnected, but
2415 * typically it will be reused as an ExtINT cascade interrupt for
2416 * the master 8259A. In the MPS case such a pin will normally be
2417 * reported as an ExtINT interrupt in the MP table. With ACPI
2418 * there is no provision for ExtINT interrupts, and in the absence
2419 * of an override it would be treated as an ordinary ISA I/O APIC
2420 * interrupt, that is edge-triggered and unmasked by default. We
2421 * used to do this, but it caused problems on some systems because
2422 * of the NMI watchdog and sometimes IRQ0 of the 8254 timer using
2423 * the same ExtINT cascade interrupt to drive the local APIC of the
2424 * bootstrap processor. Therefore we refrain from routing IRQ2 to
2425 * the I/O APIC in all cases now. No actual device should request
2426 * it anyway. --macro
2427 */
2428 #define PIC_IRQS (1<<2)
2429
2430 void __init setup_IO_APIC(void)
2431 {
2432
2433 /*
2434 * calling enable_IO_APIC() is moved to setup_local_APIC for BP
2435 */
2436
2437 io_apic_irqs = ~PIC_IRQS;
2438
2439 apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
2440
2441 sync_Arb_IDs();
2442 setup_IO_APIC_irqs();
2443 init_IO_APIC_traps();
2444 check_timer();
2445 }
2446
2447 struct sysfs_ioapic_data {
2448 struct sys_device dev;
2449 struct IO_APIC_route_entry entry[0];
2450 };
2451 static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS];
2452
2453 static int ioapic_suspend(struct sys_device *dev, pm_message_t state)
2454 {
2455 struct IO_APIC_route_entry *entry;
2456 struct sysfs_ioapic_data *data;
2457 int i;
2458
2459 data = container_of(dev, struct sysfs_ioapic_data, dev);
2460 entry = data->entry;
2461 for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ )
2462 *entry = ioapic_read_entry(dev->id, i);
2463
2464 return 0;
2465 }
2466
2467 static int ioapic_resume(struct sys_device *dev)
2468 {
2469 struct IO_APIC_route_entry *entry;
2470 struct sysfs_ioapic_data *data;
2471 unsigned long flags;
2472 union IO_APIC_reg_00 reg_00;
2473 int i;
2474
2475 data = container_of(dev, struct sysfs_ioapic_data, dev);
2476 entry = data->entry;
2477
2478 spin_lock_irqsave(&ioapic_lock, flags);
2479 reg_00.raw = io_apic_read(dev->id, 0);
2480 if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) {
2481 reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid;
2482 io_apic_write(dev->id, 0, reg_00.raw);
2483 }
2484 spin_unlock_irqrestore(&ioapic_lock, flags);
2485 for (i = 0; i < nr_ioapic_registers[dev->id]; i++)
2486 ioapic_write_entry(dev->id, i, entry[i]);
2487
2488 return 0;
2489 }
2490
2491 static struct sysdev_class ioapic_sysdev_class = {
2492 .name = "ioapic",
2493 .suspend = ioapic_suspend,
2494 .resume = ioapic_resume,
2495 };
2496
2497 static int __init ioapic_init_sysfs(void)
2498 {
2499 struct sys_device * dev;
2500 int i, size, error;
2501
2502 error = sysdev_class_register(&ioapic_sysdev_class);
2503 if (error)
2504 return error;
2505
2506 for (i = 0; i < nr_ioapics; i++ ) {
2507 size = sizeof(struct sys_device) + nr_ioapic_registers[i]
2508 * sizeof(struct IO_APIC_route_entry);
2509 mp_ioapic_data[i] = kzalloc(size, GFP_KERNEL);
2510 if (!mp_ioapic_data[i]) {
2511 printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
2512 continue;
2513 }
2514 dev = &mp_ioapic_data[i]->dev;
2515 dev->id = i;
2516 dev->cls = &ioapic_sysdev_class;
2517 error = sysdev_register(dev);
2518 if (error) {
2519 kfree(mp_ioapic_data[i]);
2520 mp_ioapic_data[i] = NULL;
2521 printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
2522 continue;
2523 }
2524 }
2525
2526 return 0;
2527 }
2528
2529 device_initcall(ioapic_init_sysfs);
2530
2531 /*
2532 * Dynamic irq allocate and deallocation
2533 */
2534 unsigned int create_irq_nr(unsigned int irq_want)
2535 {
2536 /* Allocate an unused irq */
2537 unsigned int irq;
2538 unsigned int new;
2539 unsigned long flags;
2540 struct irq_cfg *cfg_new;
2541
2542 #ifndef CONFIG_HAVE_SPARSE_IRQ
2543 irq_want = nr_irqs - 1;
2544 #endif
2545
2546 irq = 0;
2547 spin_lock_irqsave(&vector_lock, flags);
2548 for (new = irq_want; new > 0; new--) {
2549 if (platform_legacy_irq(new))
2550 continue;
2551 cfg_new = irq_cfg(new);
2552 if (cfg_new && cfg_new->vector != 0)
2553 continue;
2554 /* check if need to create one */
2555 if (!cfg_new)
2556 cfg_new = irq_cfg_alloc(new);
2557 if (__assign_irq_vector(new, TARGET_CPUS) == 0)
2558 irq = new;
2559 break;
2560 }
2561 spin_unlock_irqrestore(&vector_lock, flags);
2562
2563 if (irq > 0) {
2564 dynamic_irq_init(irq);
2565 }
2566 return irq;
2567 }
2568
2569 int create_irq(void)
2570 {
2571 int irq;
2572
2573 irq = create_irq_nr(nr_irqs - 1);
2574
2575 if (irq == 0)
2576 irq = -1;
2577
2578 return irq;
2579 }
2580
2581 void destroy_irq(unsigned int irq)
2582 {
2583 unsigned long flags;
2584
2585 dynamic_irq_cleanup(irq);
2586
2587 #ifdef CONFIG_INTR_REMAP
2588 free_irte(irq);
2589 #endif
2590 spin_lock_irqsave(&vector_lock, flags);
2591 __clear_irq_vector(irq);
2592 spin_unlock_irqrestore(&vector_lock, flags);
2593 }
2594
2595 /*
2596 * MSI message composition
2597 */
2598 #ifdef CONFIG_PCI_MSI
2599 static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
2600 {
2601 struct irq_cfg *cfg;
2602 int err;
2603 unsigned dest;
2604 cpumask_t tmp;
2605
2606 tmp = TARGET_CPUS;
2607 err = assign_irq_vector(irq, tmp);
2608 if (err)
2609 return err;
2610
2611 cfg = irq_cfg(irq);
2612 cpus_and(tmp, cfg->domain, tmp);
2613 dest = cpu_mask_to_apicid(tmp);
2614
2615 #ifdef CONFIG_INTR_REMAP
2616 if (irq_remapped(irq)) {
2617 struct irte irte;
2618 int ir_index;
2619 u16 sub_handle;
2620
2621 ir_index = map_irq_to_irte_handle(irq, &sub_handle);
2622 BUG_ON(ir_index == -1);
2623
2624 memset (&irte, 0, sizeof(irte));
2625
2626 irte.present = 1;
2627 irte.dst_mode = INT_DEST_MODE;
2628 irte.trigger_mode = 0; /* edge */
2629 irte.dlvry_mode = INT_DELIVERY_MODE;
2630 irte.vector = cfg->vector;
2631 irte.dest_id = IRTE_DEST(dest);
2632
2633 modify_irte(irq, &irte);
2634
2635 msg->address_hi = MSI_ADDR_BASE_HI;
2636 msg->data = sub_handle;
2637 msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT |
2638 MSI_ADDR_IR_SHV |
2639 MSI_ADDR_IR_INDEX1(ir_index) |
2640 MSI_ADDR_IR_INDEX2(ir_index);
2641 } else
2642 #endif
2643 {
2644 msg->address_hi = MSI_ADDR_BASE_HI;
2645 msg->address_lo =
2646 MSI_ADDR_BASE_LO |
2647 ((INT_DEST_MODE == 0) ?
2648 MSI_ADDR_DEST_MODE_PHYSICAL:
2649 MSI_ADDR_DEST_MODE_LOGICAL) |
2650 ((INT_DELIVERY_MODE != dest_LowestPrio) ?
2651 MSI_ADDR_REDIRECTION_CPU:
2652 MSI_ADDR_REDIRECTION_LOWPRI) |
2653 MSI_ADDR_DEST_ID(dest);
2654
2655 msg->data =
2656 MSI_DATA_TRIGGER_EDGE |
2657 MSI_DATA_LEVEL_ASSERT |
2658 ((INT_DELIVERY_MODE != dest_LowestPrio) ?
2659 MSI_DATA_DELIVERY_FIXED:
2660 MSI_DATA_DELIVERY_LOWPRI) |
2661 MSI_DATA_VECTOR(cfg->vector);
2662 }
2663 return err;
2664 }
2665
2666 #ifdef CONFIG_SMP
2667 static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
2668 {
2669 struct irq_cfg *cfg;
2670 struct msi_msg msg;
2671 unsigned int dest;
2672 cpumask_t tmp;
2673 struct irq_desc *desc;
2674
2675 cpus_and(tmp, mask, cpu_online_map);
2676 if (cpus_empty(tmp))
2677 return;
2678
2679 if (assign_irq_vector(irq, mask))
2680 return;
2681
2682 cfg = irq_cfg(irq);
2683 cpus_and(tmp, cfg->domain, mask);
2684 dest = cpu_mask_to_apicid(tmp);
2685
2686 read_msi_msg(irq, &msg);
2687
2688 msg.data &= ~MSI_DATA_VECTOR_MASK;
2689 msg.data |= MSI_DATA_VECTOR(cfg->vector);
2690 msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
2691 msg.address_lo |= MSI_ADDR_DEST_ID(dest);
2692
2693 write_msi_msg(irq, &msg);
2694 desc = irq_to_desc(irq);
2695 desc->affinity = mask;
2696 }
2697
2698 #ifdef CONFIG_INTR_REMAP
2699 /*
2700 * Migrate the MSI irq to another cpumask. This migration is
2701 * done in the process context using interrupt-remapping hardware.
2702 */
2703 static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
2704 {
2705 struct irq_cfg *cfg;
2706 unsigned int dest;
2707 cpumask_t tmp, cleanup_mask;
2708 struct irte irte;
2709 struct irq_desc *desc;
2710
2711 cpus_and(tmp, mask, cpu_online_map);
2712 if (cpus_empty(tmp))
2713 return;
2714
2715 if (get_irte(irq, &irte))
2716 return;
2717
2718 if (assign_irq_vector(irq, mask))
2719 return;
2720
2721 cfg = irq_cfg(irq);
2722 cpus_and(tmp, cfg->domain, mask);
2723 dest = cpu_mask_to_apicid(tmp);
2724
2725 irte.vector = cfg->vector;
2726 irte.dest_id = IRTE_DEST(dest);
2727
2728 /*
2729 * atomically update the IRTE with the new destination and vector.
2730 */
2731 modify_irte(irq, &irte);
2732
2733 /*
2734 * After this point, all the interrupts will start arriving
2735 * at the new destination. So, time to cleanup the previous
2736 * vector allocation.
2737 */
2738 if (cfg->move_in_progress) {
2739 cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
2740 cfg->move_cleanup_count = cpus_weight(cleanup_mask);
2741 send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
2742 cfg->move_in_progress = 0;
2743 }
2744
2745 desc = irq_to_desc(irq);
2746 desc->affinity = mask;
2747 }
2748 #endif
2749 #endif /* CONFIG_SMP */
2750
2751 /*
2752 * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
2753 * which implement the MSI or MSI-X Capability Structure.
2754 */
2755 static struct irq_chip msi_chip = {
2756 .name = "PCI-MSI",
2757 .unmask = unmask_msi_irq,
2758 .mask = mask_msi_irq,
2759 .ack = ack_apic_edge,
2760 #ifdef CONFIG_SMP
2761 .set_affinity = set_msi_irq_affinity,
2762 #endif
2763 .retrigger = ioapic_retrigger_irq,
2764 };
2765
2766 #ifdef CONFIG_INTR_REMAP
2767 static struct irq_chip msi_ir_chip = {
2768 .name = "IR-PCI-MSI",
2769 .unmask = unmask_msi_irq,
2770 .mask = mask_msi_irq,
2771 .ack = ack_x2apic_edge,
2772 #ifdef CONFIG_SMP
2773 .set_affinity = ir_set_msi_irq_affinity,
2774 #endif
2775 .retrigger = ioapic_retrigger_irq,
2776 };
2777
2778 /*
2779 * Map the PCI dev to the corresponding remapping hardware unit
2780 * and allocate 'nvec' consecutive interrupt-remapping table entries
2781 * in it.
2782 */
2783 static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
2784 {
2785 struct intel_iommu *iommu;
2786 int index;
2787
2788 iommu = map_dev_to_ir(dev);
2789 if (!iommu) {
2790 printk(KERN_ERR
2791 "Unable to map PCI %s to iommu\n", pci_name(dev));
2792 return -ENOENT;
2793 }
2794
2795 index = alloc_irte(iommu, irq, nvec);
2796 if (index < 0) {
2797 printk(KERN_ERR
2798 "Unable to allocate %d IRTE for PCI %s\n", nvec,
2799 pci_name(dev));
2800 return -ENOSPC;
2801 }
2802 return index;
2803 }
2804 #endif
2805
2806 static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
2807 {
2808 int ret;
2809 struct msi_msg msg;
2810
2811 ret = msi_compose_msg(dev, irq, &msg);
2812 if (ret < 0)
2813 return ret;
2814
2815 set_irq_msi(irq, desc);
2816 write_msi_msg(irq, &msg);
2817
2818 #ifdef CONFIG_INTR_REMAP
2819 if (irq_remapped(irq)) {
2820 struct irq_desc *desc = irq_to_desc(irq);
2821 /*
2822 * irq migration in process context
2823 */
2824 desc->status |= IRQ_MOVE_PCNTXT;
2825 set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge");
2826 } else
2827 #endif
2828 set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
2829
2830 return 0;
2831 }
2832
2833 static unsigned int build_irq_for_pci_dev(struct pci_dev *dev)
2834 {
2835 unsigned int irq;
2836
2837 irq = dev->bus->number;
2838 irq <<= 8;
2839 irq |= dev->devfn;
2840 irq <<= 12;
2841
2842 return irq;
2843 }
2844
2845 int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
2846 {
2847 unsigned int irq;
2848 int ret;
2849 unsigned int irq_want;
2850
2851 irq_want = build_irq_for_pci_dev(dev) + 0x100;
2852
2853 irq = create_irq_nr(irq_want);
2854 if (irq == 0)
2855 return -1;
2856
2857 #ifdef CONFIG_INTR_REMAP
2858 if (!intr_remapping_enabled)
2859 goto no_ir;
2860
2861 ret = msi_alloc_irte(dev, irq, 1);
2862 if (ret < 0)
2863 goto error;
2864 no_ir:
2865 #endif
2866 ret = setup_msi_irq(dev, desc, irq);
2867 if (ret < 0) {
2868 destroy_irq(irq);
2869 return ret;
2870 }
2871 return 0;
2872
2873 #ifdef CONFIG_INTR_REMAP
2874 error:
2875 destroy_irq(irq);
2876 return ret;
2877 #endif
2878 }
2879
2880 int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
2881 {
2882 unsigned int irq;
2883 int ret, sub_handle;
2884 struct msi_desc *desc;
2885 unsigned int irq_want;
2886
2887 #ifdef CONFIG_INTR_REMAP
2888 struct intel_iommu *iommu = 0;
2889 int index = 0;
2890 #endif
2891
2892 irq_want = build_irq_for_pci_dev(dev) + 0x100;
2893 sub_handle = 0;
2894 list_for_each_entry(desc, &dev->msi_list, list) {
2895 irq = create_irq_nr(irq_want--);
2896 if (irq == 0)
2897 return -1;
2898 #ifdef CONFIG_INTR_REMAP
2899 if (!intr_remapping_enabled)
2900 goto no_ir;
2901
2902 if (!sub_handle) {
2903 /*
2904 * allocate the consecutive block of IRTE's
2905 * for 'nvec'
2906 */
2907 index = msi_alloc_irte(dev, irq, nvec);
2908 if (index < 0) {
2909 ret = index;
2910 goto error;
2911 }
2912 } else {
2913 iommu = map_dev_to_ir(dev);
2914 if (!iommu) {
2915 ret = -ENOENT;
2916 goto error;
2917 }
2918 /*
2919 * setup the mapping between the irq and the IRTE
2920 * base index, the sub_handle pointing to the
2921 * appropriate interrupt remap table entry.
2922 */
2923 set_irte_irq(irq, iommu, index, sub_handle);
2924 }
2925 no_ir:
2926 #endif
2927 ret = setup_msi_irq(dev, desc, irq);
2928 if (ret < 0)
2929 goto error;
2930 sub_handle++;
2931 }
2932 return 0;
2933
2934 error:
2935 destroy_irq(irq);
2936 return ret;
2937 }
2938
2939 void arch_teardown_msi_irq(unsigned int irq)
2940 {
2941 destroy_irq(irq);
2942 }
2943
2944 #ifdef CONFIG_DMAR
2945 #ifdef CONFIG_SMP
2946 static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
2947 {
2948 struct irq_cfg *cfg;
2949 struct msi_msg msg;
2950 unsigned int dest;
2951 cpumask_t tmp;
2952 struct irq_desc *desc;
2953
2954 cpus_and(tmp, mask, cpu_online_map);
2955 if (cpus_empty(tmp))
2956 return;
2957
2958 if (assign_irq_vector(irq, mask))
2959 return;
2960
2961 cfg = irq_cfg(irq);
2962 cpus_and(tmp, cfg->domain, mask);
2963 dest = cpu_mask_to_apicid(tmp);
2964
2965 dmar_msi_read(irq, &msg);
2966
2967 msg.data &= ~MSI_DATA_VECTOR_MASK;
2968 msg.data |= MSI_DATA_VECTOR(cfg->vector);
2969 msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
2970 msg.address_lo |= MSI_ADDR_DEST_ID(dest);
2971
2972 dmar_msi_write(irq, &msg);
2973 desc = irq_to_desc(irq);
2974 desc->affinity = mask;
2975 }
2976 #endif /* CONFIG_SMP */
2977
2978 struct irq_chip dmar_msi_type = {
2979 .name = "DMAR_MSI",
2980 .unmask = dmar_msi_unmask,
2981 .mask = dmar_msi_mask,
2982 .ack = ack_apic_edge,
2983 #ifdef CONFIG_SMP
2984 .set_affinity = dmar_msi_set_affinity,
2985 #endif
2986 .retrigger = ioapic_retrigger_irq,
2987 };
2988
2989 int arch_setup_dmar_msi(unsigned int irq)
2990 {
2991 int ret;
2992 struct msi_msg msg;
2993
2994 ret = msi_compose_msg(NULL, irq, &msg);
2995 if (ret < 0)
2996 return ret;
2997 dmar_msi_write(irq, &msg);
2998 set_irq_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq,
2999 "edge");
3000 return 0;
3001 }
3002 #endif
3003
3004 #endif /* CONFIG_PCI_MSI */
3005 /*
3006 * Hypertransport interrupt support
3007 */
3008 #ifdef CONFIG_HT_IRQ
3009
3010 #ifdef CONFIG_SMP
3011
3012 static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
3013 {
3014 struct ht_irq_msg msg;
3015 fetch_ht_irq_msg(irq, &msg);
3016
3017 msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK);
3018 msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
3019
3020 msg.address_lo |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest);
3021 msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest);
3022
3023 write_ht_irq_msg(irq, &msg);
3024 }
3025
3026 static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
3027 {
3028 struct irq_cfg *cfg;
3029 unsigned int dest;
3030 cpumask_t tmp;
3031 struct irq_desc *desc;
3032
3033 cpus_and(tmp, mask, cpu_online_map);
3034 if (cpus_empty(tmp))
3035 return;
3036
3037 if (assign_irq_vector(irq, mask))
3038 return;
3039
3040 cfg = irq_cfg(irq);
3041 cpus_and(tmp, cfg->domain, mask);
3042 dest = cpu_mask_to_apicid(tmp);
3043
3044 target_ht_irq(irq, dest, cfg->vector);
3045 desc = irq_to_desc(irq);
3046 desc->affinity = mask;
3047 }
3048 #endif
3049
3050 static struct irq_chip ht_irq_chip = {
3051 .name = "PCI-HT",
3052 .mask = mask_ht_irq,
3053 .unmask = unmask_ht_irq,
3054 .ack = ack_apic_edge,
3055 #ifdef CONFIG_SMP
3056 .set_affinity = set_ht_irq_affinity,
3057 #endif
3058 .retrigger = ioapic_retrigger_irq,
3059 };
3060
3061 int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
3062 {
3063 struct irq_cfg *cfg;
3064 int err;
3065 cpumask_t tmp;
3066
3067 tmp = TARGET_CPUS;
3068 err = assign_irq_vector(irq, tmp);
3069 if (!err) {
3070 struct ht_irq_msg msg;
3071 unsigned dest;
3072
3073 cfg = irq_cfg(irq);
3074 cpus_and(tmp, cfg->domain, tmp);
3075 dest = cpu_mask_to_apicid(tmp);
3076
3077 msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
3078
3079 msg.address_lo =
3080 HT_IRQ_LOW_BASE |
3081 HT_IRQ_LOW_DEST_ID(dest) |
3082 HT_IRQ_LOW_VECTOR(cfg->vector) |
3083 ((INT_DEST_MODE == 0) ?
3084 HT_IRQ_LOW_DM_PHYSICAL :
3085 HT_IRQ_LOW_DM_LOGICAL) |
3086 HT_IRQ_LOW_RQEOI_EDGE |
3087 ((INT_DELIVERY_MODE != dest_LowestPrio) ?
3088 HT_IRQ_LOW_MT_FIXED :
3089 HT_IRQ_LOW_MT_ARBITRATED) |
3090 HT_IRQ_LOW_IRQ_MASKED;
3091
3092 write_ht_irq_msg(irq, &msg);
3093
3094 set_irq_chip_and_handler_name(irq, &ht_irq_chip,
3095 handle_edge_irq, "edge");
3096 }
3097 return err;
3098 }
3099 #endif /* CONFIG_HT_IRQ */
3100
3101 /* --------------------------------------------------------------------------
3102 ACPI-based IOAPIC Configuration
3103 -------------------------------------------------------------------------- */
3104
3105 #ifdef CONFIG_ACPI
3106
3107 #define IO_APIC_MAX_ID 0xFE
3108
3109 int __init io_apic_get_redir_entries (int ioapic)
3110 {
3111 union IO_APIC_reg_01 reg_01;
3112 unsigned long flags;
3113
3114 spin_lock_irqsave(&ioapic_lock, flags);
3115 reg_01.raw = io_apic_read(ioapic, 1);
3116 spin_unlock_irqrestore(&ioapic_lock, flags);
3117
3118 return reg_01.bits.entries;
3119 }
3120
3121
3122 int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity)
3123 {
3124 if (!IO_APIC_IRQ(irq)) {
3125 apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
3126 ioapic);
3127 return -EINVAL;
3128 }
3129
3130 /*
3131 * IRQs < 16 are already in the irq_2_pin[] map
3132 */
3133 if (irq >= 16)
3134 add_pin_to_irq(irq, ioapic, pin);
3135
3136 setup_IO_APIC_irq(ioapic, pin, irq, triggering, polarity);
3137
3138 return 0;
3139 }
3140
3141
3142 int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
3143 {
3144 int i;
3145
3146 if (skip_ioapic_setup)
3147 return -1;
3148
3149 for (i = 0; i < mp_irq_entries; i++)
3150 if (mp_irqs[i].mp_irqtype == mp_INT &&
3151 mp_irqs[i].mp_srcbusirq == bus_irq)
3152 break;
3153 if (i >= mp_irq_entries)
3154 return -1;
3155
3156 *trigger = irq_trigger(i);
3157 *polarity = irq_polarity(i);
3158 return 0;
3159 }
3160
3161 #endif /* CONFIG_ACPI */
3162
3163 /*
3164 * This function currently is only a helper for the i386 smp boot process where
3165 * we need to reprogram the ioredtbls to cater for the cpus which have come online
3166 * so mask in all cases should simply be TARGET_CPUS
3167 */
3168 #ifdef CONFIG_SMP
3169 void __init setup_ioapic_dest(void)
3170 {
3171 int pin, ioapic, irq, irq_entry;
3172 struct irq_cfg *cfg;
3173
3174 if (skip_ioapic_setup == 1)
3175 return;
3176
3177 for (ioapic = 0; ioapic < nr_ioapics; ioapic++) {
3178 for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
3179 irq_entry = find_irq_entry(ioapic, pin, mp_INT);
3180 if (irq_entry == -1)
3181 continue;
3182 irq = pin_2_irq(irq_entry, ioapic, pin);
3183
3184 /* setup_IO_APIC_irqs could fail to get vector for some device
3185 * when you have too many devices, because at that time only boot
3186 * cpu is online.
3187 */
3188 cfg = irq_cfg(irq);
3189 if (!cfg->vector)
3190 setup_IO_APIC_irq(ioapic, pin, irq,
3191 irq_trigger(irq_entry),
3192 irq_polarity(irq_entry));
3193 #ifdef CONFIG_INTR_REMAP
3194 else if (intr_remapping_enabled)
3195 set_ir_ioapic_affinity_irq(irq, TARGET_CPUS);
3196 #endif
3197 else
3198 set_ioapic_affinity_irq(irq, TARGET_CPUS);
3199 }
3200
3201 }
3202 }
3203 #endif
3204
3205 #define IOAPIC_RESOURCE_NAME_SIZE 11
3206
3207 static struct resource *ioapic_resources;
3208
3209 static struct resource * __init ioapic_setup_resources(void)
3210 {
3211 unsigned long n;
3212 struct resource *res;
3213 char *mem;
3214 int i;
3215
3216 if (nr_ioapics <= 0)
3217 return NULL;
3218
3219 n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource);
3220 n *= nr_ioapics;
3221
3222 mem = alloc_bootmem(n);
3223 res = (void *)mem;
3224
3225 if (mem != NULL) {
3226 mem += sizeof(struct resource) * nr_ioapics;
3227
3228 for (i = 0; i < nr_ioapics; i++) {
3229 res[i].name = mem;
3230 res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
3231 sprintf(mem, "IOAPIC %u", i);
3232 mem += IOAPIC_RESOURCE_NAME_SIZE;
3233 }
3234 }
3235
3236 ioapic_resources = res;
3237
3238 return res;
3239 }
3240
3241 void __init ioapic_init_mappings(void)
3242 {
3243 unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
3244 struct resource *ioapic_res;
3245 int i;
3246
3247 ioapic_res = ioapic_setup_resources();
3248 for (i = 0; i < nr_ioapics; i++) {
3249 if (smp_found_config) {
3250 ioapic_phys = mp_ioapics[i].mp_apicaddr;
3251 } else {
3252 ioapic_phys = (unsigned long)
3253 alloc_bootmem_pages(PAGE_SIZE);
3254 ioapic_phys = __pa(ioapic_phys);
3255 }
3256 set_fixmap_nocache(idx, ioapic_phys);
3257 apic_printk(APIC_VERBOSE,
3258 "mapped IOAPIC to %016lx (%016lx)\n",
3259 __fix_to_virt(idx), ioapic_phys);
3260 idx++;
3261
3262 if (ioapic_res != NULL) {
3263 ioapic_res->start = ioapic_phys;
3264 ioapic_res->end = ioapic_phys + (4 * 1024) - 1;
3265 ioapic_res++;
3266 }
3267 }
3268 }
3269
3270 static int __init ioapic_insert_resources(void)
3271 {
3272 int i;
3273 struct resource *r = ioapic_resources;
3274
3275 if (!r) {
3276 printk(KERN_ERR
3277 "IO APIC resources could be not be allocated.\n");
3278 return -1;
3279 }
3280
3281 for (i = 0; i < nr_ioapics; i++) {
3282 insert_resource(&iomem_resource, r);
3283 r++;
3284 }
3285
3286 return 0;
3287 }
3288
3289 /* Insert the IO APIC resources after PCI initialization has occured to handle
3290 * IO APICS that are mapped in on a BAR in PCI space. */
3291 late_initcall(ioapic_insert_resources);
3292
This page took 0.152627 seconds and 4 git commands to generate.