x86: add irq_cfg in io_apic_64.c
[deliverable/linux.git] / arch / x86 / kernel / io_apic_64.c
index 61a83b70c18fcc65ce60b965ee3a6e0456622dc4..858c37a31a2f416beb612cf5d6343bc31a0f37a8 100644 (file)
@@ -37,6 +37,7 @@
 #include <acpi/acpi_bus.h>
 #endif
 #include <linux/bootmem.h>
+#include <linux/dmar.h>
 
 #include <asm/idle.h>
 #include <asm/io.h>
 #include <asm/nmi.h>
 #include <asm/msidef.h>
 #include <asm/hypertransport.h>
+#include <asm/irq_remapping.h>
 
 #include <mach_ipi.h>
 #include <mach_apic.h>
 
+#define __apicdebuginit(type) static type __init
+
+struct irq_cfg;
+
 struct irq_cfg {
+       unsigned int irq;
+       struct irq_cfg *next;
        cpumask_t domain;
        cpumask_t old_domain;
        unsigned move_cleanup_count;
@@ -62,33 +70,140 @@ struct irq_cfg {
 };
 
 /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
-static struct irq_cfg irq_cfg[NR_IRQS] __read_mostly = {
-       [0]  = { .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR,  },
-       [1]  = { .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR,  },
-       [2]  = { .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR,  },
-       [3]  = { .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR,  },
-       [4]  = { .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR,  },
-       [5]  = { .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR,  },
-       [6]  = { .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR,  },
-       [7]  = { .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR,  },
-       [8]  = { .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR,  },
-       [9]  = { .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR,  },
-       [10] = { .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, },
-       [11] = { .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, },
-       [12] = { .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, },
-       [13] = { .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, },
-       [14] = { .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, },
-       [15] = { .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
+static struct irq_cfg irq_cfg_legacy[] __initdata = {
+       [0]  = { .irq =  0, .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR,  },
+       [1]  = { .irq =  1, .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR,  },
+       [2]  = { .irq =  2, .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR,  },
+       [3]  = { .irq =  3, .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR,  },
+       [4]  = { .irq =  4, .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR,  },
+       [5]  = { .irq =  5, .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR,  },
+       [6]  = { .irq =  6, .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR,  },
+       [7]  = { .irq =  7, .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR,  },
+       [8]  = { .irq =  8, .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR,  },
+       [9]  = { .irq =  9, .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR,  },
+       [10] = { .irq = 10, .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, },
+       [11] = { .irq = 11, .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, },
+       [12] = { .irq = 12, .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, },
+       [13] = { .irq = 13, .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, },
+       [14] = { .irq = 14, .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, },
+       [15] = { .irq = 15, .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
 };
 
+static struct irq_cfg irq_cfg_init = { .irq =  -1U, };
+/* need to be biger than size of irq_cfg_legacy */
+static int nr_irq_cfg = 32;
+
+static int __init parse_nr_irq_cfg(char *arg)
+{
+       if (arg) {
+               nr_irq_cfg = simple_strtoul(arg, NULL, 0);
+               if (nr_irq_cfg < 32)
+                       nr_irq_cfg = 32;
+       }
+       return 0;
+}
+
+early_param("nr_irq_cfg", parse_nr_irq_cfg);
+
+static void init_one_irq_cfg(struct irq_cfg *cfg)
+{
+       memcpy(cfg, &irq_cfg_init, sizeof(struct irq_cfg));
+}
+
+static void __init init_work(void *data)
+{
+       struct dyn_array *da = data;
+       struct irq_cfg *cfg;
+       int i;
+
+       cfg = *da->name;
+
+       memcpy(cfg, irq_cfg_legacy, sizeof(irq_cfg_legacy));
+
+       i = sizeof(irq_cfg_legacy)/sizeof(irq_cfg_legacy[0]);
+       for (; i < *da->nr; i++)
+               init_one_irq_cfg(&cfg[i]);
+
+       for (i = 1; i < *da->nr; i++)
+               cfg[i-1].next = &cfg[i];
+}
+
+static struct irq_cfg *irq_cfgx;
+DEFINE_DYN_ARRAY(irq_cfgx, sizeof(struct irq_cfg), nr_irq_cfg, PAGE_SIZE, init_work);
+
+static struct irq_cfg *irq_cfg(unsigned int irq)
+{
+       struct irq_cfg *cfg;
+
+       BUG_ON(irq == -1U);
+
+       cfg = &irq_cfgx[0];
+       while (cfg) {
+               if (cfg->irq == irq)
+                       return cfg;
+
+               if (cfg->irq == -1U)
+                       return NULL;
+
+               cfg = cfg->next;
+       }
+
+       return NULL;
+}
+
+static struct irq_cfg *irq_cfg_alloc(unsigned int irq)
+{
+       struct irq_cfg *cfg, *cfg_pri;
+       int i;
+       int count = 0;
+
+       BUG_ON(irq == -1U);
+
+       cfg_pri = cfg = &irq_cfgx[0];
+       while (cfg) {
+               if (cfg->irq == irq)
+                       return cfg;
+
+               if (cfg->irq == -1U) {
+                       cfg->irq = irq;
+                       return cfg;
+               }
+               cfg_pri = cfg;
+               cfg = cfg->next;
+               count++;
+       }
+
+       /*
+        *  we run out of pre-allocate ones, allocate more
+        */
+       printk(KERN_DEBUG "try to get more irq_cfg %d\n", nr_irq_cfg);
+
+       if (after_bootmem)
+               cfg = kzalloc(sizeof(struct irq_cfg)*nr_irq_cfg, GFP_ATOMIC);
+       else
+               cfg = __alloc_bootmem_nopanic(sizeof(struct irq_cfg)*nr_irq_cfg, PAGE_SIZE, 0);
+
+       if (!cfg)
+               panic("please boot with nr_irq_cfg= %d\n", count * 2);
+
+       for (i = 0; i < nr_irq_cfg; i++)
+               init_one_irq_cfg(&cfg[i]);
+
+       for (i = 1; i < nr_irq_cfg; i++)
+               cfg[i-1].next = &cfg[i];
+
+       cfg->irq = irq;
+       cfg_pri->next = cfg;
+
+       return cfg;
+}
+
 static int assign_irq_vector(int irq, cpumask_t mask);
 
 int first_system_vector = 0xfe;
 
 char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE};
 
-#define __apicdebuginit  __init
-
 int sis_apic_bug; /* not actually supported, dummy for compile */
 
 static int no_timer_check;
@@ -108,6 +223,9 @@ static DEFINE_SPINLOCK(vector_lock);
  */
 int nr_ioapic_registers[MAX_IO_APICS];
 
+/* I/O APIC RTE contents at the OS boot up */
+struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS];
+
 /* I/O APIC entries */
 struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
 int nr_ioapics;
@@ -124,8 +242,8 @@ DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
  * Rough estimation of how many shared IRQs there are, can
  * be changed anytime.
  */
-#define MAX_PLUS_SHARED_IRQS NR_IRQS
-#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
+
+int pin_map_size;
 
 /*
  * This is performance-critical, we want to do it O(1)
@@ -135,8 +253,12 @@ DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
  */
 
 static struct irq_pin_list {
-       short apic, pin, next;
-} irq_2_pin[PIN_MAP_SIZE];
+       short apic, pin;
+       int next;
+} *irq_2_pin;
+
+DEFINE_DYN_ARRAY(irq_2_pin, sizeof(struct irq_pin_list), pin_map_size, sizeof(struct irq_pin_list), NULL);
+
 
 struct io_apic {
        unsigned int index;
@@ -219,7 +341,7 @@ static inline void io_apic_sync(unsigned int apic)
        int pin;                                                        \
        struct irq_pin_list *entry = irq_2_pin + irq;                   \
                                                                        \
-       BUG_ON(irq >= NR_IRQS);                                         \
+       BUG_ON(irq >= nr_irqs);                                         \
        for (;;) {                                                      \
                unsigned int reg;                                       \
                pin = entry->pin;                                       \
@@ -296,14 +418,19 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
        int apic, pin;
        struct irq_pin_list *entry = irq_2_pin + irq;
 
-       BUG_ON(irq >= NR_IRQS);
+       BUG_ON(irq >= nr_irqs);
        for (;;) {
                unsigned int reg;
                apic = entry->apic;
                pin = entry->pin;
                if (pin == -1)
                        break;
-               io_apic_write(apic, 0x11 + pin*2, dest);
+               /*
+                * With interrupt-remapping, destination information comes
+                * from interrupt-remapping table entry.
+                */
+               if (!irq_remapped(irq))
+                       io_apic_write(apic, 0x11 + pin*2, dest);
                reg = io_apic_read(apic, 0x10 + pin*2);
                reg &= ~IO_APIC_REDIR_VECTOR_MASK;
                reg |= vector;
@@ -316,10 +443,11 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
 
 static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
 {
-       struct irq_cfg *cfg = irq_cfg + irq;
+       struct irq_cfg *cfg = irq_cfg(irq);
        unsigned long flags;
        unsigned int dest;
        cpumask_t tmp;
+       struct irq_desc *desc;
 
        cpus_and(tmp, mask, cpu_online_map);
        if (cpus_empty(tmp))
@@ -336,9 +464,10 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
         */
        dest = SET_APIC_LOGICAL_ID(dest);
 
+       desc = irq_to_desc(irq);
        spin_lock_irqsave(&ioapic_lock, flags);
        __target_IO_APIC_irq(irq, dest, cfg->vector);
-       irq_desc[irq].affinity = mask;
+       desc->affinity = mask;
        spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 #endif
@@ -348,19 +477,21 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
  * shared ISA-space IRQs, so we have to support them. We are super
  * fast in the common case, and fast for shared ISA-space IRQs.
  */
+int first_free_entry;
 static void add_pin_to_irq(unsigned int irq, int apic, int pin)
 {
-       static int first_free_entry = NR_IRQS;
        struct irq_pin_list *entry = irq_2_pin + irq;
 
-       BUG_ON(irq >= NR_IRQS);
+       BUG_ON(irq >= nr_irqs);
+       irq_cfg_alloc(irq);
+
        while (entry->next)
                entry = irq_2_pin + entry->next;
 
        if (entry->pin != -1) {
                entry->next = first_free_entry;
                entry = irq_2_pin + entry->next;
-               if (++first_free_entry >= PIN_MAP_SIZE)
+               if (++first_free_entry >= pin_map_size)
                        panic("io_apic.c: ran out of irq_2_pin entries!");
        }
        entry->apic = apic;
@@ -440,6 +571,69 @@ static void clear_IO_APIC (void)
                        clear_IO_APIC_pin(apic, pin);
 }
 
+/*
+ * Saves and masks all the unmasked IO-APIC RTE's
+ */
+int save_mask_IO_APIC_setup(void)
+{
+       union IO_APIC_reg_01 reg_01;
+       unsigned long flags;
+       int apic, pin;
+
+       /*
+        * The number of IO-APIC IRQ registers (== #pins):
+        */
+       for (apic = 0; apic < nr_ioapics; apic++) {
+               spin_lock_irqsave(&ioapic_lock, flags);
+               reg_01.raw = io_apic_read(apic, 1);
+               spin_unlock_irqrestore(&ioapic_lock, flags);
+               nr_ioapic_registers[apic] = reg_01.bits.entries+1;
+       }
+
+       for (apic = 0; apic < nr_ioapics; apic++) {
+               early_ioapic_entries[apic] =
+                       kzalloc(sizeof(struct IO_APIC_route_entry) *
+                               nr_ioapic_registers[apic], GFP_KERNEL);
+               if (!early_ioapic_entries[apic])
+                       return -ENOMEM;
+       }
+
+       for (apic = 0; apic < nr_ioapics; apic++)
+               for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+                       struct IO_APIC_route_entry entry;
+
+                       entry = early_ioapic_entries[apic][pin] =
+                               ioapic_read_entry(apic, pin);
+                       if (!entry.mask) {
+                               entry.mask = 1;
+                               ioapic_write_entry(apic, pin, entry);
+                       }
+               }
+       return 0;
+}
+
+void restore_IO_APIC_setup(void)
+{
+       int apic, pin;
+
+       for (apic = 0; apic < nr_ioapics; apic++)
+               for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
+                       ioapic_write_entry(apic, pin,
+                                          early_ioapic_entries[apic][pin]);
+}
+
+void reinit_intr_remapped_IO_APIC(int intr_remapping)
+{
+       /*
+        * for now plain restore of previous settings.
+        * TBD: In the case of OS enabling interrupt-remapping,
+        * IO-APIC RTE's need to be setup to point to interrupt-remapping
+        * table entries. for now, do a plain restore, and wait for
+        * the setup_IO_APIC_irqs() to do proper initialization.
+        */
+       restore_IO_APIC_setup();
+}
+
 int skip_ioapic_setup;
 int ioapic_force;
 
@@ -561,7 +755,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
                                best_guess = irq;
                }
        }
-       BUG_ON(best_guess >= NR_IRQS);
+       BUG_ON(best_guess >= nr_irqs);
        return best_guess;
 }
 
@@ -693,7 +887,7 @@ static int pin_2_irq(int idx, int apic, int pin)
                        irq += nr_ioapic_registers[i++];
                irq += pin;
        }
-       BUG_ON(irq >= NR_IRQS);
+       BUG_ON(irq >= nr_irqs);
        return irq;
 }
 
@@ -728,8 +922,8 @@ static int __assign_irq_vector(int irq, cpumask_t mask)
        int cpu;
        struct irq_cfg *cfg;
 
-       BUG_ON((unsigned)irq >= NR_IRQS);
-       cfg = &irq_cfg[irq];
+       BUG_ON((unsigned)irq >= nr_irqs);
+       cfg = irq_cfg(irq);
 
        /* Only try and allocate irqs on cpus that are present */
        cpus_and(mask, mask, cpu_online_map);
@@ -802,8 +996,8 @@ static void __clear_irq_vector(int irq)
        cpumask_t mask;
        int cpu, vector;
 
-       BUG_ON((unsigned)irq >= NR_IRQS);
-       cfg = &irq_cfg[irq];
+       BUG_ON((unsigned)irq >= nr_irqs);
+       cfg = irq_cfg(irq);
        BUG_ON(!cfg->vector);
 
        vector = cfg->vector;
@@ -822,47 +1016,138 @@ void __setup_vector_irq(int cpu)
        int irq, vector;
 
        /* Mark the inuse vectors */
-       for (irq = 0; irq < NR_IRQS; ++irq) {
-               if (!cpu_isset(cpu, irq_cfg[irq].domain))
+       for (irq = 0; irq < nr_irqs; ++irq) {
+               struct irq_cfg *cfg = irq_cfg(irq);
+
+               if (!cpu_isset(cpu, cfg->domain))
                        continue;
-               vector = irq_cfg[irq].vector;
+               vector = cfg->vector;
                per_cpu(vector_irq, cpu)[vector] = irq;
        }
        /* Mark the free vectors */
        for (vector = 0; vector < NR_VECTORS; ++vector) {
+               struct irq_cfg *cfg;
+
                irq = per_cpu(vector_irq, cpu)[vector];
                if (irq < 0)
                        continue;
-               if (!cpu_isset(cpu, irq_cfg[irq].domain))
+
+               cfg = irq_cfg(irq);
+               if (!cpu_isset(cpu, cfg->domain))
                        per_cpu(vector_irq, cpu)[vector] = -1;
        }
 }
 
 static struct irq_chip ioapic_chip;
+#ifdef CONFIG_INTR_REMAP
+static struct irq_chip ir_ioapic_chip;
+#endif
 
 static void ioapic_register_intr(int irq, unsigned long trigger)
 {
-       if (trigger) {
-               irq_desc[irq].status |= IRQ_LEVEL;
+       struct irq_desc *desc;
+
+       desc = irq_to_desc(irq);
+       if (trigger)
+               desc->status |= IRQ_LEVEL;
+       else
+               desc->status &= ~IRQ_LEVEL;
+
+#ifdef CONFIG_INTR_REMAP
+       if (irq_remapped(irq)) {
+               desc->status |= IRQ_MOVE_PCNTXT;
+               if (trigger)
+                       set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
+                                                     handle_fasteoi_irq,
+                                                    "fasteoi");
+               else
+                       set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
+                                                     handle_edge_irq, "edge");
+               return;
+       }
+#endif
+       if (trigger)
                set_irq_chip_and_handler_name(irq, &ioapic_chip,
-                                             handle_fasteoi_irq, "fasteoi");
-       } else {
-               irq_desc[irq].status &= ~IRQ_LEVEL;
+                                             handle_fasteoi_irq,
+                                             "fasteoi");
+       else
                set_irq_chip_and_handler_name(irq, &ioapic_chip,
                                              handle_edge_irq, "edge");
+}
+
+static int setup_ioapic_entry(int apic, int irq,
+                             struct IO_APIC_route_entry *entry,
+                             unsigned int destination, int trigger,
+                             int polarity, int vector)
+{
+       /*
+        * add it to the IO-APIC irq-routing table:
+        */
+       memset(entry,0,sizeof(*entry));
+
+#ifdef CONFIG_INTR_REMAP
+       if (intr_remapping_enabled) {
+               struct intel_iommu *iommu = map_ioapic_to_ir(apic);
+               struct irte irte;
+               struct IR_IO_APIC_route_entry *ir_entry =
+                       (struct IR_IO_APIC_route_entry *) entry;
+               int index;
+
+               if (!iommu)
+                       panic("No mapping iommu for ioapic %d\n", apic);
+
+               index = alloc_irte(iommu, irq, 1);
+               if (index < 0)
+                       panic("Failed to allocate IRTE for ioapic %d\n", apic);
+
+               memset(&irte, 0, sizeof(irte));
+
+               irte.present = 1;
+               irte.dst_mode = INT_DEST_MODE;
+               irte.trigger_mode = trigger;
+               irte.dlvry_mode = INT_DELIVERY_MODE;
+               irte.vector = vector;
+               irte.dest_id = IRTE_DEST(destination);
+
+               modify_irte(irq, &irte);
+
+               ir_entry->index2 = (index >> 15) & 0x1;
+               ir_entry->zero = 0;
+               ir_entry->format = 1;
+               ir_entry->index = (index & 0x7fff);
+       } else
+#endif
+       {
+               entry->delivery_mode = INT_DELIVERY_MODE;
+               entry->dest_mode = INT_DEST_MODE;
+               entry->dest = destination;
        }
+
+       entry->mask = 0;                                /* enable IRQ */
+       entry->trigger = trigger;
+       entry->polarity = polarity;
+       entry->vector = vector;
+
+       /* Mask level triggered irqs.
+        * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
+        */
+       if (trigger)
+               entry->mask = 1;
+       return 0;
 }
 
 static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
                              int trigger, int polarity)
 {
-       struct irq_cfg *cfg = irq_cfg + irq;
+       struct irq_cfg *cfg;
        struct IO_APIC_route_entry entry;
        cpumask_t mask;
 
        if (!IO_APIC_IRQ(irq))
                return;
 
+       cfg = irq_cfg(irq);
+
        mask = TARGET_CPUS;
        if (assign_irq_vector(irq, mask))
                return;
@@ -875,24 +1160,15 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
                    apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector,
                    irq, trigger, polarity);
 
-       /*
-        * add it to the IO-APIC irq-routing table:
-        */
-       memset(&entry,0,sizeof(entry));
 
-       entry.delivery_mode = INT_DELIVERY_MODE;
-       entry.dest_mode = INT_DEST_MODE;
-       entry.dest = cpu_mask_to_apicid(mask);
-       entry.mask = 0;                         /* enable IRQ */
-       entry.trigger = trigger;
-       entry.polarity = polarity;
-       entry.vector = cfg->vector;
-
-       /* Mask level triggered irqs.
-        * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
-        */
-       if (trigger)
-               entry.mask = 1;
+       if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry,
+                              cpu_mask_to_apicid(mask), trigger, polarity,
+                              cfg->vector)) {
+               printk("Failed to setup ioapic entry for ioapic  %d, pin %d\n",
+                      mp_ioapics[apic].mp_apicid, pin);
+               __clear_irq_vector(irq);
+               return;
+       }
 
        ioapic_register_intr(irq, trigger);
        if (irq < 16)
@@ -944,6 +1220,9 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
 {
        struct IO_APIC_route_entry entry;
 
+       if (intr_remapping_enabled)
+               return;
+
        memset(&entry, 0, sizeof(entry));
 
        /*
@@ -970,7 +1249,8 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
        ioapic_write_entry(apic, pin, entry);
 }
 
-void __apicdebuginit print_IO_APIC(void)
+
+__apicdebuginit(void) print_IO_APIC(void)
 {
        int apic, i;
        union IO_APIC_reg_00 reg_00;
@@ -1045,7 +1325,7 @@ void __apicdebuginit print_IO_APIC(void)
        }
        }
        printk(KERN_DEBUG "IRQ to pin mappings:\n");
-       for (i = 0; i < NR_IRQS; i++) {
+       for (i = 0; i < nr_irqs; i++) {
                struct irq_pin_list *entry = irq_2_pin + i;
                if (entry->pin < 0)
                        continue;
@@ -1064,9 +1344,7 @@ void __apicdebuginit print_IO_APIC(void)
        return;
 }
 
-#if 0
-
-static __apicdebuginit void print_APIC_bitfield (int base)
+__apicdebuginit(void) print_APIC_bitfield(int base)
 {
        unsigned int v;
        int i, j;
@@ -1087,9 +1365,10 @@ static __apicdebuginit void print_APIC_bitfield (int base)
        }
 }
 
-void __apicdebuginit print_local_APIC(void * dummy)
+__apicdebuginit(void) print_local_APIC(void *dummy)
 {
        unsigned int v, ver, maxlvt;
+       unsigned long icr;
 
        if (apic_verbosity == APIC_QUIET)
                return;
@@ -1097,7 +1376,7 @@ void __apicdebuginit print_local_APIC(void * dummy)
        printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
                smp_processor_id(), hard_smp_processor_id());
        v = apic_read(APIC_ID);
-       printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v, GET_APIC_ID(read_apic_id()));
+       printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v, read_apic_id());
        v = apic_read(APIC_LVR);
        printk(KERN_INFO "... APIC VERSION: %08x\n", v);
        ver = GET_APIC_VERSION(v);
@@ -1133,10 +1412,9 @@ void __apicdebuginit print_local_APIC(void * dummy)
        v = apic_read(APIC_ESR);
        printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
 
-       v = apic_read(APIC_ICR);
-       printk(KERN_DEBUG "... APIC ICR: %08x\n", v);
-       v = apic_read(APIC_ICR2);
-       printk(KERN_DEBUG "... APIC ICR2: %08x\n", v);
+       icr = apic_icr_read();
+       printk(KERN_DEBUG "... APIC ICR: %08x\n", (u32)icr);
+       printk(KERN_DEBUG "... APIC ICR2: %08x\n", (u32)(icr >> 32));
 
        v = apic_read(APIC_LVTT);
        printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
@@ -1164,12 +1442,12 @@ void __apicdebuginit print_local_APIC(void * dummy)
        printk("\n");
 }
 
-void print_all_local_APICs (void)
+__apicdebuginit(void) print_all_local_APICs(void)
 {
        on_each_cpu(print_local_APIC, NULL, 1);
 }
 
-void __apicdebuginit print_PIC(void)
+__apicdebuginit(void) print_PIC(void)
 {
        unsigned int v;
        unsigned long flags;
@@ -1201,7 +1479,17 @@ void __apicdebuginit print_PIC(void)
        printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
 }
 
-#endif  /*  0  */
+__apicdebuginit(int) print_all_ICs(void)
+{
+       print_PIC();
+       print_all_local_APICs();
+       print_IO_APIC();
+
+       return 0;
+}
+
+fs_initcall(print_all_ICs);
+
 
 void __init enable_IO_APIC(void)
 {
@@ -1210,7 +1498,7 @@ void __init enable_IO_APIC(void)
        int i, apic;
        unsigned long flags;
 
-       for (i = 0; i < PIN_MAP_SIZE; i++) {
+       for (i = 0; i < pin_map_size; i++) {
                irq_2_pin[i].pin = -1;
                irq_2_pin[i].next = 0;
        }
@@ -1291,7 +1579,7 @@ void disable_IO_APIC(void)
                entry.dest_mode       = 0; /* Physical */
                entry.delivery_mode   = dest_ExtINT; /* ExtInt */
                entry.vector          = 0;
-               entry.dest          = GET_APIC_ID(read_apic_id());
+               entry.dest            = read_apic_id();
 
                /*
                 * Add it to the IO-APIC irq-routing table:
@@ -1377,7 +1665,7 @@ static unsigned int startup_ioapic_irq(unsigned int irq)
 
 static int ioapic_retrigger_irq(unsigned int irq)
 {
-       struct irq_cfg *cfg = &irq_cfg[irq];
+       struct irq_cfg *cfg = irq_cfg(irq);
        unsigned long flags;
 
        spin_lock_irqsave(&vector_lock, flags);
@@ -1397,6 +1685,152 @@ static int ioapic_retrigger_irq(unsigned int irq)
  */
 
 #ifdef CONFIG_SMP
+
+#ifdef CONFIG_INTR_REMAP
+static void ir_irq_migration(struct work_struct *work);
+
+static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
+
+/*
+ * Migrate the IO-APIC irq in the presence of intr-remapping.
+ *
+ * For edge triggered, irq migration is a simple atomic update(of vector
+ * and cpu destination) of IRTE and flush the hardware cache.
+ *
+ * For level triggered, we need to modify the io-apic RTE aswell with the update
+ * vector information, along with modifying IRTE with vector and destination.
+ * So irq migration for level triggered is little  bit more complex compared to
+ * edge triggered migration. But the good news is, we use the same algorithm
+ * for level triggered migration as we have today, only difference being,
+ * we now initiate the irq migration from process context instead of the
+ * interrupt context.
+ *
+ * In future, when we do a directed EOI (combined with cpu EOI broadcast
+ * suppression) to the IO-APIC, level triggered irq migration will also be
+ * as simple as edge triggered migration and we can do the irq migration
+ * with a simple atomic update to IO-APIC RTE.
+ */
+static void migrate_ioapic_irq(int irq, cpumask_t mask)
+{
+       struct irq_cfg *cfg;
+       struct irq_desc *desc;
+       cpumask_t tmp, cleanup_mask;
+       struct irte irte;
+       int modify_ioapic_rte;
+       unsigned int dest;
+       unsigned long flags;
+
+       cpus_and(tmp, mask, cpu_online_map);
+       if (cpus_empty(tmp))
+               return;
+
+       if (get_irte(irq, &irte))
+               return;
+
+       if (assign_irq_vector(irq, mask))
+               return;
+
+       cfg = irq_cfg(irq);
+       cpus_and(tmp, cfg->domain, mask);
+       dest = cpu_mask_to_apicid(tmp);
+
+       desc = irq_to_desc(irq);
+       modify_ioapic_rte = desc->status & IRQ_LEVEL;
+       if (modify_ioapic_rte) {
+               spin_lock_irqsave(&ioapic_lock, flags);
+               __target_IO_APIC_irq(irq, dest, cfg->vector);
+               spin_unlock_irqrestore(&ioapic_lock, flags);
+       }
+
+       irte.vector = cfg->vector;
+       irte.dest_id = IRTE_DEST(dest);
+
+       /*
+        * Modified the IRTE and flushes the Interrupt entry cache.
+        */
+       modify_irte(irq, &irte);
+
+       if (cfg->move_in_progress) {
+               cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
+               cfg->move_cleanup_count = cpus_weight(cleanup_mask);
+               send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+               cfg->move_in_progress = 0;
+       }
+
+       desc->affinity = mask;
+}
+
+static int migrate_irq_remapped_level(int irq)
+{
+       int ret = -1;
+       struct irq_desc *desc = irq_to_desc(irq);
+
+       mask_IO_APIC_irq(irq);
+
+       if (io_apic_level_ack_pending(irq)) {
+               /*
+                * Interrupt in progress. Migrating irq now will change the
+                * vector information in the IO-APIC RTE and that will confuse
+                * the EOI broadcast performed by cpu.
+                * So, delay the irq migration to the next instance.
+                */
+               schedule_delayed_work(&ir_migration_work, 1);
+               goto unmask;
+       }
+
+       /* everthing is clear. we have right of way */
+       migrate_ioapic_irq(irq, desc->pending_mask);
+
+       ret = 0;
+       desc->status &= ~IRQ_MOVE_PENDING;
+       cpus_clear(desc->pending_mask);
+
+unmask:
+       unmask_IO_APIC_irq(irq);
+       return ret;
+}
+
+static void ir_irq_migration(struct work_struct *work)
+{
+       int irq;
+
+       for (irq = 0; irq < nr_irqs; irq++) {
+               struct irq_desc *desc = irq_to_desc(irq);
+               if (desc->status & IRQ_MOVE_PENDING) {
+                       unsigned long flags;
+
+                       spin_lock_irqsave(&desc->lock, flags);
+                       if (!desc->chip->set_affinity ||
+                           !(desc->status & IRQ_MOVE_PENDING)) {
+                               desc->status &= ~IRQ_MOVE_PENDING;
+                               spin_unlock_irqrestore(&desc->lock, flags);
+                               continue;
+                       }
+
+                       desc->chip->set_affinity(irq, desc->pending_mask);
+                       spin_unlock_irqrestore(&desc->lock, flags);
+               }
+       }
+}
+
+/*
+ * Migrates the IRQ destination in the process context.
+ */
+static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
+{
+       struct irq_desc *desc = irq_to_desc(irq);
+
+       if (desc->status & IRQ_LEVEL) {
+               desc->status |= IRQ_MOVE_PENDING;
+               desc->pending_mask = mask;
+               migrate_irq_remapped_level(irq);
+               return;
+       }
+
+       migrate_ioapic_irq(irq, mask);
+}
+#endif
+
 asmlinkage void smp_irq_move_cleanup_interrupt(void)
 {
        unsigned vector, me;
@@ -1410,11 +1844,11 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
                struct irq_desc *desc;
                struct irq_cfg *cfg;
                irq = __get_cpu_var(vector_irq)[vector];
-               if (irq >= NR_IRQS)
+               if (irq >= nr_irqs)
                        continue;
 
-               desc = irq_desc + irq;
-               cfg = irq_cfg + irq;
+               desc = irq_to_desc(irq);
+               cfg = irq_cfg(irq);
                spin_lock(&desc->lock);
                if (!cfg->move_cleanup_count)
                        goto unlock;
@@ -1433,7 +1867,7 @@ unlock:
 
 static void irq_complete_move(unsigned int irq)
 {
-       struct irq_cfg *cfg = irq_cfg + irq;
+       struct irq_cfg *cfg = irq_cfg(irq);
        unsigned vector, me;
 
        if (likely(!cfg->move_in_progress))
@@ -1453,6 +1887,17 @@ static void irq_complete_move(unsigned int irq)
 #else
 static inline void irq_complete_move(unsigned int irq) {}
 #endif
+#ifdef CONFIG_INTR_REMAP
+static void ack_x2apic_level(unsigned int irq)
+{
+       ack_x2APIC_irq();
+}
+
+static void ack_x2apic_edge(unsigned int irq)
+{
+       ack_x2APIC_irq();
+}
+#endif
 
 static void ack_apic_edge(unsigned int irq)
 {
@@ -1468,7 +1913,7 @@ static void ack_apic_level(unsigned int irq)
        irq_complete_move(irq);
 #ifdef CONFIG_GENERIC_PENDING_IRQ
        /* If we are moving the irq we need to mask it */
-       if (unlikely(irq_desc[irq].status & IRQ_MOVE_PENDING)) {
+       if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) {
                do_unmask_irq = 1;
                mask_IO_APIC_irq(irq);
        }
@@ -1527,9 +1972,25 @@ static struct irq_chip ioapic_chip __read_mostly = {
        .retrigger      = ioapic_retrigger_irq,
 };
 
+#ifdef CONFIG_INTR_REMAP
+static struct irq_chip ir_ioapic_chip __read_mostly = {
+       .name           = "IR-IO-APIC",
+       .startup        = startup_ioapic_irq,
+       .mask           = mask_IO_APIC_irq,
+       .unmask         = unmask_IO_APIC_irq,
+       .ack            = ack_x2apic_edge,
+       .eoi            = ack_x2apic_level,
+#ifdef CONFIG_SMP
+       .set_affinity   = set_ir_ioapic_affinity_irq,
+#endif
+       .retrigger      = ioapic_retrigger_irq,
+};
+#endif
+
 static inline void init_IO_APIC_traps(void)
 {
        int irq;
+       struct irq_desc *desc;
 
        /*
         * NOTE! The local APIC isn't very good at handling
@@ -1542,8 +2003,11 @@ static inline void init_IO_APIC_traps(void)
         * Also, we've got to be careful not to trash gate
         * 0x80, because int 0x80 is hm, kind of importantish. ;)
         */
-       for (irq = 0; irq < NR_IRQS ; irq++) {
-               if (IO_APIC_IRQ(irq) && !irq_cfg[irq].vector) {
+       for (irq = 0; irq < nr_irqs ; irq++) {
+               struct irq_cfg *cfg;
+
+               cfg = irq_cfg(irq);
+               if (IO_APIC_IRQ(irq) && !cfg->vector) {
                        /*
                         * Hmm.. We don't have an entry for this,
                         * so default to an old-fashioned 8259
@@ -1551,9 +2015,11 @@ static inline void init_IO_APIC_traps(void)
                         */
                        if (irq < 16)
                                make_8259A_irq(irq);
-                       else
+                       else {
+                               desc = irq_to_desc(irq);
                                /* Strange. Oh, well.. */
-                               irq_desc[irq].chip = &no_irq_chip;
+                               desc->chip = &no_irq_chip;
+                       }
                }
        }
 }
@@ -1588,7 +2054,10 @@ static struct irq_chip lapic_chip __read_mostly = {
 
 static void lapic_register_intr(int irq)
 {
-       irq_desc[irq].status &= ~IRQ_LEVEL;
+       struct irq_desc *desc;
+
+       desc = irq_to_desc(irq);
+       desc->status &= ~IRQ_LEVEL;
        set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
                                      "edge");
 }
@@ -1675,7 +2144,7 @@ static inline void __init unlock_ExtINT_logic(void)
  */
 static inline void __init check_timer(void)
 {
-       struct irq_cfg *cfg = irq_cfg + 0;
+       struct irq_cfg *cfg = irq_cfg(0);
        int apic1, pin1, apic2, pin2;
        unsigned long flags;
        int no_pin1 = 0;
@@ -1712,6 +2181,8 @@ static inline void __init check_timer(void)
         * 8259A.
         */
        if (pin1 == -1) {
+               if (intr_remapping_enabled)
+                       panic("BIOS bug: timer not connected to IO-APIC");
                pin1 = pin2;
                apic1 = apic2;
                no_pin1 = 1;
@@ -1738,6 +2209,8 @@ static inline void __init check_timer(void)
                                clear_IO_APIC_pin(0, pin1);
                        goto out;
                }
+               if (intr_remapping_enabled)
+                       panic("timer doesn't work through Interrupt-remapped IO-APIC");
                clear_IO_APIC_pin(apic1, pin1);
                if (!no_pin1)
                        apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
@@ -1854,8 +2327,6 @@ void __init setup_IO_APIC(void)
        setup_IO_APIC_irqs();
        init_IO_APIC_traps();
        check_timer();
-       if (!acpi_ioapic)
-               print_IO_APIC();
 }
 
 struct sysfs_ioapic_data {
@@ -1951,14 +2422,19 @@ int create_irq(void)
        int irq;
        int new;
        unsigned long flags;
+       struct irq_cfg *cfg_new;
 
        irq = -ENOSPC;
        spin_lock_irqsave(&vector_lock, flags);
-       for (new = (NR_IRQS - 1); new >= 0; new--) {
+       for (new = (nr_irqs - 1); new >= 0; new--) {
                if (platform_legacy_irq(new))
                        continue;
-               if (irq_cfg[new].vector != 0)
+               cfg_new = irq_cfg(new);
+               if (cfg_new && cfg_new->vector != 0)
                        continue;
+               /* check if need to create one */
+               if (!cfg_new)
+                       cfg_new = irq_cfg_alloc(new);
                if (__assign_irq_vector(new, TARGET_CPUS) == 0)
                        irq = new;
                break;
@@ -1977,6 +2453,9 @@ void destroy_irq(unsigned int irq)
 
        dynamic_irq_cleanup(irq);
 
+#ifdef CONFIG_INTR_REMAP
+       free_irte(irq);
+#endif
        spin_lock_irqsave(&vector_lock, flags);
        __clear_irq_vector(irq);
        spin_unlock_irqrestore(&vector_lock, flags);
@@ -1988,17 +2467,49 @@ void destroy_irq(unsigned int irq)
 #ifdef CONFIG_PCI_MSI
 static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
 {
-       struct irq_cfg *cfg = irq_cfg + irq;
+       struct irq_cfg *cfg;
        int err;
        unsigned dest;
        cpumask_t tmp;
 
        tmp = TARGET_CPUS;
        err = assign_irq_vector(irq, tmp);
-       if (!err) {
-               cpus_and(tmp, cfg->domain, tmp);
-               dest = cpu_mask_to_apicid(tmp);
+       if (err)
+               return err;
+
+       cfg = irq_cfg(irq);
+       cpus_and(tmp, cfg->domain, tmp);
+       dest = cpu_mask_to_apicid(tmp);
+
+#ifdef CONFIG_INTR_REMAP
+       if (irq_remapped(irq)) {
+               struct irte irte;
+               int ir_index;
+               u16 sub_handle;
+
+               ir_index = map_irq_to_irte_handle(irq, &sub_handle);
+               BUG_ON(ir_index == -1);
+
+               memset (&irte, 0, sizeof(irte));
+
+               irte.present = 1;
+               irte.dst_mode = INT_DEST_MODE;
+               irte.trigger_mode = 0; /* edge */
+               irte.dlvry_mode = INT_DELIVERY_MODE;
+               irte.vector = cfg->vector;
+               irte.dest_id = IRTE_DEST(dest);
+
+               modify_irte(irq, &irte);
 
+               msg->address_hi = MSI_ADDR_BASE_HI;
+               msg->data = sub_handle;
+               msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT |
+                                 MSI_ADDR_IR_SHV |
+                                 MSI_ADDR_IR_INDEX1(ir_index) |
+                                 MSI_ADDR_IR_INDEX2(ir_index);
+       } else
+#endif
+       {
                msg->address_hi = MSI_ADDR_BASE_HI;
                msg->address_lo =
                        MSI_ADDR_BASE_LO |
@@ -2024,10 +2535,11 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
 #ifdef CONFIG_SMP
 static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 {
-       struct irq_cfg *cfg = irq_cfg + irq;
+       struct irq_cfg *cfg;
        struct msi_msg msg;
        unsigned int dest;
        cpumask_t tmp;
+       struct irq_desc *desc;
 
        cpus_and(tmp, mask, cpu_online_map);
        if (cpus_empty(tmp))
@@ -2036,6 +2548,7 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
        if (assign_irq_vector(irq, mask))
                return;
 
+       cfg = irq_cfg(irq);
        cpus_and(tmp, cfg->domain, mask);
        dest = cpu_mask_to_apicid(tmp);
 
@@ -2047,8 +2560,61 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
        msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
        write_msi_msg(irq, &msg);
-       irq_desc[irq].affinity = mask;
+       desc = irq_to_desc(irq);
+       desc->affinity = mask;
 }
+
+#ifdef CONFIG_INTR_REMAP
+/*
+ * Migrate the MSI irq to another cpumask. This migration is
+ * done in the process context using interrupt-remapping hardware.
+ */
+static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
+{
+       struct irq_cfg *cfg;
+       unsigned int dest;
+       cpumask_t tmp, cleanup_mask;
+       struct irte irte;
+       struct irq_desc *desc;
+
+       cpus_and(tmp, mask, cpu_online_map);
+       if (cpus_empty(tmp))
+               return;
+
+       if (get_irte(irq, &irte))
+               return;
+
+       if (assign_irq_vector(irq, mask))
+               return;
+
+       cfg = irq_cfg(irq);
+       cpus_and(tmp, cfg->domain, mask);
+       dest = cpu_mask_to_apicid(tmp);
+
+       irte.vector = cfg->vector;
+       irte.dest_id = IRTE_DEST(dest);
+
+       /*
+        * atomically update the IRTE with the new destination and vector.
+        */
+       modify_irte(irq, &irte);
+
+       /*
+        * After this point, all the interrupts will start arriving
+        * at the new destination. So, time to cleanup the previous
+        * vector allocation.
+        */
+       if (cfg->move_in_progress) {
+               cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
+               cfg->move_cleanup_count = cpus_weight(cleanup_mask);
+               send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+               cfg->move_in_progress = 0;
+       }
+
+       desc = irq_to_desc(irq);
+       desc->affinity = mask;
+}
+#endif
 #endif /* CONFIG_SMP */
 
 /*
@@ -2066,26 +2632,157 @@ static struct irq_chip msi_chip = {
        .retrigger      = ioapic_retrigger_irq,
 };
 
-int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
+#ifdef CONFIG_INTR_REMAP
+static struct irq_chip msi_ir_chip = {
+       .name           = "IR-PCI-MSI",
+       .unmask         = unmask_msi_irq,
+       .mask           = mask_msi_irq,
+       .ack            = ack_x2apic_edge,
+#ifdef CONFIG_SMP
+       .set_affinity   = ir_set_msi_irq_affinity,
+#endif
+       .retrigger      = ioapic_retrigger_irq,
+};
+
+/*
+ * Map the PCI dev to the corresponding remapping hardware unit
+ * and allocate 'nvec' consecutive interrupt-remapping table entries
+ * in it.
+ */
+static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
+{
+       struct intel_iommu *iommu;
+       int index;
+
+       iommu = map_dev_to_ir(dev);
+       if (!iommu) {
+               printk(KERN_ERR
+                      "Unable to map PCI %s to iommu\n", pci_name(dev));
+               return -ENOENT;
+       }
+
+       index = alloc_irte(iommu, irq, nvec);
+       if (index < 0) {
+               printk(KERN_ERR
+                      "Unable to allocate %d IRTE for PCI %s\n", nvec,
+                       pci_name(dev));
+               return -ENOSPC;
+       }
+       return index;
+}
+#endif
+
+static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
 {
+       int ret;
        struct msi_msg msg;
+
+       ret = msi_compose_msg(dev, irq, &msg);
+       if (ret < 0)
+               return ret;
+
+       set_irq_msi(irq, desc);
+       write_msi_msg(irq, &msg);
+
+#ifdef CONFIG_INTR_REMAP
+       if (irq_remapped(irq)) {
+               struct irq_desc *desc = irq_to_desc(irq);
+               /*
+                * irq migration in process context
+                */
+               desc->status |= IRQ_MOVE_PCNTXT;
+               set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge");
+       } else
+#endif
+               set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
+
+       return 0;
+}
+
+int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
+{
        int irq, ret;
+
        irq = create_irq();
        if (irq < 0)
                return irq;
 
-       ret = msi_compose_msg(dev, irq, &msg);
+#ifdef CONFIG_INTR_REMAP
+       if (!intr_remapping_enabled)
+               goto no_ir;
+
+       ret = msi_alloc_irte(dev, irq, 1);
+       if (ret < 0)
+               goto error;
+no_ir:
+#endif
+       ret = setup_msi_irq(dev, desc, irq);
        if (ret < 0) {
                destroy_irq(irq);
                return ret;
        }
+       return 0;
 
-       set_irq_msi(irq, desc);
-       write_msi_msg(irq, &msg);
+#ifdef CONFIG_INTR_REMAP
+error:
+       destroy_irq(irq);
+       return ret;
+#endif
+}
+
+int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+{
+       int irq, ret, sub_handle;
+       struct msi_desc *desc;
+#ifdef CONFIG_INTR_REMAP
+       struct intel_iommu *iommu = 0;
+       int index = 0;
+#endif
 
-       set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
+       sub_handle = 0;
+       list_for_each_entry(desc, &dev->msi_list, list) {
+               irq = create_irq();
+               if (irq < 0)
+                       return irq;
+#ifdef CONFIG_INTR_REMAP
+               if (!intr_remapping_enabled)
+                       goto no_ir;
 
+               if (!sub_handle) {
+                       /*
+                        * allocate the consecutive block of IRTE's
+                        * for 'nvec'
+                        */
+                       index = msi_alloc_irte(dev, irq, nvec);
+                       if (index < 0) {
+                               ret = index;
+                               goto error;
+                       }
+               } else {
+                       iommu = map_dev_to_ir(dev);
+                       if (!iommu) {
+                               ret = -ENOENT;
+                               goto error;
+                       }
+                       /*
+                        * setup the mapping between the irq and the IRTE
+                        * base index, the sub_handle pointing to the
+                        * appropriate interrupt remap table entry.
+                        */
+                       set_irte_irq(irq, iommu, index, sub_handle);
+               }
+no_ir:
+#endif
+               ret = setup_msi_irq(dev, desc, irq);
+               if (ret < 0)
+                       goto error;
+               sub_handle++;
+       }
        return 0;
+
+error:
+       destroy_irq(irq);
+       return ret;
 }
 
 void arch_teardown_msi_irq(unsigned int irq)
@@ -2097,10 +2794,11 @@ void arch_teardown_msi_irq(unsigned int irq)
 #ifdef CONFIG_SMP
 static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
 {
-       struct irq_cfg *cfg = irq_cfg + irq;
+       struct irq_cfg *cfg;
        struct msi_msg msg;
        unsigned int dest;
        cpumask_t tmp;
+       struct irq_desc *desc;
 
        cpus_and(tmp, mask, cpu_online_map);
        if (cpus_empty(tmp))
@@ -2109,6 +2807,7 @@ static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
        if (assign_irq_vector(irq, mask))
                return;
 
+       cfg = irq_cfg(irq);
        cpus_and(tmp, cfg->domain, mask);
        dest = cpu_mask_to_apicid(tmp);
 
@@ -2120,7 +2819,8 @@ static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
        msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
        dmar_msi_write(irq, &msg);
-       irq_desc[irq].affinity = mask;
+       desc = irq_to_desc(irq);
+       desc->affinity = mask;
 }
 #endif /* CONFIG_SMP */
 
@@ -2174,9 +2874,10 @@ static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
 
 static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
 {
-       struct irq_cfg *cfg = irq_cfg + irq;
+       struct irq_cfg *cfg;
        unsigned int dest;
        cpumask_t tmp;
+       struct irq_desc *desc;
 
        cpus_and(tmp, mask, cpu_online_map);
        if (cpus_empty(tmp))
@@ -2185,11 +2886,13 @@ static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
        if (assign_irq_vector(irq, mask))
                return;
 
+       cfg = irq_cfg(irq);
        cpus_and(tmp, cfg->domain, mask);
        dest = cpu_mask_to_apicid(tmp);
 
        target_ht_irq(irq, dest, cfg->vector);
-       irq_desc[irq].affinity = mask;
+       desc = irq_to_desc(irq);
+       desc->affinity = mask;
 }
 #endif
 
@@ -2206,7 +2909,7 @@ static struct irq_chip ht_irq_chip = {
 
 int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 {
-       struct irq_cfg *cfg = irq_cfg + irq;
+       struct irq_cfg *cfg;
        int err;
        cpumask_t tmp;
 
@@ -2216,6 +2919,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
                struct ht_irq_msg msg;
                unsigned dest;
 
+               cfg = irq_cfg(irq);
                cpus_and(tmp, cfg->domain, tmp);
                dest = cpu_mask_to_apicid(tmp);
 
@@ -2314,6 +3018,7 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
 void __init setup_ioapic_dest(void)
 {
        int pin, ioapic, irq, irq_entry;
+       struct irq_cfg *cfg;
 
        if (skip_ioapic_setup == 1)
                return;
@@ -2329,10 +3034,15 @@ void __init setup_ioapic_dest(void)
                         * when you have too many devices, because at that time only boot
                         * cpu is online.
                         */
-                       if (!irq_cfg[irq].vector)
+                       cfg = irq_cfg(irq);
+                       if (!cfg->vector)
                                setup_IO_APIC_irq(ioapic, pin, irq,
                                                  irq_trigger(irq_entry),
                                                  irq_polarity(irq_entry));
+#ifdef CONFIG_INTR_REMAP
+                       else if (intr_remapping_enabled)
+                               set_ir_ioapic_affinity_irq(irq, TARGET_CPUS);
+#endif
                        else
                                set_ioapic_affinity_irq(irq, TARGET_CPUS);
                }
This page took 0.044304 seconds and 5 git commands to generate.