From: Paul Mackerras Date: Mon, 10 Oct 2005 12:03:41 +0000 (+1000) Subject: powerpc: move pSeries files to arch/powerpc/platforms/pseries X-Git-Url: http://drtracing.org/?a=commitdiff_plain;h=69a80d3f69d0b2d7fae5a73c6e034d402d434d8a;p=deliverable%2Flinux.git powerpc: move pSeries files to arch/powerpc/platforms/pseries Signed-off-by: Paul Mackerras --- diff --git a/arch/powerpc/platforms/Makefile b/arch/powerpc/platforms/Makefile index 7637ff3642c3..181ae612b2d3 100644 --- a/arch/powerpc/platforms/Makefile +++ b/arch/powerpc/platforms/Makefile @@ -4,4 +4,5 @@ endif obj-$(CONFIG_4xx) += 4xx/ obj-$(CONFIG_83xx) += 83xx/ obj-$(CONFIG_85xx) += 85xx/ +obj-$(CONFIG_PPC_PSERIES) += pseries/ obj-$(CONFIG_PPC_ISERIES) += iseries/ diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile new file mode 100644 index 000000000000..9ebb34180a10 --- /dev/null +++ b/arch/powerpc/platforms/pseries/Makefile @@ -0,0 +1,4 @@ +obj-y := pci.o lpar.o hvCall.o nvram.o reconfig.o \ + setup.o iommu.o +obj-$(CONFIG_SMP) += smp.o +obj-$(CONFIG_IBMVIO) += vio.o diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S new file mode 100644 index 000000000000..176e8da76466 --- /dev/null +++ b/arch/powerpc/platforms/pseries/hvCall.S @@ -0,0 +1,131 @@ +/* + * arch/ppc64/kernel/pSeries_hvCall.S + * + * This file contains the generic code to perform a call to the + * pSeries LPAR hypervisor. + * NOTE: this file will go away when we move to inline this work. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include + +#define STK_PARM(i) (48 + ((i)-3)*8) + + .text + +/* long plpar_hcall(unsigned long opcode, R3 + unsigned long arg1, R4 + unsigned long arg2, R5 + unsigned long arg3, R6 + unsigned long arg4, R7 + unsigned long *out1, R8 + unsigned long *out2, R9 + unsigned long *out3); R10 + */ +_GLOBAL(plpar_hcall) + HMT_MEDIUM + + mfcr r0 + + std r8,STK_PARM(r8)(r1) /* Save out ptrs */ + std r9,STK_PARM(r9)(r1) + std r10,STK_PARM(r10)(r1) + + stw r0,8(r1) + + HVSC /* invoke the hypervisor */ + + lwz r0,8(r1) + + ld r8,STK_PARM(r8)(r1) /* Fetch r4-r6 ret args */ + ld r9,STK_PARM(r9)(r1) + ld r10,STK_PARM(r10)(r1) + std r4,0(r8) + std r5,0(r9) + std r6,0(r10) + + mtcrf 0xff,r0 + blr /* return r3 = status */ + + +/* Simple interface with no output values (other than status) */ +_GLOBAL(plpar_hcall_norets) + HMT_MEDIUM + + mfcr r0 + stw r0,8(r1) + + HVSC /* invoke the hypervisor */ + + lwz r0,8(r1) + mtcrf 0xff,r0 + blr /* return r3 = status */ + + +/* long plpar_hcall_8arg_2ret(unsigned long opcode, R3 + unsigned long arg1, R4 + unsigned long arg2, R5 + unsigned long arg3, R6 + unsigned long arg4, R7 + unsigned long arg5, R8 + unsigned long arg6, R9 + unsigned long arg7, R10 + unsigned long arg8, 112(R1) + unsigned long *out1); 120(R1) + */ +_GLOBAL(plpar_hcall_8arg_2ret) + HMT_MEDIUM + + mfcr r0 + ld r11,STK_PARM(r11)(r1) /* put arg8 in R11 */ + stw r0,8(r1) + + HVSC /* invoke the hypervisor */ + + lwz r0,8(r1) + ld r10,STK_PARM(r12)(r1) /* Fetch r4 ret arg */ + std r4,0(r10) + mtcrf 0xff,r0 + blr /* return r3 = status */ + + +/* long plpar_hcall_4out(unsigned long opcode, R3 + unsigned long arg1, R4 + unsigned long arg2, R5 + unsigned long arg3, R6 + unsigned long arg4, R7 + unsigned long *out1, R8 + unsigned long *out2, R9 + unsigned long *out3, R10 + unsigned long *out4); 112(R1) + */ +_GLOBAL(plpar_hcall_4out) + HMT_MEDIUM + + mfcr r0 + stw r0,8(r1) + + std r8,STK_PARM(r8)(r1) /* Save out ptrs */ + std r9,STK_PARM(r9)(r1) + std r10,STK_PARM(r10)(r1) + + HVSC /* invoke the hypervisor */ + + lwz r0,8(r1) + + ld r8,STK_PARM(r8)(r1) /* Fetch r4-r7 ret args */ + ld r9,STK_PARM(r9)(r1) + ld r10,STK_PARM(r10)(r1) + ld r11,STK_PARM(r11)(r1) + std r4,0(r8) + std r5,0(r9) + std r6,0(r10) + std r7,0(r11) + + mtcrf 0xff,r0 + blr /* return r3 = status */ diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c new file mode 100644 index 000000000000..9e90d41131d8 --- /dev/null +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -0,0 +1,606 @@ +/* + * arch/ppc64/kernel/pSeries_iommu.c + * + * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation + * + * Rewrite, cleanup: + * + * Copyright (C) 2004 Olof Johansson , IBM Corporation + * + * Dynamic DMA mapping support, pSeries-specific parts, both SMP and LPAR. + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DBG(fmt...) + +extern int is_python(struct device_node *); + +static void tce_build_pSeries(struct iommu_table *tbl, long index, + long npages, unsigned long uaddr, + enum dma_data_direction direction) +{ + union tce_entry t; + union tce_entry *tp; + + index <<= TCE_PAGE_FACTOR; + npages <<= TCE_PAGE_FACTOR; + + t.te_word = 0; + t.te_rdwr = 1; // Read allowed + + if (direction != DMA_TO_DEVICE) + t.te_pciwr = 1; + + tp = ((union tce_entry *)tbl->it_base) + index; + + while (npages--) { + /* can't move this out since we might cross LMB boundary */ + t.te_rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT; + + tp->te_word = t.te_word; + + uaddr += TCE_PAGE_SIZE; + tp++; + } +} + + +static void tce_free_pSeries(struct iommu_table *tbl, long index, long npages) +{ + union tce_entry t; + union tce_entry *tp; + + npages <<= TCE_PAGE_FACTOR; + index <<= TCE_PAGE_FACTOR; + + t.te_word = 0; + tp = ((union tce_entry *)tbl->it_base) + index; + + while (npages--) { + tp->te_word = t.te_word; + + tp++; + } +} + + +static void tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum, + long npages, unsigned long uaddr, + enum dma_data_direction direction) +{ + u64 rc; + union tce_entry tce; + + tce.te_word = 0; + tce.te_rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT; + tce.te_rdwr = 1; + if (direction != DMA_TO_DEVICE) + tce.te_pciwr = 1; + + while (npages--) { + rc = plpar_tce_put((u64)tbl->it_index, + (u64)tcenum << 12, + tce.te_word ); + + if (rc && printk_ratelimit()) { + printk("tce_build_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc); + printk("\tindex = 0x%lx\n", (u64)tbl->it_index); + printk("\ttcenum = 0x%lx\n", (u64)tcenum); + printk("\ttce val = 0x%lx\n", tce.te_word ); + show_stack(current, (unsigned long *)__get_SP()); + } + + tcenum++; + tce.te_rpn++; + } +} + +static DEFINE_PER_CPU(void *, tce_page) = NULL; + +static void tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, + long npages, unsigned long uaddr, + enum dma_data_direction direction) +{ + u64 rc; + union tce_entry tce, *tcep; + long l, limit; + + tcenum <<= TCE_PAGE_FACTOR; + npages <<= TCE_PAGE_FACTOR; + + if (npages == 1) + return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr, + direction); + + tcep = __get_cpu_var(tce_page); + + /* This is safe to do since interrupts are off when we're called + * from iommu_alloc{,_sg}() + */ + if (!tcep) { + tcep = (void *)__get_free_page(GFP_ATOMIC); + /* If allocation fails, fall back to the loop implementation */ + if (!tcep) + return tce_build_pSeriesLP(tbl, tcenum, npages, + uaddr, direction); + __get_cpu_var(tce_page) = tcep; + } + + tce.te_word = 0; + tce.te_rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT; + tce.te_rdwr = 1; + if (direction != DMA_TO_DEVICE) + tce.te_pciwr = 1; + + /* We can map max one pageful of TCEs at a time */ + do { + /* + * Set up the page with TCE data, looping through and setting + * the values. + */ + limit = min_t(long, npages, 4096/sizeof(union tce_entry)); + + for (l = 0; l < limit; l++) { + tcep[l] = tce; + tce.te_rpn++; + } + + rc = plpar_tce_put_indirect((u64)tbl->it_index, + (u64)tcenum << 12, + (u64)virt_to_abs(tcep), + limit); + + npages -= limit; + tcenum += limit; + } while (npages > 0 && !rc); + + if (rc && printk_ratelimit()) { + printk("tce_buildmulti_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc); + printk("\tindex = 0x%lx\n", (u64)tbl->it_index); + printk("\tnpages = 0x%lx\n", (u64)npages); + printk("\ttce[0] val = 0x%lx\n", tcep[0].te_word); + show_stack(current, (unsigned long *)__get_SP()); + } +} + +static void tce_free_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages) +{ + u64 rc; + union tce_entry tce; + + tcenum <<= TCE_PAGE_FACTOR; + npages <<= TCE_PAGE_FACTOR; + + tce.te_word = 0; + + while (npages--) { + rc = plpar_tce_put((u64)tbl->it_index, + (u64)tcenum << 12, + tce.te_word); + + if (rc && printk_ratelimit()) { + printk("tce_free_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc); + printk("\tindex = 0x%lx\n", (u64)tbl->it_index); + printk("\ttcenum = 0x%lx\n", (u64)tcenum); + printk("\ttce val = 0x%lx\n", tce.te_word ); + show_stack(current, (unsigned long *)__get_SP()); + } + + tcenum++; + } +} + + +static void tce_freemulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages) +{ + u64 rc; + union tce_entry tce; + + tcenum <<= TCE_PAGE_FACTOR; + npages <<= TCE_PAGE_FACTOR; + + tce.te_word = 0; + + rc = plpar_tce_stuff((u64)tbl->it_index, + (u64)tcenum << 12, + tce.te_word, + npages); + + if (rc && printk_ratelimit()) { + printk("tce_freemulti_pSeriesLP: plpar_tce_stuff failed\n"); + printk("\trc = %ld\n", rc); + printk("\tindex = 0x%lx\n", (u64)tbl->it_index); + printk("\tnpages = 0x%lx\n", (u64)npages); + printk("\ttce val = 0x%lx\n", tce.te_word ); + show_stack(current, (unsigned long *)__get_SP()); + } +} + +static void iommu_table_setparms(struct pci_controller *phb, + struct device_node *dn, + struct iommu_table *tbl) +{ + struct device_node *node; + unsigned long *basep; + unsigned int *sizep; + + node = (struct device_node *)phb->arch_data; + + basep = (unsigned long *)get_property(node, "linux,tce-base", NULL); + sizep = (unsigned int *)get_property(node, "linux,tce-size", NULL); + if (basep == NULL || sizep == NULL) { + printk(KERN_ERR "PCI_DMA: iommu_table_setparms: %s has " + "missing tce entries !\n", dn->full_name); + return; + } + + tbl->it_base = (unsigned long)__va(*basep); + memset((void *)tbl->it_base, 0, *sizep); + + tbl->it_busno = phb->bus->number; + + /* Units of tce entries */ + tbl->it_offset = phb->dma_window_base_cur >> PAGE_SHIFT; + + /* Test if we are going over 2GB of DMA space */ + if (phb->dma_window_base_cur + phb->dma_window_size > 0x80000000ul) { + udbg_printf("PCI_DMA: Unexpected number of IOAs under this PHB.\n"); + panic("PCI_DMA: Unexpected number of IOAs under this PHB.\n"); + } + + phb->dma_window_base_cur += phb->dma_window_size; + + /* Set the tce table size - measured in entries */ + tbl->it_size = phb->dma_window_size >> PAGE_SHIFT; + + tbl->it_index = 0; + tbl->it_blocksize = 16; + tbl->it_type = TCE_PCI; +} + +/* + * iommu_table_setparms_lpar + * + * Function: On pSeries LPAR systems, return TCE table info, given a pci bus. + * + * ToDo: properly interpret the ibm,dma-window property. The definition is: + * logical-bus-number (1 word) + * phys-address (#address-cells words) + * size (#cell-size words) + * + * Currently we hard code these sizes (more or less). + */ +static void iommu_table_setparms_lpar(struct pci_controller *phb, + struct device_node *dn, + struct iommu_table *tbl, + unsigned int *dma_window) +{ + tbl->it_busno = PCI_DN(dn)->bussubno; + + /* TODO: Parse field size properties properly. */ + tbl->it_size = (((unsigned long)dma_window[4] << 32) | + (unsigned long)dma_window[5]) >> PAGE_SHIFT; + tbl->it_offset = (((unsigned long)dma_window[2] << 32) | + (unsigned long)dma_window[3]) >> PAGE_SHIFT; + tbl->it_base = 0; + tbl->it_index = dma_window[0]; + tbl->it_blocksize = 16; + tbl->it_type = TCE_PCI; +} + +static void iommu_bus_setup_pSeries(struct pci_bus *bus) +{ + struct device_node *dn; + struct iommu_table *tbl; + struct device_node *isa_dn, *isa_dn_orig; + struct device_node *tmp; + struct pci_dn *pci; + int children; + + DBG("iommu_bus_setup_pSeries, bus %p, bus->self %p\n", bus, bus->self); + + dn = pci_bus_to_OF_node(bus); + pci = PCI_DN(dn); + + if (bus->self) { + /* This is not a root bus, any setup will be done for the + * device-side of the bridge in iommu_dev_setup_pSeries(). + */ + return; + } + + /* Check if the ISA bus on the system is under + * this PHB. + */ + isa_dn = isa_dn_orig = of_find_node_by_type(NULL, "isa"); + + while (isa_dn && isa_dn != dn) + isa_dn = isa_dn->parent; + + if (isa_dn_orig) + of_node_put(isa_dn_orig); + + /* Count number of direct PCI children of the PHB. + * All PCI device nodes have class-code property, so it's + * an easy way to find them. + */ + for (children = 0, tmp = dn->child; tmp; tmp = tmp->sibling) + if (get_property(tmp, "class-code", NULL)) + children++; + + DBG("Children: %d\n", children); + + /* Calculate amount of DMA window per slot. Each window must be + * a power of two (due to pci_alloc_consistent requirements). + * + * Keep 256MB aside for PHBs with ISA. + */ + + if (!isa_dn) { + /* No ISA/IDE - just set window size and return */ + pci->phb->dma_window_size = 0x80000000ul; /* To be divided */ + + while (pci->phb->dma_window_size * children > 0x80000000ul) + pci->phb->dma_window_size >>= 1; + DBG("No ISA/IDE, window size is 0x%lx\n", + pci->phb->dma_window_size); + pci->phb->dma_window_base_cur = 0; + + return; + } + + /* If we have ISA, then we probably have an IDE + * controller too. Allocate a 128MB table but + * skip the first 128MB to avoid stepping on ISA + * space. + */ + pci->phb->dma_window_size = 0x8000000ul; + pci->phb->dma_window_base_cur = 0x8000000ul; + + tbl = kmalloc(sizeof(struct iommu_table), GFP_KERNEL); + + iommu_table_setparms(pci->phb, dn, tbl); + pci->iommu_table = iommu_init_table(tbl); + + /* Divide the rest (1.75GB) among the children */ + pci->phb->dma_window_size = 0x80000000ul; + while (pci->phb->dma_window_size * children > 0x70000000ul) + pci->phb->dma_window_size >>= 1; + + DBG("ISA/IDE, window size is 0x%lx\n", pci->phb->dma_window_size); + +} + + +static void iommu_bus_setup_pSeriesLP(struct pci_bus *bus) +{ + struct iommu_table *tbl; + struct device_node *dn, *pdn; + struct pci_dn *ppci; + unsigned int *dma_window = NULL; + + DBG("iommu_bus_setup_pSeriesLP, bus %p, bus->self %p\n", bus, bus->self); + + dn = pci_bus_to_OF_node(bus); + + /* Find nearest ibm,dma-window, walking up the device tree */ + for (pdn = dn; pdn != NULL; pdn = pdn->parent) { + dma_window = (unsigned int *)get_property(pdn, "ibm,dma-window", NULL); + if (dma_window != NULL) + break; + } + + if (dma_window == NULL) { + DBG("iommu_bus_setup_pSeriesLP: bus %s seems to have no ibm,dma-window property\n", dn->full_name); + return; + } + + ppci = pdn->data; + if (!ppci->iommu_table) { + /* Bussubno hasn't been copied yet. + * Do it now because iommu_table_setparms_lpar needs it. + */ + + ppci->bussubno = bus->number; + + tbl = (struct iommu_table *)kmalloc(sizeof(struct iommu_table), + GFP_KERNEL); + + iommu_table_setparms_lpar(ppci->phb, pdn, tbl, dma_window); + + ppci->iommu_table = iommu_init_table(tbl); + } + + if (pdn != dn) + PCI_DN(dn)->iommu_table = ppci->iommu_table; +} + + +static void iommu_dev_setup_pSeries(struct pci_dev *dev) +{ + struct device_node *dn, *mydn; + struct iommu_table *tbl; + + DBG("iommu_dev_setup_pSeries, dev %p (%s)\n", dev, pci_name(dev)); + + mydn = dn = pci_device_to_OF_node(dev); + + /* If we're the direct child of a root bus, then we need to allocate + * an iommu table ourselves. The bus setup code should have setup + * the window sizes already. + */ + if (!dev->bus->self) { + DBG(" --> first child, no bridge. Allocating iommu table.\n"); + tbl = kmalloc(sizeof(struct iommu_table), GFP_KERNEL); + iommu_table_setparms(PCI_DN(dn)->phb, dn, tbl); + PCI_DN(mydn)->iommu_table = iommu_init_table(tbl); + + return; + } + + /* If this device is further down the bus tree, search upwards until + * an already allocated iommu table is found and use that. + */ + + while (dn && dn->data && PCI_DN(dn)->iommu_table == NULL) + dn = dn->parent; + + if (dn && dn->data) { + PCI_DN(mydn)->iommu_table = PCI_DN(dn)->iommu_table; + } else { + DBG("iommu_dev_setup_pSeries, dev %p (%s) has no iommu table\n", dev, pci_name(dev)); + } +} + +static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *node) +{ + int err = NOTIFY_OK; + struct device_node *np = node; + struct pci_dn *pci = np->data; + + switch (action) { + case PSERIES_RECONFIG_REMOVE: + if (pci->iommu_table && + get_property(np, "ibm,dma-window", NULL)) + iommu_free_table(np); + break; + default: + err = NOTIFY_DONE; + break; + } + return err; +} + +static struct notifier_block iommu_reconfig_nb = { + .notifier_call = iommu_reconfig_notifier, +}; + +static void iommu_dev_setup_pSeriesLP(struct pci_dev *dev) +{ + struct device_node *pdn, *dn; + struct iommu_table *tbl; + int *dma_window = NULL; + struct pci_dn *pci; + + DBG("iommu_dev_setup_pSeriesLP, dev %p (%s)\n", dev, pci_name(dev)); + + /* dev setup for LPAR is a little tricky, since the device tree might + * contain the dma-window properties per-device and not neccesarily + * for the bus. So we need to search upwards in the tree until we + * either hit a dma-window property, OR find a parent with a table + * already allocated. + */ + dn = pci_device_to_OF_node(dev); + + for (pdn = dn; pdn && pdn->data && !PCI_DN(pdn)->iommu_table; + pdn = pdn->parent) { + dma_window = (unsigned int *) + get_property(pdn, "ibm,dma-window", NULL); + if (dma_window) + break; + } + + /* Check for parent == NULL so we don't try to setup the empty EADS + * slots on POWER4 machines. + */ + if (dma_window == NULL || pdn->parent == NULL) { + DBG("No dma window for device, linking to parent\n"); + PCI_DN(dn)->iommu_table = PCI_DN(pdn)->iommu_table; + return; + } else { + DBG("Found DMA window, allocating table\n"); + } + + pci = pdn->data; + if (!pci->iommu_table) { + /* iommu_table_setparms_lpar needs bussubno. */ + pci->bussubno = pci->phb->bus->number; + + tbl = (struct iommu_table *)kmalloc(sizeof(struct iommu_table), + GFP_KERNEL); + + iommu_table_setparms_lpar(pci->phb, pdn, tbl, dma_window); + + pci->iommu_table = iommu_init_table(tbl); + } + + if (pdn != dn) + PCI_DN(dn)->iommu_table = pci->iommu_table; +} + +static void iommu_bus_setup_null(struct pci_bus *b) { } +static void iommu_dev_setup_null(struct pci_dev *d) { } + +/* These are called very early. */ +void iommu_init_early_pSeries(void) +{ + if (of_chosen && get_property(of_chosen, "linux,iommu-off", NULL)) { + /* Direct I/O, IOMMU off */ + ppc_md.iommu_dev_setup = iommu_dev_setup_null; + ppc_md.iommu_bus_setup = iommu_bus_setup_null; + pci_direct_iommu_init(); + + return; + } + + if (systemcfg->platform & PLATFORM_LPAR) { + if (firmware_has_feature(FW_FEATURE_MULTITCE)) { + ppc_md.tce_build = tce_buildmulti_pSeriesLP; + ppc_md.tce_free = tce_freemulti_pSeriesLP; + } else { + ppc_md.tce_build = tce_build_pSeriesLP; + ppc_md.tce_free = tce_free_pSeriesLP; + } + ppc_md.iommu_bus_setup = iommu_bus_setup_pSeriesLP; + ppc_md.iommu_dev_setup = iommu_dev_setup_pSeriesLP; + } else { + ppc_md.tce_build = tce_build_pSeries; + ppc_md.tce_free = tce_free_pSeries; + ppc_md.iommu_bus_setup = iommu_bus_setup_pSeries; + ppc_md.iommu_dev_setup = iommu_dev_setup_pSeries; + } + + + pSeries_reconfig_notifier_register(&iommu_reconfig_nb); + + pci_iommu_init(); +} + diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c new file mode 100644 index 000000000000..268d8362dde7 --- /dev/null +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -0,0 +1,517 @@ +/* + * pSeries_lpar.c + * Copyright (C) 2001 Todd Inglett, IBM Corporation + * + * pSeries LPAR support. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#define DEBUG + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef DEBUG +#define DBG(fmt...) udbg_printf(fmt) +#else +#define DBG(fmt...) +#endif + +/* in pSeries_hvCall.S */ +EXPORT_SYMBOL(plpar_hcall); +EXPORT_SYMBOL(plpar_hcall_4out); +EXPORT_SYMBOL(plpar_hcall_norets); +EXPORT_SYMBOL(plpar_hcall_8arg_2ret); + +extern void pSeries_find_serial_port(void); + + +int vtermno; /* virtual terminal# for udbg */ + +#define __ALIGNED__ __attribute__((__aligned__(sizeof(long)))) +static void udbg_hvsi_putc(unsigned char c) +{ + /* packet's seqno isn't used anyways */ + uint8_t packet[] __ALIGNED__ = { 0xff, 5, 0, 0, c }; + int rc; + + if (c == '\n') + udbg_hvsi_putc('\r'); + + do { + rc = plpar_put_term_char(vtermno, sizeof(packet), packet); + } while (rc == H_Busy); +} + +static long hvsi_udbg_buf_len; +static uint8_t hvsi_udbg_buf[256]; + +static int udbg_hvsi_getc_poll(void) +{ + unsigned char ch; + int rc, i; + + if (hvsi_udbg_buf_len == 0) { + rc = plpar_get_term_char(vtermno, &hvsi_udbg_buf_len, hvsi_udbg_buf); + if (rc != H_Success || hvsi_udbg_buf[0] != 0xff) { + /* bad read or non-data packet */ + hvsi_udbg_buf_len = 0; + } else { + /* remove the packet header */ + for (i = 4; i < hvsi_udbg_buf_len; i++) + hvsi_udbg_buf[i-4] = hvsi_udbg_buf[i]; + hvsi_udbg_buf_len -= 4; + } + } + + if (hvsi_udbg_buf_len <= 0 || hvsi_udbg_buf_len > 256) { + /* no data ready */ + hvsi_udbg_buf_len = 0; + return -1; + } + + ch = hvsi_udbg_buf[0]; + /* shift remaining data down */ + for (i = 1; i < hvsi_udbg_buf_len; i++) { + hvsi_udbg_buf[i-1] = hvsi_udbg_buf[i]; + } + hvsi_udbg_buf_len--; + + return ch; +} + +static unsigned char udbg_hvsi_getc(void) +{ + int ch; + for (;;) { + ch = udbg_hvsi_getc_poll(); + if (ch == -1) { + /* This shouldn't be needed...but... */ + volatile unsigned long delay; + for (delay=0; delay < 2000000; delay++) + ; + } else { + return ch; + } + } +} + +static void udbg_putcLP(unsigned char c) +{ + char buf[16]; + unsigned long rc; + + if (c == '\n') + udbg_putcLP('\r'); + + buf[0] = c; + do { + rc = plpar_put_term_char(vtermno, 1, buf); + } while(rc == H_Busy); +} + +/* Buffered chars getc */ +static long inbuflen; +static long inbuf[2]; /* must be 2 longs */ + +static int udbg_getc_pollLP(void) +{ + /* The interface is tricky because it may return up to 16 chars. + * We save them statically for future calls to udbg_getc(). + */ + char ch, *buf = (char *)inbuf; + int i; + long rc; + if (inbuflen == 0) { + /* get some more chars. */ + inbuflen = 0; + rc = plpar_get_term_char(vtermno, &inbuflen, buf); + if (rc != H_Success) + inbuflen = 0; /* otherwise inbuflen is garbage */ + } + if (inbuflen <= 0 || inbuflen > 16) { + /* Catch error case as well as other oddities (corruption) */ + inbuflen = 0; + return -1; + } + ch = buf[0]; + for (i = 1; i < inbuflen; i++) /* shuffle them down. */ + buf[i-1] = buf[i]; + inbuflen--; + return ch; +} + +static unsigned char udbg_getcLP(void) +{ + int ch; + for (;;) { + ch = udbg_getc_pollLP(); + if (ch == -1) { + /* This shouldn't be needed...but... */ + volatile unsigned long delay; + for (delay=0; delay < 2000000; delay++) + ; + } else { + return ch; + } + } +} + +/* call this from early_init() for a working debug console on + * vterm capable LPAR machines + */ +void udbg_init_debug_lpar(void) +{ + vtermno = 0; + udbg_putc = udbg_putcLP; + udbg_getc = udbg_getcLP; + udbg_getc_poll = udbg_getc_pollLP; +} + +/* returns 0 if couldn't find or use /chosen/stdout as console */ +int find_udbg_vterm(void) +{ + struct device_node *stdout_node; + u32 *termno; + char *name; + int found = 0; + + /* find the boot console from /chosen/stdout */ + if (!of_chosen) + return 0; + name = (char *)get_property(of_chosen, "linux,stdout-path", NULL); + if (name == NULL) + return 0; + stdout_node = of_find_node_by_path(name); + if (!stdout_node) + return 0; + + /* now we have the stdout node; figure out what type of device it is. */ + name = (char *)get_property(stdout_node, "name", NULL); + if (!name) { + printk(KERN_WARNING "stdout node missing 'name' property!\n"); + goto out; + } + + if (strncmp(name, "vty", 3) == 0) { + if (device_is_compatible(stdout_node, "hvterm1")) { + termno = (u32 *)get_property(stdout_node, "reg", NULL); + if (termno) { + vtermno = termno[0]; + udbg_putc = udbg_putcLP; + udbg_getc = udbg_getcLP; + udbg_getc_poll = udbg_getc_pollLP; + found = 1; + } + } else if (device_is_compatible(stdout_node, "hvterm-protocol")) { + termno = (u32 *)get_property(stdout_node, "reg", NULL); + if (termno) { + vtermno = termno[0]; + udbg_putc = udbg_hvsi_putc; + udbg_getc = udbg_hvsi_getc; + udbg_getc_poll = udbg_hvsi_getc_poll; + found = 1; + } + } + } else if (strncmp(name, "serial", 6)) { + /* XXX fix ISA serial console */ + printk(KERN_WARNING "serial stdout on LPAR ('%s')! " + "can't print udbg messages\n", + stdout_node->full_name); + } else { + printk(KERN_WARNING "don't know how to print to stdout '%s'\n", + stdout_node->full_name); + } + +out: + of_node_put(stdout_node); + return found; +} + +void vpa_init(int cpu) +{ + int hwcpu = get_hard_smp_processor_id(cpu); + unsigned long vpa = (unsigned long)&(paca[cpu].lppaca); + long ret; + unsigned long flags; + + /* Register the Virtual Processor Area (VPA) */ + flags = 1UL << (63 - 18); + + if (cpu_has_feature(CPU_FTR_ALTIVEC)) + paca[cpu].lppaca.vmxregs_in_use = 1; + + ret = register_vpa(flags, hwcpu, __pa(vpa)); + + if (ret) + printk(KERN_ERR "WARNING: vpa_init: VPA registration for " + "cpu %d (hw %d) of area %lx returns %ld\n", + cpu, hwcpu, __pa(vpa), ret); +} + +long pSeries_lpar_hpte_insert(unsigned long hpte_group, + unsigned long va, unsigned long prpn, + unsigned long vflags, unsigned long rflags) +{ + unsigned long lpar_rc; + unsigned long flags; + unsigned long slot; + unsigned long hpte_v, hpte_r; + unsigned long dummy0, dummy1; + + hpte_v = ((va >> 23) << HPTE_V_AVPN_SHIFT) | vflags | HPTE_V_VALID; + if (vflags & HPTE_V_LARGE) + hpte_v &= ~(1UL << HPTE_V_AVPN_SHIFT); + + hpte_r = (prpn << HPTE_R_RPN_SHIFT) | rflags; + + /* Now fill in the actual HPTE */ + /* Set CEC cookie to 0 */ + /* Zero page = 0 */ + /* I-cache Invalidate = 0 */ + /* I-cache synchronize = 0 */ + /* Exact = 0 */ + flags = 0; + + /* XXX why is this here? - Anton */ + if (rflags & (_PAGE_GUARDED|_PAGE_NO_CACHE)) + hpte_r &= ~_PAGE_COHERENT; + + lpar_rc = plpar_hcall(H_ENTER, flags, hpte_group, hpte_v, + hpte_r, &slot, &dummy0, &dummy1); + + if (unlikely(lpar_rc == H_PTEG_Full)) + return -1; + + /* + * Since we try and ioremap PHBs we don't own, the pte insert + * will fail. However we must catch the failure in hash_page + * or we will loop forever, so return -2 in this case. + */ + if (unlikely(lpar_rc != H_Success)) + return -2; + + /* Because of iSeries, we have to pass down the secondary + * bucket bit here as well + */ + return (slot & 7) | (!!(vflags & HPTE_V_SECONDARY) << 3); +} + +static DEFINE_SPINLOCK(pSeries_lpar_tlbie_lock); + +static long pSeries_lpar_hpte_remove(unsigned long hpte_group) +{ + unsigned long slot_offset; + unsigned long lpar_rc; + int i; + unsigned long dummy1, dummy2; + + /* pick a random slot to start at */ + slot_offset = mftb() & 0x7; + + for (i = 0; i < HPTES_PER_GROUP; i++) { + + /* don't remove a bolted entry */ + lpar_rc = plpar_pte_remove(H_ANDCOND, hpte_group + slot_offset, + (0x1UL << 4), &dummy1, &dummy2); + + if (lpar_rc == H_Success) + return i; + + BUG_ON(lpar_rc != H_Not_Found); + + slot_offset++; + slot_offset &= 0x7; + } + + return -1; +} + +static void pSeries_lpar_hptab_clear(void) +{ + unsigned long size_bytes = 1UL << ppc64_pft_size; + unsigned long hpte_count = size_bytes >> 4; + unsigned long dummy1, dummy2; + int i; + + /* TODO: Use bulk call */ + for (i = 0; i < hpte_count; i++) + plpar_pte_remove(0, i, 0, &dummy1, &dummy2); +} + +/* + * NOTE: for updatepp ops we are fortunate that the linux "newpp" bits and + * the low 3 bits of flags happen to line up. So no transform is needed. + * We can probably optimize here and assume the high bits of newpp are + * already zero. For now I am paranoid. + */ +static long pSeries_lpar_hpte_updatepp(unsigned long slot, unsigned long newpp, + unsigned long va, int large, int local) +{ + unsigned long lpar_rc; + unsigned long flags = (newpp & 7) | H_AVPN; + unsigned long avpn = va >> 23; + + if (large) + avpn &= ~0x1UL; + + lpar_rc = plpar_pte_protect(flags, slot, (avpn << 7)); + + if (lpar_rc == H_Not_Found) + return -1; + + BUG_ON(lpar_rc != H_Success); + + return 0; +} + +static unsigned long pSeries_lpar_hpte_getword0(unsigned long slot) +{ + unsigned long dword0; + unsigned long lpar_rc; + unsigned long dummy_word1; + unsigned long flags; + + /* Read 1 pte at a time */ + /* Do not need RPN to logical page translation */ + /* No cross CEC PFT access */ + flags = 0; + + lpar_rc = plpar_pte_read(flags, slot, &dword0, &dummy_word1); + + BUG_ON(lpar_rc != H_Success); + + return dword0; +} + +static long pSeries_lpar_hpte_find(unsigned long vpn) +{ + unsigned long hash; + unsigned long i, j; + long slot; + unsigned long hpte_v; + + hash = hpt_hash(vpn, 0); + + for (j = 0; j < 2; j++) { + slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; + for (i = 0; i < HPTES_PER_GROUP; i++) { + hpte_v = pSeries_lpar_hpte_getword0(slot); + + if ((HPTE_V_AVPN_VAL(hpte_v) == (vpn >> 11)) + && (hpte_v & HPTE_V_VALID) + && (!!(hpte_v & HPTE_V_SECONDARY) == j)) { + /* HPTE matches */ + if (j) + slot = -slot; + return slot; + } + ++slot; + } + hash = ~hash; + } + + return -1; +} + +static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp, + unsigned long ea) +{ + unsigned long lpar_rc; + unsigned long vsid, va, vpn, flags; + long slot; + + vsid = get_kernel_vsid(ea); + va = (vsid << 28) | (ea & 0x0fffffff); + vpn = va >> PAGE_SHIFT; + + slot = pSeries_lpar_hpte_find(vpn); + BUG_ON(slot == -1); + + flags = newpp & 7; + lpar_rc = plpar_pte_protect(flags, slot, 0); + + BUG_ON(lpar_rc != H_Success); +} + +static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long va, + int large, int local) +{ + unsigned long avpn = va >> 23; + unsigned long lpar_rc; + unsigned long dummy1, dummy2; + + if (large) + avpn &= ~0x1UL; + + lpar_rc = plpar_pte_remove(H_AVPN, slot, (avpn << 7), &dummy1, + &dummy2); + + if (lpar_rc == H_Not_Found) + return; + + BUG_ON(lpar_rc != H_Success); +} + +/* + * Take a spinlock around flushes to avoid bouncing the hypervisor tlbie + * lock. + */ +void pSeries_lpar_flush_hash_range(unsigned long number, int local) +{ + int i; + unsigned long flags = 0; + struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); + int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE); + + if (lock_tlbie) + spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags); + + for (i = 0; i < number; i++) + flush_hash_page(batch->vaddr[i], batch->pte[i], local); + + if (lock_tlbie) + spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags); +} + +void hpte_init_lpar(void) +{ + ppc_md.hpte_invalidate = pSeries_lpar_hpte_invalidate; + ppc_md.hpte_updatepp = pSeries_lpar_hpte_updatepp; + ppc_md.hpte_updateboltedpp = pSeries_lpar_hpte_updateboltedpp; + ppc_md.hpte_insert = pSeries_lpar_hpte_insert; + ppc_md.hpte_remove = pSeries_lpar_hpte_remove; + ppc_md.flush_hash_range = pSeries_lpar_flush_hash_range; + ppc_md.hpte_clear_all = pSeries_lpar_hptab_clear; + + htab_finish_init(); +} diff --git a/arch/powerpc/platforms/pseries/nvram.c b/arch/powerpc/platforms/pseries/nvram.c new file mode 100644 index 000000000000..18abfb1f4e24 --- /dev/null +++ b/arch/powerpc/platforms/pseries/nvram.c @@ -0,0 +1,148 @@ +/* + * c 2001 PPC 64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * /dev/nvram driver for PPC64 + * + * This perhaps should live in drivers/char + */ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static unsigned int nvram_size; +static int nvram_fetch, nvram_store; +static char nvram_buf[NVRW_CNT]; /* assume this is in the first 4GB */ +static DEFINE_SPINLOCK(nvram_lock); + + +static ssize_t pSeries_nvram_read(char *buf, size_t count, loff_t *index) +{ + unsigned int i; + unsigned long len; + int done; + unsigned long flags; + char *p = buf; + + + if (nvram_size == 0 || nvram_fetch == RTAS_UNKNOWN_SERVICE) + return -ENODEV; + + if (*index >= nvram_size) + return 0; + + i = *index; + if (i + count > nvram_size) + count = nvram_size - i; + + spin_lock_irqsave(&nvram_lock, flags); + + for (; count != 0; count -= len) { + len = count; + if (len > NVRW_CNT) + len = NVRW_CNT; + + if ((rtas_call(nvram_fetch, 3, 2, &done, i, __pa(nvram_buf), + len) != 0) || len != done) { + spin_unlock_irqrestore(&nvram_lock, flags); + return -EIO; + } + + memcpy(p, nvram_buf, len); + + p += len; + i += len; + } + + spin_unlock_irqrestore(&nvram_lock, flags); + + *index = i; + return p - buf; +} + +static ssize_t pSeries_nvram_write(char *buf, size_t count, loff_t *index) +{ + unsigned int i; + unsigned long len; + int done; + unsigned long flags; + const char *p = buf; + + if (nvram_size == 0 || nvram_store == RTAS_UNKNOWN_SERVICE) + return -ENODEV; + + if (*index >= nvram_size) + return 0; + + i = *index; + if (i + count > nvram_size) + count = nvram_size - i; + + spin_lock_irqsave(&nvram_lock, flags); + + for (; count != 0; count -= len) { + len = count; + if (len > NVRW_CNT) + len = NVRW_CNT; + + memcpy(nvram_buf, p, len); + + if ((rtas_call(nvram_store, 3, 2, &done, i, __pa(nvram_buf), + len) != 0) || len != done) { + spin_unlock_irqrestore(&nvram_lock, flags); + return -EIO; + } + + p += len; + i += len; + } + spin_unlock_irqrestore(&nvram_lock, flags); + + *index = i; + return p - buf; +} + +static ssize_t pSeries_nvram_get_size(void) +{ + return nvram_size ? nvram_size : -ENODEV; +} + +int __init pSeries_nvram_init(void) +{ + struct device_node *nvram; + unsigned int *nbytes_p, proplen; + + nvram = of_find_node_by_type(NULL, "nvram"); + if (nvram == NULL) + return -ENODEV; + + nbytes_p = (unsigned int *)get_property(nvram, "#bytes", &proplen); + if (nbytes_p == NULL || proplen != sizeof(unsigned int)) + return -EIO; + + nvram_size = *nbytes_p; + + nvram_fetch = rtas_token("nvram-fetch"); + nvram_store = rtas_token("nvram-store"); + printk(KERN_INFO "PPC64 nvram contains %d bytes\n", nvram_size); + of_node_put(nvram); + + ppc_md.nvram_read = pSeries_nvram_read; + ppc_md.nvram_write = pSeries_nvram_write; + ppc_md.nvram_size = pSeries_nvram_get_size; + + return 0; +} diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c new file mode 100644 index 000000000000..2dd477eb1c53 --- /dev/null +++ b/arch/powerpc/platforms/pseries/pci.c @@ -0,0 +1,142 @@ +/* + * arch/ppc64/kernel/pSeries_pci.c + * + * Copyright (C) 2001 Dave Engebretsen, IBM Corporation + * Copyright (C) 2003 Anton Blanchard , IBM + * + * pSeries specific routines for PCI. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +static int __initdata s7a_workaround = -1; + +#if 0 +void pcibios_name_device(struct pci_dev *dev) +{ + struct device_node *dn; + + /* + * Add IBM loc code (slot) as a prefix to the device names for service + */ + dn = pci_device_to_OF_node(dev); + if (dn) { + char *loc_code = get_property(dn, "ibm,loc-code", 0); + if (loc_code) { + int loc_len = strlen(loc_code); + if (loc_len < sizeof(dev->dev.name)) { + memmove(dev->dev.name+loc_len+1, dev->dev.name, + sizeof(dev->dev.name)-loc_len-1); + memcpy(dev->dev.name, loc_code, loc_len); + dev->dev.name[loc_len] = ' '; + dev->dev.name[sizeof(dev->dev.name)-1] = '\0'; + } + } + } +} +DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pcibios_name_device); +#endif + +static void __init check_s7a(void) +{ + struct device_node *root; + char *model; + + s7a_workaround = 0; + root = of_find_node_by_path("/"); + if (root) { + model = get_property(root, "model", NULL); + if (model && !strcmp(model, "IBM,7013-S7A")) + s7a_workaround = 1; + of_node_put(root); + } +} + +void __devinit pSeries_irq_bus_setup(struct pci_bus *bus) +{ + struct pci_dev *dev; + + if (s7a_workaround < 0) + check_s7a(); + list_for_each_entry(dev, &bus->devices, bus_list) { + pci_read_irq_line(dev); + if (s7a_workaround) { + if (dev->irq > 16) { + dev->irq -= 3; + pci_write_config_byte(dev, PCI_INTERRUPT_LINE, + dev->irq); + } + } + } +} + +static void __init pSeries_request_regions(void) +{ + if (!isa_io_base) + return; + + request_region(0x20,0x20,"pic1"); + request_region(0xa0,0x20,"pic2"); + request_region(0x00,0x20,"dma1"); + request_region(0x40,0x20,"timer"); + request_region(0x80,0x10,"dma page reg"); + request_region(0xc0,0x20,"dma2"); +} + +void __init pSeries_final_fixup(void) +{ + phbs_remap_io(); + pSeries_request_regions(); + + pci_addr_cache_build(); +} + +/* + * Assume the winbond 82c105 is the IDE controller on a + * p610. We should probably be more careful in case + * someone tries to plug in a similar adapter. + */ +static void fixup_winbond_82c105(struct pci_dev* dev) +{ + int i; + unsigned int reg; + + if (!(systemcfg->platform & PLATFORM_PSERIES)) + return; + + printk("Using INTC for W82c105 IDE controller.\n"); + pci_read_config_dword(dev, 0x40, ®); + /* Enable LEGIRQ to use INTC instead of ISA interrupts */ + pci_write_config_dword(dev, 0x40, reg | (1<<11)); + + for (i = 0; i < DEVICE_COUNT_RESOURCE; ++i) { + /* zap the 2nd function of the winbond chip */ + if (dev->resource[i].flags & IORESOURCE_IO + && dev->bus->number == 0 && dev->devfn == 0x81) + dev->resource[i].flags &= ~IORESOURCE_IO; + } +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_WINBOND, PCI_DEVICE_ID_WINBOND_82C105, + fixup_winbond_82c105); diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c new file mode 100644 index 000000000000..58c61219d08e --- /dev/null +++ b/arch/powerpc/platforms/pseries/reconfig.c @@ -0,0 +1,426 @@ +/* + * pSeries_reconfig.c - support for dynamic reconfiguration (including PCI + * Hotplug and Dynamic Logical Partitioning on RPA platforms). + * + * Copyright (C) 2005 Nathan Lynch + * Copyright (C) 2005 IBM Corporation + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + */ + +#include +#include +#include +#include + +#include +#include +#include + + + +/* + * Routines for "runtime" addition and removal of device tree nodes. + */ +#ifdef CONFIG_PROC_DEVICETREE +/* + * Add a node to /proc/device-tree. + */ +static void add_node_proc_entries(struct device_node *np) +{ + struct proc_dir_entry *ent; + + ent = proc_mkdir(strrchr(np->full_name, '/') + 1, np->parent->pde); + if (ent) + proc_device_tree_add_node(np, ent); +} + +static void remove_node_proc_entries(struct device_node *np) +{ + struct property *pp = np->properties; + struct device_node *parent = np->parent; + + while (pp) { + remove_proc_entry(pp->name, np->pde); + pp = pp->next; + } + if (np->pde) + remove_proc_entry(np->pde->name, parent->pde); +} +#else /* !CONFIG_PROC_DEVICETREE */ +static void add_node_proc_entries(struct device_node *np) +{ + return; +} + +static void remove_node_proc_entries(struct device_node *np) +{ + return; +} +#endif /* CONFIG_PROC_DEVICETREE */ + +/** + * derive_parent - basically like dirname(1) + * @path: the full_name of a node to be added to the tree + * + * Returns the node which should be the parent of the node + * described by path. E.g., for path = "/foo/bar", returns + * the node with full_name = "/foo". + */ +static struct device_node *derive_parent(const char *path) +{ + struct device_node *parent = NULL; + char *parent_path = "/"; + size_t parent_path_len = strrchr(path, '/') - path + 1; + + /* reject if path is "/" */ + if (!strcmp(path, "/")) + return ERR_PTR(-EINVAL); + + if (strrchr(path, '/') != path) { + parent_path = kmalloc(parent_path_len, GFP_KERNEL); + if (!parent_path) + return ERR_PTR(-ENOMEM); + strlcpy(parent_path, path, parent_path_len); + } + parent = of_find_node_by_path(parent_path); + if (!parent) + return ERR_PTR(-EINVAL); + if (strcmp(parent_path, "/")) + kfree(parent_path); + return parent; +} + +static struct notifier_block *pSeries_reconfig_chain; + +int pSeries_reconfig_notifier_register(struct notifier_block *nb) +{ + return notifier_chain_register(&pSeries_reconfig_chain, nb); +} + +void pSeries_reconfig_notifier_unregister(struct notifier_block *nb) +{ + notifier_chain_unregister(&pSeries_reconfig_chain, nb); +} + +static int pSeries_reconfig_add_node(const char *path, struct property *proplist) +{ + struct device_node *np; + int err = -ENOMEM; + + np = kzalloc(sizeof(*np), GFP_KERNEL); + if (!np) + goto out_err; + + np->full_name = kmalloc(strlen(path) + 1, GFP_KERNEL); + if (!np->full_name) + goto out_err; + + strcpy(np->full_name, path); + + np->properties = proplist; + OF_MARK_DYNAMIC(np); + kref_init(&np->kref); + + np->parent = derive_parent(path); + if (IS_ERR(np->parent)) { + err = PTR_ERR(np->parent); + goto out_err; + } + + err = notifier_call_chain(&pSeries_reconfig_chain, + PSERIES_RECONFIG_ADD, np); + if (err == NOTIFY_BAD) { + printk(KERN_ERR "Failed to add device node %s\n", path); + err = -ENOMEM; /* For now, safe to assume kmalloc failure */ + goto out_err; + } + + of_attach_node(np); + + add_node_proc_entries(np); + + of_node_put(np->parent); + + return 0; + +out_err: + if (np) { + of_node_put(np->parent); + kfree(np->full_name); + kfree(np); + } + return err; +} + +static int pSeries_reconfig_remove_node(struct device_node *np) +{ + struct device_node *parent, *child; + + parent = of_get_parent(np); + if (!parent) + return -EINVAL; + + if ((child = of_get_next_child(np, NULL))) { + of_node_put(child); + return -EBUSY; + } + + remove_node_proc_entries(np); + + notifier_call_chain(&pSeries_reconfig_chain, + PSERIES_RECONFIG_REMOVE, np); + of_detach_node(np); + + of_node_put(parent); + of_node_put(np); /* Must decrement the refcount */ + return 0; +} + +/* + * /proc/ppc64/ofdt - yucky binary interface for adding and removing + * OF device nodes. Should be deprecated as soon as we get an + * in-kernel wrapper for the RTAS ibm,configure-connector call. + */ + +static void release_prop_list(const struct property *prop) +{ + struct property *next; + for (; prop; prop = next) { + next = prop->next; + kfree(prop->name); + kfree(prop->value); + kfree(prop); + } + +} + +/** + * parse_next_property - process the next property from raw input buffer + * @buf: input buffer, must be nul-terminated + * @end: end of the input buffer + 1, for validation + * @name: return value; set to property name in buf + * @length: return value; set to length of value + * @value: return value; set to the property value in buf + * + * Note that the caller must make copies of the name and value returned, + * this function does no allocation or copying of the data. Return value + * is set to the next name in buf, or NULL on error. + */ +static char * parse_next_property(char *buf, char *end, char **name, int *length, + unsigned char **value) +{ + char *tmp; + + *name = buf; + + tmp = strchr(buf, ' '); + if (!tmp) { + printk(KERN_ERR "property parse failed in %s at line %d\n", + __FUNCTION__, __LINE__); + return NULL; + } + *tmp = '\0'; + + if (++tmp >= end) { + printk(KERN_ERR "property parse failed in %s at line %d\n", + __FUNCTION__, __LINE__); + return NULL; + } + + /* now we're on the length */ + *length = -1; + *length = simple_strtoul(tmp, &tmp, 10); + if (*length == -1) { + printk(KERN_ERR "property parse failed in %s at line %d\n", + __FUNCTION__, __LINE__); + return NULL; + } + if (*tmp != ' ' || ++tmp >= end) { + printk(KERN_ERR "property parse failed in %s at line %d\n", + __FUNCTION__, __LINE__); + return NULL; + } + + /* now we're on the value */ + *value = tmp; + tmp += *length; + if (tmp > end) { + printk(KERN_ERR "property parse failed in %s at line %d\n", + __FUNCTION__, __LINE__); + return NULL; + } + else if (tmp < end && *tmp != ' ' && *tmp != '\0') { + printk(KERN_ERR "property parse failed in %s at line %d\n", + __FUNCTION__, __LINE__); + return NULL; + } + tmp++; + + /* and now we should be on the next name, or the end */ + return tmp; +} + +static struct property *new_property(const char *name, const int length, + const unsigned char *value, struct property *last) +{ + struct property *new = kmalloc(sizeof(*new), GFP_KERNEL); + + if (!new) + return NULL; + memset(new, 0, sizeof(*new)); + + if (!(new->name = kmalloc(strlen(name) + 1, GFP_KERNEL))) + goto cleanup; + if (!(new->value = kmalloc(length + 1, GFP_KERNEL))) + goto cleanup; + + strcpy(new->name, name); + memcpy(new->value, value, length); + *(((char *)new->value) + length) = 0; + new->length = length; + new->next = last; + return new; + +cleanup: + if (new->name) + kfree(new->name); + if (new->value) + kfree(new->value); + kfree(new); + return NULL; +} + +static int do_add_node(char *buf, size_t bufsize) +{ + char *path, *end, *name; + struct device_node *np; + struct property *prop = NULL; + unsigned char* value; + int length, rv = 0; + + end = buf + bufsize; + path = buf; + buf = strchr(buf, ' '); + if (!buf) + return -EINVAL; + *buf = '\0'; + buf++; + + if ((np = of_find_node_by_path(path))) { + of_node_put(np); + return -EINVAL; + } + + /* rv = build_prop_list(tmp, bufsize - (tmp - buf), &proplist); */ + while (buf < end && + (buf = parse_next_property(buf, end, &name, &length, &value))) { + struct property *last = prop; + + prop = new_property(name, length, value, last); + if (!prop) { + rv = -ENOMEM; + prop = last; + goto out; + } + } + if (!buf) { + rv = -EINVAL; + goto out; + } + + rv = pSeries_reconfig_add_node(path, prop); + +out: + if (rv) + release_prop_list(prop); + return rv; +} + +static int do_remove_node(char *buf) +{ + struct device_node *node; + int rv = -ENODEV; + + if ((node = of_find_node_by_path(buf))) + rv = pSeries_reconfig_remove_node(node); + + of_node_put(node); + return rv; +} + +/** + * ofdt_write - perform operations on the Open Firmware device tree + * + * @file: not used + * @buf: command and arguments + * @count: size of the command buffer + * @off: not used + * + * Operations supported at this time are addition and removal of + * whole nodes along with their properties. Operations on individual + * properties are not implemented (yet). + */ +static ssize_t ofdt_write(struct file *file, const char __user *buf, size_t count, + loff_t *off) +{ + int rv = 0; + char *kbuf; + char *tmp; + + if (!(kbuf = kmalloc(count + 1, GFP_KERNEL))) { + rv = -ENOMEM; + goto out; + } + if (copy_from_user(kbuf, buf, count)) { + rv = -EFAULT; + goto out; + } + + kbuf[count] = '\0'; + + tmp = strchr(kbuf, ' '); + if (!tmp) { + rv = -EINVAL; + goto out; + } + *tmp = '\0'; + tmp++; + + if (!strcmp(kbuf, "add_node")) + rv = do_add_node(tmp, count - (tmp - kbuf)); + else if (!strcmp(kbuf, "remove_node")) + rv = do_remove_node(tmp); + else + rv = -EINVAL; +out: + kfree(kbuf); + return rv ? rv : count; +} + +static struct file_operations ofdt_fops = { + .write = ofdt_write +}; + +/* create /proc/ppc64/ofdt write-only by root */ +static int proc_ppc64_create_ofdt(void) +{ + struct proc_dir_entry *ent; + + if (!(systemcfg->platform & PLATFORM_PSERIES)) + return 0; + + ent = create_proc_entry("ppc64/ofdt", S_IWUSR, NULL); + if (ent) { + ent->nlink = 1; + ent->data = NULL; + ent->size = 0; + ent->proc_fops = &ofdt_fops; + } + + return 0; +} +__initcall(proc_ppc64_create_ofdt); diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c new file mode 100644 index 000000000000..eb25ee2eead8 --- /dev/null +++ b/arch/powerpc/platforms/pseries/setup.c @@ -0,0 +1,622 @@ +/* + * linux/arch/ppc/kernel/setup.c + * + * Copyright (C) 1995 Linus Torvalds + * Adapted from 'alpha' version by Gary Thomas + * Modified by Cort Dougan (cort@cs.nmt.edu) + * Modified by PPC64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +/* + * bootup setup stuff.. + */ + +#undef DEBUG + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef DEBUG +#define DBG(fmt...) udbg_printf(fmt) +#else +#define DBG(fmt...) +#endif + +extern void find_udbg_vterm(void); +extern void system_reset_fwnmi(void); /* from head.S */ +extern void machine_check_fwnmi(void); /* from head.S */ +extern void generic_find_legacy_serial_ports(u64 *physport, + unsigned int *default_speed); + +int fwnmi_active; /* TRUE if an FWNMI handler is present */ + +extern void pSeries_system_reset_exception(struct pt_regs *regs); +extern int pSeries_machine_check_exception(struct pt_regs *regs); + +static int pseries_shared_idle(void); +static int pseries_dedicated_idle(void); + +static volatile void __iomem * chrp_int_ack_special; +struct mpic *pSeries_mpic; + +void pSeries_get_cpuinfo(struct seq_file *m) +{ + struct device_node *root; + const char *model = ""; + + root = of_find_node_by_path("/"); + if (root) + model = get_property(root, "model", NULL); + seq_printf(m, "machine\t\t: CHRP %s\n", model); + of_node_put(root); +} + +/* Initialize firmware assisted non-maskable interrupts if + * the firmware supports this feature. + * + */ +static void __init fwnmi_init(void) +{ + int ret; + int ibm_nmi_register = rtas_token("ibm,nmi-register"); + if (ibm_nmi_register == RTAS_UNKNOWN_SERVICE) + return; + ret = rtas_call(ibm_nmi_register, 2, 1, NULL, + __pa((unsigned long)system_reset_fwnmi), + __pa((unsigned long)machine_check_fwnmi)); + if (ret == 0) + fwnmi_active = 1; +} + +static int pSeries_irq_cascade(struct pt_regs *regs, void *data) +{ + if (chrp_int_ack_special) + return readb(chrp_int_ack_special); + else + return i8259_irq(regs); +} + +static void __init pSeries_init_mpic(void) +{ + unsigned int *addrp; + struct device_node *np; + int i; + + /* All ISUs are setup, complete initialization */ + mpic_init(pSeries_mpic); + + /* Check what kind of cascade ACK we have */ + if (!(np = of_find_node_by_name(NULL, "pci")) + || !(addrp = (unsigned int *) + get_property(np, "8259-interrupt-acknowledge", NULL))) + printk(KERN_ERR "Cannot find pci to get ack address\n"); + else + chrp_int_ack_special = ioremap(addrp[prom_n_addr_cells(np)-1], 1); + of_node_put(np); + + /* Setup the legacy interrupts & controller */ + for (i = 0; i < NUM_ISA_INTERRUPTS; i++) + irq_desc[i].handler = &i8259_pic; + i8259_init(0); + + /* Hook cascade to mpic */ + mpic_setup_cascade(NUM_ISA_INTERRUPTS, pSeries_irq_cascade, NULL); +} + +static void __init pSeries_setup_mpic(void) +{ + unsigned int *opprop; + unsigned long openpic_addr = 0; + unsigned char senses[NR_IRQS - NUM_ISA_INTERRUPTS]; + struct device_node *root; + int irq_count; + + /* Find the Open PIC if present */ + root = of_find_node_by_path("/"); + opprop = (unsigned int *) get_property(root, "platform-open-pic", NULL); + if (opprop != 0) { + int n = prom_n_addr_cells(root); + + for (openpic_addr = 0; n > 0; --n) + openpic_addr = (openpic_addr << 32) + *opprop++; + printk(KERN_DEBUG "OpenPIC addr: %lx\n", openpic_addr); + } + of_node_put(root); + + BUG_ON(openpic_addr == 0); + + /* Get the sense values from OF */ + prom_get_irq_senses(senses, NUM_ISA_INTERRUPTS, NR_IRQS); + + /* Setup the openpic driver */ + irq_count = NR_IRQS - NUM_ISA_INTERRUPTS - 4; /* leave room for IPIs */ + pSeries_mpic = mpic_alloc(openpic_addr, MPIC_PRIMARY, + 16, 16, irq_count, /* isu size, irq offset, irq count */ + NR_IRQS - 4, /* ipi offset */ + senses, irq_count, /* sense & sense size */ + " MPIC "); +} + +static void pseries_lpar_enable_pmcs(void) +{ + unsigned long set, reset; + + power4_enable_pmcs(); + + set = 1UL << 63; + reset = 0; + plpar_hcall_norets(H_PERFMON, set, reset); + + /* instruct hypervisor to maintain PMCs */ + if (firmware_has_feature(FW_FEATURE_SPLPAR)) + get_paca()->lppaca.pmcregs_in_use = 1; +} + +static void __init pSeries_setup_arch(void) +{ + /* Fixup ppc_md depending on the type of interrupt controller */ + if (ppc64_interrupt_controller == IC_OPEN_PIC) { + ppc_md.init_IRQ = pSeries_init_mpic; + ppc_md.get_irq = mpic_get_irq; + ppc_md.cpu_irq_down = mpic_teardown_this_cpu; + /* Allocate the mpic now, so that find_and_init_phbs() can + * fill the ISUs */ + pSeries_setup_mpic(); + } else { + ppc_md.init_IRQ = xics_init_IRQ; + ppc_md.get_irq = xics_get_irq; + ppc_md.cpu_irq_down = xics_teardown_cpu; + } + +#ifdef CONFIG_SMP + smp_init_pSeries(); +#endif + /* openpic global configuration register (64-bit format). */ + /* openpic Interrupt Source Unit pointer (64-bit format). */ + /* python0 facility area (mmio) (64-bit format) REAL address. */ + + /* init to some ~sane value until calibrate_delay() runs */ + loops_per_jiffy = 50000000; + + if (ROOT_DEV == 0) { + printk("No ramdisk, default root is /dev/sda2\n"); + ROOT_DEV = Root_SDA2; + } + + fwnmi_init(); + + /* Find and initialize PCI host bridges */ + init_pci_config_tokens(); + find_and_init_phbs(); + eeh_init(); + +#ifdef CONFIG_DUMMY_CONSOLE + conswitchp = &dummy_con; +#endif + + pSeries_nvram_init(); + + /* Choose an idle loop */ + if (firmware_has_feature(FW_FEATURE_SPLPAR)) { + vpa_init(boot_cpuid); + if (get_paca()->lppaca.shared_proc) { + printk(KERN_INFO "Using shared processor idle loop\n"); + ppc_md.idle_loop = pseries_shared_idle; + } else { + printk(KERN_INFO "Using dedicated idle loop\n"); + ppc_md.idle_loop = pseries_dedicated_idle; + } + } else { + printk(KERN_INFO "Using default idle loop\n"); + ppc_md.idle_loop = default_idle; + } + + if (systemcfg->platform & PLATFORM_LPAR) + ppc_md.enable_pmcs = pseries_lpar_enable_pmcs; + else + ppc_md.enable_pmcs = power4_enable_pmcs; +} + +static int __init pSeries_init_panel(void) +{ + /* Manually leave the kernel version on the panel. */ + ppc_md.progress("Linux ppc64\n", 0); + ppc_md.progress(system_utsname.version, 0); + + return 0; +} +arch_initcall(pSeries_init_panel); + + +/* Build up the ppc64_firmware_features bitmask field + * using contents of device-tree/ibm,hypertas-functions. + * Ultimately this functionality may be moved into prom.c prom_init(). + */ +static void __init fw_feature_init(void) +{ + struct device_node * dn; + char * hypertas; + unsigned int len; + + DBG(" -> fw_feature_init()\n"); + + ppc64_firmware_features = 0; + dn = of_find_node_by_path("/rtas"); + if (dn == NULL) { + printk(KERN_ERR "WARNING ! Cannot find RTAS in device-tree !\n"); + goto no_rtas; + } + + hypertas = get_property(dn, "ibm,hypertas-functions", &len); + if (hypertas) { + while (len > 0){ + int i, hypertas_len; + /* check value against table of strings */ + for(i=0; i < FIRMWARE_MAX_FEATURES ;i++) { + if ((firmware_features_table[i].name) && + (strcmp(firmware_features_table[i].name,hypertas))==0) { + /* we have a match */ + ppc64_firmware_features |= + (firmware_features_table[i].val); + break; + } + } + hypertas_len = strlen(hypertas); + len -= hypertas_len +1; + hypertas+= hypertas_len +1; + } + } + + of_node_put(dn); + no_rtas: + printk(KERN_INFO "firmware_features = 0x%lx\n", + ppc64_firmware_features); + + DBG(" <- fw_feature_init()\n"); +} + + +static void __init pSeries_discover_pic(void) +{ + struct device_node *np; + char *typep; + + /* + * Setup interrupt mapping options that are needed for finish_device_tree + * to properly parse the OF interrupt tree & do the virtual irq mapping + */ + __irq_offset_value = NUM_ISA_INTERRUPTS; + ppc64_interrupt_controller = IC_INVALID; + for (np = NULL; (np = of_find_node_by_name(np, "interrupt-controller"));) { + typep = (char *)get_property(np, "compatible", NULL); + if (strstr(typep, "open-pic")) + ppc64_interrupt_controller = IC_OPEN_PIC; + else if (strstr(typep, "ppc-xicp")) + ppc64_interrupt_controller = IC_PPC_XIC; + else + printk("pSeries_discover_pic: failed to recognize" + " interrupt-controller\n"); + break; + } +} + +static void pSeries_mach_cpu_die(void) +{ + local_irq_disable(); + idle_task_exit(); + /* Some hardware requires clearing the CPPR, while other hardware does not + * it is safe either way + */ + pSeriesLP_cppr_info(0, 0); + rtas_stop_self(); + /* Should never get here... */ + BUG(); + for(;;); +} + + +/* + * Early initialization. Relocation is on but do not reference unbolted pages + */ +static void __init pSeries_init_early(void) +{ + void *comport; + int iommu_off = 0; + unsigned int default_speed; + u64 physport; + + DBG(" -> pSeries_init_early()\n"); + + fw_feature_init(); + + if (systemcfg->platform & PLATFORM_LPAR) + hpte_init_lpar(); + else { + hpte_init_native(); + iommu_off = (of_chosen && + get_property(of_chosen, "linux,iommu-off", NULL)); + } + + generic_find_legacy_serial_ports(&physport, &default_speed); + + if (systemcfg->platform & PLATFORM_LPAR) + find_udbg_vterm(); + else if (physport) { + /* Map the uart for udbg. */ + comport = (void *)ioremap(physport, 16); + udbg_init_uart(comport, default_speed); + + DBG("Hello World !\n"); + } + + + iommu_init_early_pSeries(); + + pSeries_discover_pic(); + + DBG(" <- pSeries_init_early()\n"); +} + + +static int pSeries_check_legacy_ioport(unsigned int baseport) +{ + struct device_node *np; + +#define I8042_DATA_REG 0x60 +#define FDC_BASE 0x3f0 + + + switch(baseport) { + case I8042_DATA_REG: + np = of_find_node_by_type(NULL, "8042"); + if (np == NULL) + return -ENODEV; + of_node_put(np); + break; + case FDC_BASE: + np = of_find_node_by_type(NULL, "fdc"); + if (np == NULL) + return -ENODEV; + of_node_put(np); + break; + } + return 0; +} + +/* + * Called very early, MMU is off, device-tree isn't unflattened + */ +extern struct machdep_calls pSeries_md; + +static int __init pSeries_probe(int platform) +{ + if (platform != PLATFORM_PSERIES && + platform != PLATFORM_PSERIES_LPAR) + return 0; + + /* if we have some ppc_md fixups for LPAR to do, do + * it here ... + */ + + return 1; +} + +DECLARE_PER_CPU(unsigned long, smt_snooze_delay); + +static inline void dedicated_idle_sleep(unsigned int cpu) +{ + struct paca_struct *ppaca = &paca[cpu ^ 1]; + + /* Only sleep if the other thread is not idle */ + if (!(ppaca->lppaca.idle)) { + local_irq_disable(); + + /* + * We are about to sleep the thread and so wont be polling any + * more. + */ + clear_thread_flag(TIF_POLLING_NRFLAG); + + /* + * SMT dynamic mode. Cede will result in this thread going + * dormant, if the partner thread is still doing work. Thread + * wakes up if partner goes idle, an interrupt is presented, or + * a prod occurs. Returning from the cede enables external + * interrupts. + */ + if (!need_resched()) + cede_processor(); + else + local_irq_enable(); + } else { + /* + * Give the HV an opportunity at the processor, since we are + * not doing any work. + */ + poll_pending(); + } +} + +static int pseries_dedicated_idle(void) +{ + long oldval; + struct paca_struct *lpaca = get_paca(); + unsigned int cpu = smp_processor_id(); + unsigned long start_snooze; + unsigned long *smt_snooze_delay = &__get_cpu_var(smt_snooze_delay); + + while (1) { + /* + * Indicate to the HV that we are idle. Now would be + * a good time to find other work to dispatch. + */ + lpaca->lppaca.idle = 1; + + oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED); + if (!oldval) { + set_thread_flag(TIF_POLLING_NRFLAG); + + start_snooze = __get_tb() + + *smt_snooze_delay * tb_ticks_per_usec; + + while (!need_resched() && !cpu_is_offline(cpu)) { + ppc64_runlatch_off(); + + /* + * Go into low thread priority and possibly + * low power mode. + */ + HMT_low(); + HMT_very_low(); + + if (*smt_snooze_delay != 0 && + __get_tb() > start_snooze) { + HMT_medium(); + dedicated_idle_sleep(cpu); + } + + } + + HMT_medium(); + clear_thread_flag(TIF_POLLING_NRFLAG); + } else { + set_need_resched(); + } + + lpaca->lppaca.idle = 0; + ppc64_runlatch_on(); + + schedule(); + + if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING) + cpu_die(); + } +} + +static int pseries_shared_idle(void) +{ + struct paca_struct *lpaca = get_paca(); + unsigned int cpu = smp_processor_id(); + + while (1) { + /* + * Indicate to the HV that we are idle. Now would be + * a good time to find other work to dispatch. + */ + lpaca->lppaca.idle = 1; + + while (!need_resched() && !cpu_is_offline(cpu)) { + local_irq_disable(); + ppc64_runlatch_off(); + + /* + * Yield the processor to the hypervisor. We return if + * an external interrupt occurs (which are driven prior + * to returning here) or if a prod occurs from another + * processor. When returning here, external interrupts + * are enabled. + * + * Check need_resched() again with interrupts disabled + * to avoid a race. + */ + if (!need_resched()) + cede_processor(); + else + local_irq_enable(); + + HMT_medium(); + } + + lpaca->lppaca.idle = 0; + ppc64_runlatch_on(); + + schedule(); + + if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING) + cpu_die(); + } + + return 0; +} + +static int pSeries_pci_probe_mode(struct pci_bus *bus) +{ + if (systemcfg->platform & PLATFORM_LPAR) + return PCI_PROBE_DEVTREE; + return PCI_PROBE_NORMAL; +} + +struct machdep_calls __initdata pSeries_md = { + .probe = pSeries_probe, + .setup_arch = pSeries_setup_arch, + .init_early = pSeries_init_early, + .get_cpuinfo = pSeries_get_cpuinfo, + .log_error = pSeries_log_error, + .pcibios_fixup = pSeries_final_fixup, + .pci_probe_mode = pSeries_pci_probe_mode, + .irq_bus_setup = pSeries_irq_bus_setup, + .restart = rtas_restart, + .power_off = rtas_power_off, + .halt = rtas_halt, + .panic = rtas_os_term, + .cpu_die = pSeries_mach_cpu_die, + .get_boot_time = rtas_get_boot_time, + .get_rtc_time = rtas_get_rtc_time, + .set_rtc_time = rtas_set_rtc_time, + .calibrate_decr = generic_calibrate_decr, + .progress = rtas_progress, + .check_legacy_ioport = pSeries_check_legacy_ioport, + .system_reset_exception = pSeries_system_reset_exception, + .machine_check_exception = pSeries_machine_check_exception, +}; diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c new file mode 100644 index 000000000000..ae1bd270f308 --- /dev/null +++ b/arch/powerpc/platforms/pseries/smp.c @@ -0,0 +1,471 @@ +/* + * SMP support for pSeries machines. + * + * Dave Engebretsen, Peter Bergner, and + * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com + * + * Plus various changes from other IBM teams... + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#undef DEBUG + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef DEBUG +#define DBG(fmt...) udbg_printf(fmt) +#else +#define DBG(fmt...) +#endif + +/* + * The primary thread of each non-boot processor is recorded here before + * smp init. + */ +static cpumask_t of_spin_map; + +extern void pSeries_secondary_smp_init(unsigned long); + +#ifdef CONFIG_HOTPLUG_CPU + +/* Get state of physical CPU. + * Return codes: + * 0 - The processor is in the RTAS stopped state + * 1 - stop-self is in progress + * 2 - The processor is not in the RTAS stopped state + * -1 - Hardware Error + * -2 - Hardware Busy, Try again later. + */ +static int query_cpu_stopped(unsigned int pcpu) +{ + int cpu_status; + int status, qcss_tok; + + qcss_tok = rtas_token("query-cpu-stopped-state"); + if (qcss_tok == RTAS_UNKNOWN_SERVICE) + return -1; + status = rtas_call(qcss_tok, 1, 2, &cpu_status, pcpu); + if (status != 0) { + printk(KERN_ERR + "RTAS query-cpu-stopped-state failed: %i\n", status); + return status; + } + + return cpu_status; +} + +int pSeries_cpu_disable(void) +{ + int cpu = smp_processor_id(); + + cpu_clear(cpu, cpu_online_map); + systemcfg->processorCount--; + + /*fix boot_cpuid here*/ + if (cpu == boot_cpuid) + boot_cpuid = any_online_cpu(cpu_online_map); + + /* FIXME: abstract this to not be platform specific later on */ + xics_migrate_irqs_away(); + return 0; +} + +void pSeries_cpu_die(unsigned int cpu) +{ + int tries; + int cpu_status; + unsigned int pcpu = get_hard_smp_processor_id(cpu); + + for (tries = 0; tries < 25; tries++) { + cpu_status = query_cpu_stopped(pcpu); + if (cpu_status == 0 || cpu_status == -1) + break; + msleep(200); + } + if (cpu_status != 0) { + printk("Querying DEAD? cpu %i (%i) shows %i\n", + cpu, pcpu, cpu_status); + } + + /* Isolation and deallocation are definatly done by + * drslot_chrp_cpu. If they were not they would be + * done here. Change isolate state to Isolate and + * change allocation-state to Unusable. + */ + paca[cpu].cpu_start = 0; +} + +/* + * Update cpu_present_map and paca(s) for a new cpu node. The wrinkle + * here is that a cpu device node may represent up to two logical cpus + * in the SMT case. We must honor the assumption in other code that + * the logical ids for sibling SMT threads x and y are adjacent, such + * that x^1 == y and y^1 == x. + */ +static int pSeries_add_processor(struct device_node *np) +{ + unsigned int cpu; + cpumask_t candidate_map, tmp = CPU_MASK_NONE; + int err = -ENOSPC, len, nthreads, i; + u32 *intserv; + + intserv = (u32 *)get_property(np, "ibm,ppc-interrupt-server#s", &len); + if (!intserv) + return 0; + + nthreads = len / sizeof(u32); + for (i = 0; i < nthreads; i++) + cpu_set(i, tmp); + + lock_cpu_hotplug(); + + BUG_ON(!cpus_subset(cpu_present_map, cpu_possible_map)); + + /* Get a bitmap of unoccupied slots. */ + cpus_xor(candidate_map, cpu_possible_map, cpu_present_map); + if (cpus_empty(candidate_map)) { + /* If we get here, it most likely means that NR_CPUS is + * less than the partition's max processors setting. + */ + printk(KERN_ERR "Cannot add cpu %s; this system configuration" + " supports %d logical cpus.\n", np->full_name, + cpus_weight(cpu_possible_map)); + goto out_unlock; + } + + while (!cpus_empty(tmp)) + if (cpus_subset(tmp, candidate_map)) + /* Found a range where we can insert the new cpu(s) */ + break; + else + cpus_shift_left(tmp, tmp, nthreads); + + if (cpus_empty(tmp)) { + printk(KERN_ERR "Unable to find space in cpu_present_map for" + " processor %s with %d thread(s)\n", np->name, + nthreads); + goto out_unlock; + } + + for_each_cpu_mask(cpu, tmp) { + BUG_ON(cpu_isset(cpu, cpu_present_map)); + cpu_set(cpu, cpu_present_map); + set_hard_smp_processor_id(cpu, *intserv++); + } + err = 0; +out_unlock: + unlock_cpu_hotplug(); + return err; +} + +/* + * Update the present map for a cpu node which is going away, and set + * the hard id in the paca(s) to -1 to be consistent with boot time + * convention for non-present cpus. + */ +static void pSeries_remove_processor(struct device_node *np) +{ + unsigned int cpu; + int len, nthreads, i; + u32 *intserv; + + intserv = (u32 *)get_property(np, "ibm,ppc-interrupt-server#s", &len); + if (!intserv) + return; + + nthreads = len / sizeof(u32); + + lock_cpu_hotplug(); + for (i = 0; i < nthreads; i++) { + for_each_present_cpu(cpu) { + if (get_hard_smp_processor_id(cpu) != intserv[i]) + continue; + BUG_ON(cpu_online(cpu)); + cpu_clear(cpu, cpu_present_map); + set_hard_smp_processor_id(cpu, -1); + break; + } + if (cpu == NR_CPUS) + printk(KERN_WARNING "Could not find cpu to remove " + "with physical id 0x%x\n", intserv[i]); + } + unlock_cpu_hotplug(); +} + +static int pSeries_smp_notifier(struct notifier_block *nb, unsigned long action, void *node) +{ + int err = NOTIFY_OK; + + switch (action) { + case PSERIES_RECONFIG_ADD: + if (pSeries_add_processor(node)) + err = NOTIFY_BAD; + break; + case PSERIES_RECONFIG_REMOVE: + pSeries_remove_processor(node); + break; + default: + err = NOTIFY_DONE; + break; + } + return err; +} + +static struct notifier_block pSeries_smp_nb = { + .notifier_call = pSeries_smp_notifier, +}; + +#endif /* CONFIG_HOTPLUG_CPU */ + +/** + * smp_startup_cpu() - start the given cpu + * + * At boot time, there is nothing to do for primary threads which were + * started from Open Firmware. For anything else, call RTAS with the + * appropriate start location. + * + * Returns: + * 0 - failure + * 1 - success + */ +static inline int __devinit smp_startup_cpu(unsigned int lcpu) +{ + int status; + unsigned long start_here = __pa((u32)*((unsigned long *) + pSeries_secondary_smp_init)); + unsigned int pcpu; + int start_cpu; + + if (cpu_isset(lcpu, of_spin_map)) + /* Already started by OF and sitting in spin loop */ + return 1; + + pcpu = get_hard_smp_processor_id(lcpu); + + /* Fixup atomic count: it exited inside IRQ handler. */ + paca[lcpu].__current->thread_info->preempt_count = 0; + + /* + * If the RTAS start-cpu token does not exist then presume the + * cpu is already spinning. + */ + start_cpu = rtas_token("start-cpu"); + if (start_cpu == RTAS_UNKNOWN_SERVICE) + return 1; + + status = rtas_call(start_cpu, 3, 1, NULL, pcpu, start_here, lcpu); + if (status != 0) { + printk(KERN_ERR "start-cpu failed: %i\n", status); + return 0; + } + + return 1; +} + +#ifdef CONFIG_XICS +static inline void smp_xics_do_message(int cpu, int msg) +{ + set_bit(msg, &xics_ipi_message[cpu].value); + mb(); + xics_cause_IPI(cpu); +} + +static void smp_xics_message_pass(int target, int msg) +{ + unsigned int i; + + if (target < NR_CPUS) { + smp_xics_do_message(target, msg); + } else { + for_each_online_cpu(i) { + if (target == MSG_ALL_BUT_SELF + && i == smp_processor_id()) + continue; + smp_xics_do_message(i, msg); + } + } +} + +static int __init smp_xics_probe(void) +{ + xics_request_IPIs(); + + return cpus_weight(cpu_possible_map); +} + +static void __devinit smp_xics_setup_cpu(int cpu) +{ + if (cpu != boot_cpuid) + xics_setup_cpu(); + + if (firmware_has_feature(FW_FEATURE_SPLPAR)) + vpa_init(cpu); + + cpu_clear(cpu, of_spin_map); + +} +#endif /* CONFIG_XICS */ + +static DEFINE_SPINLOCK(timebase_lock); +static unsigned long timebase = 0; + +static void __devinit pSeries_give_timebase(void) +{ + spin_lock(&timebase_lock); + rtas_call(rtas_token("freeze-time-base"), 0, 1, NULL); + timebase = get_tb(); + spin_unlock(&timebase_lock); + + while (timebase) + barrier(); + rtas_call(rtas_token("thaw-time-base"), 0, 1, NULL); +} + +static void __devinit pSeries_take_timebase(void) +{ + while (!timebase) + barrier(); + spin_lock(&timebase_lock); + set_tb(timebase >> 32, timebase & 0xffffffff); + timebase = 0; + spin_unlock(&timebase_lock); +} + +static void __devinit smp_pSeries_kick_cpu(int nr) +{ + BUG_ON(nr < 0 || nr >= NR_CPUS); + + if (!smp_startup_cpu(nr)) + return; + + /* + * The processor is currently spinning, waiting for the + * cpu_start field to become non-zero After we set cpu_start, + * the processor will continue on to secondary_start + */ + paca[nr].cpu_start = 1; +} + +static int smp_pSeries_cpu_bootable(unsigned int nr) +{ + /* Special case - we inhibit secondary thread startup + * during boot if the user requests it. Odd-numbered + * cpus are assumed to be secondary threads. + */ + if (system_state < SYSTEM_RUNNING && + cpu_has_feature(CPU_FTR_SMT) && + !smt_enabled_at_boot && nr % 2 != 0) + return 0; + + return 1; +} +#ifdef CONFIG_MPIC +static struct smp_ops_t pSeries_mpic_smp_ops = { + .message_pass = smp_mpic_message_pass, + .probe = smp_mpic_probe, + .kick_cpu = smp_pSeries_kick_cpu, + .setup_cpu = smp_mpic_setup_cpu, +}; +#endif +#ifdef CONFIG_XICS +static struct smp_ops_t pSeries_xics_smp_ops = { + .message_pass = smp_xics_message_pass, + .probe = smp_xics_probe, + .kick_cpu = smp_pSeries_kick_cpu, + .setup_cpu = smp_xics_setup_cpu, + .cpu_bootable = smp_pSeries_cpu_bootable, +}; +#endif + +/* This is called very early */ +void __init smp_init_pSeries(void) +{ + int i; + + DBG(" -> smp_init_pSeries()\n"); + + switch (ppc64_interrupt_controller) { +#ifdef CONFIG_MPIC + case IC_OPEN_PIC: + smp_ops = &pSeries_mpic_smp_ops; + break; +#endif +#ifdef CONFIG_XICS + case IC_PPC_XIC: + smp_ops = &pSeries_xics_smp_ops; + break; +#endif + default: + panic("Invalid interrupt controller"); + } + +#ifdef CONFIG_HOTPLUG_CPU + smp_ops->cpu_disable = pSeries_cpu_disable; + smp_ops->cpu_die = pSeries_cpu_die; + + /* Processors can be added/removed only on LPAR */ + if (systemcfg->platform == PLATFORM_PSERIES_LPAR) + pSeries_reconfig_notifier_register(&pSeries_smp_nb); +#endif + + /* Mark threads which are still spinning in hold loops. */ + if (cpu_has_feature(CPU_FTR_SMT)) { + for_each_present_cpu(i) { + if (i % 2 == 0) + /* + * Even-numbered logical cpus correspond to + * primary threads. + */ + cpu_set(i, of_spin_map); + } + } else { + of_spin_map = cpu_present_map; + } + + cpu_clear(boot_cpuid, of_spin_map); + + /* Non-lpar has additional take/give timebase */ + if (rtas_token("freeze-time-base") != RTAS_UNKNOWN_SERVICE) { + smp_ops->give_timebase = pSeries_give_timebase; + smp_ops->take_timebase = pSeries_take_timebase; + } + + DBG(" <- smp_init_pSeries()\n"); +} + diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c new file mode 100644 index 000000000000..866379b80c09 --- /dev/null +++ b/arch/powerpc/platforms/pseries/vio.c @@ -0,0 +1,274 @@ +/* + * IBM PowerPC pSeries Virtual I/O Infrastructure Support. + * + * Copyright (c) 2003-2005 IBM Corp. + * Dave Engebretsen engebret@us.ibm.com + * Santiago Leon santil@us.ibm.com + * Hollis Blanchard + * Stephen Rothwell + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern struct subsystem devices_subsys; /* needed for vio_find_name() */ + +static void probe_bus_pseries(void) +{ + struct device_node *node_vroot, *of_node; + + node_vroot = find_devices("vdevice"); + if ((node_vroot == NULL) || (node_vroot->child == NULL)) + /* this machine doesn't do virtual IO, and that's ok */ + return; + + /* + * Create struct vio_devices for each virtual device in the device tree. + * Drivers will associate with them later. + */ + for (of_node = node_vroot->child; of_node != NULL; + of_node = of_node->sibling) { + printk(KERN_DEBUG "%s: processing %p\n", __FUNCTION__, of_node); + vio_register_device_node(of_node); + } +} + +/** + * vio_match_device_pseries: - Tell if a pSeries VIO device matches a + * vio_device_id + */ +static int vio_match_device_pseries(const struct vio_device_id *id, + const struct vio_dev *dev) +{ + return (strncmp(dev->type, id->type, strlen(id->type)) == 0) && + device_is_compatible(dev->dev.platform_data, id->compat); +} + +static void vio_release_device_pseries(struct device *dev) +{ + /* XXX free TCE table */ + of_node_put(dev->platform_data); +} + +static ssize_t viodev_show_devspec(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct device_node *of_node = dev->platform_data; + + return sprintf(buf, "%s\n", of_node->full_name); +} +DEVICE_ATTR(devspec, S_IRUSR | S_IRGRP | S_IROTH, viodev_show_devspec, NULL); + +static void vio_unregister_device_pseries(struct vio_dev *viodev) +{ + device_remove_file(&viodev->dev, &dev_attr_devspec); +} + +static struct vio_bus_ops vio_bus_ops_pseries = { + .match = vio_match_device_pseries, + .unregister_device = vio_unregister_device_pseries, + .release_device = vio_release_device_pseries, +}; + +/** + * vio_bus_init_pseries: - Initialize the pSeries virtual IO bus + */ +static int __init vio_bus_init_pseries(void) +{ + int err; + + err = vio_bus_init(&vio_bus_ops_pseries); + if (err == 0) + probe_bus_pseries(); + return err; +} + +__initcall(vio_bus_init_pseries); + +/** + * vio_build_iommu_table: - gets the dma information from OF and + * builds the TCE tree. + * @dev: the virtual device. + * + * Returns a pointer to the built tce tree, or NULL if it can't + * find property. +*/ +static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev) +{ + unsigned int *dma_window; + struct iommu_table *newTceTable; + unsigned long offset; + int dma_window_property_size; + + dma_window = (unsigned int *) get_property(dev->dev.platform_data, "ibm,my-dma-window", &dma_window_property_size); + if(!dma_window) { + return NULL; + } + + newTceTable = (struct iommu_table *) kmalloc(sizeof(struct iommu_table), GFP_KERNEL); + + /* There should be some code to extract the phys-encoded offset + using prom_n_addr_cells(). However, according to a comment + on earlier versions, it's always zero, so we don't bother */ + offset = dma_window[1] >> PAGE_SHIFT; + + /* TCE table size - measured in tce entries */ + newTceTable->it_size = dma_window[4] >> PAGE_SHIFT; + /* offset for VIO should always be 0 */ + newTceTable->it_offset = offset; + newTceTable->it_busno = 0; + newTceTable->it_index = (unsigned long)dma_window[0]; + newTceTable->it_type = TCE_VB; + + return iommu_init_table(newTceTable); +} + +/** + * vio_register_device_node: - Register a new vio device. + * @of_node: The OF node for this device. + * + * Creates and initializes a vio_dev structure from the data in + * of_node (dev.platform_data) and adds it to the list of virtual devices. + * Returns a pointer to the created vio_dev or NULL if node has + * NULL device_type or compatible fields. + */ +struct vio_dev * __devinit vio_register_device_node(struct device_node *of_node) +{ + struct vio_dev *viodev; + unsigned int *unit_address; + unsigned int *irq_p; + + /* we need the 'device_type' property, in order to match with drivers */ + if ((NULL == of_node->type)) { + printk(KERN_WARNING + "%s: node %s missing 'device_type'\n", __FUNCTION__, + of_node->name ? of_node->name : ""); + return NULL; + } + + unit_address = (unsigned int *)get_property(of_node, "reg", NULL); + if (!unit_address) { + printk(KERN_WARNING "%s: node %s missing 'reg'\n", __FUNCTION__, + of_node->name ? of_node->name : ""); + return NULL; + } + + /* allocate a vio_dev for this node */ + viodev = kmalloc(sizeof(struct vio_dev), GFP_KERNEL); + if (!viodev) { + return NULL; + } + memset(viodev, 0, sizeof(struct vio_dev)); + + viodev->dev.platform_data = of_node_get(of_node); + + viodev->irq = NO_IRQ; + irq_p = (unsigned int *)get_property(of_node, "interrupts", NULL); + if (irq_p) { + int virq = virt_irq_create_mapping(*irq_p); + if (virq == NO_IRQ) { + printk(KERN_ERR "Unable to allocate interrupt " + "number for %s\n", of_node->full_name); + } else + viodev->irq = irq_offset_up(virq); + } + + snprintf(viodev->dev.bus_id, BUS_ID_SIZE, "%x", *unit_address); + viodev->name = of_node->name; + viodev->type = of_node->type; + viodev->unit_address = *unit_address; + viodev->iommu_table = vio_build_iommu_table(viodev); + + /* register with generic device framework */ + if (vio_register_device(viodev) == NULL) { + /* XXX free TCE table */ + kfree(viodev); + return NULL; + } + device_create_file(&viodev->dev, &dev_attr_devspec); + + return viodev; +} +EXPORT_SYMBOL(vio_register_device_node); + +/** + * vio_get_attribute: - get attribute for virtual device + * @vdev: The vio device to get property. + * @which: The property/attribute to be extracted. + * @length: Pointer to length of returned data size (unused if NULL). + * + * Calls prom.c's get_property() to return the value of the + * attribute specified by the preprocessor constant @which +*/ +const void * vio_get_attribute(struct vio_dev *vdev, void* which, int* length) +{ + return get_property(vdev->dev.platform_data, (char*)which, length); +} +EXPORT_SYMBOL(vio_get_attribute); + +/* vio_find_name() - internal because only vio.c knows how we formatted the + * kobject name + * XXX once vio_bus_type.devices is actually used as a kset in + * drivers/base/bus.c, this function should be removed in favor of + * "device_find(kobj_name, &vio_bus_type)" + */ +static struct vio_dev *vio_find_name(const char *kobj_name) +{ + struct kobject *found; + + found = kset_find_obj(&devices_subsys.kset, kobj_name); + if (!found) + return NULL; + + return to_vio_dev(container_of(found, struct device, kobj)); +} + +/** + * vio_find_node - find an already-registered vio_dev + * @vnode: device_node of the virtual device we're looking for + */ +struct vio_dev *vio_find_node(struct device_node *vnode) +{ + uint32_t *unit_address; + char kobj_name[BUS_ID_SIZE]; + + /* construct the kobject name from the device node */ + unit_address = (uint32_t *)get_property(vnode, "reg", NULL); + if (!unit_address) + return NULL; + snprintf(kobj_name, BUS_ID_SIZE, "%x", *unit_address); + + return vio_find_name(kobj_name); +} +EXPORT_SYMBOL(vio_find_node); + +int vio_enable_interrupts(struct vio_dev *dev) +{ + int rc = h_vio_signal(dev->unit_address, VIO_IRQ_ENABLE); + if (rc != H_Success) + printk(KERN_ERR "vio: Error 0x%x enabling interrupts\n", rc); + return rc; +} +EXPORT_SYMBOL(vio_enable_interrupts); + +int vio_disable_interrupts(struct vio_dev *dev) +{ + int rc = h_vio_signal(dev->unit_address, VIO_IRQ_DISABLE); + if (rc != H_Success) + printk(KERN_ERR "vio: Error 0x%x disabling interrupts\n", rc); + return rc; +} +EXPORT_SYMBOL(vio_disable_interrupts); diff --git a/arch/ppc64/kernel/Makefile b/arch/ppc64/kernel/Makefile index 32fd3f1c7935..6a0fea272386 100644 --- a/arch/ppc64/kernel/Makefile +++ b/arch/ppc64/kernel/Makefile @@ -24,9 +24,7 @@ obj-$(CONFIG_PCI) += pci.o pci_iommu.o iomap.o $(pci-obj-y) obj-$(CONFIG_PPC_MULTIPLATFORM) += nvram.o i8259.o prom_init.o -obj-$(CONFIG_PPC_PSERIES) += pSeries_pci.o pSeries_lpar.o pSeries_hvCall.o \ - pSeries_nvram.o rtasd.o ras.o pSeries_reconfig.o \ - pSeries_setup.o pSeries_iommu.o udbg_16550.o +obj-$(CONFIG_PPC_PSERIES) += rtasd.o ras.o udbg_16550.o obj-$(CONFIG_PPC_BPA) += bpa_setup.o bpa_iommu.o bpa_nvram.o \ bpa_iic.o spider-pic.o diff --git a/arch/ppc64/kernel/pSeries_hvCall.S b/arch/ppc64/kernel/pSeries_hvCall.S deleted file mode 100644 index 176e8da76466..000000000000 --- a/arch/ppc64/kernel/pSeries_hvCall.S +++ /dev/null @@ -1,131 +0,0 @@ -/* - * arch/ppc64/kernel/pSeries_hvCall.S - * - * This file contains the generic code to perform a call to the - * pSeries LPAR hypervisor. - * NOTE: this file will go away when we move to inline this work. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ -#include -#include -#include - -#define STK_PARM(i) (48 + ((i)-3)*8) - - .text - -/* long plpar_hcall(unsigned long opcode, R3 - unsigned long arg1, R4 - unsigned long arg2, R5 - unsigned long arg3, R6 - unsigned long arg4, R7 - unsigned long *out1, R8 - unsigned long *out2, R9 - unsigned long *out3); R10 - */ -_GLOBAL(plpar_hcall) - HMT_MEDIUM - - mfcr r0 - - std r8,STK_PARM(r8)(r1) /* Save out ptrs */ - std r9,STK_PARM(r9)(r1) - std r10,STK_PARM(r10)(r1) - - stw r0,8(r1) - - HVSC /* invoke the hypervisor */ - - lwz r0,8(r1) - - ld r8,STK_PARM(r8)(r1) /* Fetch r4-r6 ret args */ - ld r9,STK_PARM(r9)(r1) - ld r10,STK_PARM(r10)(r1) - std r4,0(r8) - std r5,0(r9) - std r6,0(r10) - - mtcrf 0xff,r0 - blr /* return r3 = status */ - - -/* Simple interface with no output values (other than status) */ -_GLOBAL(plpar_hcall_norets) - HMT_MEDIUM - - mfcr r0 - stw r0,8(r1) - - HVSC /* invoke the hypervisor */ - - lwz r0,8(r1) - mtcrf 0xff,r0 - blr /* return r3 = status */ - - -/* long plpar_hcall_8arg_2ret(unsigned long opcode, R3 - unsigned long arg1, R4 - unsigned long arg2, R5 - unsigned long arg3, R6 - unsigned long arg4, R7 - unsigned long arg5, R8 - unsigned long arg6, R9 - unsigned long arg7, R10 - unsigned long arg8, 112(R1) - unsigned long *out1); 120(R1) - */ -_GLOBAL(plpar_hcall_8arg_2ret) - HMT_MEDIUM - - mfcr r0 - ld r11,STK_PARM(r11)(r1) /* put arg8 in R11 */ - stw r0,8(r1) - - HVSC /* invoke the hypervisor */ - - lwz r0,8(r1) - ld r10,STK_PARM(r12)(r1) /* Fetch r4 ret arg */ - std r4,0(r10) - mtcrf 0xff,r0 - blr /* return r3 = status */ - - -/* long plpar_hcall_4out(unsigned long opcode, R3 - unsigned long arg1, R4 - unsigned long arg2, R5 - unsigned long arg3, R6 - unsigned long arg4, R7 - unsigned long *out1, R8 - unsigned long *out2, R9 - unsigned long *out3, R10 - unsigned long *out4); 112(R1) - */ -_GLOBAL(plpar_hcall_4out) - HMT_MEDIUM - - mfcr r0 - stw r0,8(r1) - - std r8,STK_PARM(r8)(r1) /* Save out ptrs */ - std r9,STK_PARM(r9)(r1) - std r10,STK_PARM(r10)(r1) - - HVSC /* invoke the hypervisor */ - - lwz r0,8(r1) - - ld r8,STK_PARM(r8)(r1) /* Fetch r4-r7 ret args */ - ld r9,STK_PARM(r9)(r1) - ld r10,STK_PARM(r10)(r1) - ld r11,STK_PARM(r11)(r1) - std r4,0(r8) - std r5,0(r9) - std r6,0(r10) - std r7,0(r11) - - mtcrf 0xff,r0 - blr /* return r3 = status */ diff --git a/arch/ppc64/kernel/pSeries_iommu.c b/arch/ppc64/kernel/pSeries_iommu.c deleted file mode 100644 index 9e90d41131d8..000000000000 --- a/arch/ppc64/kernel/pSeries_iommu.c +++ /dev/null @@ -1,606 +0,0 @@ -/* - * arch/ppc64/kernel/pSeries_iommu.c - * - * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation - * - * Rewrite, cleanup: - * - * Copyright (C) 2004 Olof Johansson , IBM Corporation - * - * Dynamic DMA mapping support, pSeries-specific parts, both SMP and LPAR. - * - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define DBG(fmt...) - -extern int is_python(struct device_node *); - -static void tce_build_pSeries(struct iommu_table *tbl, long index, - long npages, unsigned long uaddr, - enum dma_data_direction direction) -{ - union tce_entry t; - union tce_entry *tp; - - index <<= TCE_PAGE_FACTOR; - npages <<= TCE_PAGE_FACTOR; - - t.te_word = 0; - t.te_rdwr = 1; // Read allowed - - if (direction != DMA_TO_DEVICE) - t.te_pciwr = 1; - - tp = ((union tce_entry *)tbl->it_base) + index; - - while (npages--) { - /* can't move this out since we might cross LMB boundary */ - t.te_rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT; - - tp->te_word = t.te_word; - - uaddr += TCE_PAGE_SIZE; - tp++; - } -} - - -static void tce_free_pSeries(struct iommu_table *tbl, long index, long npages) -{ - union tce_entry t; - union tce_entry *tp; - - npages <<= TCE_PAGE_FACTOR; - index <<= TCE_PAGE_FACTOR; - - t.te_word = 0; - tp = ((union tce_entry *)tbl->it_base) + index; - - while (npages--) { - tp->te_word = t.te_word; - - tp++; - } -} - - -static void tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum, - long npages, unsigned long uaddr, - enum dma_data_direction direction) -{ - u64 rc; - union tce_entry tce; - - tce.te_word = 0; - tce.te_rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT; - tce.te_rdwr = 1; - if (direction != DMA_TO_DEVICE) - tce.te_pciwr = 1; - - while (npages--) { - rc = plpar_tce_put((u64)tbl->it_index, - (u64)tcenum << 12, - tce.te_word ); - - if (rc && printk_ratelimit()) { - printk("tce_build_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc); - printk("\tindex = 0x%lx\n", (u64)tbl->it_index); - printk("\ttcenum = 0x%lx\n", (u64)tcenum); - printk("\ttce val = 0x%lx\n", tce.te_word ); - show_stack(current, (unsigned long *)__get_SP()); - } - - tcenum++; - tce.te_rpn++; - } -} - -static DEFINE_PER_CPU(void *, tce_page) = NULL; - -static void tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, - long npages, unsigned long uaddr, - enum dma_data_direction direction) -{ - u64 rc; - union tce_entry tce, *tcep; - long l, limit; - - tcenum <<= TCE_PAGE_FACTOR; - npages <<= TCE_PAGE_FACTOR; - - if (npages == 1) - return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr, - direction); - - tcep = __get_cpu_var(tce_page); - - /* This is safe to do since interrupts are off when we're called - * from iommu_alloc{,_sg}() - */ - if (!tcep) { - tcep = (void *)__get_free_page(GFP_ATOMIC); - /* If allocation fails, fall back to the loop implementation */ - if (!tcep) - return tce_build_pSeriesLP(tbl, tcenum, npages, - uaddr, direction); - __get_cpu_var(tce_page) = tcep; - } - - tce.te_word = 0; - tce.te_rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT; - tce.te_rdwr = 1; - if (direction != DMA_TO_DEVICE) - tce.te_pciwr = 1; - - /* We can map max one pageful of TCEs at a time */ - do { - /* - * Set up the page with TCE data, looping through and setting - * the values. - */ - limit = min_t(long, npages, 4096/sizeof(union tce_entry)); - - for (l = 0; l < limit; l++) { - tcep[l] = tce; - tce.te_rpn++; - } - - rc = plpar_tce_put_indirect((u64)tbl->it_index, - (u64)tcenum << 12, - (u64)virt_to_abs(tcep), - limit); - - npages -= limit; - tcenum += limit; - } while (npages > 0 && !rc); - - if (rc && printk_ratelimit()) { - printk("tce_buildmulti_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc); - printk("\tindex = 0x%lx\n", (u64)tbl->it_index); - printk("\tnpages = 0x%lx\n", (u64)npages); - printk("\ttce[0] val = 0x%lx\n", tcep[0].te_word); - show_stack(current, (unsigned long *)__get_SP()); - } -} - -static void tce_free_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages) -{ - u64 rc; - union tce_entry tce; - - tcenum <<= TCE_PAGE_FACTOR; - npages <<= TCE_PAGE_FACTOR; - - tce.te_word = 0; - - while (npages--) { - rc = plpar_tce_put((u64)tbl->it_index, - (u64)tcenum << 12, - tce.te_word); - - if (rc && printk_ratelimit()) { - printk("tce_free_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc); - printk("\tindex = 0x%lx\n", (u64)tbl->it_index); - printk("\ttcenum = 0x%lx\n", (u64)tcenum); - printk("\ttce val = 0x%lx\n", tce.te_word ); - show_stack(current, (unsigned long *)__get_SP()); - } - - tcenum++; - } -} - - -static void tce_freemulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages) -{ - u64 rc; - union tce_entry tce; - - tcenum <<= TCE_PAGE_FACTOR; - npages <<= TCE_PAGE_FACTOR; - - tce.te_word = 0; - - rc = plpar_tce_stuff((u64)tbl->it_index, - (u64)tcenum << 12, - tce.te_word, - npages); - - if (rc && printk_ratelimit()) { - printk("tce_freemulti_pSeriesLP: plpar_tce_stuff failed\n"); - printk("\trc = %ld\n", rc); - printk("\tindex = 0x%lx\n", (u64)tbl->it_index); - printk("\tnpages = 0x%lx\n", (u64)npages); - printk("\ttce val = 0x%lx\n", tce.te_word ); - show_stack(current, (unsigned long *)__get_SP()); - } -} - -static void iommu_table_setparms(struct pci_controller *phb, - struct device_node *dn, - struct iommu_table *tbl) -{ - struct device_node *node; - unsigned long *basep; - unsigned int *sizep; - - node = (struct device_node *)phb->arch_data; - - basep = (unsigned long *)get_property(node, "linux,tce-base", NULL); - sizep = (unsigned int *)get_property(node, "linux,tce-size", NULL); - if (basep == NULL || sizep == NULL) { - printk(KERN_ERR "PCI_DMA: iommu_table_setparms: %s has " - "missing tce entries !\n", dn->full_name); - return; - } - - tbl->it_base = (unsigned long)__va(*basep); - memset((void *)tbl->it_base, 0, *sizep); - - tbl->it_busno = phb->bus->number; - - /* Units of tce entries */ - tbl->it_offset = phb->dma_window_base_cur >> PAGE_SHIFT; - - /* Test if we are going over 2GB of DMA space */ - if (phb->dma_window_base_cur + phb->dma_window_size > 0x80000000ul) { - udbg_printf("PCI_DMA: Unexpected number of IOAs under this PHB.\n"); - panic("PCI_DMA: Unexpected number of IOAs under this PHB.\n"); - } - - phb->dma_window_base_cur += phb->dma_window_size; - - /* Set the tce table size - measured in entries */ - tbl->it_size = phb->dma_window_size >> PAGE_SHIFT; - - tbl->it_index = 0; - tbl->it_blocksize = 16; - tbl->it_type = TCE_PCI; -} - -/* - * iommu_table_setparms_lpar - * - * Function: On pSeries LPAR systems, return TCE table info, given a pci bus. - * - * ToDo: properly interpret the ibm,dma-window property. The definition is: - * logical-bus-number (1 word) - * phys-address (#address-cells words) - * size (#cell-size words) - * - * Currently we hard code these sizes (more or less). - */ -static void iommu_table_setparms_lpar(struct pci_controller *phb, - struct device_node *dn, - struct iommu_table *tbl, - unsigned int *dma_window) -{ - tbl->it_busno = PCI_DN(dn)->bussubno; - - /* TODO: Parse field size properties properly. */ - tbl->it_size = (((unsigned long)dma_window[4] << 32) | - (unsigned long)dma_window[5]) >> PAGE_SHIFT; - tbl->it_offset = (((unsigned long)dma_window[2] << 32) | - (unsigned long)dma_window[3]) >> PAGE_SHIFT; - tbl->it_base = 0; - tbl->it_index = dma_window[0]; - tbl->it_blocksize = 16; - tbl->it_type = TCE_PCI; -} - -static void iommu_bus_setup_pSeries(struct pci_bus *bus) -{ - struct device_node *dn; - struct iommu_table *tbl; - struct device_node *isa_dn, *isa_dn_orig; - struct device_node *tmp; - struct pci_dn *pci; - int children; - - DBG("iommu_bus_setup_pSeries, bus %p, bus->self %p\n", bus, bus->self); - - dn = pci_bus_to_OF_node(bus); - pci = PCI_DN(dn); - - if (bus->self) { - /* This is not a root bus, any setup will be done for the - * device-side of the bridge in iommu_dev_setup_pSeries(). - */ - return; - } - - /* Check if the ISA bus on the system is under - * this PHB. - */ - isa_dn = isa_dn_orig = of_find_node_by_type(NULL, "isa"); - - while (isa_dn && isa_dn != dn) - isa_dn = isa_dn->parent; - - if (isa_dn_orig) - of_node_put(isa_dn_orig); - - /* Count number of direct PCI children of the PHB. - * All PCI device nodes have class-code property, so it's - * an easy way to find them. - */ - for (children = 0, tmp = dn->child; tmp; tmp = tmp->sibling) - if (get_property(tmp, "class-code", NULL)) - children++; - - DBG("Children: %d\n", children); - - /* Calculate amount of DMA window per slot. Each window must be - * a power of two (due to pci_alloc_consistent requirements). - * - * Keep 256MB aside for PHBs with ISA. - */ - - if (!isa_dn) { - /* No ISA/IDE - just set window size and return */ - pci->phb->dma_window_size = 0x80000000ul; /* To be divided */ - - while (pci->phb->dma_window_size * children > 0x80000000ul) - pci->phb->dma_window_size >>= 1; - DBG("No ISA/IDE, window size is 0x%lx\n", - pci->phb->dma_window_size); - pci->phb->dma_window_base_cur = 0; - - return; - } - - /* If we have ISA, then we probably have an IDE - * controller too. Allocate a 128MB table but - * skip the first 128MB to avoid stepping on ISA - * space. - */ - pci->phb->dma_window_size = 0x8000000ul; - pci->phb->dma_window_base_cur = 0x8000000ul; - - tbl = kmalloc(sizeof(struct iommu_table), GFP_KERNEL); - - iommu_table_setparms(pci->phb, dn, tbl); - pci->iommu_table = iommu_init_table(tbl); - - /* Divide the rest (1.75GB) among the children */ - pci->phb->dma_window_size = 0x80000000ul; - while (pci->phb->dma_window_size * children > 0x70000000ul) - pci->phb->dma_window_size >>= 1; - - DBG("ISA/IDE, window size is 0x%lx\n", pci->phb->dma_window_size); - -} - - -static void iommu_bus_setup_pSeriesLP(struct pci_bus *bus) -{ - struct iommu_table *tbl; - struct device_node *dn, *pdn; - struct pci_dn *ppci; - unsigned int *dma_window = NULL; - - DBG("iommu_bus_setup_pSeriesLP, bus %p, bus->self %p\n", bus, bus->self); - - dn = pci_bus_to_OF_node(bus); - - /* Find nearest ibm,dma-window, walking up the device tree */ - for (pdn = dn; pdn != NULL; pdn = pdn->parent) { - dma_window = (unsigned int *)get_property(pdn, "ibm,dma-window", NULL); - if (dma_window != NULL) - break; - } - - if (dma_window == NULL) { - DBG("iommu_bus_setup_pSeriesLP: bus %s seems to have no ibm,dma-window property\n", dn->full_name); - return; - } - - ppci = pdn->data; - if (!ppci->iommu_table) { - /* Bussubno hasn't been copied yet. - * Do it now because iommu_table_setparms_lpar needs it. - */ - - ppci->bussubno = bus->number; - - tbl = (struct iommu_table *)kmalloc(sizeof(struct iommu_table), - GFP_KERNEL); - - iommu_table_setparms_lpar(ppci->phb, pdn, tbl, dma_window); - - ppci->iommu_table = iommu_init_table(tbl); - } - - if (pdn != dn) - PCI_DN(dn)->iommu_table = ppci->iommu_table; -} - - -static void iommu_dev_setup_pSeries(struct pci_dev *dev) -{ - struct device_node *dn, *mydn; - struct iommu_table *tbl; - - DBG("iommu_dev_setup_pSeries, dev %p (%s)\n", dev, pci_name(dev)); - - mydn = dn = pci_device_to_OF_node(dev); - - /* If we're the direct child of a root bus, then we need to allocate - * an iommu table ourselves. The bus setup code should have setup - * the window sizes already. - */ - if (!dev->bus->self) { - DBG(" --> first child, no bridge. Allocating iommu table.\n"); - tbl = kmalloc(sizeof(struct iommu_table), GFP_KERNEL); - iommu_table_setparms(PCI_DN(dn)->phb, dn, tbl); - PCI_DN(mydn)->iommu_table = iommu_init_table(tbl); - - return; - } - - /* If this device is further down the bus tree, search upwards until - * an already allocated iommu table is found and use that. - */ - - while (dn && dn->data && PCI_DN(dn)->iommu_table == NULL) - dn = dn->parent; - - if (dn && dn->data) { - PCI_DN(mydn)->iommu_table = PCI_DN(dn)->iommu_table; - } else { - DBG("iommu_dev_setup_pSeries, dev %p (%s) has no iommu table\n", dev, pci_name(dev)); - } -} - -static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *node) -{ - int err = NOTIFY_OK; - struct device_node *np = node; - struct pci_dn *pci = np->data; - - switch (action) { - case PSERIES_RECONFIG_REMOVE: - if (pci->iommu_table && - get_property(np, "ibm,dma-window", NULL)) - iommu_free_table(np); - break; - default: - err = NOTIFY_DONE; - break; - } - return err; -} - -static struct notifier_block iommu_reconfig_nb = { - .notifier_call = iommu_reconfig_notifier, -}; - -static void iommu_dev_setup_pSeriesLP(struct pci_dev *dev) -{ - struct device_node *pdn, *dn; - struct iommu_table *tbl; - int *dma_window = NULL; - struct pci_dn *pci; - - DBG("iommu_dev_setup_pSeriesLP, dev %p (%s)\n", dev, pci_name(dev)); - - /* dev setup for LPAR is a little tricky, since the device tree might - * contain the dma-window properties per-device and not neccesarily - * for the bus. So we need to search upwards in the tree until we - * either hit a dma-window property, OR find a parent with a table - * already allocated. - */ - dn = pci_device_to_OF_node(dev); - - for (pdn = dn; pdn && pdn->data && !PCI_DN(pdn)->iommu_table; - pdn = pdn->parent) { - dma_window = (unsigned int *) - get_property(pdn, "ibm,dma-window", NULL); - if (dma_window) - break; - } - - /* Check for parent == NULL so we don't try to setup the empty EADS - * slots on POWER4 machines. - */ - if (dma_window == NULL || pdn->parent == NULL) { - DBG("No dma window for device, linking to parent\n"); - PCI_DN(dn)->iommu_table = PCI_DN(pdn)->iommu_table; - return; - } else { - DBG("Found DMA window, allocating table\n"); - } - - pci = pdn->data; - if (!pci->iommu_table) { - /* iommu_table_setparms_lpar needs bussubno. */ - pci->bussubno = pci->phb->bus->number; - - tbl = (struct iommu_table *)kmalloc(sizeof(struct iommu_table), - GFP_KERNEL); - - iommu_table_setparms_lpar(pci->phb, pdn, tbl, dma_window); - - pci->iommu_table = iommu_init_table(tbl); - } - - if (pdn != dn) - PCI_DN(dn)->iommu_table = pci->iommu_table; -} - -static void iommu_bus_setup_null(struct pci_bus *b) { } -static void iommu_dev_setup_null(struct pci_dev *d) { } - -/* These are called very early. */ -void iommu_init_early_pSeries(void) -{ - if (of_chosen && get_property(of_chosen, "linux,iommu-off", NULL)) { - /* Direct I/O, IOMMU off */ - ppc_md.iommu_dev_setup = iommu_dev_setup_null; - ppc_md.iommu_bus_setup = iommu_bus_setup_null; - pci_direct_iommu_init(); - - return; - } - - if (systemcfg->platform & PLATFORM_LPAR) { - if (firmware_has_feature(FW_FEATURE_MULTITCE)) { - ppc_md.tce_build = tce_buildmulti_pSeriesLP; - ppc_md.tce_free = tce_freemulti_pSeriesLP; - } else { - ppc_md.tce_build = tce_build_pSeriesLP; - ppc_md.tce_free = tce_free_pSeriesLP; - } - ppc_md.iommu_bus_setup = iommu_bus_setup_pSeriesLP; - ppc_md.iommu_dev_setup = iommu_dev_setup_pSeriesLP; - } else { - ppc_md.tce_build = tce_build_pSeries; - ppc_md.tce_free = tce_free_pSeries; - ppc_md.iommu_bus_setup = iommu_bus_setup_pSeries; - ppc_md.iommu_dev_setup = iommu_dev_setup_pSeries; - } - - - pSeries_reconfig_notifier_register(&iommu_reconfig_nb); - - pci_iommu_init(); -} - diff --git a/arch/ppc64/kernel/pSeries_lpar.c b/arch/ppc64/kernel/pSeries_lpar.c deleted file mode 100644 index 268d8362dde7..000000000000 --- a/arch/ppc64/kernel/pSeries_lpar.c +++ /dev/null @@ -1,517 +0,0 @@ -/* - * pSeries_lpar.c - * Copyright (C) 2001 Todd Inglett, IBM Corporation - * - * pSeries LPAR support. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#define DEBUG - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef DEBUG -#define DBG(fmt...) udbg_printf(fmt) -#else -#define DBG(fmt...) -#endif - -/* in pSeries_hvCall.S */ -EXPORT_SYMBOL(plpar_hcall); -EXPORT_SYMBOL(plpar_hcall_4out); -EXPORT_SYMBOL(plpar_hcall_norets); -EXPORT_SYMBOL(plpar_hcall_8arg_2ret); - -extern void pSeries_find_serial_port(void); - - -int vtermno; /* virtual terminal# for udbg */ - -#define __ALIGNED__ __attribute__((__aligned__(sizeof(long)))) -static void udbg_hvsi_putc(unsigned char c) -{ - /* packet's seqno isn't used anyways */ - uint8_t packet[] __ALIGNED__ = { 0xff, 5, 0, 0, c }; - int rc; - - if (c == '\n') - udbg_hvsi_putc('\r'); - - do { - rc = plpar_put_term_char(vtermno, sizeof(packet), packet); - } while (rc == H_Busy); -} - -static long hvsi_udbg_buf_len; -static uint8_t hvsi_udbg_buf[256]; - -static int udbg_hvsi_getc_poll(void) -{ - unsigned char ch; - int rc, i; - - if (hvsi_udbg_buf_len == 0) { - rc = plpar_get_term_char(vtermno, &hvsi_udbg_buf_len, hvsi_udbg_buf); - if (rc != H_Success || hvsi_udbg_buf[0] != 0xff) { - /* bad read or non-data packet */ - hvsi_udbg_buf_len = 0; - } else { - /* remove the packet header */ - for (i = 4; i < hvsi_udbg_buf_len; i++) - hvsi_udbg_buf[i-4] = hvsi_udbg_buf[i]; - hvsi_udbg_buf_len -= 4; - } - } - - if (hvsi_udbg_buf_len <= 0 || hvsi_udbg_buf_len > 256) { - /* no data ready */ - hvsi_udbg_buf_len = 0; - return -1; - } - - ch = hvsi_udbg_buf[0]; - /* shift remaining data down */ - for (i = 1; i < hvsi_udbg_buf_len; i++) { - hvsi_udbg_buf[i-1] = hvsi_udbg_buf[i]; - } - hvsi_udbg_buf_len--; - - return ch; -} - -static unsigned char udbg_hvsi_getc(void) -{ - int ch; - for (;;) { - ch = udbg_hvsi_getc_poll(); - if (ch == -1) { - /* This shouldn't be needed...but... */ - volatile unsigned long delay; - for (delay=0; delay < 2000000; delay++) - ; - } else { - return ch; - } - } -} - -static void udbg_putcLP(unsigned char c) -{ - char buf[16]; - unsigned long rc; - - if (c == '\n') - udbg_putcLP('\r'); - - buf[0] = c; - do { - rc = plpar_put_term_char(vtermno, 1, buf); - } while(rc == H_Busy); -} - -/* Buffered chars getc */ -static long inbuflen; -static long inbuf[2]; /* must be 2 longs */ - -static int udbg_getc_pollLP(void) -{ - /* The interface is tricky because it may return up to 16 chars. - * We save them statically for future calls to udbg_getc(). - */ - char ch, *buf = (char *)inbuf; - int i; - long rc; - if (inbuflen == 0) { - /* get some more chars. */ - inbuflen = 0; - rc = plpar_get_term_char(vtermno, &inbuflen, buf); - if (rc != H_Success) - inbuflen = 0; /* otherwise inbuflen is garbage */ - } - if (inbuflen <= 0 || inbuflen > 16) { - /* Catch error case as well as other oddities (corruption) */ - inbuflen = 0; - return -1; - } - ch = buf[0]; - for (i = 1; i < inbuflen; i++) /* shuffle them down. */ - buf[i-1] = buf[i]; - inbuflen--; - return ch; -} - -static unsigned char udbg_getcLP(void) -{ - int ch; - for (;;) { - ch = udbg_getc_pollLP(); - if (ch == -1) { - /* This shouldn't be needed...but... */ - volatile unsigned long delay; - for (delay=0; delay < 2000000; delay++) - ; - } else { - return ch; - } - } -} - -/* call this from early_init() for a working debug console on - * vterm capable LPAR machines - */ -void udbg_init_debug_lpar(void) -{ - vtermno = 0; - udbg_putc = udbg_putcLP; - udbg_getc = udbg_getcLP; - udbg_getc_poll = udbg_getc_pollLP; -} - -/* returns 0 if couldn't find or use /chosen/stdout as console */ -int find_udbg_vterm(void) -{ - struct device_node *stdout_node; - u32 *termno; - char *name; - int found = 0; - - /* find the boot console from /chosen/stdout */ - if (!of_chosen) - return 0; - name = (char *)get_property(of_chosen, "linux,stdout-path", NULL); - if (name == NULL) - return 0; - stdout_node = of_find_node_by_path(name); - if (!stdout_node) - return 0; - - /* now we have the stdout node; figure out what type of device it is. */ - name = (char *)get_property(stdout_node, "name", NULL); - if (!name) { - printk(KERN_WARNING "stdout node missing 'name' property!\n"); - goto out; - } - - if (strncmp(name, "vty", 3) == 0) { - if (device_is_compatible(stdout_node, "hvterm1")) { - termno = (u32 *)get_property(stdout_node, "reg", NULL); - if (termno) { - vtermno = termno[0]; - udbg_putc = udbg_putcLP; - udbg_getc = udbg_getcLP; - udbg_getc_poll = udbg_getc_pollLP; - found = 1; - } - } else if (device_is_compatible(stdout_node, "hvterm-protocol")) { - termno = (u32 *)get_property(stdout_node, "reg", NULL); - if (termno) { - vtermno = termno[0]; - udbg_putc = udbg_hvsi_putc; - udbg_getc = udbg_hvsi_getc; - udbg_getc_poll = udbg_hvsi_getc_poll; - found = 1; - } - } - } else if (strncmp(name, "serial", 6)) { - /* XXX fix ISA serial console */ - printk(KERN_WARNING "serial stdout on LPAR ('%s')! " - "can't print udbg messages\n", - stdout_node->full_name); - } else { - printk(KERN_WARNING "don't know how to print to stdout '%s'\n", - stdout_node->full_name); - } - -out: - of_node_put(stdout_node); - return found; -} - -void vpa_init(int cpu) -{ - int hwcpu = get_hard_smp_processor_id(cpu); - unsigned long vpa = (unsigned long)&(paca[cpu].lppaca); - long ret; - unsigned long flags; - - /* Register the Virtual Processor Area (VPA) */ - flags = 1UL << (63 - 18); - - if (cpu_has_feature(CPU_FTR_ALTIVEC)) - paca[cpu].lppaca.vmxregs_in_use = 1; - - ret = register_vpa(flags, hwcpu, __pa(vpa)); - - if (ret) - printk(KERN_ERR "WARNING: vpa_init: VPA registration for " - "cpu %d (hw %d) of area %lx returns %ld\n", - cpu, hwcpu, __pa(vpa), ret); -} - -long pSeries_lpar_hpte_insert(unsigned long hpte_group, - unsigned long va, unsigned long prpn, - unsigned long vflags, unsigned long rflags) -{ - unsigned long lpar_rc; - unsigned long flags; - unsigned long slot; - unsigned long hpte_v, hpte_r; - unsigned long dummy0, dummy1; - - hpte_v = ((va >> 23) << HPTE_V_AVPN_SHIFT) | vflags | HPTE_V_VALID; - if (vflags & HPTE_V_LARGE) - hpte_v &= ~(1UL << HPTE_V_AVPN_SHIFT); - - hpte_r = (prpn << HPTE_R_RPN_SHIFT) | rflags; - - /* Now fill in the actual HPTE */ - /* Set CEC cookie to 0 */ - /* Zero page = 0 */ - /* I-cache Invalidate = 0 */ - /* I-cache synchronize = 0 */ - /* Exact = 0 */ - flags = 0; - - /* XXX why is this here? - Anton */ - if (rflags & (_PAGE_GUARDED|_PAGE_NO_CACHE)) - hpte_r &= ~_PAGE_COHERENT; - - lpar_rc = plpar_hcall(H_ENTER, flags, hpte_group, hpte_v, - hpte_r, &slot, &dummy0, &dummy1); - - if (unlikely(lpar_rc == H_PTEG_Full)) - return -1; - - /* - * Since we try and ioremap PHBs we don't own, the pte insert - * will fail. However we must catch the failure in hash_page - * or we will loop forever, so return -2 in this case. - */ - if (unlikely(lpar_rc != H_Success)) - return -2; - - /* Because of iSeries, we have to pass down the secondary - * bucket bit here as well - */ - return (slot & 7) | (!!(vflags & HPTE_V_SECONDARY) << 3); -} - -static DEFINE_SPINLOCK(pSeries_lpar_tlbie_lock); - -static long pSeries_lpar_hpte_remove(unsigned long hpte_group) -{ - unsigned long slot_offset; - unsigned long lpar_rc; - int i; - unsigned long dummy1, dummy2; - - /* pick a random slot to start at */ - slot_offset = mftb() & 0x7; - - for (i = 0; i < HPTES_PER_GROUP; i++) { - - /* don't remove a bolted entry */ - lpar_rc = plpar_pte_remove(H_ANDCOND, hpte_group + slot_offset, - (0x1UL << 4), &dummy1, &dummy2); - - if (lpar_rc == H_Success) - return i; - - BUG_ON(lpar_rc != H_Not_Found); - - slot_offset++; - slot_offset &= 0x7; - } - - return -1; -} - -static void pSeries_lpar_hptab_clear(void) -{ - unsigned long size_bytes = 1UL << ppc64_pft_size; - unsigned long hpte_count = size_bytes >> 4; - unsigned long dummy1, dummy2; - int i; - - /* TODO: Use bulk call */ - for (i = 0; i < hpte_count; i++) - plpar_pte_remove(0, i, 0, &dummy1, &dummy2); -} - -/* - * NOTE: for updatepp ops we are fortunate that the linux "newpp" bits and - * the low 3 bits of flags happen to line up. So no transform is needed. - * We can probably optimize here and assume the high bits of newpp are - * already zero. For now I am paranoid. - */ -static long pSeries_lpar_hpte_updatepp(unsigned long slot, unsigned long newpp, - unsigned long va, int large, int local) -{ - unsigned long lpar_rc; - unsigned long flags = (newpp & 7) | H_AVPN; - unsigned long avpn = va >> 23; - - if (large) - avpn &= ~0x1UL; - - lpar_rc = plpar_pte_protect(flags, slot, (avpn << 7)); - - if (lpar_rc == H_Not_Found) - return -1; - - BUG_ON(lpar_rc != H_Success); - - return 0; -} - -static unsigned long pSeries_lpar_hpte_getword0(unsigned long slot) -{ - unsigned long dword0; - unsigned long lpar_rc; - unsigned long dummy_word1; - unsigned long flags; - - /* Read 1 pte at a time */ - /* Do not need RPN to logical page translation */ - /* No cross CEC PFT access */ - flags = 0; - - lpar_rc = plpar_pte_read(flags, slot, &dword0, &dummy_word1); - - BUG_ON(lpar_rc != H_Success); - - return dword0; -} - -static long pSeries_lpar_hpte_find(unsigned long vpn) -{ - unsigned long hash; - unsigned long i, j; - long slot; - unsigned long hpte_v; - - hash = hpt_hash(vpn, 0); - - for (j = 0; j < 2; j++) { - slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; - for (i = 0; i < HPTES_PER_GROUP; i++) { - hpte_v = pSeries_lpar_hpte_getword0(slot); - - if ((HPTE_V_AVPN_VAL(hpte_v) == (vpn >> 11)) - && (hpte_v & HPTE_V_VALID) - && (!!(hpte_v & HPTE_V_SECONDARY) == j)) { - /* HPTE matches */ - if (j) - slot = -slot; - return slot; - } - ++slot; - } - hash = ~hash; - } - - return -1; -} - -static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp, - unsigned long ea) -{ - unsigned long lpar_rc; - unsigned long vsid, va, vpn, flags; - long slot; - - vsid = get_kernel_vsid(ea); - va = (vsid << 28) | (ea & 0x0fffffff); - vpn = va >> PAGE_SHIFT; - - slot = pSeries_lpar_hpte_find(vpn); - BUG_ON(slot == -1); - - flags = newpp & 7; - lpar_rc = plpar_pte_protect(flags, slot, 0); - - BUG_ON(lpar_rc != H_Success); -} - -static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long va, - int large, int local) -{ - unsigned long avpn = va >> 23; - unsigned long lpar_rc; - unsigned long dummy1, dummy2; - - if (large) - avpn &= ~0x1UL; - - lpar_rc = plpar_pte_remove(H_AVPN, slot, (avpn << 7), &dummy1, - &dummy2); - - if (lpar_rc == H_Not_Found) - return; - - BUG_ON(lpar_rc != H_Success); -} - -/* - * Take a spinlock around flushes to avoid bouncing the hypervisor tlbie - * lock. - */ -void pSeries_lpar_flush_hash_range(unsigned long number, int local) -{ - int i; - unsigned long flags = 0; - struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); - int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE); - - if (lock_tlbie) - spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags); - - for (i = 0; i < number; i++) - flush_hash_page(batch->vaddr[i], batch->pte[i], local); - - if (lock_tlbie) - spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags); -} - -void hpte_init_lpar(void) -{ - ppc_md.hpte_invalidate = pSeries_lpar_hpte_invalidate; - ppc_md.hpte_updatepp = pSeries_lpar_hpte_updatepp; - ppc_md.hpte_updateboltedpp = pSeries_lpar_hpte_updateboltedpp; - ppc_md.hpte_insert = pSeries_lpar_hpte_insert; - ppc_md.hpte_remove = pSeries_lpar_hpte_remove; - ppc_md.flush_hash_range = pSeries_lpar_flush_hash_range; - ppc_md.hpte_clear_all = pSeries_lpar_hptab_clear; - - htab_finish_init(); -} diff --git a/arch/ppc64/kernel/pSeries_nvram.c b/arch/ppc64/kernel/pSeries_nvram.c deleted file mode 100644 index 18abfb1f4e24..000000000000 --- a/arch/ppc64/kernel/pSeries_nvram.c +++ /dev/null @@ -1,148 +0,0 @@ -/* - * c 2001 PPC 64 Team, IBM Corp - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * /dev/nvram driver for PPC64 - * - * This perhaps should live in drivers/char - */ - - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static unsigned int nvram_size; -static int nvram_fetch, nvram_store; -static char nvram_buf[NVRW_CNT]; /* assume this is in the first 4GB */ -static DEFINE_SPINLOCK(nvram_lock); - - -static ssize_t pSeries_nvram_read(char *buf, size_t count, loff_t *index) -{ - unsigned int i; - unsigned long len; - int done; - unsigned long flags; - char *p = buf; - - - if (nvram_size == 0 || nvram_fetch == RTAS_UNKNOWN_SERVICE) - return -ENODEV; - - if (*index >= nvram_size) - return 0; - - i = *index; - if (i + count > nvram_size) - count = nvram_size - i; - - spin_lock_irqsave(&nvram_lock, flags); - - for (; count != 0; count -= len) { - len = count; - if (len > NVRW_CNT) - len = NVRW_CNT; - - if ((rtas_call(nvram_fetch, 3, 2, &done, i, __pa(nvram_buf), - len) != 0) || len != done) { - spin_unlock_irqrestore(&nvram_lock, flags); - return -EIO; - } - - memcpy(p, nvram_buf, len); - - p += len; - i += len; - } - - spin_unlock_irqrestore(&nvram_lock, flags); - - *index = i; - return p - buf; -} - -static ssize_t pSeries_nvram_write(char *buf, size_t count, loff_t *index) -{ - unsigned int i; - unsigned long len; - int done; - unsigned long flags; - const char *p = buf; - - if (nvram_size == 0 || nvram_store == RTAS_UNKNOWN_SERVICE) - return -ENODEV; - - if (*index >= nvram_size) - return 0; - - i = *index; - if (i + count > nvram_size) - count = nvram_size - i; - - spin_lock_irqsave(&nvram_lock, flags); - - for (; count != 0; count -= len) { - len = count; - if (len > NVRW_CNT) - len = NVRW_CNT; - - memcpy(nvram_buf, p, len); - - if ((rtas_call(nvram_store, 3, 2, &done, i, __pa(nvram_buf), - len) != 0) || len != done) { - spin_unlock_irqrestore(&nvram_lock, flags); - return -EIO; - } - - p += len; - i += len; - } - spin_unlock_irqrestore(&nvram_lock, flags); - - *index = i; - return p - buf; -} - -static ssize_t pSeries_nvram_get_size(void) -{ - return nvram_size ? nvram_size : -ENODEV; -} - -int __init pSeries_nvram_init(void) -{ - struct device_node *nvram; - unsigned int *nbytes_p, proplen; - - nvram = of_find_node_by_type(NULL, "nvram"); - if (nvram == NULL) - return -ENODEV; - - nbytes_p = (unsigned int *)get_property(nvram, "#bytes", &proplen); - if (nbytes_p == NULL || proplen != sizeof(unsigned int)) - return -EIO; - - nvram_size = *nbytes_p; - - nvram_fetch = rtas_token("nvram-fetch"); - nvram_store = rtas_token("nvram-store"); - printk(KERN_INFO "PPC64 nvram contains %d bytes\n", nvram_size); - of_node_put(nvram); - - ppc_md.nvram_read = pSeries_nvram_read; - ppc_md.nvram_write = pSeries_nvram_write; - ppc_md.nvram_size = pSeries_nvram_get_size; - - return 0; -} diff --git a/arch/ppc64/kernel/pSeries_pci.c b/arch/ppc64/kernel/pSeries_pci.c deleted file mode 100644 index 2dd477eb1c53..000000000000 --- a/arch/ppc64/kernel/pSeries_pci.c +++ /dev/null @@ -1,142 +0,0 @@ -/* - * arch/ppc64/kernel/pSeries_pci.c - * - * Copyright (C) 2001 Dave Engebretsen, IBM Corporation - * Copyright (C) 2003 Anton Blanchard , IBM - * - * pSeries specific routines for PCI. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include -#include -#include -#include -#include - -#include -#include -#include - -static int __initdata s7a_workaround = -1; - -#if 0 -void pcibios_name_device(struct pci_dev *dev) -{ - struct device_node *dn; - - /* - * Add IBM loc code (slot) as a prefix to the device names for service - */ - dn = pci_device_to_OF_node(dev); - if (dn) { - char *loc_code = get_property(dn, "ibm,loc-code", 0); - if (loc_code) { - int loc_len = strlen(loc_code); - if (loc_len < sizeof(dev->dev.name)) { - memmove(dev->dev.name+loc_len+1, dev->dev.name, - sizeof(dev->dev.name)-loc_len-1); - memcpy(dev->dev.name, loc_code, loc_len); - dev->dev.name[loc_len] = ' '; - dev->dev.name[sizeof(dev->dev.name)-1] = '\0'; - } - } - } -} -DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pcibios_name_device); -#endif - -static void __init check_s7a(void) -{ - struct device_node *root; - char *model; - - s7a_workaround = 0; - root = of_find_node_by_path("/"); - if (root) { - model = get_property(root, "model", NULL); - if (model && !strcmp(model, "IBM,7013-S7A")) - s7a_workaround = 1; - of_node_put(root); - } -} - -void __devinit pSeries_irq_bus_setup(struct pci_bus *bus) -{ - struct pci_dev *dev; - - if (s7a_workaround < 0) - check_s7a(); - list_for_each_entry(dev, &bus->devices, bus_list) { - pci_read_irq_line(dev); - if (s7a_workaround) { - if (dev->irq > 16) { - dev->irq -= 3; - pci_write_config_byte(dev, PCI_INTERRUPT_LINE, - dev->irq); - } - } - } -} - -static void __init pSeries_request_regions(void) -{ - if (!isa_io_base) - return; - - request_region(0x20,0x20,"pic1"); - request_region(0xa0,0x20,"pic2"); - request_region(0x00,0x20,"dma1"); - request_region(0x40,0x20,"timer"); - request_region(0x80,0x10,"dma page reg"); - request_region(0xc0,0x20,"dma2"); -} - -void __init pSeries_final_fixup(void) -{ - phbs_remap_io(); - pSeries_request_regions(); - - pci_addr_cache_build(); -} - -/* - * Assume the winbond 82c105 is the IDE controller on a - * p610. We should probably be more careful in case - * someone tries to plug in a similar adapter. - */ -static void fixup_winbond_82c105(struct pci_dev* dev) -{ - int i; - unsigned int reg; - - if (!(systemcfg->platform & PLATFORM_PSERIES)) - return; - - printk("Using INTC for W82c105 IDE controller.\n"); - pci_read_config_dword(dev, 0x40, ®); - /* Enable LEGIRQ to use INTC instead of ISA interrupts */ - pci_write_config_dword(dev, 0x40, reg | (1<<11)); - - for (i = 0; i < DEVICE_COUNT_RESOURCE; ++i) { - /* zap the 2nd function of the winbond chip */ - if (dev->resource[i].flags & IORESOURCE_IO - && dev->bus->number == 0 && dev->devfn == 0x81) - dev->resource[i].flags &= ~IORESOURCE_IO; - } -} -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_WINBOND, PCI_DEVICE_ID_WINBOND_82C105, - fixup_winbond_82c105); diff --git a/arch/ppc64/kernel/pSeries_reconfig.c b/arch/ppc64/kernel/pSeries_reconfig.c deleted file mode 100644 index 58c61219d08e..000000000000 --- a/arch/ppc64/kernel/pSeries_reconfig.c +++ /dev/null @@ -1,426 +0,0 @@ -/* - * pSeries_reconfig.c - support for dynamic reconfiguration (including PCI - * Hotplug and Dynamic Logical Partitioning on RPA platforms). - * - * Copyright (C) 2005 Nathan Lynch - * Copyright (C) 2005 IBM Corporation - * - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version - * 2 as published by the Free Software Foundation. - */ - -#include -#include -#include -#include - -#include -#include -#include - - - -/* - * Routines for "runtime" addition and removal of device tree nodes. - */ -#ifdef CONFIG_PROC_DEVICETREE -/* - * Add a node to /proc/device-tree. - */ -static void add_node_proc_entries(struct device_node *np) -{ - struct proc_dir_entry *ent; - - ent = proc_mkdir(strrchr(np->full_name, '/') + 1, np->parent->pde); - if (ent) - proc_device_tree_add_node(np, ent); -} - -static void remove_node_proc_entries(struct device_node *np) -{ - struct property *pp = np->properties; - struct device_node *parent = np->parent; - - while (pp) { - remove_proc_entry(pp->name, np->pde); - pp = pp->next; - } - if (np->pde) - remove_proc_entry(np->pde->name, parent->pde); -} -#else /* !CONFIG_PROC_DEVICETREE */ -static void add_node_proc_entries(struct device_node *np) -{ - return; -} - -static void remove_node_proc_entries(struct device_node *np) -{ - return; -} -#endif /* CONFIG_PROC_DEVICETREE */ - -/** - * derive_parent - basically like dirname(1) - * @path: the full_name of a node to be added to the tree - * - * Returns the node which should be the parent of the node - * described by path. E.g., for path = "/foo/bar", returns - * the node with full_name = "/foo". - */ -static struct device_node *derive_parent(const char *path) -{ - struct device_node *parent = NULL; - char *parent_path = "/"; - size_t parent_path_len = strrchr(path, '/') - path + 1; - - /* reject if path is "/" */ - if (!strcmp(path, "/")) - return ERR_PTR(-EINVAL); - - if (strrchr(path, '/') != path) { - parent_path = kmalloc(parent_path_len, GFP_KERNEL); - if (!parent_path) - return ERR_PTR(-ENOMEM); - strlcpy(parent_path, path, parent_path_len); - } - parent = of_find_node_by_path(parent_path); - if (!parent) - return ERR_PTR(-EINVAL); - if (strcmp(parent_path, "/")) - kfree(parent_path); - return parent; -} - -static struct notifier_block *pSeries_reconfig_chain; - -int pSeries_reconfig_notifier_register(struct notifier_block *nb) -{ - return notifier_chain_register(&pSeries_reconfig_chain, nb); -} - -void pSeries_reconfig_notifier_unregister(struct notifier_block *nb) -{ - notifier_chain_unregister(&pSeries_reconfig_chain, nb); -} - -static int pSeries_reconfig_add_node(const char *path, struct property *proplist) -{ - struct device_node *np; - int err = -ENOMEM; - - np = kzalloc(sizeof(*np), GFP_KERNEL); - if (!np) - goto out_err; - - np->full_name = kmalloc(strlen(path) + 1, GFP_KERNEL); - if (!np->full_name) - goto out_err; - - strcpy(np->full_name, path); - - np->properties = proplist; - OF_MARK_DYNAMIC(np); - kref_init(&np->kref); - - np->parent = derive_parent(path); - if (IS_ERR(np->parent)) { - err = PTR_ERR(np->parent); - goto out_err; - } - - err = notifier_call_chain(&pSeries_reconfig_chain, - PSERIES_RECONFIG_ADD, np); - if (err == NOTIFY_BAD) { - printk(KERN_ERR "Failed to add device node %s\n", path); - err = -ENOMEM; /* For now, safe to assume kmalloc failure */ - goto out_err; - } - - of_attach_node(np); - - add_node_proc_entries(np); - - of_node_put(np->parent); - - return 0; - -out_err: - if (np) { - of_node_put(np->parent); - kfree(np->full_name); - kfree(np); - } - return err; -} - -static int pSeries_reconfig_remove_node(struct device_node *np) -{ - struct device_node *parent, *child; - - parent = of_get_parent(np); - if (!parent) - return -EINVAL; - - if ((child = of_get_next_child(np, NULL))) { - of_node_put(child); - return -EBUSY; - } - - remove_node_proc_entries(np); - - notifier_call_chain(&pSeries_reconfig_chain, - PSERIES_RECONFIG_REMOVE, np); - of_detach_node(np); - - of_node_put(parent); - of_node_put(np); /* Must decrement the refcount */ - return 0; -} - -/* - * /proc/ppc64/ofdt - yucky binary interface for adding and removing - * OF device nodes. Should be deprecated as soon as we get an - * in-kernel wrapper for the RTAS ibm,configure-connector call. - */ - -static void release_prop_list(const struct property *prop) -{ - struct property *next; - for (; prop; prop = next) { - next = prop->next; - kfree(prop->name); - kfree(prop->value); - kfree(prop); - } - -} - -/** - * parse_next_property - process the next property from raw input buffer - * @buf: input buffer, must be nul-terminated - * @end: end of the input buffer + 1, for validation - * @name: return value; set to property name in buf - * @length: return value; set to length of value - * @value: return value; set to the property value in buf - * - * Note that the caller must make copies of the name and value returned, - * this function does no allocation or copying of the data. Return value - * is set to the next name in buf, or NULL on error. - */ -static char * parse_next_property(char *buf, char *end, char **name, int *length, - unsigned char **value) -{ - char *tmp; - - *name = buf; - - tmp = strchr(buf, ' '); - if (!tmp) { - printk(KERN_ERR "property parse failed in %s at line %d\n", - __FUNCTION__, __LINE__); - return NULL; - } - *tmp = '\0'; - - if (++tmp >= end) { - printk(KERN_ERR "property parse failed in %s at line %d\n", - __FUNCTION__, __LINE__); - return NULL; - } - - /* now we're on the length */ - *length = -1; - *length = simple_strtoul(tmp, &tmp, 10); - if (*length == -1) { - printk(KERN_ERR "property parse failed in %s at line %d\n", - __FUNCTION__, __LINE__); - return NULL; - } - if (*tmp != ' ' || ++tmp >= end) { - printk(KERN_ERR "property parse failed in %s at line %d\n", - __FUNCTION__, __LINE__); - return NULL; - } - - /* now we're on the value */ - *value = tmp; - tmp += *length; - if (tmp > end) { - printk(KERN_ERR "property parse failed in %s at line %d\n", - __FUNCTION__, __LINE__); - return NULL; - } - else if (tmp < end && *tmp != ' ' && *tmp != '\0') { - printk(KERN_ERR "property parse failed in %s at line %d\n", - __FUNCTION__, __LINE__); - return NULL; - } - tmp++; - - /* and now we should be on the next name, or the end */ - return tmp; -} - -static struct property *new_property(const char *name, const int length, - const unsigned char *value, struct property *last) -{ - struct property *new = kmalloc(sizeof(*new), GFP_KERNEL); - - if (!new) - return NULL; - memset(new, 0, sizeof(*new)); - - if (!(new->name = kmalloc(strlen(name) + 1, GFP_KERNEL))) - goto cleanup; - if (!(new->value = kmalloc(length + 1, GFP_KERNEL))) - goto cleanup; - - strcpy(new->name, name); - memcpy(new->value, value, length); - *(((char *)new->value) + length) = 0; - new->length = length; - new->next = last; - return new; - -cleanup: - if (new->name) - kfree(new->name); - if (new->value) - kfree(new->value); - kfree(new); - return NULL; -} - -static int do_add_node(char *buf, size_t bufsize) -{ - char *path, *end, *name; - struct device_node *np; - struct property *prop = NULL; - unsigned char* value; - int length, rv = 0; - - end = buf + bufsize; - path = buf; - buf = strchr(buf, ' '); - if (!buf) - return -EINVAL; - *buf = '\0'; - buf++; - - if ((np = of_find_node_by_path(path))) { - of_node_put(np); - return -EINVAL; - } - - /* rv = build_prop_list(tmp, bufsize - (tmp - buf), &proplist); */ - while (buf < end && - (buf = parse_next_property(buf, end, &name, &length, &value))) { - struct property *last = prop; - - prop = new_property(name, length, value, last); - if (!prop) { - rv = -ENOMEM; - prop = last; - goto out; - } - } - if (!buf) { - rv = -EINVAL; - goto out; - } - - rv = pSeries_reconfig_add_node(path, prop); - -out: - if (rv) - release_prop_list(prop); - return rv; -} - -static int do_remove_node(char *buf) -{ - struct device_node *node; - int rv = -ENODEV; - - if ((node = of_find_node_by_path(buf))) - rv = pSeries_reconfig_remove_node(node); - - of_node_put(node); - return rv; -} - -/** - * ofdt_write - perform operations on the Open Firmware device tree - * - * @file: not used - * @buf: command and arguments - * @count: size of the command buffer - * @off: not used - * - * Operations supported at this time are addition and removal of - * whole nodes along with their properties. Operations on individual - * properties are not implemented (yet). - */ -static ssize_t ofdt_write(struct file *file, const char __user *buf, size_t count, - loff_t *off) -{ - int rv = 0; - char *kbuf; - char *tmp; - - if (!(kbuf = kmalloc(count + 1, GFP_KERNEL))) { - rv = -ENOMEM; - goto out; - } - if (copy_from_user(kbuf, buf, count)) { - rv = -EFAULT; - goto out; - } - - kbuf[count] = '\0'; - - tmp = strchr(kbuf, ' '); - if (!tmp) { - rv = -EINVAL; - goto out; - } - *tmp = '\0'; - tmp++; - - if (!strcmp(kbuf, "add_node")) - rv = do_add_node(tmp, count - (tmp - kbuf)); - else if (!strcmp(kbuf, "remove_node")) - rv = do_remove_node(tmp); - else - rv = -EINVAL; -out: - kfree(kbuf); - return rv ? rv : count; -} - -static struct file_operations ofdt_fops = { - .write = ofdt_write -}; - -/* create /proc/ppc64/ofdt write-only by root */ -static int proc_ppc64_create_ofdt(void) -{ - struct proc_dir_entry *ent; - - if (!(systemcfg->platform & PLATFORM_PSERIES)) - return 0; - - ent = create_proc_entry("ppc64/ofdt", S_IWUSR, NULL); - if (ent) { - ent->nlink = 1; - ent->data = NULL; - ent->size = 0; - ent->proc_fops = &ofdt_fops; - } - - return 0; -} -__initcall(proc_ppc64_create_ofdt); diff --git a/arch/ppc64/kernel/pSeries_setup.c b/arch/ppc64/kernel/pSeries_setup.c deleted file mode 100644 index 5a9fe96f9f67..000000000000 --- a/arch/ppc64/kernel/pSeries_setup.c +++ /dev/null @@ -1,622 +0,0 @@ -/* - * linux/arch/ppc/kernel/setup.c - * - * Copyright (C) 1995 Linus Torvalds - * Adapted from 'alpha' version by Gary Thomas - * Modified by Cort Dougan (cort@cs.nmt.edu) - * Modified by PPC64 Team, IBM Corp - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -/* - * bootup setup stuff.. - */ - -#undef DEBUG - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "i8259.h" - -#ifdef DEBUG -#define DBG(fmt...) udbg_printf(fmt) -#else -#define DBG(fmt...) -#endif - -extern void find_udbg_vterm(void); -extern void system_reset_fwnmi(void); /* from head.S */ -extern void machine_check_fwnmi(void); /* from head.S */ -extern void generic_find_legacy_serial_ports(u64 *physport, - unsigned int *default_speed); - -int fwnmi_active; /* TRUE if an FWNMI handler is present */ - -extern void pSeries_system_reset_exception(struct pt_regs *regs); -extern int pSeries_machine_check_exception(struct pt_regs *regs); - -static int pseries_shared_idle(void); -static int pseries_dedicated_idle(void); - -static volatile void __iomem * chrp_int_ack_special; -struct mpic *pSeries_mpic; - -void pSeries_get_cpuinfo(struct seq_file *m) -{ - struct device_node *root; - const char *model = ""; - - root = of_find_node_by_path("/"); - if (root) - model = get_property(root, "model", NULL); - seq_printf(m, "machine\t\t: CHRP %s\n", model); - of_node_put(root); -} - -/* Initialize firmware assisted non-maskable interrupts if - * the firmware supports this feature. - * - */ -static void __init fwnmi_init(void) -{ - int ret; - int ibm_nmi_register = rtas_token("ibm,nmi-register"); - if (ibm_nmi_register == RTAS_UNKNOWN_SERVICE) - return; - ret = rtas_call(ibm_nmi_register, 2, 1, NULL, - __pa((unsigned long)system_reset_fwnmi), - __pa((unsigned long)machine_check_fwnmi)); - if (ret == 0) - fwnmi_active = 1; -} - -static int pSeries_irq_cascade(struct pt_regs *regs, void *data) -{ - if (chrp_int_ack_special) - return readb(chrp_int_ack_special); - else - return i8259_irq(smp_processor_id()); -} - -static void __init pSeries_init_mpic(void) -{ - unsigned int *addrp; - struct device_node *np; - int i; - - /* All ISUs are setup, complete initialization */ - mpic_init(pSeries_mpic); - - /* Check what kind of cascade ACK we have */ - if (!(np = of_find_node_by_name(NULL, "pci")) - || !(addrp = (unsigned int *) - get_property(np, "8259-interrupt-acknowledge", NULL))) - printk(KERN_ERR "Cannot find pci to get ack address\n"); - else - chrp_int_ack_special = ioremap(addrp[prom_n_addr_cells(np)-1], 1); - of_node_put(np); - - /* Setup the legacy interrupts & controller */ - for (i = 0; i < NUM_ISA_INTERRUPTS; i++) - irq_desc[i].handler = &i8259_pic; - i8259_init(0); - - /* Hook cascade to mpic */ - mpic_setup_cascade(NUM_ISA_INTERRUPTS, pSeries_irq_cascade, NULL); -} - -static void __init pSeries_setup_mpic(void) -{ - unsigned int *opprop; - unsigned long openpic_addr = 0; - unsigned char senses[NR_IRQS - NUM_ISA_INTERRUPTS]; - struct device_node *root; - int irq_count; - - /* Find the Open PIC if present */ - root = of_find_node_by_path("/"); - opprop = (unsigned int *) get_property(root, "platform-open-pic", NULL); - if (opprop != 0) { - int n = prom_n_addr_cells(root); - - for (openpic_addr = 0; n > 0; --n) - openpic_addr = (openpic_addr << 32) + *opprop++; - printk(KERN_DEBUG "OpenPIC addr: %lx\n", openpic_addr); - } - of_node_put(root); - - BUG_ON(openpic_addr == 0); - - /* Get the sense values from OF */ - prom_get_irq_senses(senses, NUM_ISA_INTERRUPTS, NR_IRQS); - - /* Setup the openpic driver */ - irq_count = NR_IRQS - NUM_ISA_INTERRUPTS - 4; /* leave room for IPIs */ - pSeries_mpic = mpic_alloc(openpic_addr, MPIC_PRIMARY, - 16, 16, irq_count, /* isu size, irq offset, irq count */ - NR_IRQS - 4, /* ipi offset */ - senses, irq_count, /* sense & sense size */ - " MPIC "); -} - -static void pseries_lpar_enable_pmcs(void) -{ - unsigned long set, reset; - - power4_enable_pmcs(); - - set = 1UL << 63; - reset = 0; - plpar_hcall_norets(H_PERFMON, set, reset); - - /* instruct hypervisor to maintain PMCs */ - if (firmware_has_feature(FW_FEATURE_SPLPAR)) - get_paca()->lppaca.pmcregs_in_use = 1; -} - -static void __init pSeries_setup_arch(void) -{ - /* Fixup ppc_md depending on the type of interrupt controller */ - if (ppc64_interrupt_controller == IC_OPEN_PIC) { - ppc_md.init_IRQ = pSeries_init_mpic; - ppc_md.get_irq = mpic_get_irq; - ppc_md.cpu_irq_down = mpic_teardown_this_cpu; - /* Allocate the mpic now, so that find_and_init_phbs() can - * fill the ISUs */ - pSeries_setup_mpic(); - } else { - ppc_md.init_IRQ = xics_init_IRQ; - ppc_md.get_irq = xics_get_irq; - ppc_md.cpu_irq_down = xics_teardown_cpu; - } - -#ifdef CONFIG_SMP - smp_init_pSeries(); -#endif - /* openpic global configuration register (64-bit format). */ - /* openpic Interrupt Source Unit pointer (64-bit format). */ - /* python0 facility area (mmio) (64-bit format) REAL address. */ - - /* init to some ~sane value until calibrate_delay() runs */ - loops_per_jiffy = 50000000; - - if (ROOT_DEV == 0) { - printk("No ramdisk, default root is /dev/sda2\n"); - ROOT_DEV = Root_SDA2; - } - - fwnmi_init(); - - /* Find and initialize PCI host bridges */ - init_pci_config_tokens(); - find_and_init_phbs(); - eeh_init(); - -#ifdef CONFIG_DUMMY_CONSOLE - conswitchp = &dummy_con; -#endif - - pSeries_nvram_init(); - - /* Choose an idle loop */ - if (firmware_has_feature(FW_FEATURE_SPLPAR)) { - vpa_init(boot_cpuid); - if (get_paca()->lppaca.shared_proc) { - printk(KERN_INFO "Using shared processor idle loop\n"); - ppc_md.idle_loop = pseries_shared_idle; - } else { - printk(KERN_INFO "Using dedicated idle loop\n"); - ppc_md.idle_loop = pseries_dedicated_idle; - } - } else { - printk(KERN_INFO "Using default idle loop\n"); - ppc_md.idle_loop = default_idle; - } - - if (systemcfg->platform & PLATFORM_LPAR) - ppc_md.enable_pmcs = pseries_lpar_enable_pmcs; - else - ppc_md.enable_pmcs = power4_enable_pmcs; -} - -static int __init pSeries_init_panel(void) -{ - /* Manually leave the kernel version on the panel. */ - ppc_md.progress("Linux ppc64\n", 0); - ppc_md.progress(system_utsname.version, 0); - - return 0; -} -arch_initcall(pSeries_init_panel); - - -/* Build up the ppc64_firmware_features bitmask field - * using contents of device-tree/ibm,hypertas-functions. - * Ultimately this functionality may be moved into prom.c prom_init(). - */ -static void __init fw_feature_init(void) -{ - struct device_node * dn; - char * hypertas; - unsigned int len; - - DBG(" -> fw_feature_init()\n"); - - ppc64_firmware_features = 0; - dn = of_find_node_by_path("/rtas"); - if (dn == NULL) { - printk(KERN_ERR "WARNING ! Cannot find RTAS in device-tree !\n"); - goto no_rtas; - } - - hypertas = get_property(dn, "ibm,hypertas-functions", &len); - if (hypertas) { - while (len > 0){ - int i, hypertas_len; - /* check value against table of strings */ - for(i=0; i < FIRMWARE_MAX_FEATURES ;i++) { - if ((firmware_features_table[i].name) && - (strcmp(firmware_features_table[i].name,hypertas))==0) { - /* we have a match */ - ppc64_firmware_features |= - (firmware_features_table[i].val); - break; - } - } - hypertas_len = strlen(hypertas); - len -= hypertas_len +1; - hypertas+= hypertas_len +1; - } - } - - of_node_put(dn); - no_rtas: - printk(KERN_INFO "firmware_features = 0x%lx\n", - ppc64_firmware_features); - - DBG(" <- fw_feature_init()\n"); -} - - -static void __init pSeries_discover_pic(void) -{ - struct device_node *np; - char *typep; - - /* - * Setup interrupt mapping options that are needed for finish_device_tree - * to properly parse the OF interrupt tree & do the virtual irq mapping - */ - __irq_offset_value = NUM_ISA_INTERRUPTS; - ppc64_interrupt_controller = IC_INVALID; - for (np = NULL; (np = of_find_node_by_name(np, "interrupt-controller"));) { - typep = (char *)get_property(np, "compatible", NULL); - if (strstr(typep, "open-pic")) - ppc64_interrupt_controller = IC_OPEN_PIC; - else if (strstr(typep, "ppc-xicp")) - ppc64_interrupt_controller = IC_PPC_XIC; - else - printk("pSeries_discover_pic: failed to recognize" - " interrupt-controller\n"); - break; - } -} - -static void pSeries_mach_cpu_die(void) -{ - local_irq_disable(); - idle_task_exit(); - /* Some hardware requires clearing the CPPR, while other hardware does not - * it is safe either way - */ - pSeriesLP_cppr_info(0, 0); - rtas_stop_self(); - /* Should never get here... */ - BUG(); - for(;;); -} - - -/* - * Early initialization. Relocation is on but do not reference unbolted pages - */ -static void __init pSeries_init_early(void) -{ - void *comport; - int iommu_off = 0; - unsigned int default_speed; - u64 physport; - - DBG(" -> pSeries_init_early()\n"); - - fw_feature_init(); - - if (systemcfg->platform & PLATFORM_LPAR) - hpte_init_lpar(); - else { - hpte_init_native(); - iommu_off = (of_chosen && - get_property(of_chosen, "linux,iommu-off", NULL)); - } - - generic_find_legacy_serial_ports(&physport, &default_speed); - - if (systemcfg->platform & PLATFORM_LPAR) - find_udbg_vterm(); - else if (physport) { - /* Map the uart for udbg. */ - comport = (void *)ioremap(physport, 16); - udbg_init_uart(comport, default_speed); - - DBG("Hello World !\n"); - } - - - iommu_init_early_pSeries(); - - pSeries_discover_pic(); - - DBG(" <- pSeries_init_early()\n"); -} - - -static int pSeries_check_legacy_ioport(unsigned int baseport) -{ - struct device_node *np; - -#define I8042_DATA_REG 0x60 -#define FDC_BASE 0x3f0 - - - switch(baseport) { - case I8042_DATA_REG: - np = of_find_node_by_type(NULL, "8042"); - if (np == NULL) - return -ENODEV; - of_node_put(np); - break; - case FDC_BASE: - np = of_find_node_by_type(NULL, "fdc"); - if (np == NULL) - return -ENODEV; - of_node_put(np); - break; - } - return 0; -} - -/* - * Called very early, MMU is off, device-tree isn't unflattened - */ -extern struct machdep_calls pSeries_md; - -static int __init pSeries_probe(int platform) -{ - if (platform != PLATFORM_PSERIES && - platform != PLATFORM_PSERIES_LPAR) - return 0; - - /* if we have some ppc_md fixups for LPAR to do, do - * it here ... - */ - - return 1; -} - -DECLARE_PER_CPU(unsigned long, smt_snooze_delay); - -static inline void dedicated_idle_sleep(unsigned int cpu) -{ - struct paca_struct *ppaca = &paca[cpu ^ 1]; - - /* Only sleep if the other thread is not idle */ - if (!(ppaca->lppaca.idle)) { - local_irq_disable(); - - /* - * We are about to sleep the thread and so wont be polling any - * more. - */ - clear_thread_flag(TIF_POLLING_NRFLAG); - - /* - * SMT dynamic mode. Cede will result in this thread going - * dormant, if the partner thread is still doing work. Thread - * wakes up if partner goes idle, an interrupt is presented, or - * a prod occurs. Returning from the cede enables external - * interrupts. - */ - if (!need_resched()) - cede_processor(); - else - local_irq_enable(); - } else { - /* - * Give the HV an opportunity at the processor, since we are - * not doing any work. - */ - poll_pending(); - } -} - -static int pseries_dedicated_idle(void) -{ - long oldval; - struct paca_struct *lpaca = get_paca(); - unsigned int cpu = smp_processor_id(); - unsigned long start_snooze; - unsigned long *smt_snooze_delay = &__get_cpu_var(smt_snooze_delay); - - while (1) { - /* - * Indicate to the HV that we are idle. Now would be - * a good time to find other work to dispatch. - */ - lpaca->lppaca.idle = 1; - - oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED); - if (!oldval) { - set_thread_flag(TIF_POLLING_NRFLAG); - - start_snooze = __get_tb() + - *smt_snooze_delay * tb_ticks_per_usec; - - while (!need_resched() && !cpu_is_offline(cpu)) { - ppc64_runlatch_off(); - - /* - * Go into low thread priority and possibly - * low power mode. - */ - HMT_low(); - HMT_very_low(); - - if (*smt_snooze_delay != 0 && - __get_tb() > start_snooze) { - HMT_medium(); - dedicated_idle_sleep(cpu); - } - - } - - HMT_medium(); - clear_thread_flag(TIF_POLLING_NRFLAG); - } else { - set_need_resched(); - } - - lpaca->lppaca.idle = 0; - ppc64_runlatch_on(); - - schedule(); - - if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING) - cpu_die(); - } -} - -static int pseries_shared_idle(void) -{ - struct paca_struct *lpaca = get_paca(); - unsigned int cpu = smp_processor_id(); - - while (1) { - /* - * Indicate to the HV that we are idle. Now would be - * a good time to find other work to dispatch. - */ - lpaca->lppaca.idle = 1; - - while (!need_resched() && !cpu_is_offline(cpu)) { - local_irq_disable(); - ppc64_runlatch_off(); - - /* - * Yield the processor to the hypervisor. We return if - * an external interrupt occurs (which are driven prior - * to returning here) or if a prod occurs from another - * processor. When returning here, external interrupts - * are enabled. - * - * Check need_resched() again with interrupts disabled - * to avoid a race. - */ - if (!need_resched()) - cede_processor(); - else - local_irq_enable(); - - HMT_medium(); - } - - lpaca->lppaca.idle = 0; - ppc64_runlatch_on(); - - schedule(); - - if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING) - cpu_die(); - } - - return 0; -} - -static int pSeries_pci_probe_mode(struct pci_bus *bus) -{ - if (systemcfg->platform & PLATFORM_LPAR) - return PCI_PROBE_DEVTREE; - return PCI_PROBE_NORMAL; -} - -struct machdep_calls __initdata pSeries_md = { - .probe = pSeries_probe, - .setup_arch = pSeries_setup_arch, - .init_early = pSeries_init_early, - .get_cpuinfo = pSeries_get_cpuinfo, - .log_error = pSeries_log_error, - .pcibios_fixup = pSeries_final_fixup, - .pci_probe_mode = pSeries_pci_probe_mode, - .irq_bus_setup = pSeries_irq_bus_setup, - .restart = rtas_restart, - .power_off = rtas_power_off, - .halt = rtas_halt, - .panic = rtas_os_term, - .cpu_die = pSeries_mach_cpu_die, - .get_boot_time = rtas_get_boot_time, - .get_rtc_time = rtas_get_rtc_time, - .set_rtc_time = rtas_set_rtc_time, - .calibrate_decr = generic_calibrate_decr, - .progress = rtas_progress, - .check_legacy_ioport = pSeries_check_legacy_ioport, - .system_reset_exception = pSeries_system_reset_exception, - .machine_check_exception = pSeries_machine_check_exception, -}; diff --git a/arch/ppc64/kernel/pSeries_smp.c b/arch/ppc64/kernel/pSeries_smp.c deleted file mode 100644 index 5d1ed850f47b..000000000000 --- a/arch/ppc64/kernel/pSeries_smp.c +++ /dev/null @@ -1,517 +0,0 @@ -/* - * SMP support for pSeries and BPA machines. - * - * Dave Engebretsen, Peter Bergner, and - * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com - * - * Plus various changes from other IBM teams... - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#undef DEBUG - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "bpa_iic.h" - -#ifdef DEBUG -#define DBG(fmt...) udbg_printf(fmt) -#else -#define DBG(fmt...) -#endif - -/* - * The primary thread of each non-boot processor is recorded here before - * smp init. - */ -static cpumask_t of_spin_map; - -extern void pSeries_secondary_smp_init(unsigned long); - -#ifdef CONFIG_HOTPLUG_CPU - -/* Get state of physical CPU. - * Return codes: - * 0 - The processor is in the RTAS stopped state - * 1 - stop-self is in progress - * 2 - The processor is not in the RTAS stopped state - * -1 - Hardware Error - * -2 - Hardware Busy, Try again later. - */ -static int query_cpu_stopped(unsigned int pcpu) -{ - int cpu_status; - int status, qcss_tok; - - qcss_tok = rtas_token("query-cpu-stopped-state"); - if (qcss_tok == RTAS_UNKNOWN_SERVICE) - return -1; - status = rtas_call(qcss_tok, 1, 2, &cpu_status, pcpu); - if (status != 0) { - printk(KERN_ERR - "RTAS query-cpu-stopped-state failed: %i\n", status); - return status; - } - - return cpu_status; -} - -int pSeries_cpu_disable(void) -{ - int cpu = smp_processor_id(); - - cpu_clear(cpu, cpu_online_map); - systemcfg->processorCount--; - - /*fix boot_cpuid here*/ - if (cpu == boot_cpuid) - boot_cpuid = any_online_cpu(cpu_online_map); - - /* FIXME: abstract this to not be platform specific later on */ - xics_migrate_irqs_away(); - return 0; -} - -void pSeries_cpu_die(unsigned int cpu) -{ - int tries; - int cpu_status; - unsigned int pcpu = get_hard_smp_processor_id(cpu); - - for (tries = 0; tries < 25; tries++) { - cpu_status = query_cpu_stopped(pcpu); - if (cpu_status == 0 || cpu_status == -1) - break; - msleep(200); - } - if (cpu_status != 0) { - printk("Querying DEAD? cpu %i (%i) shows %i\n", - cpu, pcpu, cpu_status); - } - - /* Isolation and deallocation are definatly done by - * drslot_chrp_cpu. If they were not they would be - * done here. Change isolate state to Isolate and - * change allocation-state to Unusable. - */ - paca[cpu].cpu_start = 0; -} - -/* - * Update cpu_present_map and paca(s) for a new cpu node. The wrinkle - * here is that a cpu device node may represent up to two logical cpus - * in the SMT case. We must honor the assumption in other code that - * the logical ids for sibling SMT threads x and y are adjacent, such - * that x^1 == y and y^1 == x. - */ -static int pSeries_add_processor(struct device_node *np) -{ - unsigned int cpu; - cpumask_t candidate_map, tmp = CPU_MASK_NONE; - int err = -ENOSPC, len, nthreads, i; - u32 *intserv; - - intserv = (u32 *)get_property(np, "ibm,ppc-interrupt-server#s", &len); - if (!intserv) - return 0; - - nthreads = len / sizeof(u32); - for (i = 0; i < nthreads; i++) - cpu_set(i, tmp); - - lock_cpu_hotplug(); - - BUG_ON(!cpus_subset(cpu_present_map, cpu_possible_map)); - - /* Get a bitmap of unoccupied slots. */ - cpus_xor(candidate_map, cpu_possible_map, cpu_present_map); - if (cpus_empty(candidate_map)) { - /* If we get here, it most likely means that NR_CPUS is - * less than the partition's max processors setting. - */ - printk(KERN_ERR "Cannot add cpu %s; this system configuration" - " supports %d logical cpus.\n", np->full_name, - cpus_weight(cpu_possible_map)); - goto out_unlock; - } - - while (!cpus_empty(tmp)) - if (cpus_subset(tmp, candidate_map)) - /* Found a range where we can insert the new cpu(s) */ - break; - else - cpus_shift_left(tmp, tmp, nthreads); - - if (cpus_empty(tmp)) { - printk(KERN_ERR "Unable to find space in cpu_present_map for" - " processor %s with %d thread(s)\n", np->name, - nthreads); - goto out_unlock; - } - - for_each_cpu_mask(cpu, tmp) { - BUG_ON(cpu_isset(cpu, cpu_present_map)); - cpu_set(cpu, cpu_present_map); - set_hard_smp_processor_id(cpu, *intserv++); - } - err = 0; -out_unlock: - unlock_cpu_hotplug(); - return err; -} - -/* - * Update the present map for a cpu node which is going away, and set - * the hard id in the paca(s) to -1 to be consistent with boot time - * convention for non-present cpus. - */ -static void pSeries_remove_processor(struct device_node *np) -{ - unsigned int cpu; - int len, nthreads, i; - u32 *intserv; - - intserv = (u32 *)get_property(np, "ibm,ppc-interrupt-server#s", &len); - if (!intserv) - return; - - nthreads = len / sizeof(u32); - - lock_cpu_hotplug(); - for (i = 0; i < nthreads; i++) { - for_each_present_cpu(cpu) { - if (get_hard_smp_processor_id(cpu) != intserv[i]) - continue; - BUG_ON(cpu_online(cpu)); - cpu_clear(cpu, cpu_present_map); - set_hard_smp_processor_id(cpu, -1); - break; - } - if (cpu == NR_CPUS) - printk(KERN_WARNING "Could not find cpu to remove " - "with physical id 0x%x\n", intserv[i]); - } - unlock_cpu_hotplug(); -} - -static int pSeries_smp_notifier(struct notifier_block *nb, unsigned long action, void *node) -{ - int err = NOTIFY_OK; - - switch (action) { - case PSERIES_RECONFIG_ADD: - if (pSeries_add_processor(node)) - err = NOTIFY_BAD; - break; - case PSERIES_RECONFIG_REMOVE: - pSeries_remove_processor(node); - break; - default: - err = NOTIFY_DONE; - break; - } - return err; -} - -static struct notifier_block pSeries_smp_nb = { - .notifier_call = pSeries_smp_notifier, -}; - -#endif /* CONFIG_HOTPLUG_CPU */ - -/** - * smp_startup_cpu() - start the given cpu - * - * At boot time, there is nothing to do for primary threads which were - * started from Open Firmware. For anything else, call RTAS with the - * appropriate start location. - * - * Returns: - * 0 - failure - * 1 - success - */ -static inline int __devinit smp_startup_cpu(unsigned int lcpu) -{ - int status; - unsigned long start_here = __pa((u32)*((unsigned long *) - pSeries_secondary_smp_init)); - unsigned int pcpu; - int start_cpu; - - if (cpu_isset(lcpu, of_spin_map)) - /* Already started by OF and sitting in spin loop */ - return 1; - - pcpu = get_hard_smp_processor_id(lcpu); - - /* Fixup atomic count: it exited inside IRQ handler. */ - paca[lcpu].__current->thread_info->preempt_count = 0; - - /* - * If the RTAS start-cpu token does not exist then presume the - * cpu is already spinning. - */ - start_cpu = rtas_token("start-cpu"); - if (start_cpu == RTAS_UNKNOWN_SERVICE) - return 1; - - status = rtas_call(start_cpu, 3, 1, NULL, pcpu, start_here, lcpu); - if (status != 0) { - printk(KERN_ERR "start-cpu failed: %i\n", status); - return 0; - } - - return 1; -} - -#ifdef CONFIG_XICS -static inline void smp_xics_do_message(int cpu, int msg) -{ - set_bit(msg, &xics_ipi_message[cpu].value); - mb(); - xics_cause_IPI(cpu); -} - -static void smp_xics_message_pass(int target, int msg) -{ - unsigned int i; - - if (target < NR_CPUS) { - smp_xics_do_message(target, msg); - } else { - for_each_online_cpu(i) { - if (target == MSG_ALL_BUT_SELF - && i == smp_processor_id()) - continue; - smp_xics_do_message(i, msg); - } - } -} - -static int __init smp_xics_probe(void) -{ - xics_request_IPIs(); - - return cpus_weight(cpu_possible_map); -} - -static void __devinit smp_xics_setup_cpu(int cpu) -{ - if (cpu != boot_cpuid) - xics_setup_cpu(); - - if (firmware_has_feature(FW_FEATURE_SPLPAR)) - vpa_init(cpu); - - cpu_clear(cpu, of_spin_map); - -} -#endif /* CONFIG_XICS */ -#ifdef CONFIG_BPA_IIC -static void smp_iic_message_pass(int target, int msg) -{ - unsigned int i; - - if (target < NR_CPUS) { - iic_cause_IPI(target, msg); - } else { - for_each_online_cpu(i) { - if (target == MSG_ALL_BUT_SELF - && i == smp_processor_id()) - continue; - iic_cause_IPI(i, msg); - } - } -} - -static int __init smp_iic_probe(void) -{ - iic_request_IPIs(); - - return cpus_weight(cpu_possible_map); -} - -static void __devinit smp_iic_setup_cpu(int cpu) -{ - if (cpu != boot_cpuid) - iic_setup_cpu(); -} -#endif /* CONFIG_BPA_IIC */ - -static DEFINE_SPINLOCK(timebase_lock); -static unsigned long timebase = 0; - -static void __devinit pSeries_give_timebase(void) -{ - spin_lock(&timebase_lock); - rtas_call(rtas_token("freeze-time-base"), 0, 1, NULL); - timebase = get_tb(); - spin_unlock(&timebase_lock); - - while (timebase) - barrier(); - rtas_call(rtas_token("thaw-time-base"), 0, 1, NULL); -} - -static void __devinit pSeries_take_timebase(void) -{ - while (!timebase) - barrier(); - spin_lock(&timebase_lock); - set_tb(timebase >> 32, timebase & 0xffffffff); - timebase = 0; - spin_unlock(&timebase_lock); -} - -static void __devinit smp_pSeries_kick_cpu(int nr) -{ - BUG_ON(nr < 0 || nr >= NR_CPUS); - - if (!smp_startup_cpu(nr)) - return; - - /* - * The processor is currently spinning, waiting for the - * cpu_start field to become non-zero After we set cpu_start, - * the processor will continue on to secondary_start - */ - paca[nr].cpu_start = 1; -} - -static int smp_pSeries_cpu_bootable(unsigned int nr) -{ - /* Special case - we inhibit secondary thread startup - * during boot if the user requests it. Odd-numbered - * cpus are assumed to be secondary threads. - */ - if (system_state < SYSTEM_RUNNING && - cpu_has_feature(CPU_FTR_SMT) && - !smt_enabled_at_boot && nr % 2 != 0) - return 0; - - return 1; -} -#ifdef CONFIG_MPIC -static struct smp_ops_t pSeries_mpic_smp_ops = { - .message_pass = smp_mpic_message_pass, - .probe = smp_mpic_probe, - .kick_cpu = smp_pSeries_kick_cpu, - .setup_cpu = smp_mpic_setup_cpu, -}; -#endif -#ifdef CONFIG_XICS -static struct smp_ops_t pSeries_xics_smp_ops = { - .message_pass = smp_xics_message_pass, - .probe = smp_xics_probe, - .kick_cpu = smp_pSeries_kick_cpu, - .setup_cpu = smp_xics_setup_cpu, - .cpu_bootable = smp_pSeries_cpu_bootable, -}; -#endif -#ifdef CONFIG_BPA_IIC -static struct smp_ops_t bpa_iic_smp_ops = { - .message_pass = smp_iic_message_pass, - .probe = smp_iic_probe, - .kick_cpu = smp_pSeries_kick_cpu, - .setup_cpu = smp_iic_setup_cpu, - .cpu_bootable = smp_pSeries_cpu_bootable, -}; -#endif - -/* This is called very early */ -void __init smp_init_pSeries(void) -{ - int i; - - DBG(" -> smp_init_pSeries()\n"); - - switch (ppc64_interrupt_controller) { -#ifdef CONFIG_MPIC - case IC_OPEN_PIC: - smp_ops = &pSeries_mpic_smp_ops; - break; -#endif -#ifdef CONFIG_XICS - case IC_PPC_XIC: - smp_ops = &pSeries_xics_smp_ops; - break; -#endif -#ifdef CONFIG_BPA_IIC - case IC_BPA_IIC: - smp_ops = &bpa_iic_smp_ops; - break; -#endif - default: - panic("Invalid interrupt controller"); - } - -#ifdef CONFIG_HOTPLUG_CPU - smp_ops->cpu_disable = pSeries_cpu_disable; - smp_ops->cpu_die = pSeries_cpu_die; - - /* Processors can be added/removed only on LPAR */ - if (systemcfg->platform == PLATFORM_PSERIES_LPAR) - pSeries_reconfig_notifier_register(&pSeries_smp_nb); -#endif - - /* Mark threads which are still spinning in hold loops. */ - if (cpu_has_feature(CPU_FTR_SMT)) { - for_each_present_cpu(i) { - if (i % 2 == 0) - /* - * Even-numbered logical cpus correspond to - * primary threads. - */ - cpu_set(i, of_spin_map); - } - } else { - of_spin_map = cpu_present_map; - } - - cpu_clear(boot_cpuid, of_spin_map); - - /* Non-lpar has additional take/give timebase */ - if (rtas_token("freeze-time-base") != RTAS_UNKNOWN_SERVICE) { - smp_ops->give_timebase = pSeries_give_timebase; - smp_ops->take_timebase = pSeries_take_timebase; - } - - DBG(" <- smp_init_pSeries()\n"); -} - diff --git a/arch/ppc64/kernel/pSeries_vio.c b/arch/ppc64/kernel/pSeries_vio.c deleted file mode 100644 index 866379b80c09..000000000000 --- a/arch/ppc64/kernel/pSeries_vio.c +++ /dev/null @@ -1,274 +0,0 @@ -/* - * IBM PowerPC pSeries Virtual I/O Infrastructure Support. - * - * Copyright (c) 2003-2005 IBM Corp. - * Dave Engebretsen engebret@us.ibm.com - * Santiago Leon santil@us.ibm.com - * Hollis Blanchard - * Stephen Rothwell - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -extern struct subsystem devices_subsys; /* needed for vio_find_name() */ - -static void probe_bus_pseries(void) -{ - struct device_node *node_vroot, *of_node; - - node_vroot = find_devices("vdevice"); - if ((node_vroot == NULL) || (node_vroot->child == NULL)) - /* this machine doesn't do virtual IO, and that's ok */ - return; - - /* - * Create struct vio_devices for each virtual device in the device tree. - * Drivers will associate with them later. - */ - for (of_node = node_vroot->child; of_node != NULL; - of_node = of_node->sibling) { - printk(KERN_DEBUG "%s: processing %p\n", __FUNCTION__, of_node); - vio_register_device_node(of_node); - } -} - -/** - * vio_match_device_pseries: - Tell if a pSeries VIO device matches a - * vio_device_id - */ -static int vio_match_device_pseries(const struct vio_device_id *id, - const struct vio_dev *dev) -{ - return (strncmp(dev->type, id->type, strlen(id->type)) == 0) && - device_is_compatible(dev->dev.platform_data, id->compat); -} - -static void vio_release_device_pseries(struct device *dev) -{ - /* XXX free TCE table */ - of_node_put(dev->platform_data); -} - -static ssize_t viodev_show_devspec(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct device_node *of_node = dev->platform_data; - - return sprintf(buf, "%s\n", of_node->full_name); -} -DEVICE_ATTR(devspec, S_IRUSR | S_IRGRP | S_IROTH, viodev_show_devspec, NULL); - -static void vio_unregister_device_pseries(struct vio_dev *viodev) -{ - device_remove_file(&viodev->dev, &dev_attr_devspec); -} - -static struct vio_bus_ops vio_bus_ops_pseries = { - .match = vio_match_device_pseries, - .unregister_device = vio_unregister_device_pseries, - .release_device = vio_release_device_pseries, -}; - -/** - * vio_bus_init_pseries: - Initialize the pSeries virtual IO bus - */ -static int __init vio_bus_init_pseries(void) -{ - int err; - - err = vio_bus_init(&vio_bus_ops_pseries); - if (err == 0) - probe_bus_pseries(); - return err; -} - -__initcall(vio_bus_init_pseries); - -/** - * vio_build_iommu_table: - gets the dma information from OF and - * builds the TCE tree. - * @dev: the virtual device. - * - * Returns a pointer to the built tce tree, or NULL if it can't - * find property. -*/ -static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev) -{ - unsigned int *dma_window; - struct iommu_table *newTceTable; - unsigned long offset; - int dma_window_property_size; - - dma_window = (unsigned int *) get_property(dev->dev.platform_data, "ibm,my-dma-window", &dma_window_property_size); - if(!dma_window) { - return NULL; - } - - newTceTable = (struct iommu_table *) kmalloc(sizeof(struct iommu_table), GFP_KERNEL); - - /* There should be some code to extract the phys-encoded offset - using prom_n_addr_cells(). However, according to a comment - on earlier versions, it's always zero, so we don't bother */ - offset = dma_window[1] >> PAGE_SHIFT; - - /* TCE table size - measured in tce entries */ - newTceTable->it_size = dma_window[4] >> PAGE_SHIFT; - /* offset for VIO should always be 0 */ - newTceTable->it_offset = offset; - newTceTable->it_busno = 0; - newTceTable->it_index = (unsigned long)dma_window[0]; - newTceTable->it_type = TCE_VB; - - return iommu_init_table(newTceTable); -} - -/** - * vio_register_device_node: - Register a new vio device. - * @of_node: The OF node for this device. - * - * Creates and initializes a vio_dev structure from the data in - * of_node (dev.platform_data) and adds it to the list of virtual devices. - * Returns a pointer to the created vio_dev or NULL if node has - * NULL device_type or compatible fields. - */ -struct vio_dev * __devinit vio_register_device_node(struct device_node *of_node) -{ - struct vio_dev *viodev; - unsigned int *unit_address; - unsigned int *irq_p; - - /* we need the 'device_type' property, in order to match with drivers */ - if ((NULL == of_node->type)) { - printk(KERN_WARNING - "%s: node %s missing 'device_type'\n", __FUNCTION__, - of_node->name ? of_node->name : ""); - return NULL; - } - - unit_address = (unsigned int *)get_property(of_node, "reg", NULL); - if (!unit_address) { - printk(KERN_WARNING "%s: node %s missing 'reg'\n", __FUNCTION__, - of_node->name ? of_node->name : ""); - return NULL; - } - - /* allocate a vio_dev for this node */ - viodev = kmalloc(sizeof(struct vio_dev), GFP_KERNEL); - if (!viodev) { - return NULL; - } - memset(viodev, 0, sizeof(struct vio_dev)); - - viodev->dev.platform_data = of_node_get(of_node); - - viodev->irq = NO_IRQ; - irq_p = (unsigned int *)get_property(of_node, "interrupts", NULL); - if (irq_p) { - int virq = virt_irq_create_mapping(*irq_p); - if (virq == NO_IRQ) { - printk(KERN_ERR "Unable to allocate interrupt " - "number for %s\n", of_node->full_name); - } else - viodev->irq = irq_offset_up(virq); - } - - snprintf(viodev->dev.bus_id, BUS_ID_SIZE, "%x", *unit_address); - viodev->name = of_node->name; - viodev->type = of_node->type; - viodev->unit_address = *unit_address; - viodev->iommu_table = vio_build_iommu_table(viodev); - - /* register with generic device framework */ - if (vio_register_device(viodev) == NULL) { - /* XXX free TCE table */ - kfree(viodev); - return NULL; - } - device_create_file(&viodev->dev, &dev_attr_devspec); - - return viodev; -} -EXPORT_SYMBOL(vio_register_device_node); - -/** - * vio_get_attribute: - get attribute for virtual device - * @vdev: The vio device to get property. - * @which: The property/attribute to be extracted. - * @length: Pointer to length of returned data size (unused if NULL). - * - * Calls prom.c's get_property() to return the value of the - * attribute specified by the preprocessor constant @which -*/ -const void * vio_get_attribute(struct vio_dev *vdev, void* which, int* length) -{ - return get_property(vdev->dev.platform_data, (char*)which, length); -} -EXPORT_SYMBOL(vio_get_attribute); - -/* vio_find_name() - internal because only vio.c knows how we formatted the - * kobject name - * XXX once vio_bus_type.devices is actually used as a kset in - * drivers/base/bus.c, this function should be removed in favor of - * "device_find(kobj_name, &vio_bus_type)" - */ -static struct vio_dev *vio_find_name(const char *kobj_name) -{ - struct kobject *found; - - found = kset_find_obj(&devices_subsys.kset, kobj_name); - if (!found) - return NULL; - - return to_vio_dev(container_of(found, struct device, kobj)); -} - -/** - * vio_find_node - find an already-registered vio_dev - * @vnode: device_node of the virtual device we're looking for - */ -struct vio_dev *vio_find_node(struct device_node *vnode) -{ - uint32_t *unit_address; - char kobj_name[BUS_ID_SIZE]; - - /* construct the kobject name from the device node */ - unit_address = (uint32_t *)get_property(vnode, "reg", NULL); - if (!unit_address) - return NULL; - snprintf(kobj_name, BUS_ID_SIZE, "%x", *unit_address); - - return vio_find_name(kobj_name); -} -EXPORT_SYMBOL(vio_find_node); - -int vio_enable_interrupts(struct vio_dev *dev) -{ - int rc = h_vio_signal(dev->unit_address, VIO_IRQ_ENABLE); - if (rc != H_Success) - printk(KERN_ERR "vio: Error 0x%x enabling interrupts\n", rc); - return rc; -} -EXPORT_SYMBOL(vio_enable_interrupts); - -int vio_disable_interrupts(struct vio_dev *dev) -{ - int rc = h_vio_signal(dev->unit_address, VIO_IRQ_DISABLE); - if (rc != H_Success) - printk(KERN_ERR "vio: Error 0x%x disabling interrupts\n", rc); - return rc; -} -EXPORT_SYMBOL(vio_disable_interrupts);