1 /* pci_sun4v.c: SUN4V specific PCI controller support.
3 * Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net)
6 #include <linux/kernel.h>
7 #include <linux/types.h>
9 #include <linux/init.h>
10 #include <linux/slab.h>
11 #include <linux/interrupt.h>
12 #include <linux/percpu.h>
13 #include <linux/irq.h>
14 #include <linux/msi.h>
16 #include <asm/iommu.h>
19 #include <asm/pstate.h>
20 #include <asm/oplib.h>
21 #include <asm/hypervisor.h>
25 #include "iommu_common.h"
27 #include "pci_sun4v.h"
29 #define PGLIST_NENTS (PAGE_SIZE / sizeof(u64))
32 struct pci_dev
*pdev
; /* Device mapping is for. */
33 unsigned long prot
; /* IOMMU page protections */
34 unsigned long entry
; /* Index into IOTSB. */
35 u64
*pglist
; /* List of physical pages */
36 unsigned long npages
; /* Number of pages in list. */
39 static DEFINE_PER_CPU(struct iommu_batch
, pci_iommu_batch
);
41 /* Interrupts must be disabled. */
42 static inline void pci_iommu_batch_start(struct pci_dev
*pdev
, unsigned long prot
, unsigned long entry
)
44 struct iommu_batch
*p
= &__get_cpu_var(pci_iommu_batch
);
52 /* Interrupts must be disabled. */
53 static long pci_iommu_batch_flush(struct iommu_batch
*p
)
55 struct pci_pbm_info
*pbm
= p
->pdev
->dev
.archdata
.host_controller
;
56 unsigned long devhandle
= pbm
->devhandle
;
57 unsigned long prot
= p
->prot
;
58 unsigned long entry
= p
->entry
;
59 u64
*pglist
= p
->pglist
;
60 unsigned long npages
= p
->npages
;
65 num
= pci_sun4v_iommu_map(devhandle
, HV_PCI_TSBID(0, entry
),
66 npages
, prot
, __pa(pglist
));
67 if (unlikely(num
< 0)) {
68 if (printk_ratelimit())
69 printk("pci_iommu_batch_flush: IOMMU map of "
70 "[%08lx:%08lx:%lx:%lx:%lx] failed with "
72 devhandle
, HV_PCI_TSBID(0, entry
),
73 npages
, prot
, __pa(pglist
), num
);
88 /* Interrupts must be disabled. */
89 static inline long pci_iommu_batch_add(u64 phys_page
)
91 struct iommu_batch
*p
= &__get_cpu_var(pci_iommu_batch
);
93 BUG_ON(p
->npages
>= PGLIST_NENTS
);
95 p
->pglist
[p
->npages
++] = phys_page
;
96 if (p
->npages
== PGLIST_NENTS
)
97 return pci_iommu_batch_flush(p
);
102 /* Interrupts must be disabled. */
103 static inline long pci_iommu_batch_end(void)
105 struct iommu_batch
*p
= &__get_cpu_var(pci_iommu_batch
);
107 BUG_ON(p
->npages
>= PGLIST_NENTS
);
109 return pci_iommu_batch_flush(p
);
112 static long pci_arena_alloc(struct iommu_arena
*arena
, unsigned long npages
)
114 unsigned long n
, i
, start
, end
, limit
;
117 limit
= arena
->limit
;
122 n
= find_next_zero_bit(arena
->map
, limit
, start
);
124 if (unlikely(end
>= limit
)) {
125 if (likely(pass
< 1)) {
131 /* Scanned the whole thing, give up. */
136 for (i
= n
; i
< end
; i
++) {
137 if (test_bit(i
, arena
->map
)) {
143 for (i
= n
; i
< end
; i
++)
144 __set_bit(i
, arena
->map
);
151 static void pci_arena_free(struct iommu_arena
*arena
, unsigned long base
, unsigned long npages
)
155 for (i
= base
; i
< (base
+ npages
); i
++)
156 __clear_bit(i
, arena
->map
);
159 static void *pci_4v_alloc_consistent(struct pci_dev
*pdev
, size_t size
, dma_addr_t
*dma_addrp
, gfp_t gfp
)
162 unsigned long flags
, order
, first_page
, npages
, n
;
166 size
= IO_PAGE_ALIGN(size
);
167 order
= get_order(size
);
168 if (unlikely(order
>= MAX_ORDER
))
171 npages
= size
>> IO_PAGE_SHIFT
;
173 first_page
= __get_free_pages(gfp
, order
);
174 if (unlikely(first_page
== 0UL))
177 memset((char *)first_page
, 0, PAGE_SIZE
<< order
);
179 iommu
= pdev
->dev
.archdata
.iommu
;
181 spin_lock_irqsave(&iommu
->lock
, flags
);
182 entry
= pci_arena_alloc(&iommu
->arena
, npages
);
183 spin_unlock_irqrestore(&iommu
->lock
, flags
);
185 if (unlikely(entry
< 0L))
186 goto arena_alloc_fail
;
188 *dma_addrp
= (iommu
->page_table_map_base
+
189 (entry
<< IO_PAGE_SHIFT
));
190 ret
= (void *) first_page
;
191 first_page
= __pa(first_page
);
193 local_irq_save(flags
);
195 pci_iommu_batch_start(pdev
,
196 (HV_PCI_MAP_ATTR_READ
|
197 HV_PCI_MAP_ATTR_WRITE
),
200 for (n
= 0; n
< npages
; n
++) {
201 long err
= pci_iommu_batch_add(first_page
+ (n
* PAGE_SIZE
));
202 if (unlikely(err
< 0L))
206 if (unlikely(pci_iommu_batch_end() < 0L))
209 local_irq_restore(flags
);
214 /* Interrupts are disabled. */
215 spin_lock(&iommu
->lock
);
216 pci_arena_free(&iommu
->arena
, entry
, npages
);
217 spin_unlock_irqrestore(&iommu
->lock
, flags
);
220 free_pages(first_page
, order
);
224 static void pci_4v_free_consistent(struct pci_dev
*pdev
, size_t size
, void *cpu
, dma_addr_t dvma
)
226 struct pci_pbm_info
*pbm
;
228 unsigned long flags
, order
, npages
, entry
;
231 npages
= IO_PAGE_ALIGN(size
) >> IO_PAGE_SHIFT
;
232 iommu
= pdev
->dev
.archdata
.iommu
;
233 pbm
= pdev
->dev
.archdata
.host_controller
;
234 devhandle
= pbm
->devhandle
;
235 entry
= ((dvma
- iommu
->page_table_map_base
) >> IO_PAGE_SHIFT
);
237 spin_lock_irqsave(&iommu
->lock
, flags
);
239 pci_arena_free(&iommu
->arena
, entry
, npages
);
244 num
= pci_sun4v_iommu_demap(devhandle
, HV_PCI_TSBID(0, entry
),
248 } while (npages
!= 0);
250 spin_unlock_irqrestore(&iommu
->lock
, flags
);
252 order
= get_order(size
);
254 free_pages((unsigned long)cpu
, order
);
257 static dma_addr_t
pci_4v_map_single(struct pci_dev
*pdev
, void *ptr
, size_t sz
, int direction
)
260 unsigned long flags
, npages
, oaddr
;
261 unsigned long i
, base_paddr
;
266 iommu
= pdev
->dev
.archdata
.iommu
;
268 if (unlikely(direction
== PCI_DMA_NONE
))
271 oaddr
= (unsigned long)ptr
;
272 npages
= IO_PAGE_ALIGN(oaddr
+ sz
) - (oaddr
& IO_PAGE_MASK
);
273 npages
>>= IO_PAGE_SHIFT
;
275 spin_lock_irqsave(&iommu
->lock
, flags
);
276 entry
= pci_arena_alloc(&iommu
->arena
, npages
);
277 spin_unlock_irqrestore(&iommu
->lock
, flags
);
279 if (unlikely(entry
< 0L))
282 bus_addr
= (iommu
->page_table_map_base
+
283 (entry
<< IO_PAGE_SHIFT
));
284 ret
= bus_addr
| (oaddr
& ~IO_PAGE_MASK
);
285 base_paddr
= __pa(oaddr
& IO_PAGE_MASK
);
286 prot
= HV_PCI_MAP_ATTR_READ
;
287 if (direction
!= PCI_DMA_TODEVICE
)
288 prot
|= HV_PCI_MAP_ATTR_WRITE
;
290 local_irq_save(flags
);
292 pci_iommu_batch_start(pdev
, prot
, entry
);
294 for (i
= 0; i
< npages
; i
++, base_paddr
+= IO_PAGE_SIZE
) {
295 long err
= pci_iommu_batch_add(base_paddr
);
296 if (unlikely(err
< 0L))
299 if (unlikely(pci_iommu_batch_end() < 0L))
302 local_irq_restore(flags
);
307 if (printk_ratelimit())
309 return PCI_DMA_ERROR_CODE
;
312 /* Interrupts are disabled. */
313 spin_lock(&iommu
->lock
);
314 pci_arena_free(&iommu
->arena
, entry
, npages
);
315 spin_unlock_irqrestore(&iommu
->lock
, flags
);
317 return PCI_DMA_ERROR_CODE
;
320 static void pci_4v_unmap_single(struct pci_dev
*pdev
, dma_addr_t bus_addr
, size_t sz
, int direction
)
322 struct pci_pbm_info
*pbm
;
324 unsigned long flags
, npages
;
328 if (unlikely(direction
== PCI_DMA_NONE
)) {
329 if (printk_ratelimit())
334 iommu
= pdev
->dev
.archdata
.iommu
;
335 pbm
= pdev
->dev
.archdata
.host_controller
;
336 devhandle
= pbm
->devhandle
;
338 npages
= IO_PAGE_ALIGN(bus_addr
+ sz
) - (bus_addr
& IO_PAGE_MASK
);
339 npages
>>= IO_PAGE_SHIFT
;
340 bus_addr
&= IO_PAGE_MASK
;
342 spin_lock_irqsave(&iommu
->lock
, flags
);
344 entry
= (bus_addr
- iommu
->page_table_map_base
) >> IO_PAGE_SHIFT
;
345 pci_arena_free(&iommu
->arena
, entry
, npages
);
350 num
= pci_sun4v_iommu_demap(devhandle
, HV_PCI_TSBID(0, entry
),
354 } while (npages
!= 0);
356 spin_unlock_irqrestore(&iommu
->lock
, flags
);
359 #define SG_ENT_PHYS_ADDRESS(SG) \
360 (__pa(page_address((SG)->page)) + (SG)->offset)
362 static inline long fill_sg(long entry
, struct pci_dev
*pdev
,
363 struct scatterlist
*sg
,
364 int nused
, int nelems
, unsigned long prot
)
366 struct scatterlist
*dma_sg
= sg
;
367 struct scatterlist
*sg_end
= sg
+ nelems
;
371 local_irq_save(flags
);
373 pci_iommu_batch_start(pdev
, prot
, entry
);
375 for (i
= 0; i
< nused
; i
++) {
376 unsigned long pteval
= ~0UL;
379 dma_npages
= ((dma_sg
->dma_address
& (IO_PAGE_SIZE
- 1UL)) +
381 ((IO_PAGE_SIZE
- 1UL))) >> IO_PAGE_SHIFT
;
383 unsigned long offset
;
386 /* If we are here, we know we have at least one
387 * more page to map. So walk forward until we
388 * hit a page crossing, and begin creating new
389 * mappings from that spot.
394 tmp
= SG_ENT_PHYS_ADDRESS(sg
);
396 if (((tmp
^ pteval
) >> IO_PAGE_SHIFT
) != 0UL) {
397 pteval
= tmp
& IO_PAGE_MASK
;
398 offset
= tmp
& (IO_PAGE_SIZE
- 1UL);
401 if (((tmp
^ (tmp
+ len
- 1UL)) >> IO_PAGE_SHIFT
) != 0UL) {
402 pteval
= (tmp
+ IO_PAGE_SIZE
) & IO_PAGE_MASK
;
404 len
-= (IO_PAGE_SIZE
- (tmp
& (IO_PAGE_SIZE
- 1UL)));
410 pteval
= (pteval
& IOPTE_PAGE
);
414 err
= pci_iommu_batch_add(pteval
);
415 if (unlikely(err
< 0L))
416 goto iommu_map_failed
;
418 pteval
+= IO_PAGE_SIZE
;
419 len
-= (IO_PAGE_SIZE
- offset
);
424 pteval
= (pteval
& IOPTE_PAGE
) + len
;
427 /* Skip over any tail mappings we've fully mapped,
428 * adjusting pteval along the way. Stop when we
429 * detect a page crossing event.
431 while (sg
< sg_end
&&
432 (pteval
<< (64 - IO_PAGE_SHIFT
)) != 0UL &&
433 (pteval
== SG_ENT_PHYS_ADDRESS(sg
)) &&
435 (SG_ENT_PHYS_ADDRESS(sg
) + sg
->length
- 1UL)) >> IO_PAGE_SHIFT
) == 0UL) {
436 pteval
+= sg
->length
;
439 if ((pteval
<< (64 - IO_PAGE_SHIFT
)) == 0UL)
441 } while (dma_npages
!= 0);
445 if (unlikely(pci_iommu_batch_end() < 0L))
446 goto iommu_map_failed
;
448 local_irq_restore(flags
);
452 local_irq_restore(flags
);
456 static int pci_4v_map_sg(struct pci_dev
*pdev
, struct scatterlist
*sglist
, int nelems
, int direction
)
459 unsigned long flags
, npages
, prot
;
461 struct scatterlist
*sgtmp
;
465 /* Fast path single entry scatterlists. */
467 sglist
->dma_address
=
468 pci_4v_map_single(pdev
,
469 (page_address(sglist
->page
) + sglist
->offset
),
470 sglist
->length
, direction
);
471 if (unlikely(sglist
->dma_address
== PCI_DMA_ERROR_CODE
))
473 sglist
->dma_length
= sglist
->length
;
477 iommu
= pdev
->dev
.archdata
.iommu
;
479 if (unlikely(direction
== PCI_DMA_NONE
))
482 /* Step 1: Prepare scatter list. */
483 npages
= prepare_sg(sglist
, nelems
);
485 /* Step 2: Allocate a cluster and context, if necessary. */
486 spin_lock_irqsave(&iommu
->lock
, flags
);
487 entry
= pci_arena_alloc(&iommu
->arena
, npages
);
488 spin_unlock_irqrestore(&iommu
->lock
, flags
);
490 if (unlikely(entry
< 0L))
493 dma_base
= iommu
->page_table_map_base
+
494 (entry
<< IO_PAGE_SHIFT
);
496 /* Step 3: Normalize DMA addresses. */
500 while (used
&& sgtmp
->dma_length
) {
501 sgtmp
->dma_address
+= dma_base
;
505 used
= nelems
- used
;
507 /* Step 4: Create the mappings. */
508 prot
= HV_PCI_MAP_ATTR_READ
;
509 if (direction
!= PCI_DMA_TODEVICE
)
510 prot
|= HV_PCI_MAP_ATTR_WRITE
;
512 err
= fill_sg(entry
, pdev
, sglist
, used
, nelems
, prot
);
513 if (unlikely(err
< 0L))
514 goto iommu_map_failed
;
519 if (printk_ratelimit())
524 spin_lock_irqsave(&iommu
->lock
, flags
);
525 pci_arena_free(&iommu
->arena
, entry
, npages
);
526 spin_unlock_irqrestore(&iommu
->lock
, flags
);
531 static void pci_4v_unmap_sg(struct pci_dev
*pdev
, struct scatterlist
*sglist
, int nelems
, int direction
)
533 struct pci_pbm_info
*pbm
;
535 unsigned long flags
, i
, npages
;
537 u32 devhandle
, bus_addr
;
539 if (unlikely(direction
== PCI_DMA_NONE
)) {
540 if (printk_ratelimit())
544 iommu
= pdev
->dev
.archdata
.iommu
;
545 pbm
= pdev
->dev
.archdata
.host_controller
;
546 devhandle
= pbm
->devhandle
;
548 bus_addr
= sglist
->dma_address
& IO_PAGE_MASK
;
550 for (i
= 1; i
< nelems
; i
++)
551 if (sglist
[i
].dma_length
== 0)
554 npages
= (IO_PAGE_ALIGN(sglist
[i
].dma_address
+ sglist
[i
].dma_length
) -
555 bus_addr
) >> IO_PAGE_SHIFT
;
557 entry
= ((bus_addr
- iommu
->page_table_map_base
) >> IO_PAGE_SHIFT
);
559 spin_lock_irqsave(&iommu
->lock
, flags
);
561 pci_arena_free(&iommu
->arena
, entry
, npages
);
566 num
= pci_sun4v_iommu_demap(devhandle
, HV_PCI_TSBID(0, entry
),
570 } while (npages
!= 0);
572 spin_unlock_irqrestore(&iommu
->lock
, flags
);
575 static void pci_4v_dma_sync_single_for_cpu(struct pci_dev
*pdev
, dma_addr_t bus_addr
, size_t sz
, int direction
)
577 /* Nothing to do... */
580 static void pci_4v_dma_sync_sg_for_cpu(struct pci_dev
*pdev
, struct scatterlist
*sglist
, int nelems
, int direction
)
582 /* Nothing to do... */
585 const struct pci_iommu_ops pci_sun4v_iommu_ops
= {
586 .alloc_consistent
= pci_4v_alloc_consistent
,
587 .free_consistent
= pci_4v_free_consistent
,
588 .map_single
= pci_4v_map_single
,
589 .unmap_single
= pci_4v_unmap_single
,
590 .map_sg
= pci_4v_map_sg
,
591 .unmap_sg
= pci_4v_unmap_sg
,
592 .dma_sync_single_for_cpu
= pci_4v_dma_sync_single_for_cpu
,
593 .dma_sync_sg_for_cpu
= pci_4v_dma_sync_sg_for_cpu
,
596 static inline int pci_sun4v_out_of_range(struct pci_pbm_info
*pbm
, unsigned int bus
, unsigned int device
, unsigned int func
)
598 if (bus
< pbm
->pci_first_busno
||
599 bus
> pbm
->pci_last_busno
)
604 static int pci_sun4v_read_pci_cfg(struct pci_bus
*bus_dev
, unsigned int devfn
,
605 int where
, int size
, u32
*value
)
607 struct pci_pbm_info
*pbm
= bus_dev
->sysdata
;
608 u32 devhandle
= pbm
->devhandle
;
609 unsigned int bus
= bus_dev
->number
;
610 unsigned int device
= PCI_SLOT(devfn
);
611 unsigned int func
= PCI_FUNC(devfn
);
614 if (bus_dev
== pbm
->pci_bus
&& devfn
== 0x00)
615 return pci_host_bridge_read_pci_cfg(bus_dev
, devfn
, where
,
617 if (pci_sun4v_out_of_range(pbm
, bus
, device
, func
)) {
620 ret
= pci_sun4v_config_get(devhandle
,
621 HV_PCI_DEVICE_BUILD(bus
, device
, func
),
624 printk("rcfg: [%x:%x:%x:%d]=[%lx]\n",
625 devhandle
, HV_PCI_DEVICE_BUILD(bus
, device
, func
),
634 *value
= ret
& 0xffff;
637 *value
= ret
& 0xffffffff;
642 return PCIBIOS_SUCCESSFUL
;
645 static int pci_sun4v_write_pci_cfg(struct pci_bus
*bus_dev
, unsigned int devfn
,
646 int where
, int size
, u32 value
)
648 struct pci_pbm_info
*pbm
= bus_dev
->sysdata
;
649 u32 devhandle
= pbm
->devhandle
;
650 unsigned int bus
= bus_dev
->number
;
651 unsigned int device
= PCI_SLOT(devfn
);
652 unsigned int func
= PCI_FUNC(devfn
);
655 if (bus_dev
== pbm
->pci_bus
&& devfn
== 0x00)
656 return pci_host_bridge_write_pci_cfg(bus_dev
, devfn
, where
,
658 if (pci_sun4v_out_of_range(pbm
, bus
, device
, func
)) {
661 ret
= pci_sun4v_config_put(devhandle
,
662 HV_PCI_DEVICE_BUILD(bus
, device
, func
),
665 printk("wcfg: [%x:%x:%x:%d] v[%x] == [%lx]\n",
666 devhandle
, HV_PCI_DEVICE_BUILD(bus
, device
, func
),
667 where
, size
, value
, ret
);
670 return PCIBIOS_SUCCESSFUL
;
673 static struct pci_ops pci_sun4v_ops
= {
674 .read
= pci_sun4v_read_pci_cfg
,
675 .write
= pci_sun4v_write_pci_cfg
,
679 static void pci_sun4v_scan_bus(struct pci_pbm_info
*pbm
)
681 struct property
*prop
;
682 struct device_node
*dp
;
685 prop
= of_find_property(dp
, "66mhz-capable", NULL
);
686 pbm
->is_66mhz_capable
= (prop
!= NULL
);
687 pbm
->pci_bus
= pci_scan_one_pbm(pbm
);
689 /* XXX register error interrupt handlers XXX */
692 static unsigned long probe_existing_entries(struct pci_pbm_info
*pbm
,
695 struct iommu_arena
*arena
= &iommu
->arena
;
696 unsigned long i
, cnt
= 0;
699 devhandle
= pbm
->devhandle
;
700 for (i
= 0; i
< arena
->limit
; i
++) {
701 unsigned long ret
, io_attrs
, ra
;
703 ret
= pci_sun4v_iommu_getmap(devhandle
,
707 if (page_in_phys_avail(ra
)) {
708 pci_sun4v_iommu_demap(devhandle
,
709 HV_PCI_TSBID(0, i
), 1);
712 __set_bit(i
, arena
->map
);
720 static void pci_sun4v_iommu_init(struct pci_pbm_info
*pbm
)
722 struct iommu
*iommu
= pbm
->iommu
;
723 struct property
*prop
;
724 unsigned long num_tsb_entries
, sz
;
725 u32 vdma
[2], dma_mask
, dma_offset
;
728 prop
= of_find_property(pbm
->prom_node
, "virtual-dma", NULL
);
730 u32
*val
= prop
->value
;
735 /* No property, use default values. */
736 vdma
[0] = 0x80000000;
737 vdma
[1] = 0x80000000;
743 dma_mask
|= 0x1fffffff;
748 dma_mask
|= 0x3fffffff;
753 dma_mask
|= 0x7fffffff;
758 prom_printf("PCI-SUN4V: strange virtual-dma size.\n");
762 tsbsize
*= (8 * 1024);
764 num_tsb_entries
= tsbsize
/ sizeof(iopte_t
);
766 dma_offset
= vdma
[0];
768 /* Setup initial software IOMMU state. */
769 spin_lock_init(&iommu
->lock
);
770 iommu
->ctx_lowest_free
= 1;
771 iommu
->page_table_map_base
= dma_offset
;
772 iommu
->dma_addr_mask
= dma_mask
;
774 /* Allocate and initialize the free area map. */
775 sz
= num_tsb_entries
/ 8;
776 sz
= (sz
+ 7UL) & ~7UL;
777 iommu
->arena
.map
= kzalloc(sz
, GFP_KERNEL
);
778 if (!iommu
->arena
.map
) {
779 prom_printf("PCI_IOMMU: Error, kmalloc(arena.map) failed.\n");
782 iommu
->arena
.limit
= num_tsb_entries
;
784 sz
= probe_existing_entries(pbm
, iommu
);
786 printk("%s: Imported %lu TSB entries from OBP\n",
790 #ifdef CONFIG_PCI_MSI
791 struct pci_sun4v_msiq_entry
{
793 #define MSIQ_VERSION_MASK 0xffffffff00000000UL
794 #define MSIQ_VERSION_SHIFT 32
795 #define MSIQ_TYPE_MASK 0x00000000000000ffUL
796 #define MSIQ_TYPE_SHIFT 0
797 #define MSIQ_TYPE_NONE 0x00
798 #define MSIQ_TYPE_MSG 0x01
799 #define MSIQ_TYPE_MSI32 0x02
800 #define MSIQ_TYPE_MSI64 0x03
801 #define MSIQ_TYPE_INTX 0x08
802 #define MSIQ_TYPE_NONE2 0xff
807 u64 req_id
; /* bus/device/func */
808 #define MSIQ_REQID_BUS_MASK 0xff00UL
809 #define MSIQ_REQID_BUS_SHIFT 8
810 #define MSIQ_REQID_DEVICE_MASK 0x00f8UL
811 #define MSIQ_REQID_DEVICE_SHIFT 3
812 #define MSIQ_REQID_FUNC_MASK 0x0007UL
813 #define MSIQ_REQID_FUNC_SHIFT 0
817 /* The format of this value is message type dependant.
818 * For MSI bits 15:0 are the data from the MSI packet.
819 * For MSI-X bits 31:0 are the data from the MSI packet.
820 * For MSG, the message code and message routing code where:
821 * bits 39:32 is the bus/device/fn of the msg target-id
822 * bits 18:16 is the message routing code
823 * bits 7:0 is the message code
824 * For INTx the low order 2-bits are:
835 /* For now this just runs as a pre-handler for the real interrupt handler.
836 * So we just walk through the queue and ACK all the entries, update the
837 * head pointer, and return.
839 * In the longer term it would be nice to do something more integrated
840 * wherein we can pass in some of this MSI info to the drivers. This
841 * would be most useful for PCIe fabric error messages, although we could
842 * invoke those directly from the loop here in order to pass the info around.
844 static void pci_sun4v_msi_prehandler(unsigned int ino
, void *data1
, void *data2
)
846 struct pci_pbm_info
*pbm
= data1
;
847 struct pci_sun4v_msiq_entry
*base
, *ep
;
848 unsigned long msiqid
, orig_head
, head
, type
, err
;
850 msiqid
= (unsigned long) data2
;
853 err
= pci_sun4v_msiq_gethead(pbm
->devhandle
, msiqid
, &head
);
857 if (unlikely(head
>= (pbm
->msiq_ent_count
* sizeof(struct pci_sun4v_msiq_entry
))))
860 head
/= sizeof(struct pci_sun4v_msiq_entry
);
862 base
= (pbm
->msi_queues
+ ((msiqid
- pbm
->msiq_first
) *
863 (pbm
->msiq_ent_count
*
864 sizeof(struct pci_sun4v_msiq_entry
))));
866 while ((ep
->version_type
& MSIQ_TYPE_MASK
) != 0) {
867 type
= (ep
->version_type
& MSIQ_TYPE_MASK
) >> MSIQ_TYPE_SHIFT
;
868 if (unlikely(type
!= MSIQ_TYPE_MSI32
&&
869 type
!= MSIQ_TYPE_MSI64
))
872 pci_sun4v_msi_setstate(pbm
->devhandle
,
873 ep
->msi_data
/* msi_num */,
876 /* Clear the entry. */
877 ep
->version_type
&= ~MSIQ_TYPE_MASK
;
879 /* Go to next entry in ring. */
881 if (head
>= pbm
->msiq_ent_count
)
886 if (likely(head
!= orig_head
)) {
887 /* ACK entries by updating head pointer. */
888 head
*= sizeof(struct pci_sun4v_msiq_entry
);
889 err
= pci_sun4v_msiq_sethead(pbm
->devhandle
, msiqid
, head
);
896 printk(KERN_EMERG
"MSI: Hypervisor set head gives error %lu\n", err
);
900 printk(KERN_EMERG
"MSI: Hypervisor get head gives error %lu\n", err
);
903 printk(KERN_EMERG
"MSI: devhandle[%x] msiqid[%lx] head[%lu]\n",
904 pbm
->devhandle
, msiqid
, head
);
908 printk(KERN_EMERG
"MSI: Hypervisor gives bad offset %lx max(%lx)\n",
909 head
, pbm
->msiq_ent_count
* sizeof(struct pci_sun4v_msiq_entry
));
913 printk(KERN_EMERG
"MSI: Entry has bad type %lx\n", type
);
917 static int msi_bitmap_alloc(struct pci_pbm_info
*pbm
)
919 unsigned long size
, bits_per_ulong
;
921 bits_per_ulong
= sizeof(unsigned long) * 8;
922 size
= (pbm
->msi_num
+ (bits_per_ulong
- 1)) & ~(bits_per_ulong
- 1);
924 BUG_ON(size
% sizeof(unsigned long));
926 pbm
->msi_bitmap
= kzalloc(size
, GFP_KERNEL
);
927 if (!pbm
->msi_bitmap
)
933 static void msi_bitmap_free(struct pci_pbm_info
*pbm
)
935 kfree(pbm
->msi_bitmap
);
936 pbm
->msi_bitmap
= NULL
;
939 static int msi_queue_alloc(struct pci_pbm_info
*pbm
)
941 unsigned long q_size
, alloc_size
, pages
, order
;
944 q_size
= pbm
->msiq_ent_count
* sizeof(struct pci_sun4v_msiq_entry
);
945 alloc_size
= (pbm
->msiq_num
* q_size
);
946 order
= get_order(alloc_size
);
947 pages
= __get_free_pages(GFP_KERNEL
| __GFP_COMP
, order
);
949 printk(KERN_ERR
"MSI: Cannot allocate MSI queues (o=%lu).\n",
953 memset((char *)pages
, 0, PAGE_SIZE
<< order
);
954 pbm
->msi_queues
= (void *) pages
;
956 for (i
= 0; i
< pbm
->msiq_num
; i
++) {
957 unsigned long err
, base
= __pa(pages
+ (i
* q_size
));
958 unsigned long ret1
, ret2
;
960 err
= pci_sun4v_msiq_conf(pbm
->devhandle
,
962 base
, pbm
->msiq_ent_count
);
964 printk(KERN_ERR
"MSI: msiq register fails (err=%lu)\n",
969 err
= pci_sun4v_msiq_info(pbm
->devhandle
,
973 printk(KERN_ERR
"MSI: Cannot read msiq (err=%lu)\n",
977 if (ret1
!= base
|| ret2
!= pbm
->msiq_ent_count
) {
978 printk(KERN_ERR
"MSI: Bogus qconf "
979 "expected[%lx:%x] got[%lx:%lx]\n",
980 base
, pbm
->msiq_ent_count
,
989 free_pages(pages
, order
);
994 static int alloc_msi(struct pci_pbm_info
*pbm
)
998 for (i
= 0; i
< pbm
->msi_num
; i
++) {
999 if (!test_and_set_bit(i
, pbm
->msi_bitmap
))
1000 return i
+ pbm
->msi_first
;
1006 static void free_msi(struct pci_pbm_info
*pbm
, int msi_num
)
1008 msi_num
-= pbm
->msi_first
;
1009 clear_bit(msi_num
, pbm
->msi_bitmap
);
1012 static int pci_sun4v_setup_msi_irq(unsigned int *virt_irq_p
,
1013 struct pci_dev
*pdev
,
1014 struct msi_desc
*entry
)
1016 struct pci_pbm_info
*pbm
= pdev
->dev
.archdata
.host_controller
;
1017 unsigned long devino
, msiqid
;
1023 msi_num
= alloc_msi(pbm
);
1027 devino
= sun4v_build_msi(pbm
->devhandle
, virt_irq_p
,
1028 pbm
->msiq_first_devino
,
1029 (pbm
->msiq_first_devino
+
1035 msiqid
= ((devino
- pbm
->msiq_first_devino
) +
1039 if (pci_sun4v_msiq_setstate(pbm
->devhandle
, msiqid
, HV_MSIQSTATE_IDLE
))
1043 if (pci_sun4v_msiq_setvalid(pbm
->devhandle
, msiqid
, HV_MSIQ_VALID
))
1046 if (pci_sun4v_msi_setmsiq(pbm
->devhandle
,
1048 (entry
->msi_attrib
.is_64
?
1049 HV_MSITYPE_MSI64
: HV_MSITYPE_MSI32
)))
1052 if (pci_sun4v_msi_setstate(pbm
->devhandle
, msi_num
, HV_MSISTATE_IDLE
))
1055 if (pci_sun4v_msi_setvalid(pbm
->devhandle
, msi_num
, HV_MSIVALID_VALID
))
1058 pdev
->dev
.archdata
.msi_num
= msi_num
;
1060 if (entry
->msi_attrib
.is_64
) {
1061 msg
.address_hi
= pbm
->msi64_start
>> 32;
1062 msg
.address_lo
= pbm
->msi64_start
& 0xffffffff;
1065 msg
.address_lo
= pbm
->msi32_start
;
1069 set_irq_msi(*virt_irq_p
, entry
);
1070 write_msi_msg(*virt_irq_p
, &msg
);
1072 irq_install_pre_handler(*virt_irq_p
,
1073 pci_sun4v_msi_prehandler
,
1074 pbm
, (void *) msiqid
);
1079 free_msi(pbm
, msi_num
);
1080 sun4v_destroy_msi(*virt_irq_p
);
1086 static void pci_sun4v_teardown_msi_irq(unsigned int virt_irq
,
1087 struct pci_dev
*pdev
)
1089 struct pci_pbm_info
*pbm
= pdev
->dev
.archdata
.host_controller
;
1090 unsigned long msiqid
, err
;
1091 unsigned int msi_num
;
1093 msi_num
= pdev
->dev
.archdata
.msi_num
;
1094 err
= pci_sun4v_msi_getmsiq(pbm
->devhandle
, msi_num
, &msiqid
);
1096 printk(KERN_ERR
"%s: getmsiq gives error %lu\n",
1101 pci_sun4v_msi_setvalid(pbm
->devhandle
, msi_num
, HV_MSIVALID_INVALID
);
1102 pci_sun4v_msiq_setvalid(pbm
->devhandle
, msiqid
, HV_MSIQ_INVALID
);
1104 free_msi(pbm
, msi_num
);
1106 /* The sun4v_destroy_msi() will liberate the devino and thus the MSIQ
1109 sun4v_destroy_msi(virt_irq
);
1112 static void pci_sun4v_msi_init(struct pci_pbm_info
*pbm
)
1117 val
= of_get_property(pbm
->prom_node
, "#msi-eqs", &len
);
1118 if (!val
|| len
!= 4)
1120 pbm
->msiq_num
= *val
;
1121 if (pbm
->msiq_num
) {
1122 const struct msiq_prop
{
1127 const struct msi_range_prop
{
1131 const struct addr_range_prop
{
1140 val
= of_get_property(pbm
->prom_node
, "msi-eq-size", &len
);
1141 if (!val
|| len
!= 4)
1144 pbm
->msiq_ent_count
= *val
;
1146 mqp
= of_get_property(pbm
->prom_node
,
1147 "msi-eq-to-devino", &len
);
1148 if (!mqp
|| len
!= sizeof(struct msiq_prop
))
1151 pbm
->msiq_first
= mqp
->first_msiq
;
1152 pbm
->msiq_first_devino
= mqp
->first_devino
;
1154 val
= of_get_property(pbm
->prom_node
, "#msi", &len
);
1155 if (!val
|| len
!= 4)
1157 pbm
->msi_num
= *val
;
1159 mrng
= of_get_property(pbm
->prom_node
, "msi-ranges", &len
);
1160 if (!mrng
|| len
!= sizeof(struct msi_range_prop
))
1162 pbm
->msi_first
= mrng
->first_msi
;
1164 val
= of_get_property(pbm
->prom_node
, "msi-data-mask", &len
);
1165 if (!val
|| len
!= 4)
1167 pbm
->msi_data_mask
= *val
;
1169 val
= of_get_property(pbm
->prom_node
, "msix-data-width", &len
);
1170 if (!val
|| len
!= 4)
1172 pbm
->msix_data_width
= *val
;
1174 arng
= of_get_property(pbm
->prom_node
, "msi-address-ranges",
1176 if (!arng
|| len
!= sizeof(struct addr_range_prop
))
1178 pbm
->msi32_start
= ((u64
)arng
->msi32_high
<< 32) |
1179 (u64
) arng
->msi32_low
;
1180 pbm
->msi64_start
= ((u64
)arng
->msi64_high
<< 32) |
1181 (u64
) arng
->msi64_low
;
1182 pbm
->msi32_len
= arng
->msi32_len
;
1183 pbm
->msi64_len
= arng
->msi64_len
;
1185 if (msi_bitmap_alloc(pbm
))
1188 if (msi_queue_alloc(pbm
)) {
1189 msi_bitmap_free(pbm
);
1193 printk(KERN_INFO
"%s: MSI Queue first[%u] num[%u] count[%u] "
1196 pbm
->msiq_first
, pbm
->msiq_num
,
1197 pbm
->msiq_ent_count
,
1198 pbm
->msiq_first_devino
);
1199 printk(KERN_INFO
"%s: MSI first[%u] num[%u] mask[0x%x] "
1202 pbm
->msi_first
, pbm
->msi_num
, pbm
->msi_data_mask
,
1203 pbm
->msix_data_width
);
1204 printk(KERN_INFO
"%s: MSI addr32[0x%lx:0x%x] "
1205 "addr64[0x%lx:0x%x]\n",
1207 pbm
->msi32_start
, pbm
->msi32_len
,
1208 pbm
->msi64_start
, pbm
->msi64_len
);
1209 printk(KERN_INFO
"%s: MSI queues at RA [%p]\n",
1213 pbm
->setup_msi_irq
= pci_sun4v_setup_msi_irq
;
1214 pbm
->teardown_msi_irq
= pci_sun4v_teardown_msi_irq
;
1220 printk(KERN_INFO
"%s: No MSI support.\n", pbm
->name
);
1222 #else /* CONFIG_PCI_MSI */
1223 static void pci_sun4v_msi_init(struct pci_pbm_info
*pbm
)
1226 #endif /* !(CONFIG_PCI_MSI) */
1228 static void pci_sun4v_pbm_init(struct pci_controller_info
*p
, struct device_node
*dp
, u32 devhandle
)
1230 struct pci_pbm_info
*pbm
;
1232 if (devhandle
& 0x40)
1237 pbm
->next
= pci_pbm_root
;
1240 pbm
->scan_bus
= pci_sun4v_scan_bus
;
1241 pbm
->pci_ops
= &pci_sun4v_ops
;
1243 pbm
->index
= pci_num_pbms
++;
1246 pbm
->prom_node
= dp
;
1248 pbm
->devhandle
= devhandle
;
1250 pbm
->name
= dp
->full_name
;
1252 printk("%s: SUN4V PCI Bus Module\n", pbm
->name
);
1254 pci_determine_mem_io_space(pbm
);
1256 pci_get_pbm_props(pbm
);
1257 pci_sun4v_iommu_init(pbm
);
1258 pci_sun4v_msi_init(pbm
);
1261 void sun4v_pci_init(struct device_node
*dp
, char *model_name
)
1263 struct pci_controller_info
*p
;
1264 struct pci_pbm_info
*pbm
;
1265 struct iommu
*iommu
;
1266 struct property
*prop
;
1267 struct linux_prom64_registers
*regs
;
1271 prop
= of_find_property(dp
, "reg", NULL
);
1274 devhandle
= (regs
->phys_addr
>> 32UL) & 0x0fffffff;
1276 for (pbm
= pci_pbm_root
; pbm
; pbm
= pbm
->next
) {
1277 if (pbm
->devhandle
== (devhandle
^ 0x40)) {
1278 pci_sun4v_pbm_init(pbm
->parent
, dp
, devhandle
);
1283 for_each_possible_cpu(i
) {
1284 unsigned long page
= get_zeroed_page(GFP_ATOMIC
);
1287 goto fatal_memory_error
;
1289 per_cpu(pci_iommu_batch
, i
).pglist
= (u64
*) page
;
1292 p
= kzalloc(sizeof(struct pci_controller_info
), GFP_ATOMIC
);
1294 goto fatal_memory_error
;
1296 iommu
= kzalloc(sizeof(struct iommu
), GFP_ATOMIC
);
1298 goto fatal_memory_error
;
1300 p
->pbm_A
.iommu
= iommu
;
1302 iommu
= kzalloc(sizeof(struct iommu
), GFP_ATOMIC
);
1304 goto fatal_memory_error
;
1306 p
->pbm_B
.iommu
= iommu
;
1308 /* Like PSYCHO and SCHIZO we have a 2GB aligned area
1311 pci_memspace_mask
= 0x7fffffffUL
;
1313 pci_sun4v_pbm_init(p
, dp
, devhandle
);
1317 prom_printf("SUN4V_PCI: Fatal memory allocation error.\n");