2 * Copyright IBM Corp. 2012
5 * Jan Glauber <jang@linux.vnet.ibm.com>
8 #include <linux/kernel.h>
9 #include <linux/slab.h>
10 #include <linux/export.h>
11 #include <linux/iommu-helper.h>
12 #include <linux/dma-mapping.h>
13 #include <linux/vmalloc.h>
14 #include <linux/pci.h>
15 #include <asm/pci_dma.h>
17 static struct kmem_cache
*dma_region_table_cache
;
18 static struct kmem_cache
*dma_page_table_cache
;
19 static int s390_iommu_strict
;
21 static int zpci_refresh_global(struct zpci_dev
*zdev
)
23 return zpci_refresh_trans((u64
) zdev
->fh
<< 32, zdev
->start_dma
,
24 zdev
->iommu_pages
* PAGE_SIZE
);
27 unsigned long *dma_alloc_cpu_table(void)
29 unsigned long *table
, *entry
;
31 table
= kmem_cache_alloc(dma_region_table_cache
, GFP_ATOMIC
);
35 for (entry
= table
; entry
< table
+ ZPCI_TABLE_ENTRIES
; entry
++)
36 *entry
= ZPCI_TABLE_INVALID
;
40 static void dma_free_cpu_table(void *table
)
42 kmem_cache_free(dma_region_table_cache
, table
);
45 static unsigned long *dma_alloc_page_table(void)
47 unsigned long *table
, *entry
;
49 table
= kmem_cache_alloc(dma_page_table_cache
, GFP_ATOMIC
);
53 for (entry
= table
; entry
< table
+ ZPCI_PT_ENTRIES
; entry
++)
54 *entry
= ZPCI_PTE_INVALID
;
58 static void dma_free_page_table(void *table
)
60 kmem_cache_free(dma_page_table_cache
, table
);
63 static unsigned long *dma_get_seg_table_origin(unsigned long *entry
)
67 if (reg_entry_isvalid(*entry
))
68 sto
= get_rt_sto(*entry
);
70 sto
= dma_alloc_cpu_table();
74 set_rt_sto(entry
, sto
);
75 validate_rt_entry(entry
);
76 entry_clr_protected(entry
);
81 static unsigned long *dma_get_page_table_origin(unsigned long *entry
)
85 if (reg_entry_isvalid(*entry
))
86 pto
= get_st_pto(*entry
);
88 pto
= dma_alloc_page_table();
91 set_st_pto(entry
, pto
);
92 validate_st_entry(entry
);
93 entry_clr_protected(entry
);
98 unsigned long *dma_walk_cpu_trans(unsigned long *rto
, dma_addr_t dma_addr
)
100 unsigned long *sto
, *pto
;
101 unsigned int rtx
, sx
, px
;
103 rtx
= calc_rtx(dma_addr
);
104 sto
= dma_get_seg_table_origin(&rto
[rtx
]);
108 sx
= calc_sx(dma_addr
);
109 pto
= dma_get_page_table_origin(&sto
[sx
]);
113 px
= calc_px(dma_addr
);
117 void dma_update_cpu_trans(unsigned long *entry
, void *page_addr
, int flags
)
119 if (flags
& ZPCI_PTE_INVALID
) {
120 invalidate_pt_entry(entry
);
122 set_pt_pfaa(entry
, page_addr
);
123 validate_pt_entry(entry
);
126 if (flags
& ZPCI_TABLE_PROTECTED
)
127 entry_set_protected(entry
);
129 entry_clr_protected(entry
);
132 static int dma_update_trans(struct zpci_dev
*zdev
, unsigned long pa
,
133 dma_addr_t dma_addr
, size_t size
, int flags
)
135 unsigned int nr_pages
= PAGE_ALIGN(size
) >> PAGE_SHIFT
;
136 u8
*page_addr
= (u8
*) (pa
& PAGE_MASK
);
137 dma_addr_t start_dma_addr
= dma_addr
;
138 unsigned long irq_flags
;
139 unsigned long *entry
;
145 spin_lock_irqsave(&zdev
->dma_table_lock
, irq_flags
);
146 if (!zdev
->dma_table
) {
151 for (i
= 0; i
< nr_pages
; i
++) {
152 entry
= dma_walk_cpu_trans(zdev
->dma_table
, dma_addr
);
157 dma_update_cpu_trans(entry
, page_addr
, flags
);
158 page_addr
+= PAGE_SIZE
;
159 dma_addr
+= PAGE_SIZE
;
163 * With zdev->tlb_refresh == 0, rpcit is not required to establish new
164 * translations when previously invalid translation-table entries are
165 * validated. With lazy unmap, it also is skipped for previously valid
166 * entries, but a global rpcit is then required before any address can
167 * be re-used, i.e. after each iommu bitmap wrap-around.
169 if (!zdev
->tlb_refresh
&&
170 (!s390_iommu_strict
||
171 ((flags
& ZPCI_PTE_VALID_MASK
) == ZPCI_PTE_VALID
)))
174 rc
= zpci_refresh_trans((u64
) zdev
->fh
<< 32, start_dma_addr
,
175 nr_pages
* PAGE_SIZE
);
177 if (rc
&& ((flags
& ZPCI_PTE_VALID_MASK
) == ZPCI_PTE_VALID
)) {
178 flags
= ZPCI_PTE_INVALID
;
180 page_addr
-= PAGE_SIZE
;
181 dma_addr
-= PAGE_SIZE
;
182 entry
= dma_walk_cpu_trans(zdev
->dma_table
, dma_addr
);
185 dma_update_cpu_trans(entry
, page_addr
, flags
);
190 spin_unlock_irqrestore(&zdev
->dma_table_lock
, irq_flags
);
194 void dma_free_seg_table(unsigned long entry
)
196 unsigned long *sto
= get_rt_sto(entry
);
199 for (sx
= 0; sx
< ZPCI_TABLE_ENTRIES
; sx
++)
200 if (reg_entry_isvalid(sto
[sx
]))
201 dma_free_page_table(get_st_pto(sto
[sx
]));
203 dma_free_cpu_table(sto
);
206 void dma_cleanup_tables(unsigned long *table
)
213 for (rtx
= 0; rtx
< ZPCI_TABLE_ENTRIES
; rtx
++)
214 if (reg_entry_isvalid(table
[rtx
]))
215 dma_free_seg_table(table
[rtx
]);
217 dma_free_cpu_table(table
);
220 static unsigned long __dma_alloc_iommu(struct device
*dev
,
221 unsigned long start
, int size
)
223 struct zpci_dev
*zdev
= to_zpci(to_pci_dev(dev
));
224 unsigned long boundary_size
;
226 boundary_size
= ALIGN(dma_get_seg_boundary(dev
) + 1,
227 PAGE_SIZE
) >> PAGE_SHIFT
;
228 return iommu_area_alloc(zdev
->iommu_bitmap
, zdev
->iommu_pages
,
229 start
, size
, 0, boundary_size
, 0);
232 static unsigned long dma_alloc_iommu(struct device
*dev
, int size
)
234 struct zpci_dev
*zdev
= to_zpci(to_pci_dev(dev
));
235 unsigned long offset
, flags
;
238 spin_lock_irqsave(&zdev
->iommu_bitmap_lock
, flags
);
239 offset
= __dma_alloc_iommu(dev
, zdev
->next_bit
, size
);
242 offset
= __dma_alloc_iommu(dev
, 0, size
);
247 zdev
->next_bit
= offset
+ size
;
248 if (!zdev
->tlb_refresh
&& !s390_iommu_strict
&& wrap
)
249 /* global flush after wrap-around with lazy unmap */
250 zpci_refresh_global(zdev
);
252 spin_unlock_irqrestore(&zdev
->iommu_bitmap_lock
, flags
);
256 static void dma_free_iommu(struct device
*dev
, unsigned long offset
, int size
)
258 struct zpci_dev
*zdev
= to_zpci(to_pci_dev(dev
));
261 spin_lock_irqsave(&zdev
->iommu_bitmap_lock
, flags
);
262 if (!zdev
->iommu_bitmap
)
264 bitmap_clear(zdev
->iommu_bitmap
, offset
, size
);
266 * Lazy flush for unmap: need to move next_bit to avoid address re-use
269 if (!s390_iommu_strict
&& offset
>= zdev
->next_bit
)
270 zdev
->next_bit
= offset
+ size
;
272 spin_unlock_irqrestore(&zdev
->iommu_bitmap_lock
, flags
);
275 static inline void zpci_err_dma(unsigned long rc
, unsigned long addr
)
280 } __packed data
= {rc
, addr
};
282 zpci_err_hex(&data
, sizeof(data
));
285 static dma_addr_t
s390_dma_map_pages(struct device
*dev
, struct page
*page
,
286 unsigned long offset
, size_t size
,
287 enum dma_data_direction direction
,
288 struct dma_attrs
*attrs
)
290 struct zpci_dev
*zdev
= to_zpci(to_pci_dev(dev
));
291 unsigned long nr_pages
, iommu_page_index
;
292 unsigned long pa
= page_to_phys(page
) + offset
;
293 int flags
= ZPCI_PTE_VALID
;
297 /* This rounds up number of pages based on size and offset */
298 nr_pages
= iommu_num_pages(pa
, size
, PAGE_SIZE
);
299 iommu_page_index
= dma_alloc_iommu(dev
, nr_pages
);
300 if (iommu_page_index
== -1) {
305 /* Use rounded up size */
306 size
= nr_pages
* PAGE_SIZE
;
308 dma_addr
= zdev
->start_dma
+ iommu_page_index
* PAGE_SIZE
;
309 if (dma_addr
+ size
> zdev
->end_dma
) {
314 if (direction
== DMA_NONE
|| direction
== DMA_TO_DEVICE
)
315 flags
|= ZPCI_TABLE_PROTECTED
;
317 ret
= dma_update_trans(zdev
, pa
, dma_addr
, size
, flags
);
321 atomic64_add(nr_pages
, &zdev
->mapped_pages
);
322 return dma_addr
+ (offset
& ~PAGE_MASK
);
325 dma_free_iommu(dev
, iommu_page_index
, nr_pages
);
327 zpci_err("map error:\n");
328 zpci_err_dma(ret
, pa
);
329 return DMA_ERROR_CODE
;
332 static void s390_dma_unmap_pages(struct device
*dev
, dma_addr_t dma_addr
,
333 size_t size
, enum dma_data_direction direction
,
334 struct dma_attrs
*attrs
)
336 struct zpci_dev
*zdev
= to_zpci(to_pci_dev(dev
));
337 unsigned long iommu_page_index
;
340 npages
= iommu_num_pages(dma_addr
, size
, PAGE_SIZE
);
341 dma_addr
= dma_addr
& PAGE_MASK
;
342 ret
= dma_update_trans(zdev
, 0, dma_addr
, npages
* PAGE_SIZE
,
345 zpci_err("unmap error:\n");
346 zpci_err_dma(ret
, dma_addr
);
350 atomic64_add(npages
, &zdev
->unmapped_pages
);
351 iommu_page_index
= (dma_addr
- zdev
->start_dma
) >> PAGE_SHIFT
;
352 dma_free_iommu(dev
, iommu_page_index
, npages
);
355 static void *s390_dma_alloc(struct device
*dev
, size_t size
,
356 dma_addr_t
*dma_handle
, gfp_t flag
,
357 struct dma_attrs
*attrs
)
359 struct zpci_dev
*zdev
= to_zpci(to_pci_dev(dev
));
364 size
= PAGE_ALIGN(size
);
365 page
= alloc_pages(flag
, get_order(size
));
369 pa
= page_to_phys(page
);
370 memset((void *) pa
, 0, size
);
372 map
= s390_dma_map_pages(dev
, page
, 0, size
, DMA_BIDIRECTIONAL
, NULL
);
373 if (dma_mapping_error(dev
, map
)) {
374 free_pages(pa
, get_order(size
));
378 atomic64_add(size
/ PAGE_SIZE
, &zdev
->allocated_pages
);
384 static void s390_dma_free(struct device
*dev
, size_t size
,
385 void *pa
, dma_addr_t dma_handle
,
386 struct dma_attrs
*attrs
)
388 struct zpci_dev
*zdev
= to_zpci(to_pci_dev(dev
));
390 size
= PAGE_ALIGN(size
);
391 atomic64_sub(size
/ PAGE_SIZE
, &zdev
->allocated_pages
);
392 s390_dma_unmap_pages(dev
, dma_handle
, size
, DMA_BIDIRECTIONAL
, NULL
);
393 free_pages((unsigned long) pa
, get_order(size
));
396 static int s390_dma_map_sg(struct device
*dev
, struct scatterlist
*sg
,
397 int nr_elements
, enum dma_data_direction dir
,
398 struct dma_attrs
*attrs
)
400 int mapped_elements
= 0;
401 struct scatterlist
*s
;
404 for_each_sg(sg
, s
, nr_elements
, i
) {
405 struct page
*page
= sg_page(s
);
406 s
->dma_address
= s390_dma_map_pages(dev
, page
, s
->offset
,
407 s
->length
, dir
, NULL
);
408 if (!dma_mapping_error(dev
, s
->dma_address
)) {
409 s
->dma_length
= s
->length
;
415 return mapped_elements
;
418 for_each_sg(sg
, s
, mapped_elements
, i
) {
420 s390_dma_unmap_pages(dev
, s
->dma_address
, s
->dma_length
,
429 static void s390_dma_unmap_sg(struct device
*dev
, struct scatterlist
*sg
,
430 int nr_elements
, enum dma_data_direction dir
,
431 struct dma_attrs
*attrs
)
433 struct scatterlist
*s
;
436 for_each_sg(sg
, s
, nr_elements
, i
) {
437 s390_dma_unmap_pages(dev
, s
->dma_address
, s
->dma_length
, dir
, NULL
);
443 int zpci_dma_init_device(struct zpci_dev
*zdev
)
448 * At this point, if the device is part of an IOMMU domain, this would
449 * be a strong hint towards a bug in the IOMMU API (common) code and/or
450 * simultaneous access via IOMMU and DMA API. So let's issue a warning.
452 WARN_ON(zdev
->s390_domain
);
454 spin_lock_init(&zdev
->iommu_bitmap_lock
);
455 spin_lock_init(&zdev
->dma_table_lock
);
457 zdev
->dma_table
= dma_alloc_cpu_table();
458 if (!zdev
->dma_table
) {
464 * Restrict the iommu bitmap size to the minimum of the following:
466 * - 3-level pagetable address limit minus start_dma offset
467 * - DMA address range allowed by the hardware (clp query pci fn)
469 * Also set zdev->end_dma to the actual end address of the usable
470 * range, instead of the theoretical maximum as reported by hardware.
472 zdev
->iommu_size
= min3((u64
) high_memory
,
473 ZPCI_TABLE_SIZE_RT
- zdev
->start_dma
,
474 zdev
->end_dma
- zdev
->start_dma
+ 1);
475 zdev
->end_dma
= zdev
->start_dma
+ zdev
->iommu_size
- 1;
476 zdev
->iommu_pages
= zdev
->iommu_size
>> PAGE_SHIFT
;
477 zdev
->iommu_bitmap
= vzalloc(zdev
->iommu_pages
/ 8);
478 if (!zdev
->iommu_bitmap
) {
483 rc
= zpci_register_ioat(zdev
, 0, zdev
->start_dma
, zdev
->end_dma
,
484 (u64
) zdev
->dma_table
);
490 dma_free_cpu_table(zdev
->dma_table
);
495 void zpci_dma_exit_device(struct zpci_dev
*zdev
)
498 * At this point, if the device is part of an IOMMU domain, this would
499 * be a strong hint towards a bug in the IOMMU API (common) code and/or
500 * simultaneous access via IOMMU and DMA API. So let's issue a warning.
502 WARN_ON(zdev
->s390_domain
);
504 zpci_unregister_ioat(zdev
, 0);
505 dma_cleanup_tables(zdev
->dma_table
);
506 zdev
->dma_table
= NULL
;
507 vfree(zdev
->iommu_bitmap
);
508 zdev
->iommu_bitmap
= NULL
;
512 static int __init
dma_alloc_cpu_table_caches(void)
514 dma_region_table_cache
= kmem_cache_create("PCI_DMA_region_tables",
515 ZPCI_TABLE_SIZE
, ZPCI_TABLE_ALIGN
,
517 if (!dma_region_table_cache
)
520 dma_page_table_cache
= kmem_cache_create("PCI_DMA_page_tables",
521 ZPCI_PT_SIZE
, ZPCI_PT_ALIGN
,
523 if (!dma_page_table_cache
) {
524 kmem_cache_destroy(dma_region_table_cache
);
530 int __init
zpci_dma_init(void)
532 return dma_alloc_cpu_table_caches();
535 void zpci_dma_exit(void)
537 kmem_cache_destroy(dma_page_table_cache
);
538 kmem_cache_destroy(dma_region_table_cache
);
541 #define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16)
543 static int __init
dma_debug_do_init(void)
545 dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES
);
548 fs_initcall(dma_debug_do_init
);
550 struct dma_map_ops s390_pci_dma_ops
= {
551 .alloc
= s390_dma_alloc
,
552 .free
= s390_dma_free
,
553 .map_sg
= s390_dma_map_sg
,
554 .unmap_sg
= s390_dma_unmap_sg
,
555 .map_page
= s390_dma_map_pages
,
556 .unmap_page
= s390_dma_unmap_pages
,
557 /* if we support direct DMA this must be conditional */
559 /* dma_supported is unconditionally true without a callback */
561 EXPORT_SYMBOL_GPL(s390_pci_dma_ops
);
563 static int __init
s390_iommu_setup(char *str
)
565 if (!strncmp(str
, "strict", 6))
566 s390_iommu_strict
= 1;
570 __setup("s390_iommu=", s390_iommu_setup
);