Commit | Line | Data |
---|---|---|
5d2aa710 AP |
1 | /* |
2 | * This file implements the DMA operations for NVLink devices. The NPU | |
3 | * devices all point to the same iommu table as the parent PCI device. | |
4 | * | |
5 | * Copyright Alistair Popple, IBM Corporation 2015. | |
6 | * | |
7 | * This program is free software; you can redistribute it and/or | |
8 | * modify it under the terms of version 2 of the GNU General Public | |
9 | * License as published by the Free Software Foundation. | |
10 | */ | |
11 | ||
12 | #include <linux/export.h> | |
13 | #include <linux/pci.h> | |
14 | #include <linux/memblock.h> | |
b5cb9ab1 | 15 | #include <linux/iommu.h> |
5d2aa710 AP |
16 | |
17 | #include <asm/iommu.h> | |
18 | #include <asm/pnv-pci.h> | |
19 | #include <asm/msi_bitmap.h> | |
20 | #include <asm/opal.h> | |
21 | ||
22 | #include "powernv.h" | |
23 | #include "pci.h" | |
24 | ||
25 | /* | |
26 | * Other types of TCE cache invalidation are not functional in the | |
27 | * hardware. | |
28 | */ | |
5d2aa710 AP |
29 | static struct pci_dev *get_pci_dev(struct device_node *dn) |
30 | { | |
31 | return PCI_DN(dn)->pcidev; | |
32 | } | |
33 | ||
34 | /* Given a NPU device get the associated PCI device. */ | |
35 | struct pci_dev *pnv_pci_get_gpu_dev(struct pci_dev *npdev) | |
36 | { | |
37 | struct device_node *dn; | |
38 | struct pci_dev *gpdev; | |
39 | ||
40 | /* Get assoicated PCI device */ | |
41 | dn = of_parse_phandle(npdev->dev.of_node, "ibm,gpu", 0); | |
42 | if (!dn) | |
43 | return NULL; | |
44 | ||
45 | gpdev = get_pci_dev(dn); | |
46 | of_node_put(dn); | |
47 | ||
48 | return gpdev; | |
49 | } | |
50 | EXPORT_SYMBOL(pnv_pci_get_gpu_dev); | |
51 | ||
52 | /* Given the real PCI device get a linked NPU device. */ | |
53 | struct pci_dev *pnv_pci_get_npu_dev(struct pci_dev *gpdev, int index) | |
54 | { | |
55 | struct device_node *dn; | |
56 | struct pci_dev *npdev; | |
57 | ||
58 | /* Get assoicated PCI device */ | |
59 | dn = of_parse_phandle(gpdev->dev.of_node, "ibm,npu", index); | |
60 | if (!dn) | |
61 | return NULL; | |
62 | ||
63 | npdev = get_pci_dev(dn); | |
64 | of_node_put(dn); | |
65 | ||
66 | return npdev; | |
67 | } | |
68 | EXPORT_SYMBOL(pnv_pci_get_npu_dev); | |
69 | ||
70 | #define NPU_DMA_OP_UNSUPPORTED() \ | |
71 | dev_err_once(dev, "%s operation unsupported for NVLink devices\n", \ | |
72 | __func__) | |
73 | ||
74 | static void *dma_npu_alloc(struct device *dev, size_t size, | |
75 | dma_addr_t *dma_handle, gfp_t flag, | |
00085f1e | 76 | unsigned long attrs) |
5d2aa710 AP |
77 | { |
78 | NPU_DMA_OP_UNSUPPORTED(); | |
79 | return NULL; | |
80 | } | |
81 | ||
82 | static void dma_npu_free(struct device *dev, size_t size, | |
83 | void *vaddr, dma_addr_t dma_handle, | |
00085f1e | 84 | unsigned long attrs) |
5d2aa710 AP |
85 | { |
86 | NPU_DMA_OP_UNSUPPORTED(); | |
87 | } | |
88 | ||
89 | static dma_addr_t dma_npu_map_page(struct device *dev, struct page *page, | |
90 | unsigned long offset, size_t size, | |
91 | enum dma_data_direction direction, | |
00085f1e | 92 | unsigned long attrs) |
5d2aa710 AP |
93 | { |
94 | NPU_DMA_OP_UNSUPPORTED(); | |
95 | return 0; | |
96 | } | |
97 | ||
98 | static int dma_npu_map_sg(struct device *dev, struct scatterlist *sglist, | |
99 | int nelems, enum dma_data_direction direction, | |
00085f1e | 100 | unsigned long attrs) |
5d2aa710 AP |
101 | { |
102 | NPU_DMA_OP_UNSUPPORTED(); | |
103 | return 0; | |
104 | } | |
105 | ||
106 | static int dma_npu_dma_supported(struct device *dev, u64 mask) | |
107 | { | |
108 | NPU_DMA_OP_UNSUPPORTED(); | |
109 | return 0; | |
110 | } | |
111 | ||
112 | static u64 dma_npu_get_required_mask(struct device *dev) | |
113 | { | |
114 | NPU_DMA_OP_UNSUPPORTED(); | |
115 | return 0; | |
116 | } | |
117 | ||
118 | struct dma_map_ops dma_npu_ops = { | |
119 | .map_page = dma_npu_map_page, | |
120 | .map_sg = dma_npu_map_sg, | |
121 | .alloc = dma_npu_alloc, | |
122 | .free = dma_npu_free, | |
123 | .dma_supported = dma_npu_dma_supported, | |
124 | .get_required_mask = dma_npu_get_required_mask, | |
125 | }; | |
126 | ||
127 | /* | |
128 | * Returns the PE assoicated with the PCI device of the given | |
129 | * NPU. Returns the linked pci device if pci_dev != NULL. | |
130 | */ | |
131 | static struct pnv_ioda_pe *get_gpu_pci_dev_and_pe(struct pnv_ioda_pe *npe, | |
132 | struct pci_dev **gpdev) | |
133 | { | |
134 | struct pnv_phb *phb; | |
135 | struct pci_controller *hose; | |
136 | struct pci_dev *pdev; | |
137 | struct pnv_ioda_pe *pe; | |
138 | struct pci_dn *pdn; | |
139 | ||
85674868 AK |
140 | pdev = pnv_pci_get_gpu_dev(npe->pdev); |
141 | if (!pdev) | |
142 | return NULL; | |
5d2aa710 | 143 | |
85674868 AK |
144 | pdn = pci_get_pdn(pdev); |
145 | if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE)) | |
146 | return NULL; | |
147 | ||
148 | hose = pci_bus_to_host(pdev->bus); | |
149 | phb = hose->private_data; | |
150 | pe = &phb->ioda.pe_array[pdn->pe_number]; | |
5d2aa710 AP |
151 | |
152 | if (gpdev) | |
153 | *gpdev = pdev; | |
154 | ||
155 | return pe; | |
156 | } | |
157 | ||
b5cb9ab1 | 158 | long pnv_npu_set_window(struct pnv_ioda_pe *npe, int num, |
b575c731 AK |
159 | struct iommu_table *tbl) |
160 | { | |
161 | struct pnv_phb *phb = npe->phb; | |
162 | int64_t rc; | |
163 | const unsigned long size = tbl->it_indirect_levels ? | |
164 | tbl->it_level_size : tbl->it_size; | |
165 | const __u64 start_addr = tbl->it_offset << tbl->it_page_shift; | |
166 | const __u64 win_size = tbl->it_size << tbl->it_page_shift; | |
167 | ||
168 | pe_info(npe, "Setting up window %llx..%llx pg=%lx\n", | |
169 | start_addr, start_addr + win_size - 1, | |
170 | IOMMU_PAGE_SIZE(tbl)); | |
171 | ||
172 | rc = opal_pci_map_pe_dma_window(phb->opal_id, | |
173 | npe->pe_number, | |
174 | npe->pe_number, | |
175 | tbl->it_indirect_levels + 1, | |
176 | __pa(tbl->it_base), | |
177 | size << 3, | |
178 | IOMMU_PAGE_SIZE(tbl)); | |
179 | if (rc) { | |
180 | pe_err(npe, "Failed to configure TCE table, err %lld\n", rc); | |
181 | return rc; | |
182 | } | |
a34ab7c3 | 183 | pnv_pci_phb3_tce_invalidate_entire(phb, false); |
b575c731 | 184 | |
85674868 | 185 | /* Add the table to the list so its TCE cache will get invalidated */ |
b5cb9ab1 | 186 | pnv_pci_link_table_and_group(phb->hose->node, num, |
85674868 AK |
187 | tbl, &npe->table_group); |
188 | ||
b575c731 AK |
189 | return 0; |
190 | } | |
191 | ||
b5cb9ab1 | 192 | long pnv_npu_unset_window(struct pnv_ioda_pe *npe, int num) |
b575c731 AK |
193 | { |
194 | struct pnv_phb *phb = npe->phb; | |
195 | int64_t rc; | |
196 | ||
197 | pe_info(npe, "Removing DMA window\n"); | |
198 | ||
199 | rc = opal_pci_map_pe_dma_window(phb->opal_id, npe->pe_number, | |
200 | npe->pe_number, | |
201 | 0/* levels */, 0/* table address */, | |
202 | 0/* table size */, 0/* page size */); | |
203 | if (rc) { | |
204 | pe_err(npe, "Unmapping failed, ret = %lld\n", rc); | |
205 | return rc; | |
206 | } | |
a34ab7c3 | 207 | pnv_pci_phb3_tce_invalidate_entire(phb, false); |
b575c731 | 208 | |
b5cb9ab1 | 209 | pnv_pci_unlink_table_and_group(npe->table_group.tables[num], |
85674868 | 210 | &npe->table_group); |
5d2aa710 | 211 | |
85674868 | 212 | return 0; |
5d2aa710 AP |
213 | } |
214 | ||
215 | /* | |
f9f83456 | 216 | * Enables 32 bit DMA on NPU. |
5d2aa710 | 217 | */ |
f9f83456 | 218 | static void pnv_npu_dma_set_32(struct pnv_ioda_pe *npe) |
5d2aa710 | 219 | { |
5d2aa710 AP |
220 | struct pci_dev *gpdev; |
221 | struct pnv_ioda_pe *gpe; | |
5d2aa710 AP |
222 | int64_t rc; |
223 | ||
224 | /* | |
225 | * Find the assoicated PCI devices and get the dma window | |
226 | * information from there. | |
227 | */ | |
228 | if (!npe->pdev || !(npe->flags & PNV_IODA_PE_DEV)) | |
229 | return; | |
230 | ||
231 | gpe = get_gpu_pci_dev_and_pe(npe, &gpdev); | |
232 | if (!gpe) | |
233 | return; | |
234 | ||
b5cb9ab1 | 235 | rc = pnv_npu_set_window(npe, 0, gpe->table_group.tables[0]); |
5d2aa710 AP |
236 | |
237 | /* | |
238 | * We don't initialise npu_pe->tce32_table as we always use | |
239 | * dma_npu_ops which are nops. | |
240 | */ | |
241 | set_dma_ops(&npe->pdev->dev, &dma_npu_ops); | |
242 | } | |
243 | ||
244 | /* | |
f9f83456 | 245 | * Enables bypass mode on the NPU. The NPU only supports one |
446957ba | 246 | * window per link, so bypass needs to be explicitly enabled or |
5d2aa710 AP |
247 | * disabled. Unlike for a PHB3 bypass and non-bypass modes can't be |
248 | * active at the same time. | |
249 | */ | |
f9f83456 | 250 | static int pnv_npu_dma_set_bypass(struct pnv_ioda_pe *npe) |
5d2aa710 AP |
251 | { |
252 | struct pnv_phb *phb = npe->phb; | |
253 | int64_t rc = 0; | |
f9f83456 | 254 | phys_addr_t top = memblock_end_of_DRAM(); |
5d2aa710 AP |
255 | |
256 | if (phb->type != PNV_PHB_NPU || !npe->pdev) | |
257 | return -EINVAL; | |
258 | ||
b5cb9ab1 | 259 | rc = pnv_npu_unset_window(npe, 0); |
b575c731 AK |
260 | if (rc != OPAL_SUCCESS) |
261 | return rc; | |
262 | ||
f9f83456 AK |
263 | /* Enable the bypass window */ |
264 | ||
265 | top = roundup_pow_of_two(top); | |
266 | dev_info(&npe->pdev->dev, "Enabling bypass for PE %d\n", | |
267 | npe->pe_number); | |
268 | rc = opal_pci_map_pe_dma_window_real(phb->opal_id, | |
269 | npe->pe_number, npe->pe_number, | |
270 | 0 /* bypass base */, top); | |
5d2aa710 | 271 | |
85674868 | 272 | if (rc == OPAL_SUCCESS) |
a34ab7c3 | 273 | pnv_pci_phb3_tce_invalidate_entire(phb, false); |
85674868 | 274 | |
5d2aa710 AP |
275 | return rc; |
276 | } | |
277 | ||
f9f83456 | 278 | void pnv_npu_try_dma_set_bypass(struct pci_dev *gpdev, bool bypass) |
5d2aa710 | 279 | { |
f9f83456 AK |
280 | int i; |
281 | struct pnv_phb *phb; | |
282 | struct pci_dn *pdn; | |
283 | struct pnv_ioda_pe *npe; | |
284 | struct pci_dev *npdev; | |
5d2aa710 | 285 | |
f9f83456 AK |
286 | for (i = 0; ; ++i) { |
287 | npdev = pnv_pci_get_npu_dev(gpdev, i); | |
5d2aa710 | 288 | |
f9f83456 AK |
289 | if (!npdev) |
290 | break; | |
5d2aa710 | 291 | |
f9f83456 AK |
292 | pdn = pci_get_pdn(npdev); |
293 | if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE)) | |
294 | return; | |
5d2aa710 | 295 | |
f9f83456 | 296 | phb = pci_bus_to_host(npdev->bus)->private_data; |
5d2aa710 | 297 | |
f9f83456 AK |
298 | /* We only do bypass if it's enabled on the linked device */ |
299 | npe = &phb->ioda.pe_array[pdn->pe_number]; | |
5d2aa710 | 300 | |
f9f83456 AK |
301 | if (bypass) { |
302 | dev_info(&npdev->dev, | |
303 | "Using 64-bit DMA iommu bypass\n"); | |
304 | pnv_npu_dma_set_bypass(npe); | |
305 | } else { | |
306 | dev_info(&npdev->dev, "Using 32-bit DMA via iommu\n"); | |
307 | pnv_npu_dma_set_32(npe); | |
308 | } | |
309 | } | |
5d2aa710 | 310 | } |
b5cb9ab1 AK |
311 | |
312 | /* Switch ownership from platform code to external user (e.g. VFIO) */ | |
313 | void pnv_npu_take_ownership(struct pnv_ioda_pe *npe) | |
314 | { | |
315 | struct pnv_phb *phb = npe->phb; | |
316 | int64_t rc; | |
317 | ||
318 | /* | |
319 | * Note: NPU has just a single TVE in the hardware which means that | |
320 | * while used by the kernel, it can have either 32bit window or | |
321 | * DMA bypass but never both. So we deconfigure 32bit window only | |
322 | * if it was enabled at the moment of ownership change. | |
323 | */ | |
324 | if (npe->table_group.tables[0]) { | |
325 | pnv_npu_unset_window(npe, 0); | |
326 | return; | |
327 | } | |
328 | ||
329 | /* Disable bypass */ | |
330 | rc = opal_pci_map_pe_dma_window_real(phb->opal_id, | |
331 | npe->pe_number, npe->pe_number, | |
332 | 0 /* bypass base */, 0); | |
333 | if (rc) { | |
334 | pe_err(npe, "Failed to disable bypass, err %lld\n", rc); | |
335 | return; | |
336 | } | |
a34ab7c3 | 337 | pnv_pci_phb3_tce_invalidate_entire(npe->phb, false); |
b5cb9ab1 AK |
338 | } |
339 | ||
340 | struct pnv_ioda_pe *pnv_pci_npu_setup_iommu(struct pnv_ioda_pe *npe) | |
341 | { | |
342 | struct pnv_phb *phb = npe->phb; | |
343 | struct pci_bus *pbus = phb->hose->bus; | |
344 | struct pci_dev *npdev, *gpdev = NULL, *gptmp; | |
345 | struct pnv_ioda_pe *gpe = get_gpu_pci_dev_and_pe(npe, &gpdev); | |
346 | ||
347 | if (!gpe || !gpdev) | |
348 | return NULL; | |
349 | ||
350 | list_for_each_entry(npdev, &pbus->devices, bus_list) { | |
351 | gptmp = pnv_pci_get_gpu_dev(npdev); | |
352 | ||
353 | if (gptmp != gpdev) | |
354 | continue; | |
355 | ||
356 | pe_info(gpe, "Attached NPU %s\n", dev_name(&npdev->dev)); | |
357 | iommu_group_add_device(gpe->table_group.group, &npdev->dev); | |
358 | } | |
359 | ||
360 | return gpe; | |
361 | } |