Commit | Line | Data |
---|---|---|
67f4addb FH |
1 | /** |
2 | * IBM Accelerator Family 'GenWQE' | |
3 | * | |
4 | * (C) Copyright IBM Corp. 2013 | |
5 | * | |
6 | * Author: Frank Haverkamp <haver@linux.vnet.ibm.com> | |
7 | * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com> | |
26d8f6f1 | 8 | * Author: Michael Jung <mijung@gmx.net> |
67f4addb FH |
9 | * Author: Michael Ruettger <michael@ibmra.de> |
10 | * | |
11 | * This program is free software; you can redistribute it and/or modify | |
12 | * it under the terms of the GNU General Public License (version 2 only) | |
13 | * as published by the Free Software Foundation. | |
14 | * | |
15 | * This program is distributed in the hope that it will be useful, | |
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
18 | * GNU General Public License for more details. | |
19 | */ | |
20 | ||
21 | /* | |
22 | * Miscelanous functionality used in the other GenWQE driver parts. | |
23 | */ | |
24 | ||
25 | #include <linux/kernel.h> | |
26 | #include <linux/dma-mapping.h> | |
27 | #include <linux/sched.h> | |
28 | #include <linux/vmalloc.h> | |
29 | #include <linux/page-flags.h> | |
30 | #include <linux/scatterlist.h> | |
31 | #include <linux/hugetlb.h> | |
32 | #include <linux/iommu.h> | |
33 | #include <linux/delay.h> | |
34 | #include <linux/pci.h> | |
35 | #include <linux/dma-mapping.h> | |
36 | #include <linux/ctype.h> | |
37 | #include <linux/module.h> | |
38 | #include <linux/platform_device.h> | |
39 | #include <linux/delay.h> | |
40 | #include <asm/pgtable.h> | |
41 | ||
42 | #include "genwqe_driver.h" | |
43 | #include "card_base.h" | |
44 | #include "card_ddcb.h" | |
45 | ||
46 | /** | |
47 | * __genwqe_writeq() - Write 64-bit register | |
48 | * @cd: genwqe device descriptor | |
49 | * @byte_offs: byte offset within BAR | |
50 | * @val: 64-bit value | |
51 | * | |
52 | * Return: 0 if success; < 0 if error | |
53 | */ | |
54 | int __genwqe_writeq(struct genwqe_dev *cd, u64 byte_offs, u64 val) | |
55 | { | |
fb145456 KSS |
56 | struct pci_dev *pci_dev = cd->pci_dev; |
57 | ||
67f4addb FH |
58 | if (cd->err_inject & GENWQE_INJECT_HARDWARE_FAILURE) |
59 | return -EIO; | |
60 | ||
61 | if (cd->mmio == NULL) | |
62 | return -EIO; | |
63 | ||
fb145456 KSS |
64 | if (pci_channel_offline(pci_dev)) |
65 | return -EIO; | |
66 | ||
a45a0258 | 67 | __raw_writeq((__force u64)cpu_to_be64(val), cd->mmio + byte_offs); |
67f4addb FH |
68 | return 0; |
69 | } | |
70 | ||
71 | /** | |
72 | * __genwqe_readq() - Read 64-bit register | |
73 | * @cd: genwqe device descriptor | |
74 | * @byte_offs: offset within BAR | |
75 | * | |
76 | * Return: value from register | |
77 | */ | |
78 | u64 __genwqe_readq(struct genwqe_dev *cd, u64 byte_offs) | |
79 | { | |
67f4addb FH |
80 | if (cd->err_inject & GENWQE_INJECT_HARDWARE_FAILURE) |
81 | return 0xffffffffffffffffull; | |
82 | ||
83 | if ((cd->err_inject & GENWQE_INJECT_GFIR_FATAL) && | |
84 | (byte_offs == IO_SLC_CFGREG_GFIR)) | |
85 | return 0x000000000000ffffull; | |
86 | ||
87 | if ((cd->err_inject & GENWQE_INJECT_GFIR_INFO) && | |
88 | (byte_offs == IO_SLC_CFGREG_GFIR)) | |
89 | return 0x00000000ffff0000ull; | |
90 | ||
91 | if (cd->mmio == NULL) | |
92 | return 0xffffffffffffffffull; | |
93 | ||
58d66ce7 | 94 | return be64_to_cpu((__force __be64)__raw_readq(cd->mmio + byte_offs)); |
67f4addb FH |
95 | } |
96 | ||
97 | /** | |
98 | * __genwqe_writel() - Write 32-bit register | |
99 | * @cd: genwqe device descriptor | |
100 | * @byte_offs: byte offset within BAR | |
101 | * @val: 32-bit value | |
102 | * | |
103 | * Return: 0 if success; < 0 if error | |
104 | */ | |
105 | int __genwqe_writel(struct genwqe_dev *cd, u64 byte_offs, u32 val) | |
106 | { | |
fb145456 KSS |
107 | struct pci_dev *pci_dev = cd->pci_dev; |
108 | ||
67f4addb FH |
109 | if (cd->err_inject & GENWQE_INJECT_HARDWARE_FAILURE) |
110 | return -EIO; | |
111 | ||
112 | if (cd->mmio == NULL) | |
113 | return -EIO; | |
114 | ||
fb145456 KSS |
115 | if (pci_channel_offline(pci_dev)) |
116 | return -EIO; | |
117 | ||
58d66ce7 | 118 | __raw_writel((__force u32)cpu_to_be32(val), cd->mmio + byte_offs); |
67f4addb FH |
119 | return 0; |
120 | } | |
121 | ||
122 | /** | |
123 | * __genwqe_readl() - Read 32-bit register | |
124 | * @cd: genwqe device descriptor | |
125 | * @byte_offs: offset within BAR | |
126 | * | |
127 | * Return: Value from register | |
128 | */ | |
129 | u32 __genwqe_readl(struct genwqe_dev *cd, u64 byte_offs) | |
130 | { | |
131 | if (cd->err_inject & GENWQE_INJECT_HARDWARE_FAILURE) | |
132 | return 0xffffffff; | |
133 | ||
134 | if (cd->mmio == NULL) | |
135 | return 0xffffffff; | |
136 | ||
58d66ce7 | 137 | return be32_to_cpu((__force __be32)__raw_readl(cd->mmio + byte_offs)); |
67f4addb FH |
138 | } |
139 | ||
140 | /** | |
141 | * genwqe_read_app_id() - Extract app_id | |
142 | * | |
143 | * app_unitcfg need to be filled with valid data first | |
144 | */ | |
145 | int genwqe_read_app_id(struct genwqe_dev *cd, char *app_name, int len) | |
146 | { | |
147 | int i, j; | |
148 | u32 app_id = (u32)cd->app_unitcfg; | |
149 | ||
150 | memset(app_name, 0, len); | |
151 | for (i = 0, j = 0; j < min(len, 4); j++) { | |
152 | char ch = (char)((app_id >> (24 - j*8)) & 0xff); | |
d9c11d45 | 153 | |
67f4addb FH |
154 | if (ch == ' ') |
155 | continue; | |
156 | app_name[i++] = isprint(ch) ? ch : 'X'; | |
157 | } | |
158 | return i; | |
159 | } | |
160 | ||
161 | /** | |
162 | * genwqe_init_crc32() - Prepare a lookup table for fast crc32 calculations | |
163 | * | |
164 | * Existing kernel functions seem to use a different polynom, | |
165 | * therefore we could not use them here. | |
166 | * | |
167 | * Genwqe's Polynomial = 0x20044009 | |
168 | */ | |
169 | #define CRC32_POLYNOMIAL 0x20044009 | |
170 | static u32 crc32_tab[256]; /* crc32 lookup table */ | |
171 | ||
172 | void genwqe_init_crc32(void) | |
173 | { | |
174 | int i, j; | |
175 | u32 crc; | |
176 | ||
177 | for (i = 0; i < 256; i++) { | |
178 | crc = i << 24; | |
179 | for (j = 0; j < 8; j++) { | |
180 | if (crc & 0x80000000) | |
181 | crc = (crc << 1) ^ CRC32_POLYNOMIAL; | |
182 | else | |
183 | crc = (crc << 1); | |
184 | } | |
185 | crc32_tab[i] = crc; | |
186 | } | |
187 | } | |
188 | ||
189 | /** | |
190 | * genwqe_crc32() - Generate 32-bit crc as required for DDCBs | |
191 | * @buff: pointer to data buffer | |
192 | * @len: length of data for calculation | |
193 | * @init: initial crc (0xffffffff at start) | |
194 | * | |
195 | * polynomial = x^32 * + x^29 + x^18 + x^14 + x^3 + 1 (0x20044009) | |
196 | ||
197 | * Example: 4 bytes 0x01 0x02 0x03 0x04 with init=0xffffffff should | |
198 | * result in a crc32 of 0xf33cb7d3. | |
199 | * | |
200 | * The existing kernel crc functions did not cover this polynom yet. | |
201 | * | |
202 | * Return: crc32 checksum. | |
203 | */ | |
204 | u32 genwqe_crc32(u8 *buff, size_t len, u32 init) | |
205 | { | |
206 | int i; | |
207 | u32 crc; | |
208 | ||
209 | crc = init; | |
210 | while (len--) { | |
211 | i = ((crc >> 24) ^ *buff++) & 0xFF; | |
212 | crc = (crc << 8) ^ crc32_tab[i]; | |
213 | } | |
214 | return crc; | |
215 | } | |
216 | ||
217 | void *__genwqe_alloc_consistent(struct genwqe_dev *cd, size_t size, | |
218 | dma_addr_t *dma_handle) | |
219 | { | |
220 | if (get_order(size) > MAX_ORDER) | |
221 | return NULL; | |
222 | ||
19f7767e SO |
223 | return dma_alloc_coherent(&cd->pci_dev->dev, size, dma_handle, |
224 | GFP_KERNEL); | |
67f4addb FH |
225 | } |
226 | ||
227 | void __genwqe_free_consistent(struct genwqe_dev *cd, size_t size, | |
228 | void *vaddr, dma_addr_t dma_handle) | |
229 | { | |
230 | if (vaddr == NULL) | |
231 | return; | |
232 | ||
19f7767e | 233 | dma_free_coherent(&cd->pci_dev->dev, size, vaddr, dma_handle); |
67f4addb FH |
234 | } |
235 | ||
236 | static void genwqe_unmap_pages(struct genwqe_dev *cd, dma_addr_t *dma_list, | |
237 | int num_pages) | |
238 | { | |
239 | int i; | |
240 | struct pci_dev *pci_dev = cd->pci_dev; | |
241 | ||
242 | for (i = 0; (i < num_pages) && (dma_list[i] != 0x0); i++) { | |
243 | pci_unmap_page(pci_dev, dma_list[i], | |
244 | PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); | |
245 | dma_list[i] = 0x0; | |
246 | } | |
247 | } | |
248 | ||
249 | static int genwqe_map_pages(struct genwqe_dev *cd, | |
250 | struct page **page_list, int num_pages, | |
251 | dma_addr_t *dma_list) | |
252 | { | |
253 | int i; | |
254 | struct pci_dev *pci_dev = cd->pci_dev; | |
255 | ||
256 | /* establish DMA mapping for requested pages */ | |
257 | for (i = 0; i < num_pages; i++) { | |
258 | dma_addr_t daddr; | |
259 | ||
260 | dma_list[i] = 0x0; | |
261 | daddr = pci_map_page(pci_dev, page_list[i], | |
262 | 0, /* map_offs */ | |
263 | PAGE_SIZE, | |
264 | PCI_DMA_BIDIRECTIONAL); /* FIXME rd/rw */ | |
265 | ||
266 | if (pci_dma_mapping_error(pci_dev, daddr)) { | |
267 | dev_err(&pci_dev->dev, | |
268 | "[%s] err: no dma addr daddr=%016llx!\n", | |
269 | __func__, (long long)daddr); | |
270 | goto err; | |
271 | } | |
272 | ||
273 | dma_list[i] = daddr; | |
274 | } | |
275 | return 0; | |
276 | ||
277 | err: | |
278 | genwqe_unmap_pages(cd, dma_list, num_pages); | |
279 | return -EIO; | |
280 | } | |
281 | ||
282 | static int genwqe_sgl_size(int num_pages) | |
283 | { | |
284 | int len, num_tlb = num_pages / 7; | |
285 | ||
286 | len = sizeof(struct sg_entry) * (num_pages+num_tlb + 1); | |
287 | return roundup(len, PAGE_SIZE); | |
288 | } | |
289 | ||
718f762e FH |
290 | /** |
291 | * genwqe_alloc_sync_sgl() - Allocate memory for sgl and overlapping pages | |
292 | * | |
293 | * Allocates memory for sgl and overlapping pages. Pages which might | |
294 | * overlap other user-space memory blocks are being cached for DMAs, | |
295 | * such that we do not run into syncronization issues. Data is copied | |
296 | * from user-space into the cached pages. | |
297 | */ | |
298 | int genwqe_alloc_sync_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl, | |
299 | void __user *user_addr, size_t user_size) | |
67f4addb | 300 | { |
718f762e | 301 | int rc; |
67f4addb | 302 | struct pci_dev *pci_dev = cd->pci_dev; |
67f4addb | 303 | |
718f762e FH |
304 | sgl->fpage_offs = offset_in_page((unsigned long)user_addr); |
305 | sgl->fpage_size = min_t(size_t, PAGE_SIZE-sgl->fpage_offs, user_size); | |
306 | sgl->nr_pages = DIV_ROUND_UP(sgl->fpage_offs + user_size, PAGE_SIZE); | |
307 | sgl->lpage_size = (user_size - sgl->fpage_size) % PAGE_SIZE; | |
308 | ||
d9c11d45 | 309 | dev_dbg(&pci_dev->dev, "[%s] uaddr=%p usize=%8ld nr_pages=%ld fpage_offs=%lx fpage_size=%ld lpage_size=%ld\n", |
718f762e FH |
310 | __func__, user_addr, user_size, sgl->nr_pages, |
311 | sgl->fpage_offs, sgl->fpage_size, sgl->lpage_size); | |
312 | ||
313 | sgl->user_addr = user_addr; | |
314 | sgl->user_size = user_size; | |
315 | sgl->sgl_size = genwqe_sgl_size(sgl->nr_pages); | |
316 | ||
317 | if (get_order(sgl->sgl_size) > MAX_ORDER) { | |
67f4addb FH |
318 | dev_err(&pci_dev->dev, |
319 | "[%s] err: too much memory requested!\n", __func__); | |
718f762e | 320 | return -ENOMEM; |
67f4addb FH |
321 | } |
322 | ||
718f762e FH |
323 | sgl->sgl = __genwqe_alloc_consistent(cd, sgl->sgl_size, |
324 | &sgl->sgl_dma_addr); | |
325 | if (sgl->sgl == NULL) { | |
67f4addb FH |
326 | dev_err(&pci_dev->dev, |
327 | "[%s] err: no memory available!\n", __func__); | |
718f762e | 328 | return -ENOMEM; |
67f4addb FH |
329 | } |
330 | ||
718f762e FH |
331 | /* Only use buffering on incomplete pages */ |
332 | if ((sgl->fpage_size != 0) && (sgl->fpage_size != PAGE_SIZE)) { | |
333 | sgl->fpage = __genwqe_alloc_consistent(cd, PAGE_SIZE, | |
334 | &sgl->fpage_dma_addr); | |
335 | if (sgl->fpage == NULL) | |
336 | goto err_out; | |
337 | ||
338 | /* Sync with user memory */ | |
339 | if (copy_from_user(sgl->fpage + sgl->fpage_offs, | |
340 | user_addr, sgl->fpage_size)) { | |
341 | rc = -EFAULT; | |
342 | goto err_out; | |
343 | } | |
344 | } | |
345 | if (sgl->lpage_size != 0) { | |
346 | sgl->lpage = __genwqe_alloc_consistent(cd, PAGE_SIZE, | |
347 | &sgl->lpage_dma_addr); | |
348 | if (sgl->lpage == NULL) | |
349 | goto err_out1; | |
350 | ||
351 | /* Sync with user memory */ | |
352 | if (copy_from_user(sgl->lpage, user_addr + user_size - | |
353 | sgl->lpage_size, sgl->lpage_size)) { | |
354 | rc = -EFAULT; | |
355 | goto err_out1; | |
356 | } | |
357 | } | |
358 | return 0; | |
359 | ||
360 | err_out1: | |
361 | __genwqe_free_consistent(cd, PAGE_SIZE, sgl->fpage, | |
362 | sgl->fpage_dma_addr); | |
363 | err_out: | |
364 | __genwqe_free_consistent(cd, sgl->sgl_size, sgl->sgl, | |
365 | sgl->sgl_dma_addr); | |
366 | return -ENOMEM; | |
67f4addb FH |
367 | } |
368 | ||
718f762e FH |
369 | int genwqe_setup_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl, |
370 | dma_addr_t *dma_list) | |
67f4addb FH |
371 | { |
372 | int i = 0, j = 0, p; | |
373 | unsigned long dma_offs, map_offs; | |
67f4addb FH |
374 | dma_addr_t prev_daddr = 0; |
375 | struct sg_entry *s, *last_s = NULL; | |
718f762e | 376 | size_t size = sgl->user_size; |
67f4addb FH |
377 | |
378 | dma_offs = 128; /* next block if needed/dma_offset */ | |
718f762e | 379 | map_offs = sgl->fpage_offs; /* offset in first page */ |
67f4addb | 380 | |
718f762e | 381 | s = &sgl->sgl[0]; /* first set of 8 entries */ |
67f4addb | 382 | p = 0; /* page */ |
718f762e | 383 | while (p < sgl->nr_pages) { |
67f4addb FH |
384 | dma_addr_t daddr; |
385 | unsigned int size_to_map; | |
386 | ||
387 | /* always write the chaining entry, cleanup is done later */ | |
388 | j = 0; | |
718f762e | 389 | s[j].target_addr = cpu_to_be64(sgl->sgl_dma_addr + dma_offs); |
67f4addb FH |
390 | s[j].len = cpu_to_be32(128); |
391 | s[j].flags = cpu_to_be32(SG_CHAINED); | |
392 | j++; | |
393 | ||
394 | while (j < 8) { | |
395 | /* DMA mapping for requested page, offs, size */ | |
396 | size_to_map = min(size, PAGE_SIZE - map_offs); | |
718f762e FH |
397 | |
398 | if ((p == 0) && (sgl->fpage != NULL)) { | |
399 | daddr = sgl->fpage_dma_addr + map_offs; | |
400 | ||
401 | } else if ((p == sgl->nr_pages - 1) && | |
402 | (sgl->lpage != NULL)) { | |
403 | daddr = sgl->lpage_dma_addr; | |
404 | } else { | |
405 | daddr = dma_list[p] + map_offs; | |
406 | } | |
407 | ||
67f4addb FH |
408 | size -= size_to_map; |
409 | map_offs = 0; | |
410 | ||
411 | if (prev_daddr == daddr) { | |
412 | u32 prev_len = be32_to_cpu(last_s->len); | |
413 | ||
414 | /* pr_info("daddr combining: " | |
415 | "%016llx/%08x -> %016llx\n", | |
416 | prev_daddr, prev_len, daddr); */ | |
417 | ||
418 | last_s->len = cpu_to_be32(prev_len + | |
419 | size_to_map); | |
420 | ||
421 | p++; /* process next page */ | |
718f762e | 422 | if (p == sgl->nr_pages) |
67f4addb FH |
423 | goto fixup; /* nothing to do */ |
424 | ||
425 | prev_daddr = daddr + size_to_map; | |
426 | continue; | |
427 | } | |
428 | ||
429 | /* start new entry */ | |
430 | s[j].target_addr = cpu_to_be64(daddr); | |
431 | s[j].len = cpu_to_be32(size_to_map); | |
432 | s[j].flags = cpu_to_be32(SG_DATA); | |
433 | prev_daddr = daddr + size_to_map; | |
434 | last_s = &s[j]; | |
435 | j++; | |
436 | ||
437 | p++; /* process next page */ | |
718f762e | 438 | if (p == sgl->nr_pages) |
67f4addb FH |
439 | goto fixup; /* nothing to do */ |
440 | } | |
441 | dma_offs += 128; | |
442 | s += 8; /* continue 8 elements further */ | |
443 | } | |
444 | fixup: | |
445 | if (j == 1) { /* combining happend on last entry! */ | |
446 | s -= 8; /* full shift needed on previous sgl block */ | |
447 | j = 7; /* shift all elements */ | |
448 | } | |
449 | ||
450 | for (i = 0; i < j; i++) /* move elements 1 up */ | |
451 | s[i] = s[i + 1]; | |
452 | ||
453 | s[i].target_addr = cpu_to_be64(0); | |
454 | s[i].len = cpu_to_be32(0); | |
455 | s[i].flags = cpu_to_be32(SG_END_LIST); | |
456 | return 0; | |
457 | } | |
458 | ||
718f762e FH |
459 | /** |
460 | * genwqe_free_sync_sgl() - Free memory for sgl and overlapping pages | |
461 | * | |
462 | * After the DMA transfer has been completed we free the memory for | |
463 | * the sgl and the cached pages. Data is being transfered from cached | |
464 | * pages into user-space buffers. | |
465 | */ | |
466 | int genwqe_free_sync_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl) | |
67f4addb | 467 | { |
63fa80cd | 468 | int rc = 0; |
718f762e FH |
469 | struct pci_dev *pci_dev = cd->pci_dev; |
470 | ||
471 | if (sgl->fpage) { | |
472 | if (copy_to_user(sgl->user_addr, sgl->fpage + sgl->fpage_offs, | |
473 | sgl->fpage_size)) { | |
474 | dev_err(&pci_dev->dev, "[%s] err: copying fpage!\n", | |
475 | __func__); | |
476 | rc = -EFAULT; | |
477 | } | |
478 | __genwqe_free_consistent(cd, PAGE_SIZE, sgl->fpage, | |
479 | sgl->fpage_dma_addr); | |
480 | sgl->fpage = NULL; | |
481 | sgl->fpage_dma_addr = 0; | |
482 | } | |
483 | if (sgl->lpage) { | |
484 | if (copy_to_user(sgl->user_addr + sgl->user_size - | |
485 | sgl->lpage_size, sgl->lpage, | |
486 | sgl->lpage_size)) { | |
487 | dev_err(&pci_dev->dev, "[%s] err: copying lpage!\n", | |
488 | __func__); | |
489 | rc = -EFAULT; | |
490 | } | |
491 | __genwqe_free_consistent(cd, PAGE_SIZE, sgl->lpage, | |
492 | sgl->lpage_dma_addr); | |
493 | sgl->lpage = NULL; | |
494 | sgl->lpage_dma_addr = 0; | |
495 | } | |
496 | __genwqe_free_consistent(cd, sgl->sgl_size, sgl->sgl, | |
497 | sgl->sgl_dma_addr); | |
498 | ||
499 | sgl->sgl = NULL; | |
500 | sgl->sgl_dma_addr = 0x0; | |
501 | sgl->sgl_size = 0; | |
502 | return rc; | |
67f4addb FH |
503 | } |
504 | ||
505 | /** | |
506 | * free_user_pages() - Give pinned pages back | |
507 | * | |
508 | * Documentation of get_user_pages is in mm/memory.c: | |
509 | * | |
510 | * If the page is written to, set_page_dirty (or set_page_dirty_lock, | |
511 | * as appropriate) must be called after the page is finished with, and | |
512 | * before put_page is called. | |
513 | * | |
514 | * FIXME Could be of use to others and might belong in the generic | |
515 | * code, if others agree. E.g. | |
516 | * ll_free_user_pages in drivers/staging/lustre/lustre/llite/rw26.c | |
517 | * ceph_put_page_vector in net/ceph/pagevec.c | |
518 | * maybe more? | |
519 | */ | |
520 | static int free_user_pages(struct page **page_list, unsigned int nr_pages, | |
521 | int dirty) | |
522 | { | |
523 | unsigned int i; | |
524 | ||
525 | for (i = 0; i < nr_pages; i++) { | |
526 | if (page_list[i] != NULL) { | |
527 | if (dirty) | |
528 | set_page_dirty_lock(page_list[i]); | |
529 | put_page(page_list[i]); | |
530 | } | |
531 | } | |
532 | return 0; | |
533 | } | |
534 | ||
535 | /** | |
536 | * genwqe_user_vmap() - Map user-space memory to virtual kernel memory | |
537 | * @cd: pointer to genwqe device | |
538 | * @m: mapping params | |
539 | * @uaddr: user virtual address | |
540 | * @size: size of memory to be mapped | |
541 | * | |
542 | * We need to think about how we could speed this up. Of course it is | |
543 | * not a good idea to do this over and over again, like we are | |
544 | * currently doing it. Nevertheless, I am curious where on the path | |
545 | * the performance is spend. Most probably within the memory | |
546 | * allocation functions, but maybe also in the DMA mapping code. | |
547 | * | |
548 | * Restrictions: The maximum size of the possible mapping currently depends | |
549 | * on the amount of memory we can get using kzalloc() for the | |
550 | * page_list and pci_alloc_consistent for the sg_list. | |
551 | * The sg_list is currently itself not scattered, which could | |
552 | * be fixed with some effort. The page_list must be split into | |
553 | * PAGE_SIZE chunks too. All that will make the complicated | |
554 | * code more complicated. | |
555 | * | |
556 | * Return: 0 if success | |
557 | */ | |
558 | int genwqe_user_vmap(struct genwqe_dev *cd, struct dma_mapping *m, void *uaddr, | |
559 | unsigned long size, struct ddcb_requ *req) | |
560 | { | |
561 | int rc = -EINVAL; | |
562 | unsigned long data, offs; | |
563 | struct pci_dev *pci_dev = cd->pci_dev; | |
564 | ||
565 | if ((uaddr == NULL) || (size == 0)) { | |
566 | m->size = 0; /* mark unused and not added */ | |
567 | return -EINVAL; | |
568 | } | |
569 | m->u_vaddr = uaddr; | |
570 | m->size = size; | |
571 | ||
572 | /* determine space needed for page_list. */ | |
573 | data = (unsigned long)uaddr; | |
574 | offs = offset_in_page(data); | |
575 | m->nr_pages = DIV_ROUND_UP(offs + size, PAGE_SIZE); | |
576 | ||
577 | m->page_list = kcalloc(m->nr_pages, | |
578 | sizeof(struct page *) + sizeof(dma_addr_t), | |
579 | GFP_KERNEL); | |
580 | if (!m->page_list) { | |
581 | dev_err(&pci_dev->dev, "err: alloc page_list failed\n"); | |
582 | m->nr_pages = 0; | |
583 | m->u_vaddr = NULL; | |
584 | m->size = 0; /* mark unused and not added */ | |
585 | return -ENOMEM; | |
586 | } | |
587 | m->dma_list = (dma_addr_t *)(m->page_list + m->nr_pages); | |
588 | ||
589 | /* pin user pages in memory */ | |
590 | rc = get_user_pages_fast(data & PAGE_MASK, /* page aligned addr */ | |
591 | m->nr_pages, | |
592 | 1, /* write by caller */ | |
593 | m->page_list); /* ptrs to pages */ | |
cf35d6e0 IA |
594 | if (rc < 0) |
595 | goto fail_get_user_pages; | |
67f4addb FH |
596 | |
597 | /* assumption: get_user_pages can be killed by signals. */ | |
598 | if (rc < m->nr_pages) { | |
599 | free_user_pages(m->page_list, rc, 0); | |
600 | rc = -EFAULT; | |
601 | goto fail_get_user_pages; | |
602 | } | |
603 | ||
604 | rc = genwqe_map_pages(cd, m->page_list, m->nr_pages, m->dma_list); | |
605 | if (rc != 0) | |
606 | goto fail_free_user_pages; | |
607 | ||
608 | return 0; | |
609 | ||
610 | fail_free_user_pages: | |
611 | free_user_pages(m->page_list, m->nr_pages, 0); | |
612 | ||
613 | fail_get_user_pages: | |
614 | kfree(m->page_list); | |
615 | m->page_list = NULL; | |
616 | m->dma_list = NULL; | |
617 | m->nr_pages = 0; | |
618 | m->u_vaddr = NULL; | |
619 | m->size = 0; /* mark unused and not added */ | |
620 | return rc; | |
621 | } | |
622 | ||
623 | /** | |
624 | * genwqe_user_vunmap() - Undo mapping of user-space mem to virtual kernel | |
625 | * memory | |
626 | * @cd: pointer to genwqe device | |
627 | * @m: mapping params | |
628 | */ | |
629 | int genwqe_user_vunmap(struct genwqe_dev *cd, struct dma_mapping *m, | |
630 | struct ddcb_requ *req) | |
631 | { | |
632 | struct pci_dev *pci_dev = cd->pci_dev; | |
633 | ||
634 | if (!dma_mapping_used(m)) { | |
635 | dev_err(&pci_dev->dev, "[%s] err: mapping %p not used!\n", | |
636 | __func__, m); | |
637 | return -EINVAL; | |
638 | } | |
639 | ||
640 | if (m->dma_list) | |
641 | genwqe_unmap_pages(cd, m->dma_list, m->nr_pages); | |
642 | ||
643 | if (m->page_list) { | |
644 | free_user_pages(m->page_list, m->nr_pages, 1); | |
645 | ||
646 | kfree(m->page_list); | |
647 | m->page_list = NULL; | |
648 | m->dma_list = NULL; | |
649 | m->nr_pages = 0; | |
650 | } | |
651 | ||
652 | m->u_vaddr = NULL; | |
653 | m->size = 0; /* mark as unused and not added */ | |
654 | return 0; | |
655 | } | |
656 | ||
657 | /** | |
658 | * genwqe_card_type() - Get chip type SLU Configuration Register | |
659 | * @cd: pointer to the genwqe device descriptor | |
660 | * Return: 0: Altera Stratix-IV 230 | |
661 | * 1: Altera Stratix-IV 530 | |
662 | * 2: Altera Stratix-V A4 | |
663 | * 3: Altera Stratix-V A7 | |
664 | */ | |
665 | u8 genwqe_card_type(struct genwqe_dev *cd) | |
666 | { | |
667 | u64 card_type = cd->slu_unitcfg; | |
d9c11d45 | 668 | |
67f4addb FH |
669 | return (u8)((card_type & IO_SLU_UNITCFG_TYPE_MASK) >> 20); |
670 | } | |
671 | ||
672 | /** | |
673 | * genwqe_card_reset() - Reset the card | |
674 | * @cd: pointer to the genwqe device descriptor | |
675 | */ | |
676 | int genwqe_card_reset(struct genwqe_dev *cd) | |
677 | { | |
678 | u64 softrst; | |
679 | struct pci_dev *pci_dev = cd->pci_dev; | |
680 | ||
681 | if (!genwqe_is_privileged(cd)) | |
682 | return -ENODEV; | |
683 | ||
684 | /* new SL */ | |
685 | __genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET, 0x1ull); | |
686 | msleep(1000); | |
687 | __genwqe_readq(cd, IO_HSU_FIR_CLR); | |
688 | __genwqe_readq(cd, IO_APP_FIR_CLR); | |
689 | __genwqe_readq(cd, IO_SLU_FIR_CLR); | |
690 | ||
691 | /* | |
692 | * Read-modify-write to preserve the stealth bits | |
693 | * | |
694 | * For SL >= 039, Stealth WE bit allows removing | |
695 | * the read-modify-wrote. | |
696 | * r-m-w may require a mask 0x3C to avoid hitting hard | |
697 | * reset again for error reset (should be 0, chicken). | |
698 | */ | |
699 | softrst = __genwqe_readq(cd, IO_SLC_CFGREG_SOFTRESET) & 0x3cull; | |
700 | __genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET, softrst | 0x2ull); | |
701 | ||
702 | /* give ERRORRESET some time to finish */ | |
703 | msleep(50); | |
704 | ||
705 | if (genwqe_need_err_masking(cd)) { | |
706 | dev_info(&pci_dev->dev, | |
707 | "[%s] masking errors for old bitstreams\n", __func__); | |
708 | __genwqe_writeq(cd, IO_SLC_MISC_DEBUG, 0x0aull); | |
709 | } | |
710 | return 0; | |
711 | } | |
712 | ||
713 | int genwqe_read_softreset(struct genwqe_dev *cd) | |
714 | { | |
715 | u64 bitstream; | |
716 | ||
717 | if (!genwqe_is_privileged(cd)) | |
718 | return -ENODEV; | |
719 | ||
720 | bitstream = __genwqe_readq(cd, IO_SLU_BITSTREAM) & 0x1; | |
721 | cd->softreset = (bitstream == 0) ? 0x8ull : 0xcull; | |
722 | return 0; | |
723 | } | |
724 | ||
725 | /** | |
726 | * genwqe_set_interrupt_capability() - Configure MSI capability structure | |
727 | * @cd: pointer to the device | |
728 | * Return: 0 if no error | |
729 | */ | |
730 | int genwqe_set_interrupt_capability(struct genwqe_dev *cd, int count) | |
731 | { | |
732 | int rc; | |
733 | struct pci_dev *pci_dev = cd->pci_dev; | |
734 | ||
7276883f SO |
735 | rc = pci_enable_msi_range(pci_dev, 1, count); |
736 | if (rc < 0) | |
737 | return rc; | |
738 | ||
739 | cd->flags |= GENWQE_FLAG_MSI_ENABLED; | |
740 | return 0; | |
67f4addb FH |
741 | } |
742 | ||
743 | /** | |
744 | * genwqe_reset_interrupt_capability() - Undo genwqe_set_interrupt_capability() | |
745 | * @cd: pointer to the device | |
746 | */ | |
747 | void genwqe_reset_interrupt_capability(struct genwqe_dev *cd) | |
748 | { | |
749 | struct pci_dev *pci_dev = cd->pci_dev; | |
750 | ||
751 | if (cd->flags & GENWQE_FLAG_MSI_ENABLED) { | |
752 | pci_disable_msi(pci_dev); | |
753 | cd->flags &= ~GENWQE_FLAG_MSI_ENABLED; | |
754 | } | |
755 | } | |
756 | ||
757 | /** | |
758 | * set_reg_idx() - Fill array with data. Ignore illegal offsets. | |
759 | * @cd: card device | |
760 | * @r: debug register array | |
761 | * @i: index to desired entry | |
762 | * @m: maximum possible entries | |
763 | * @addr: addr which is read | |
764 | * @index: index in debug array | |
765 | * @val: read value | |
766 | */ | |
767 | static int set_reg_idx(struct genwqe_dev *cd, struct genwqe_reg *r, | |
768 | unsigned int *i, unsigned int m, u32 addr, u32 idx, | |
769 | u64 val) | |
770 | { | |
771 | if (WARN_ON_ONCE(*i >= m)) | |
772 | return -EFAULT; | |
773 | ||
774 | r[*i].addr = addr; | |
775 | r[*i].idx = idx; | |
776 | r[*i].val = val; | |
777 | ++*i; | |
778 | return 0; | |
779 | } | |
780 | ||
781 | static int set_reg(struct genwqe_dev *cd, struct genwqe_reg *r, | |
782 | unsigned int *i, unsigned int m, u32 addr, u64 val) | |
783 | { | |
784 | return set_reg_idx(cd, r, i, m, addr, 0, val); | |
785 | } | |
786 | ||
787 | int genwqe_read_ffdc_regs(struct genwqe_dev *cd, struct genwqe_reg *regs, | |
788 | unsigned int max_regs, int all) | |
789 | { | |
790 | unsigned int i, j, idx = 0; | |
791 | u32 ufir_addr, ufec_addr, sfir_addr, sfec_addr; | |
792 | u64 gfir, sluid, appid, ufir, ufec, sfir, sfec; | |
793 | ||
794 | /* Global FIR */ | |
795 | gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR); | |
796 | set_reg(cd, regs, &idx, max_regs, IO_SLC_CFGREG_GFIR, gfir); | |
797 | ||
798 | /* UnitCfg for SLU */ | |
799 | sluid = __genwqe_readq(cd, IO_SLU_UNITCFG); /* 0x00000000 */ | |
800 | set_reg(cd, regs, &idx, max_regs, IO_SLU_UNITCFG, sluid); | |
801 | ||
802 | /* UnitCfg for APP */ | |
803 | appid = __genwqe_readq(cd, IO_APP_UNITCFG); /* 0x02000000 */ | |
804 | set_reg(cd, regs, &idx, max_regs, IO_APP_UNITCFG, appid); | |
805 | ||
806 | /* Check all chip Units */ | |
807 | for (i = 0; i < GENWQE_MAX_UNITS; i++) { | |
808 | ||
809 | /* Unit FIR */ | |
810 | ufir_addr = (i << 24) | 0x008; | |
811 | ufir = __genwqe_readq(cd, ufir_addr); | |
812 | set_reg(cd, regs, &idx, max_regs, ufir_addr, ufir); | |
813 | ||
814 | /* Unit FEC */ | |
815 | ufec_addr = (i << 24) | 0x018; | |
816 | ufec = __genwqe_readq(cd, ufec_addr); | |
817 | set_reg(cd, regs, &idx, max_regs, ufec_addr, ufec); | |
818 | ||
819 | for (j = 0; j < 64; j++) { | |
820 | /* wherever there is a primary 1, read the 2ndary */ | |
821 | if (!all && (!(ufir & (1ull << j)))) | |
822 | continue; | |
823 | ||
824 | sfir_addr = (i << 24) | (0x100 + 8 * j); | |
825 | sfir = __genwqe_readq(cd, sfir_addr); | |
826 | set_reg(cd, regs, &idx, max_regs, sfir_addr, sfir); | |
827 | ||
828 | sfec_addr = (i << 24) | (0x300 + 8 * j); | |
829 | sfec = __genwqe_readq(cd, sfec_addr); | |
830 | set_reg(cd, regs, &idx, max_regs, sfec_addr, sfec); | |
831 | } | |
832 | } | |
833 | ||
834 | /* fill with invalid data until end */ | |
835 | for (i = idx; i < max_regs; i++) { | |
836 | regs[i].addr = 0xffffffff; | |
837 | regs[i].val = 0xffffffffffffffffull; | |
838 | } | |
839 | return idx; | |
840 | } | |
841 | ||
842 | /** | |
843 | * genwqe_ffdc_buff_size() - Calculates the number of dump registers | |
844 | */ | |
845 | int genwqe_ffdc_buff_size(struct genwqe_dev *cd, int uid) | |
846 | { | |
847 | int entries = 0, ring, traps, traces, trace_entries; | |
848 | u32 eevptr_addr, l_addr, d_len, d_type; | |
849 | u64 eevptr, val, addr; | |
850 | ||
851 | eevptr_addr = GENWQE_UID_OFFS(uid) | IO_EXTENDED_ERROR_POINTER; | |
852 | eevptr = __genwqe_readq(cd, eevptr_addr); | |
853 | ||
854 | if ((eevptr != 0x0) && (eevptr != -1ull)) { | |
855 | l_addr = GENWQE_UID_OFFS(uid) | eevptr; | |
856 | ||
857 | while (1) { | |
858 | val = __genwqe_readq(cd, l_addr); | |
859 | ||
860 | if ((val == 0x0) || (val == -1ull)) | |
861 | break; | |
862 | ||
863 | /* 38:24 */ | |
864 | d_len = (val & 0x0000007fff000000ull) >> 24; | |
865 | ||
866 | /* 39 */ | |
867 | d_type = (val & 0x0000008000000000ull) >> 36; | |
868 | ||
869 | if (d_type) { /* repeat */ | |
870 | entries += d_len; | |
871 | } else { /* size in bytes! */ | |
872 | entries += d_len >> 3; | |
873 | } | |
874 | ||
875 | l_addr += 8; | |
876 | } | |
877 | } | |
878 | ||
879 | for (ring = 0; ring < 8; ring++) { | |
880 | addr = GENWQE_UID_OFFS(uid) | IO_EXTENDED_DIAG_MAP(ring); | |
881 | val = __genwqe_readq(cd, addr); | |
882 | ||
883 | if ((val == 0x0ull) || (val == -1ull)) | |
884 | continue; | |
885 | ||
886 | traps = (val >> 24) & 0xff; | |
887 | traces = (val >> 16) & 0xff; | |
888 | trace_entries = val & 0xffff; | |
889 | ||
890 | entries += traps + (traces * trace_entries); | |
891 | } | |
892 | return entries; | |
893 | } | |
894 | ||
895 | /** | |
896 | * genwqe_ffdc_buff_read() - Implements LogoutExtendedErrorRegisters procedure | |
897 | */ | |
898 | int genwqe_ffdc_buff_read(struct genwqe_dev *cd, int uid, | |
899 | struct genwqe_reg *regs, unsigned int max_regs) | |
900 | { | |
901 | int i, traps, traces, trace, trace_entries, trace_entry, ring; | |
902 | unsigned int idx = 0; | |
903 | u32 eevptr_addr, l_addr, d_addr, d_len, d_type; | |
904 | u64 eevptr, e, val, addr; | |
905 | ||
906 | eevptr_addr = GENWQE_UID_OFFS(uid) | IO_EXTENDED_ERROR_POINTER; | |
907 | eevptr = __genwqe_readq(cd, eevptr_addr); | |
908 | ||
909 | if ((eevptr != 0x0) && (eevptr != 0xffffffffffffffffull)) { | |
910 | l_addr = GENWQE_UID_OFFS(uid) | eevptr; | |
911 | while (1) { | |
912 | e = __genwqe_readq(cd, l_addr); | |
913 | if ((e == 0x0) || (e == 0xffffffffffffffffull)) | |
914 | break; | |
915 | ||
916 | d_addr = (e & 0x0000000000ffffffull); /* 23:0 */ | |
917 | d_len = (e & 0x0000007fff000000ull) >> 24; /* 38:24 */ | |
918 | d_type = (e & 0x0000008000000000ull) >> 36; /* 39 */ | |
919 | d_addr |= GENWQE_UID_OFFS(uid); | |
920 | ||
921 | if (d_type) { | |
922 | for (i = 0; i < (int)d_len; i++) { | |
923 | val = __genwqe_readq(cd, d_addr); | |
924 | set_reg_idx(cd, regs, &idx, max_regs, | |
925 | d_addr, i, val); | |
926 | } | |
927 | } else { | |
928 | d_len >>= 3; /* Size in bytes! */ | |
929 | for (i = 0; i < (int)d_len; i++, d_addr += 8) { | |
930 | val = __genwqe_readq(cd, d_addr); | |
931 | set_reg_idx(cd, regs, &idx, max_regs, | |
932 | d_addr, 0, val); | |
933 | } | |
934 | } | |
935 | l_addr += 8; | |
936 | } | |
937 | } | |
938 | ||
939 | /* | |
940 | * To save time, there are only 6 traces poplulated on Uid=2, | |
941 | * Ring=1. each with iters=512. | |
942 | */ | |
943 | for (ring = 0; ring < 8; ring++) { /* 0 is fls, 1 is fds, | |
944 | 2...7 are ASI rings */ | |
945 | addr = GENWQE_UID_OFFS(uid) | IO_EXTENDED_DIAG_MAP(ring); | |
946 | val = __genwqe_readq(cd, addr); | |
947 | ||
948 | if ((val == 0x0ull) || (val == -1ull)) | |
949 | continue; | |
950 | ||
951 | traps = (val >> 24) & 0xff; /* Number of Traps */ | |
952 | traces = (val >> 16) & 0xff; /* Number of Traces */ | |
953 | trace_entries = val & 0xffff; /* Entries per trace */ | |
954 | ||
955 | /* Note: This is a combined loop that dumps both the traps */ | |
956 | /* (for the trace == 0 case) as well as the traces 1 to */ | |
957 | /* 'traces'. */ | |
958 | for (trace = 0; trace <= traces; trace++) { | |
959 | u32 diag_sel = | |
960 | GENWQE_EXTENDED_DIAG_SELECTOR(ring, trace); | |
961 | ||
962 | addr = (GENWQE_UID_OFFS(uid) | | |
963 | IO_EXTENDED_DIAG_SELECTOR); | |
964 | __genwqe_writeq(cd, addr, diag_sel); | |
965 | ||
966 | for (trace_entry = 0; | |
967 | trace_entry < (trace ? trace_entries : traps); | |
968 | trace_entry++) { | |
969 | addr = (GENWQE_UID_OFFS(uid) | | |
970 | IO_EXTENDED_DIAG_READ_MBX); | |
971 | val = __genwqe_readq(cd, addr); | |
972 | set_reg_idx(cd, regs, &idx, max_regs, addr, | |
973 | (diag_sel<<16) | trace_entry, val); | |
974 | } | |
975 | } | |
976 | } | |
977 | return 0; | |
978 | } | |
979 | ||
980 | /** | |
981 | * genwqe_write_vreg() - Write register in virtual window | |
982 | * | |
983 | * Note, these registers are only accessible to the PF through the | |
984 | * VF-window. It is not intended for the VF to access. | |
985 | */ | |
986 | int genwqe_write_vreg(struct genwqe_dev *cd, u32 reg, u64 val, int func) | |
987 | { | |
988 | __genwqe_writeq(cd, IO_PF_SLC_VIRTUAL_WINDOW, func & 0xf); | |
989 | __genwqe_writeq(cd, reg, val); | |
990 | return 0; | |
991 | } | |
992 | ||
993 | /** | |
994 | * genwqe_read_vreg() - Read register in virtual window | |
995 | * | |
996 | * Note, these registers are only accessible to the PF through the | |
997 | * VF-window. It is not intended for the VF to access. | |
998 | */ | |
999 | u64 genwqe_read_vreg(struct genwqe_dev *cd, u32 reg, int func) | |
1000 | { | |
1001 | __genwqe_writeq(cd, IO_PF_SLC_VIRTUAL_WINDOW, func & 0xf); | |
1002 | return __genwqe_readq(cd, reg); | |
1003 | } | |
1004 | ||
1005 | /** | |
1006 | * genwqe_base_clock_frequency() - Deteremine base clock frequency of the card | |
1007 | * | |
1008 | * Note: From a design perspective it turned out to be a bad idea to | |
1009 | * use codes here to specifiy the frequency/speed values. An old | |
1010 | * driver cannot understand new codes and is therefore always a | |
1011 | * problem. Better is to measure out the value or put the | |
1012 | * speed/frequency directly into a register which is always a valid | |
1013 | * value for old as well as for new software. | |
1014 | * | |
1015 | * Return: Card clock in MHz | |
1016 | */ | |
1017 | int genwqe_base_clock_frequency(struct genwqe_dev *cd) | |
1018 | { | |
1019 | u16 speed; /* MHz MHz MHz MHz */ | |
1020 | static const int speed_grade[] = { 250, 200, 166, 175 }; | |
1021 | ||
1022 | speed = (u16)((cd->slu_unitcfg >> 28) & 0x0full); | |
1023 | if (speed >= ARRAY_SIZE(speed_grade)) | |
1024 | return 0; /* illegal value */ | |
1025 | ||
1026 | return speed_grade[speed]; | |
1027 | } | |
1028 | ||
1029 | /** | |
1030 | * genwqe_stop_traps() - Stop traps | |
1031 | * | |
1032 | * Before reading out the analysis data, we need to stop the traps. | |
1033 | */ | |
1034 | void genwqe_stop_traps(struct genwqe_dev *cd) | |
1035 | { | |
1036 | __genwqe_writeq(cd, IO_SLC_MISC_DEBUG_SET, 0xcull); | |
1037 | } | |
1038 | ||
1039 | /** | |
1040 | * genwqe_start_traps() - Start traps | |
1041 | * | |
1042 | * After having read the data, we can/must enable the traps again. | |
1043 | */ | |
1044 | void genwqe_start_traps(struct genwqe_dev *cd) | |
1045 | { | |
1046 | __genwqe_writeq(cd, IO_SLC_MISC_DEBUG_CLR, 0xcull); | |
1047 | ||
1048 | if (genwqe_need_err_masking(cd)) | |
1049 | __genwqe_writeq(cd, IO_SLC_MISC_DEBUG, 0x0aull); | |
1050 | } |