Commit | Line | Data |
---|---|---|
67f4addb FH |
1 | /** |
2 | * IBM Accelerator Family 'GenWQE' | |
3 | * | |
4 | * (C) Copyright IBM Corp. 2013 | |
5 | * | |
6 | * Author: Frank Haverkamp <haver@linux.vnet.ibm.com> | |
7 | * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com> | |
8 | * Author: Michael Jung <mijung@de.ibm.com> | |
9 | * Author: Michael Ruettger <michael@ibmra.de> | |
10 | * | |
11 | * This program is free software; you can redistribute it and/or modify | |
12 | * it under the terms of the GNU General Public License (version 2 only) | |
13 | * as published by the Free Software Foundation. | |
14 | * | |
15 | * This program is distributed in the hope that it will be useful, | |
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
18 | * GNU General Public License for more details. | |
19 | */ | |
20 | ||
21 | /* | |
22 | * Miscelanous functionality used in the other GenWQE driver parts. | |
23 | */ | |
24 | ||
25 | #include <linux/kernel.h> | |
26 | #include <linux/dma-mapping.h> | |
27 | #include <linux/sched.h> | |
28 | #include <linux/vmalloc.h> | |
29 | #include <linux/page-flags.h> | |
30 | #include <linux/scatterlist.h> | |
31 | #include <linux/hugetlb.h> | |
32 | #include <linux/iommu.h> | |
33 | #include <linux/delay.h> | |
34 | #include <linux/pci.h> | |
35 | #include <linux/dma-mapping.h> | |
36 | #include <linux/ctype.h> | |
37 | #include <linux/module.h> | |
38 | #include <linux/platform_device.h> | |
39 | #include <linux/delay.h> | |
40 | #include <asm/pgtable.h> | |
41 | ||
42 | #include "genwqe_driver.h" | |
43 | #include "card_base.h" | |
44 | #include "card_ddcb.h" | |
45 | ||
46 | /** | |
47 | * __genwqe_writeq() - Write 64-bit register | |
48 | * @cd: genwqe device descriptor | |
49 | * @byte_offs: byte offset within BAR | |
50 | * @val: 64-bit value | |
51 | * | |
52 | * Return: 0 if success; < 0 if error | |
53 | */ | |
54 | int __genwqe_writeq(struct genwqe_dev *cd, u64 byte_offs, u64 val) | |
55 | { | |
56 | if (cd->err_inject & GENWQE_INJECT_HARDWARE_FAILURE) | |
57 | return -EIO; | |
58 | ||
59 | if (cd->mmio == NULL) | |
60 | return -EIO; | |
61 | ||
a45a0258 | 62 | __raw_writeq((__force u64)cpu_to_be64(val), cd->mmio + byte_offs); |
67f4addb FH |
63 | return 0; |
64 | } | |
65 | ||
66 | /** | |
67 | * __genwqe_readq() - Read 64-bit register | |
68 | * @cd: genwqe device descriptor | |
69 | * @byte_offs: offset within BAR | |
70 | * | |
71 | * Return: value from register | |
72 | */ | |
73 | u64 __genwqe_readq(struct genwqe_dev *cd, u64 byte_offs) | |
74 | { | |
67f4addb FH |
75 | if (cd->err_inject & GENWQE_INJECT_HARDWARE_FAILURE) |
76 | return 0xffffffffffffffffull; | |
77 | ||
78 | if ((cd->err_inject & GENWQE_INJECT_GFIR_FATAL) && | |
79 | (byte_offs == IO_SLC_CFGREG_GFIR)) | |
80 | return 0x000000000000ffffull; | |
81 | ||
82 | if ((cd->err_inject & GENWQE_INJECT_GFIR_INFO) && | |
83 | (byte_offs == IO_SLC_CFGREG_GFIR)) | |
84 | return 0x00000000ffff0000ull; | |
85 | ||
86 | if (cd->mmio == NULL) | |
87 | return 0xffffffffffffffffull; | |
88 | ||
58d66ce7 | 89 | return be64_to_cpu((__force __be64)__raw_readq(cd->mmio + byte_offs)); |
67f4addb FH |
90 | } |
91 | ||
92 | /** | |
93 | * __genwqe_writel() - Write 32-bit register | |
94 | * @cd: genwqe device descriptor | |
95 | * @byte_offs: byte offset within BAR | |
96 | * @val: 32-bit value | |
97 | * | |
98 | * Return: 0 if success; < 0 if error | |
99 | */ | |
100 | int __genwqe_writel(struct genwqe_dev *cd, u64 byte_offs, u32 val) | |
101 | { | |
102 | if (cd->err_inject & GENWQE_INJECT_HARDWARE_FAILURE) | |
103 | return -EIO; | |
104 | ||
105 | if (cd->mmio == NULL) | |
106 | return -EIO; | |
107 | ||
58d66ce7 | 108 | __raw_writel((__force u32)cpu_to_be32(val), cd->mmio + byte_offs); |
67f4addb FH |
109 | return 0; |
110 | } | |
111 | ||
112 | /** | |
113 | * __genwqe_readl() - Read 32-bit register | |
114 | * @cd: genwqe device descriptor | |
115 | * @byte_offs: offset within BAR | |
116 | * | |
117 | * Return: Value from register | |
118 | */ | |
119 | u32 __genwqe_readl(struct genwqe_dev *cd, u64 byte_offs) | |
120 | { | |
121 | if (cd->err_inject & GENWQE_INJECT_HARDWARE_FAILURE) | |
122 | return 0xffffffff; | |
123 | ||
124 | if (cd->mmio == NULL) | |
125 | return 0xffffffff; | |
126 | ||
58d66ce7 | 127 | return be32_to_cpu((__force __be32)__raw_readl(cd->mmio + byte_offs)); |
67f4addb FH |
128 | } |
129 | ||
130 | /** | |
131 | * genwqe_read_app_id() - Extract app_id | |
132 | * | |
133 | * app_unitcfg need to be filled with valid data first | |
134 | */ | |
135 | int genwqe_read_app_id(struct genwqe_dev *cd, char *app_name, int len) | |
136 | { | |
137 | int i, j; | |
138 | u32 app_id = (u32)cd->app_unitcfg; | |
139 | ||
140 | memset(app_name, 0, len); | |
141 | for (i = 0, j = 0; j < min(len, 4); j++) { | |
142 | char ch = (char)((app_id >> (24 - j*8)) & 0xff); | |
143 | if (ch == ' ') | |
144 | continue; | |
145 | app_name[i++] = isprint(ch) ? ch : 'X'; | |
146 | } | |
147 | return i; | |
148 | } | |
149 | ||
150 | /** | |
151 | * genwqe_init_crc32() - Prepare a lookup table for fast crc32 calculations | |
152 | * | |
153 | * Existing kernel functions seem to use a different polynom, | |
154 | * therefore we could not use them here. | |
155 | * | |
156 | * Genwqe's Polynomial = 0x20044009 | |
157 | */ | |
158 | #define CRC32_POLYNOMIAL 0x20044009 | |
159 | static u32 crc32_tab[256]; /* crc32 lookup table */ | |
160 | ||
161 | void genwqe_init_crc32(void) | |
162 | { | |
163 | int i, j; | |
164 | u32 crc; | |
165 | ||
166 | for (i = 0; i < 256; i++) { | |
167 | crc = i << 24; | |
168 | for (j = 0; j < 8; j++) { | |
169 | if (crc & 0x80000000) | |
170 | crc = (crc << 1) ^ CRC32_POLYNOMIAL; | |
171 | else | |
172 | crc = (crc << 1); | |
173 | } | |
174 | crc32_tab[i] = crc; | |
175 | } | |
176 | } | |
177 | ||
178 | /** | |
179 | * genwqe_crc32() - Generate 32-bit crc as required for DDCBs | |
180 | * @buff: pointer to data buffer | |
181 | * @len: length of data for calculation | |
182 | * @init: initial crc (0xffffffff at start) | |
183 | * | |
184 | * polynomial = x^32 * + x^29 + x^18 + x^14 + x^3 + 1 (0x20044009) | |
185 | ||
186 | * Example: 4 bytes 0x01 0x02 0x03 0x04 with init=0xffffffff should | |
187 | * result in a crc32 of 0xf33cb7d3. | |
188 | * | |
189 | * The existing kernel crc functions did not cover this polynom yet. | |
190 | * | |
191 | * Return: crc32 checksum. | |
192 | */ | |
193 | u32 genwqe_crc32(u8 *buff, size_t len, u32 init) | |
194 | { | |
195 | int i; | |
196 | u32 crc; | |
197 | ||
198 | crc = init; | |
199 | while (len--) { | |
200 | i = ((crc >> 24) ^ *buff++) & 0xFF; | |
201 | crc = (crc << 8) ^ crc32_tab[i]; | |
202 | } | |
203 | return crc; | |
204 | } | |
205 | ||
206 | void *__genwqe_alloc_consistent(struct genwqe_dev *cd, size_t size, | |
207 | dma_addr_t *dma_handle) | |
208 | { | |
209 | if (get_order(size) > MAX_ORDER) | |
210 | return NULL; | |
211 | ||
212 | return pci_alloc_consistent(cd->pci_dev, size, dma_handle); | |
213 | } | |
214 | ||
215 | void __genwqe_free_consistent(struct genwqe_dev *cd, size_t size, | |
216 | void *vaddr, dma_addr_t dma_handle) | |
217 | { | |
218 | if (vaddr == NULL) | |
219 | return; | |
220 | ||
221 | pci_free_consistent(cd->pci_dev, size, vaddr, dma_handle); | |
222 | } | |
223 | ||
224 | static void genwqe_unmap_pages(struct genwqe_dev *cd, dma_addr_t *dma_list, | |
225 | int num_pages) | |
226 | { | |
227 | int i; | |
228 | struct pci_dev *pci_dev = cd->pci_dev; | |
229 | ||
230 | for (i = 0; (i < num_pages) && (dma_list[i] != 0x0); i++) { | |
231 | pci_unmap_page(pci_dev, dma_list[i], | |
232 | PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); | |
233 | dma_list[i] = 0x0; | |
234 | } | |
235 | } | |
236 | ||
237 | static int genwqe_map_pages(struct genwqe_dev *cd, | |
238 | struct page **page_list, int num_pages, | |
239 | dma_addr_t *dma_list) | |
240 | { | |
241 | int i; | |
242 | struct pci_dev *pci_dev = cd->pci_dev; | |
243 | ||
244 | /* establish DMA mapping for requested pages */ | |
245 | for (i = 0; i < num_pages; i++) { | |
246 | dma_addr_t daddr; | |
247 | ||
248 | dma_list[i] = 0x0; | |
249 | daddr = pci_map_page(pci_dev, page_list[i], | |
250 | 0, /* map_offs */ | |
251 | PAGE_SIZE, | |
252 | PCI_DMA_BIDIRECTIONAL); /* FIXME rd/rw */ | |
253 | ||
254 | if (pci_dma_mapping_error(pci_dev, daddr)) { | |
255 | dev_err(&pci_dev->dev, | |
256 | "[%s] err: no dma addr daddr=%016llx!\n", | |
257 | __func__, (long long)daddr); | |
258 | goto err; | |
259 | } | |
260 | ||
261 | dma_list[i] = daddr; | |
262 | } | |
263 | return 0; | |
264 | ||
265 | err: | |
266 | genwqe_unmap_pages(cd, dma_list, num_pages); | |
267 | return -EIO; | |
268 | } | |
269 | ||
270 | static int genwqe_sgl_size(int num_pages) | |
271 | { | |
272 | int len, num_tlb = num_pages / 7; | |
273 | ||
274 | len = sizeof(struct sg_entry) * (num_pages+num_tlb + 1); | |
275 | return roundup(len, PAGE_SIZE); | |
276 | } | |
277 | ||
718f762e FH |
278 | /** |
279 | * genwqe_alloc_sync_sgl() - Allocate memory for sgl and overlapping pages | |
280 | * | |
281 | * Allocates memory for sgl and overlapping pages. Pages which might | |
282 | * overlap other user-space memory blocks are being cached for DMAs, | |
283 | * such that we do not run into syncronization issues. Data is copied | |
284 | * from user-space into the cached pages. | |
285 | */ | |
286 | int genwqe_alloc_sync_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl, | |
287 | void __user *user_addr, size_t user_size) | |
67f4addb | 288 | { |
718f762e | 289 | int rc; |
67f4addb | 290 | struct pci_dev *pci_dev = cd->pci_dev; |
67f4addb | 291 | |
718f762e FH |
292 | sgl->fpage_offs = offset_in_page((unsigned long)user_addr); |
293 | sgl->fpage_size = min_t(size_t, PAGE_SIZE-sgl->fpage_offs, user_size); | |
294 | sgl->nr_pages = DIV_ROUND_UP(sgl->fpage_offs + user_size, PAGE_SIZE); | |
295 | sgl->lpage_size = (user_size - sgl->fpage_size) % PAGE_SIZE; | |
296 | ||
297 | dev_dbg(&pci_dev->dev, "[%s] uaddr=%p usize=%8ld nr_pages=%ld " | |
298 | "fpage_offs=%lx fpage_size=%ld lpage_size=%ld\n", | |
299 | __func__, user_addr, user_size, sgl->nr_pages, | |
300 | sgl->fpage_offs, sgl->fpage_size, sgl->lpage_size); | |
301 | ||
302 | sgl->user_addr = user_addr; | |
303 | sgl->user_size = user_size; | |
304 | sgl->sgl_size = genwqe_sgl_size(sgl->nr_pages); | |
305 | ||
306 | if (get_order(sgl->sgl_size) > MAX_ORDER) { | |
67f4addb FH |
307 | dev_err(&pci_dev->dev, |
308 | "[%s] err: too much memory requested!\n", __func__); | |
718f762e | 309 | return -ENOMEM; |
67f4addb FH |
310 | } |
311 | ||
718f762e FH |
312 | sgl->sgl = __genwqe_alloc_consistent(cd, sgl->sgl_size, |
313 | &sgl->sgl_dma_addr); | |
314 | if (sgl->sgl == NULL) { | |
67f4addb FH |
315 | dev_err(&pci_dev->dev, |
316 | "[%s] err: no memory available!\n", __func__); | |
718f762e | 317 | return -ENOMEM; |
67f4addb FH |
318 | } |
319 | ||
718f762e FH |
320 | /* Only use buffering on incomplete pages */ |
321 | if ((sgl->fpage_size != 0) && (sgl->fpage_size != PAGE_SIZE)) { | |
322 | sgl->fpage = __genwqe_alloc_consistent(cd, PAGE_SIZE, | |
323 | &sgl->fpage_dma_addr); | |
324 | if (sgl->fpage == NULL) | |
325 | goto err_out; | |
326 | ||
327 | /* Sync with user memory */ | |
328 | if (copy_from_user(sgl->fpage + sgl->fpage_offs, | |
329 | user_addr, sgl->fpage_size)) { | |
330 | rc = -EFAULT; | |
331 | goto err_out; | |
332 | } | |
333 | } | |
334 | if (sgl->lpage_size != 0) { | |
335 | sgl->lpage = __genwqe_alloc_consistent(cd, PAGE_SIZE, | |
336 | &sgl->lpage_dma_addr); | |
337 | if (sgl->lpage == NULL) | |
338 | goto err_out1; | |
339 | ||
340 | /* Sync with user memory */ | |
341 | if (copy_from_user(sgl->lpage, user_addr + user_size - | |
342 | sgl->lpage_size, sgl->lpage_size)) { | |
343 | rc = -EFAULT; | |
344 | goto err_out1; | |
345 | } | |
346 | } | |
347 | return 0; | |
348 | ||
349 | err_out1: | |
350 | __genwqe_free_consistent(cd, PAGE_SIZE, sgl->fpage, | |
351 | sgl->fpage_dma_addr); | |
352 | err_out: | |
353 | __genwqe_free_consistent(cd, sgl->sgl_size, sgl->sgl, | |
354 | sgl->sgl_dma_addr); | |
355 | return -ENOMEM; | |
67f4addb FH |
356 | } |
357 | ||
718f762e FH |
358 | int genwqe_setup_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl, |
359 | dma_addr_t *dma_list) | |
67f4addb FH |
360 | { |
361 | int i = 0, j = 0, p; | |
362 | unsigned long dma_offs, map_offs; | |
67f4addb FH |
363 | dma_addr_t prev_daddr = 0; |
364 | struct sg_entry *s, *last_s = NULL; | |
718f762e | 365 | size_t size = sgl->user_size; |
67f4addb FH |
366 | |
367 | dma_offs = 128; /* next block if needed/dma_offset */ | |
718f762e | 368 | map_offs = sgl->fpage_offs; /* offset in first page */ |
67f4addb | 369 | |
718f762e | 370 | s = &sgl->sgl[0]; /* first set of 8 entries */ |
67f4addb | 371 | p = 0; /* page */ |
718f762e | 372 | while (p < sgl->nr_pages) { |
67f4addb FH |
373 | dma_addr_t daddr; |
374 | unsigned int size_to_map; | |
375 | ||
376 | /* always write the chaining entry, cleanup is done later */ | |
377 | j = 0; | |
718f762e | 378 | s[j].target_addr = cpu_to_be64(sgl->sgl_dma_addr + dma_offs); |
67f4addb FH |
379 | s[j].len = cpu_to_be32(128); |
380 | s[j].flags = cpu_to_be32(SG_CHAINED); | |
381 | j++; | |
382 | ||
383 | while (j < 8) { | |
384 | /* DMA mapping for requested page, offs, size */ | |
385 | size_to_map = min(size, PAGE_SIZE - map_offs); | |
718f762e FH |
386 | |
387 | if ((p == 0) && (sgl->fpage != NULL)) { | |
388 | daddr = sgl->fpage_dma_addr + map_offs; | |
389 | ||
390 | } else if ((p == sgl->nr_pages - 1) && | |
391 | (sgl->lpage != NULL)) { | |
392 | daddr = sgl->lpage_dma_addr; | |
393 | } else { | |
394 | daddr = dma_list[p] + map_offs; | |
395 | } | |
396 | ||
67f4addb FH |
397 | size -= size_to_map; |
398 | map_offs = 0; | |
399 | ||
400 | if (prev_daddr == daddr) { | |
401 | u32 prev_len = be32_to_cpu(last_s->len); | |
402 | ||
403 | /* pr_info("daddr combining: " | |
404 | "%016llx/%08x -> %016llx\n", | |
405 | prev_daddr, prev_len, daddr); */ | |
406 | ||
407 | last_s->len = cpu_to_be32(prev_len + | |
408 | size_to_map); | |
409 | ||
410 | p++; /* process next page */ | |
718f762e | 411 | if (p == sgl->nr_pages) |
67f4addb FH |
412 | goto fixup; /* nothing to do */ |
413 | ||
414 | prev_daddr = daddr + size_to_map; | |
415 | continue; | |
416 | } | |
417 | ||
418 | /* start new entry */ | |
419 | s[j].target_addr = cpu_to_be64(daddr); | |
420 | s[j].len = cpu_to_be32(size_to_map); | |
421 | s[j].flags = cpu_to_be32(SG_DATA); | |
422 | prev_daddr = daddr + size_to_map; | |
423 | last_s = &s[j]; | |
424 | j++; | |
425 | ||
426 | p++; /* process next page */ | |
718f762e | 427 | if (p == sgl->nr_pages) |
67f4addb FH |
428 | goto fixup; /* nothing to do */ |
429 | } | |
430 | dma_offs += 128; | |
431 | s += 8; /* continue 8 elements further */ | |
432 | } | |
433 | fixup: | |
434 | if (j == 1) { /* combining happend on last entry! */ | |
435 | s -= 8; /* full shift needed on previous sgl block */ | |
436 | j = 7; /* shift all elements */ | |
437 | } | |
438 | ||
439 | for (i = 0; i < j; i++) /* move elements 1 up */ | |
440 | s[i] = s[i + 1]; | |
441 | ||
442 | s[i].target_addr = cpu_to_be64(0); | |
443 | s[i].len = cpu_to_be32(0); | |
444 | s[i].flags = cpu_to_be32(SG_END_LIST); | |
445 | return 0; | |
446 | } | |
447 | ||
718f762e FH |
448 | /** |
449 | * genwqe_free_sync_sgl() - Free memory for sgl and overlapping pages | |
450 | * | |
451 | * After the DMA transfer has been completed we free the memory for | |
452 | * the sgl and the cached pages. Data is being transfered from cached | |
453 | * pages into user-space buffers. | |
454 | */ | |
455 | int genwqe_free_sync_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl) | |
67f4addb | 456 | { |
718f762e FH |
457 | int rc; |
458 | struct pci_dev *pci_dev = cd->pci_dev; | |
459 | ||
460 | if (sgl->fpage) { | |
461 | if (copy_to_user(sgl->user_addr, sgl->fpage + sgl->fpage_offs, | |
462 | sgl->fpage_size)) { | |
463 | dev_err(&pci_dev->dev, "[%s] err: copying fpage!\n", | |
464 | __func__); | |
465 | rc = -EFAULT; | |
466 | } | |
467 | __genwqe_free_consistent(cd, PAGE_SIZE, sgl->fpage, | |
468 | sgl->fpage_dma_addr); | |
469 | sgl->fpage = NULL; | |
470 | sgl->fpage_dma_addr = 0; | |
471 | } | |
472 | if (sgl->lpage) { | |
473 | if (copy_to_user(sgl->user_addr + sgl->user_size - | |
474 | sgl->lpage_size, sgl->lpage, | |
475 | sgl->lpage_size)) { | |
476 | dev_err(&pci_dev->dev, "[%s] err: copying lpage!\n", | |
477 | __func__); | |
478 | rc = -EFAULT; | |
479 | } | |
480 | __genwqe_free_consistent(cd, PAGE_SIZE, sgl->lpage, | |
481 | sgl->lpage_dma_addr); | |
482 | sgl->lpage = NULL; | |
483 | sgl->lpage_dma_addr = 0; | |
484 | } | |
485 | __genwqe_free_consistent(cd, sgl->sgl_size, sgl->sgl, | |
486 | sgl->sgl_dma_addr); | |
487 | ||
488 | sgl->sgl = NULL; | |
489 | sgl->sgl_dma_addr = 0x0; | |
490 | sgl->sgl_size = 0; | |
491 | return rc; | |
67f4addb FH |
492 | } |
493 | ||
494 | /** | |
495 | * free_user_pages() - Give pinned pages back | |
496 | * | |
497 | * Documentation of get_user_pages is in mm/memory.c: | |
498 | * | |
499 | * If the page is written to, set_page_dirty (or set_page_dirty_lock, | |
500 | * as appropriate) must be called after the page is finished with, and | |
501 | * before put_page is called. | |
502 | * | |
503 | * FIXME Could be of use to others and might belong in the generic | |
504 | * code, if others agree. E.g. | |
505 | * ll_free_user_pages in drivers/staging/lustre/lustre/llite/rw26.c | |
506 | * ceph_put_page_vector in net/ceph/pagevec.c | |
507 | * maybe more? | |
508 | */ | |
509 | static int free_user_pages(struct page **page_list, unsigned int nr_pages, | |
510 | int dirty) | |
511 | { | |
512 | unsigned int i; | |
513 | ||
514 | for (i = 0; i < nr_pages; i++) { | |
515 | if (page_list[i] != NULL) { | |
516 | if (dirty) | |
517 | set_page_dirty_lock(page_list[i]); | |
518 | put_page(page_list[i]); | |
519 | } | |
520 | } | |
521 | return 0; | |
522 | } | |
523 | ||
524 | /** | |
525 | * genwqe_user_vmap() - Map user-space memory to virtual kernel memory | |
526 | * @cd: pointer to genwqe device | |
527 | * @m: mapping params | |
528 | * @uaddr: user virtual address | |
529 | * @size: size of memory to be mapped | |
530 | * | |
531 | * We need to think about how we could speed this up. Of course it is | |
532 | * not a good idea to do this over and over again, like we are | |
533 | * currently doing it. Nevertheless, I am curious where on the path | |
534 | * the performance is spend. Most probably within the memory | |
535 | * allocation functions, but maybe also in the DMA mapping code. | |
536 | * | |
537 | * Restrictions: The maximum size of the possible mapping currently depends | |
538 | * on the amount of memory we can get using kzalloc() for the | |
539 | * page_list and pci_alloc_consistent for the sg_list. | |
540 | * The sg_list is currently itself not scattered, which could | |
541 | * be fixed with some effort. The page_list must be split into | |
542 | * PAGE_SIZE chunks too. All that will make the complicated | |
543 | * code more complicated. | |
544 | * | |
545 | * Return: 0 if success | |
546 | */ | |
547 | int genwqe_user_vmap(struct genwqe_dev *cd, struct dma_mapping *m, void *uaddr, | |
548 | unsigned long size, struct ddcb_requ *req) | |
549 | { | |
550 | int rc = -EINVAL; | |
551 | unsigned long data, offs; | |
552 | struct pci_dev *pci_dev = cd->pci_dev; | |
553 | ||
554 | if ((uaddr == NULL) || (size == 0)) { | |
555 | m->size = 0; /* mark unused and not added */ | |
556 | return -EINVAL; | |
557 | } | |
558 | m->u_vaddr = uaddr; | |
559 | m->size = size; | |
560 | ||
561 | /* determine space needed for page_list. */ | |
562 | data = (unsigned long)uaddr; | |
563 | offs = offset_in_page(data); | |
564 | m->nr_pages = DIV_ROUND_UP(offs + size, PAGE_SIZE); | |
565 | ||
566 | m->page_list = kcalloc(m->nr_pages, | |
567 | sizeof(struct page *) + sizeof(dma_addr_t), | |
568 | GFP_KERNEL); | |
569 | if (!m->page_list) { | |
570 | dev_err(&pci_dev->dev, "err: alloc page_list failed\n"); | |
571 | m->nr_pages = 0; | |
572 | m->u_vaddr = NULL; | |
573 | m->size = 0; /* mark unused and not added */ | |
574 | return -ENOMEM; | |
575 | } | |
576 | m->dma_list = (dma_addr_t *)(m->page_list + m->nr_pages); | |
577 | ||
578 | /* pin user pages in memory */ | |
579 | rc = get_user_pages_fast(data & PAGE_MASK, /* page aligned addr */ | |
580 | m->nr_pages, | |
581 | 1, /* write by caller */ | |
582 | m->page_list); /* ptrs to pages */ | |
583 | ||
584 | /* assumption: get_user_pages can be killed by signals. */ | |
585 | if (rc < m->nr_pages) { | |
586 | free_user_pages(m->page_list, rc, 0); | |
587 | rc = -EFAULT; | |
588 | goto fail_get_user_pages; | |
589 | } | |
590 | ||
591 | rc = genwqe_map_pages(cd, m->page_list, m->nr_pages, m->dma_list); | |
592 | if (rc != 0) | |
593 | goto fail_free_user_pages; | |
594 | ||
595 | return 0; | |
596 | ||
597 | fail_free_user_pages: | |
598 | free_user_pages(m->page_list, m->nr_pages, 0); | |
599 | ||
600 | fail_get_user_pages: | |
601 | kfree(m->page_list); | |
602 | m->page_list = NULL; | |
603 | m->dma_list = NULL; | |
604 | m->nr_pages = 0; | |
605 | m->u_vaddr = NULL; | |
606 | m->size = 0; /* mark unused and not added */ | |
607 | return rc; | |
608 | } | |
609 | ||
610 | /** | |
611 | * genwqe_user_vunmap() - Undo mapping of user-space mem to virtual kernel | |
612 | * memory | |
613 | * @cd: pointer to genwqe device | |
614 | * @m: mapping params | |
615 | */ | |
616 | int genwqe_user_vunmap(struct genwqe_dev *cd, struct dma_mapping *m, | |
617 | struct ddcb_requ *req) | |
618 | { | |
619 | struct pci_dev *pci_dev = cd->pci_dev; | |
620 | ||
621 | if (!dma_mapping_used(m)) { | |
622 | dev_err(&pci_dev->dev, "[%s] err: mapping %p not used!\n", | |
623 | __func__, m); | |
624 | return -EINVAL; | |
625 | } | |
626 | ||
627 | if (m->dma_list) | |
628 | genwqe_unmap_pages(cd, m->dma_list, m->nr_pages); | |
629 | ||
630 | if (m->page_list) { | |
631 | free_user_pages(m->page_list, m->nr_pages, 1); | |
632 | ||
633 | kfree(m->page_list); | |
634 | m->page_list = NULL; | |
635 | m->dma_list = NULL; | |
636 | m->nr_pages = 0; | |
637 | } | |
638 | ||
639 | m->u_vaddr = NULL; | |
640 | m->size = 0; /* mark as unused and not added */ | |
641 | return 0; | |
642 | } | |
643 | ||
644 | /** | |
645 | * genwqe_card_type() - Get chip type SLU Configuration Register | |
646 | * @cd: pointer to the genwqe device descriptor | |
647 | * Return: 0: Altera Stratix-IV 230 | |
648 | * 1: Altera Stratix-IV 530 | |
649 | * 2: Altera Stratix-V A4 | |
650 | * 3: Altera Stratix-V A7 | |
651 | */ | |
652 | u8 genwqe_card_type(struct genwqe_dev *cd) | |
653 | { | |
654 | u64 card_type = cd->slu_unitcfg; | |
655 | return (u8)((card_type & IO_SLU_UNITCFG_TYPE_MASK) >> 20); | |
656 | } | |
657 | ||
658 | /** | |
659 | * genwqe_card_reset() - Reset the card | |
660 | * @cd: pointer to the genwqe device descriptor | |
661 | */ | |
662 | int genwqe_card_reset(struct genwqe_dev *cd) | |
663 | { | |
664 | u64 softrst; | |
665 | struct pci_dev *pci_dev = cd->pci_dev; | |
666 | ||
667 | if (!genwqe_is_privileged(cd)) | |
668 | return -ENODEV; | |
669 | ||
670 | /* new SL */ | |
671 | __genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET, 0x1ull); | |
672 | msleep(1000); | |
673 | __genwqe_readq(cd, IO_HSU_FIR_CLR); | |
674 | __genwqe_readq(cd, IO_APP_FIR_CLR); | |
675 | __genwqe_readq(cd, IO_SLU_FIR_CLR); | |
676 | ||
677 | /* | |
678 | * Read-modify-write to preserve the stealth bits | |
679 | * | |
680 | * For SL >= 039, Stealth WE bit allows removing | |
681 | * the read-modify-wrote. | |
682 | * r-m-w may require a mask 0x3C to avoid hitting hard | |
683 | * reset again for error reset (should be 0, chicken). | |
684 | */ | |
685 | softrst = __genwqe_readq(cd, IO_SLC_CFGREG_SOFTRESET) & 0x3cull; | |
686 | __genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET, softrst | 0x2ull); | |
687 | ||
688 | /* give ERRORRESET some time to finish */ | |
689 | msleep(50); | |
690 | ||
691 | if (genwqe_need_err_masking(cd)) { | |
692 | dev_info(&pci_dev->dev, | |
693 | "[%s] masking errors for old bitstreams\n", __func__); | |
694 | __genwqe_writeq(cd, IO_SLC_MISC_DEBUG, 0x0aull); | |
695 | } | |
696 | return 0; | |
697 | } | |
698 | ||
699 | int genwqe_read_softreset(struct genwqe_dev *cd) | |
700 | { | |
701 | u64 bitstream; | |
702 | ||
703 | if (!genwqe_is_privileged(cd)) | |
704 | return -ENODEV; | |
705 | ||
706 | bitstream = __genwqe_readq(cd, IO_SLU_BITSTREAM) & 0x1; | |
707 | cd->softreset = (bitstream == 0) ? 0x8ull : 0xcull; | |
708 | return 0; | |
709 | } | |
710 | ||
711 | /** | |
712 | * genwqe_set_interrupt_capability() - Configure MSI capability structure | |
713 | * @cd: pointer to the device | |
714 | * Return: 0 if no error | |
715 | */ | |
716 | int genwqe_set_interrupt_capability(struct genwqe_dev *cd, int count) | |
717 | { | |
718 | int rc; | |
719 | struct pci_dev *pci_dev = cd->pci_dev; | |
720 | ||
721 | rc = pci_enable_msi_block(pci_dev, count); | |
722 | if (rc == 0) | |
723 | cd->flags |= GENWQE_FLAG_MSI_ENABLED; | |
724 | return rc; | |
725 | } | |
726 | ||
727 | /** | |
728 | * genwqe_reset_interrupt_capability() - Undo genwqe_set_interrupt_capability() | |
729 | * @cd: pointer to the device | |
730 | */ | |
731 | void genwqe_reset_interrupt_capability(struct genwqe_dev *cd) | |
732 | { | |
733 | struct pci_dev *pci_dev = cd->pci_dev; | |
734 | ||
735 | if (cd->flags & GENWQE_FLAG_MSI_ENABLED) { | |
736 | pci_disable_msi(pci_dev); | |
737 | cd->flags &= ~GENWQE_FLAG_MSI_ENABLED; | |
738 | } | |
739 | } | |
740 | ||
741 | /** | |
742 | * set_reg_idx() - Fill array with data. Ignore illegal offsets. | |
743 | * @cd: card device | |
744 | * @r: debug register array | |
745 | * @i: index to desired entry | |
746 | * @m: maximum possible entries | |
747 | * @addr: addr which is read | |
748 | * @index: index in debug array | |
749 | * @val: read value | |
750 | */ | |
751 | static int set_reg_idx(struct genwqe_dev *cd, struct genwqe_reg *r, | |
752 | unsigned int *i, unsigned int m, u32 addr, u32 idx, | |
753 | u64 val) | |
754 | { | |
755 | if (WARN_ON_ONCE(*i >= m)) | |
756 | return -EFAULT; | |
757 | ||
758 | r[*i].addr = addr; | |
759 | r[*i].idx = idx; | |
760 | r[*i].val = val; | |
761 | ++*i; | |
762 | return 0; | |
763 | } | |
764 | ||
765 | static int set_reg(struct genwqe_dev *cd, struct genwqe_reg *r, | |
766 | unsigned int *i, unsigned int m, u32 addr, u64 val) | |
767 | { | |
768 | return set_reg_idx(cd, r, i, m, addr, 0, val); | |
769 | } | |
770 | ||
771 | int genwqe_read_ffdc_regs(struct genwqe_dev *cd, struct genwqe_reg *regs, | |
772 | unsigned int max_regs, int all) | |
773 | { | |
774 | unsigned int i, j, idx = 0; | |
775 | u32 ufir_addr, ufec_addr, sfir_addr, sfec_addr; | |
776 | u64 gfir, sluid, appid, ufir, ufec, sfir, sfec; | |
777 | ||
778 | /* Global FIR */ | |
779 | gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR); | |
780 | set_reg(cd, regs, &idx, max_regs, IO_SLC_CFGREG_GFIR, gfir); | |
781 | ||
782 | /* UnitCfg for SLU */ | |
783 | sluid = __genwqe_readq(cd, IO_SLU_UNITCFG); /* 0x00000000 */ | |
784 | set_reg(cd, regs, &idx, max_regs, IO_SLU_UNITCFG, sluid); | |
785 | ||
786 | /* UnitCfg for APP */ | |
787 | appid = __genwqe_readq(cd, IO_APP_UNITCFG); /* 0x02000000 */ | |
788 | set_reg(cd, regs, &idx, max_regs, IO_APP_UNITCFG, appid); | |
789 | ||
790 | /* Check all chip Units */ | |
791 | for (i = 0; i < GENWQE_MAX_UNITS; i++) { | |
792 | ||
793 | /* Unit FIR */ | |
794 | ufir_addr = (i << 24) | 0x008; | |
795 | ufir = __genwqe_readq(cd, ufir_addr); | |
796 | set_reg(cd, regs, &idx, max_regs, ufir_addr, ufir); | |
797 | ||
798 | /* Unit FEC */ | |
799 | ufec_addr = (i << 24) | 0x018; | |
800 | ufec = __genwqe_readq(cd, ufec_addr); | |
801 | set_reg(cd, regs, &idx, max_regs, ufec_addr, ufec); | |
802 | ||
803 | for (j = 0; j < 64; j++) { | |
804 | /* wherever there is a primary 1, read the 2ndary */ | |
805 | if (!all && (!(ufir & (1ull << j)))) | |
806 | continue; | |
807 | ||
808 | sfir_addr = (i << 24) | (0x100 + 8 * j); | |
809 | sfir = __genwqe_readq(cd, sfir_addr); | |
810 | set_reg(cd, regs, &idx, max_regs, sfir_addr, sfir); | |
811 | ||
812 | sfec_addr = (i << 24) | (0x300 + 8 * j); | |
813 | sfec = __genwqe_readq(cd, sfec_addr); | |
814 | set_reg(cd, regs, &idx, max_regs, sfec_addr, sfec); | |
815 | } | |
816 | } | |
817 | ||
818 | /* fill with invalid data until end */ | |
819 | for (i = idx; i < max_regs; i++) { | |
820 | regs[i].addr = 0xffffffff; | |
821 | regs[i].val = 0xffffffffffffffffull; | |
822 | } | |
823 | return idx; | |
824 | } | |
825 | ||
826 | /** | |
827 | * genwqe_ffdc_buff_size() - Calculates the number of dump registers | |
828 | */ | |
829 | int genwqe_ffdc_buff_size(struct genwqe_dev *cd, int uid) | |
830 | { | |
831 | int entries = 0, ring, traps, traces, trace_entries; | |
832 | u32 eevptr_addr, l_addr, d_len, d_type; | |
833 | u64 eevptr, val, addr; | |
834 | ||
835 | eevptr_addr = GENWQE_UID_OFFS(uid) | IO_EXTENDED_ERROR_POINTER; | |
836 | eevptr = __genwqe_readq(cd, eevptr_addr); | |
837 | ||
838 | if ((eevptr != 0x0) && (eevptr != -1ull)) { | |
839 | l_addr = GENWQE_UID_OFFS(uid) | eevptr; | |
840 | ||
841 | while (1) { | |
842 | val = __genwqe_readq(cd, l_addr); | |
843 | ||
844 | if ((val == 0x0) || (val == -1ull)) | |
845 | break; | |
846 | ||
847 | /* 38:24 */ | |
848 | d_len = (val & 0x0000007fff000000ull) >> 24; | |
849 | ||
850 | /* 39 */ | |
851 | d_type = (val & 0x0000008000000000ull) >> 36; | |
852 | ||
853 | if (d_type) { /* repeat */ | |
854 | entries += d_len; | |
855 | } else { /* size in bytes! */ | |
856 | entries += d_len >> 3; | |
857 | } | |
858 | ||
859 | l_addr += 8; | |
860 | } | |
861 | } | |
862 | ||
863 | for (ring = 0; ring < 8; ring++) { | |
864 | addr = GENWQE_UID_OFFS(uid) | IO_EXTENDED_DIAG_MAP(ring); | |
865 | val = __genwqe_readq(cd, addr); | |
866 | ||
867 | if ((val == 0x0ull) || (val == -1ull)) | |
868 | continue; | |
869 | ||
870 | traps = (val >> 24) & 0xff; | |
871 | traces = (val >> 16) & 0xff; | |
872 | trace_entries = val & 0xffff; | |
873 | ||
874 | entries += traps + (traces * trace_entries); | |
875 | } | |
876 | return entries; | |
877 | } | |
878 | ||
879 | /** | |
880 | * genwqe_ffdc_buff_read() - Implements LogoutExtendedErrorRegisters procedure | |
881 | */ | |
882 | int genwqe_ffdc_buff_read(struct genwqe_dev *cd, int uid, | |
883 | struct genwqe_reg *regs, unsigned int max_regs) | |
884 | { | |
885 | int i, traps, traces, trace, trace_entries, trace_entry, ring; | |
886 | unsigned int idx = 0; | |
887 | u32 eevptr_addr, l_addr, d_addr, d_len, d_type; | |
888 | u64 eevptr, e, val, addr; | |
889 | ||
890 | eevptr_addr = GENWQE_UID_OFFS(uid) | IO_EXTENDED_ERROR_POINTER; | |
891 | eevptr = __genwqe_readq(cd, eevptr_addr); | |
892 | ||
893 | if ((eevptr != 0x0) && (eevptr != 0xffffffffffffffffull)) { | |
894 | l_addr = GENWQE_UID_OFFS(uid) | eevptr; | |
895 | while (1) { | |
896 | e = __genwqe_readq(cd, l_addr); | |
897 | if ((e == 0x0) || (e == 0xffffffffffffffffull)) | |
898 | break; | |
899 | ||
900 | d_addr = (e & 0x0000000000ffffffull); /* 23:0 */ | |
901 | d_len = (e & 0x0000007fff000000ull) >> 24; /* 38:24 */ | |
902 | d_type = (e & 0x0000008000000000ull) >> 36; /* 39 */ | |
903 | d_addr |= GENWQE_UID_OFFS(uid); | |
904 | ||
905 | if (d_type) { | |
906 | for (i = 0; i < (int)d_len; i++) { | |
907 | val = __genwqe_readq(cd, d_addr); | |
908 | set_reg_idx(cd, regs, &idx, max_regs, | |
909 | d_addr, i, val); | |
910 | } | |
911 | } else { | |
912 | d_len >>= 3; /* Size in bytes! */ | |
913 | for (i = 0; i < (int)d_len; i++, d_addr += 8) { | |
914 | val = __genwqe_readq(cd, d_addr); | |
915 | set_reg_idx(cd, regs, &idx, max_regs, | |
916 | d_addr, 0, val); | |
917 | } | |
918 | } | |
919 | l_addr += 8; | |
920 | } | |
921 | } | |
922 | ||
923 | /* | |
924 | * To save time, there are only 6 traces poplulated on Uid=2, | |
925 | * Ring=1. each with iters=512. | |
926 | */ | |
927 | for (ring = 0; ring < 8; ring++) { /* 0 is fls, 1 is fds, | |
928 | 2...7 are ASI rings */ | |
929 | addr = GENWQE_UID_OFFS(uid) | IO_EXTENDED_DIAG_MAP(ring); | |
930 | val = __genwqe_readq(cd, addr); | |
931 | ||
932 | if ((val == 0x0ull) || (val == -1ull)) | |
933 | continue; | |
934 | ||
935 | traps = (val >> 24) & 0xff; /* Number of Traps */ | |
936 | traces = (val >> 16) & 0xff; /* Number of Traces */ | |
937 | trace_entries = val & 0xffff; /* Entries per trace */ | |
938 | ||
939 | /* Note: This is a combined loop that dumps both the traps */ | |
940 | /* (for the trace == 0 case) as well as the traces 1 to */ | |
941 | /* 'traces'. */ | |
942 | for (trace = 0; trace <= traces; trace++) { | |
943 | u32 diag_sel = | |
944 | GENWQE_EXTENDED_DIAG_SELECTOR(ring, trace); | |
945 | ||
946 | addr = (GENWQE_UID_OFFS(uid) | | |
947 | IO_EXTENDED_DIAG_SELECTOR); | |
948 | __genwqe_writeq(cd, addr, diag_sel); | |
949 | ||
950 | for (trace_entry = 0; | |
951 | trace_entry < (trace ? trace_entries : traps); | |
952 | trace_entry++) { | |
953 | addr = (GENWQE_UID_OFFS(uid) | | |
954 | IO_EXTENDED_DIAG_READ_MBX); | |
955 | val = __genwqe_readq(cd, addr); | |
956 | set_reg_idx(cd, regs, &idx, max_regs, addr, | |
957 | (diag_sel<<16) | trace_entry, val); | |
958 | } | |
959 | } | |
960 | } | |
961 | return 0; | |
962 | } | |
963 | ||
964 | /** | |
965 | * genwqe_write_vreg() - Write register in virtual window | |
966 | * | |
967 | * Note, these registers are only accessible to the PF through the | |
968 | * VF-window. It is not intended for the VF to access. | |
969 | */ | |
970 | int genwqe_write_vreg(struct genwqe_dev *cd, u32 reg, u64 val, int func) | |
971 | { | |
972 | __genwqe_writeq(cd, IO_PF_SLC_VIRTUAL_WINDOW, func & 0xf); | |
973 | __genwqe_writeq(cd, reg, val); | |
974 | return 0; | |
975 | } | |
976 | ||
977 | /** | |
978 | * genwqe_read_vreg() - Read register in virtual window | |
979 | * | |
980 | * Note, these registers are only accessible to the PF through the | |
981 | * VF-window. It is not intended for the VF to access. | |
982 | */ | |
983 | u64 genwqe_read_vreg(struct genwqe_dev *cd, u32 reg, int func) | |
984 | { | |
985 | __genwqe_writeq(cd, IO_PF_SLC_VIRTUAL_WINDOW, func & 0xf); | |
986 | return __genwqe_readq(cd, reg); | |
987 | } | |
988 | ||
989 | /** | |
990 | * genwqe_base_clock_frequency() - Deteremine base clock frequency of the card | |
991 | * | |
992 | * Note: From a design perspective it turned out to be a bad idea to | |
993 | * use codes here to specifiy the frequency/speed values. An old | |
994 | * driver cannot understand new codes and is therefore always a | |
995 | * problem. Better is to measure out the value or put the | |
996 | * speed/frequency directly into a register which is always a valid | |
997 | * value for old as well as for new software. | |
998 | * | |
999 | * Return: Card clock in MHz | |
1000 | */ | |
1001 | int genwqe_base_clock_frequency(struct genwqe_dev *cd) | |
1002 | { | |
1003 | u16 speed; /* MHz MHz MHz MHz */ | |
1004 | static const int speed_grade[] = { 250, 200, 166, 175 }; | |
1005 | ||
1006 | speed = (u16)((cd->slu_unitcfg >> 28) & 0x0full); | |
1007 | if (speed >= ARRAY_SIZE(speed_grade)) | |
1008 | return 0; /* illegal value */ | |
1009 | ||
1010 | return speed_grade[speed]; | |
1011 | } | |
1012 | ||
1013 | /** | |
1014 | * genwqe_stop_traps() - Stop traps | |
1015 | * | |
1016 | * Before reading out the analysis data, we need to stop the traps. | |
1017 | */ | |
1018 | void genwqe_stop_traps(struct genwqe_dev *cd) | |
1019 | { | |
1020 | __genwqe_writeq(cd, IO_SLC_MISC_DEBUG_SET, 0xcull); | |
1021 | } | |
1022 | ||
1023 | /** | |
1024 | * genwqe_start_traps() - Start traps | |
1025 | * | |
1026 | * After having read the data, we can/must enable the traps again. | |
1027 | */ | |
1028 | void genwqe_start_traps(struct genwqe_dev *cd) | |
1029 | { | |
1030 | __genwqe_writeq(cd, IO_SLC_MISC_DEBUG_CLR, 0xcull); | |
1031 | ||
1032 | if (genwqe_need_err_masking(cd)) | |
1033 | __genwqe_writeq(cd, IO_SLC_MISC_DEBUG, 0x0aull); | |
1034 | } |