Commit | Line | Data |
---|---|---|
c8801d8c LW |
1 | /** |
2 | * Driver for Altera PCIe core chaining DMA reference design. | |
3 | * | |
4 | * Copyright (C) 2008 Leon Woestenberg <leon.woestenberg@axon.tv> | |
5 | * Copyright (C) 2008 Nickolas Heppermann <heppermannwdt@gmail.com> | |
6 | * | |
7 | * This program is free software; you can redistribute it and/or modify | |
8 | * it under the terms of the GNU General Public License as published by | |
9 | * the Free Software Foundation; either version 2 of the License, or | |
10 | * (at your option) any later version. | |
11 | * | |
12 | * This program is distributed in the hope that it will be useful, | |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | * GNU General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU General Public License along | |
18 | * with this program; if not, write to the Free Software Foundation, Inc., | |
19 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | |
20 | * | |
21 | * | |
22 | * Rationale: This driver exercises the chaining DMA read and write engine | |
23 | * in the reference design. It is meant as a complementary reference | |
24 | * driver that can be used for testing early designs as well as a basis to | |
25 | * write your custom driver. | |
26 | * | |
27 | * Status: Test results from Leon Woestenberg <leon.woestenberg@axon.tv>: | |
28 | * | |
29 | * Sendero Board w/ Cyclone II EP2C35F672C6N, PX1011A PCIe x1 PHY on a | |
30 | * Dell Precision 370 PC, x86, kernel 2.6.20 from Ubuntu 7.04. | |
31 | * | |
32 | * Sendero Board w/ Cyclone II EP2C35F672C6N, PX1011A PCIe x1 PHY on a | |
33 | * Freescale MPC8313E-RDB board, PowerPC, 2.6.24 w/ Freescale patches. | |
34 | * | |
35 | * Driver tests passed with PCIe Compiler 8.1. With PCIe 8.0 the DMA | |
36 | * loopback test had reproducable compare errors. I assume a change | |
37 | * in the compiler or reference design, but could not find evidence nor | |
38 | * documentation on a change or fix in that direction. | |
39 | * | |
40 | * The reference design does not have readable locations and thus a | |
41 | * dummy read, used to flush PCI posted writes, cannot be performed. | |
42 | * | |
43 | */ | |
44 | ||
45 | #include <linux/kernel.h> | |
46 | #include <linux/cdev.h> | |
47 | #include <linux/delay.h> | |
48 | #include <linux/dma-mapping.h> | |
c8801d8c LW |
49 | #include <linux/init.h> |
50 | #include <linux/interrupt.h> | |
51 | #include <linux/io.h> | |
52 | #include <linux/jiffies.h> | |
53 | #include <linux/module.h> | |
54 | #include <linux/pci.h> | |
55 | ||
56 | ||
57 | /* by default do not build the character device interface */ | |
58 | /* XXX It is non-functional yet */ | |
59 | #ifndef ALTPCIECHDMA_CDEV | |
60 | # define ALTPCIECHDMA_CDEV 0 | |
61 | #endif | |
62 | ||
63 | /* build the character device interface? */ | |
64 | #if ALTPCIECHDMA_CDEV | |
65 | # define MAX_CHDMA_SIZE (8 * 1024 * 1024) | |
66 | # include "mapper_user_to_sg.h" | |
67 | #endif | |
68 | ||
69 | /** driver name, mimicks Altera naming of the reference design */ | |
70 | #define DRV_NAME "altpciechdma" | |
71 | /** number of BARs on the device */ | |
72 | #define APE_BAR_NUM (6) | |
73 | /** BAR number where the RCSLAVE memory sits */ | |
74 | #define APE_BAR_RCSLAVE (0) | |
75 | /** BAR number where the Descriptor Header sits */ | |
76 | #define APE_BAR_HEADER (2) | |
77 | ||
78 | /** maximum size in bytes of the descriptor table, chdma logic limit */ | |
79 | #define APE_CHDMA_TABLE_SIZE (4096) | |
80 | /* single transfer must not exceed 255 table entries. worst case this can be | |
81 | * achieved by 255 scattered pages, with only a single byte in the head and | |
82 | * tail pages. 253 * PAGE_SIZE is a safe upper bound for the transfer size. | |
83 | */ | |
84 | #define APE_CHDMA_MAX_TRANSFER_LEN (253 * PAGE_SIZE) | |
85 | ||
86 | /** | |
87 | * Specifies those BARs to be mapped and the length of each mapping. | |
88 | * | |
89 | * Zero (0) means do not map, otherwise specifies the BAR lengths to be mapped. | |
90 | * If the actual BAR length is less, this is considered an error; then | |
91 | * reconfigure your PCIe core. | |
92 | * | |
93 | * @see ug_pci_express 8.0, table 7-2 at page 7-13. | |
94 | */ | |
95 | static const unsigned long bar_min_len[APE_BAR_NUM] = | |
96 | { 32768, 0, 256, 0, 32768, 0 }; | |
97 | ||
98 | /** | |
99 | * Descriptor Header, controls the DMA read engine or write engine. | |
100 | * | |
101 | * The descriptor header is the main data structure for starting DMA transfers. | |
102 | * | |
103 | * It sits in End Point (FPGA) memory BAR[2] for 32-bit or BAR[3:2] for 64-bit. | |
104 | * It references a descriptor table which exists in Root Complex (PC) memory. | |
105 | * Writing the rclast field starts the DMA operation, thus all other structures | |
106 | * and fields must be setup before doing so. | |
107 | * | |
108 | * @see ug_pci_express 8.0, tables 7-3, 7-4 and 7-5 at page 7-14. | |
109 | * @note This header must be written in four 32-bit (PCI DWORD) writes. | |
110 | */ | |
111 | struct ape_chdma_header { | |
112 | /** | |
113 | * w0 consists of two 16-bit fields: | |
114 | * lsb u16 number; number of descriptors in ape_chdma_table | |
115 | * msb u16 control; global control flags | |
116 | */ | |
117 | u32 w0; | |
118 | /* bus address to ape_chdma_table in Root Complex memory */ | |
119 | u32 bdt_addr_h; | |
120 | u32 bdt_addr_l; | |
121 | /** | |
122 | * w3 consists of two 16-bit fields: | |
123 | * - lsb u16 rclast; last descriptor number available in Root Complex | |
124 | * - zero (0) means the first descriptor is ready, | |
125 | * - one (1) means two descriptors are ready, etc. | |
126 | * - msb u16 reserved; | |
127 | * | |
128 | * @note writing to this memory location starts the DMA operation! | |
129 | */ | |
130 | u32 w3; | |
131 | } __attribute__ ((packed)); | |
132 | ||
133 | /** | |
134 | * Descriptor Entry, describing a (non-scattered) single memory block transfer. | |
135 | * | |
136 | * There is one descriptor for each memory block involved in the transfer, a | |
137 | * block being a contiguous address range on the bus. | |
138 | * | |
139 | * Multiple descriptors are chained by means of the ape_chdma_table data | |
140 | * structure. | |
141 | * | |
142 | * @see ug_pci_express 8.0, tables 7-6, 7-7 and 7-8 at page 7-14 and page 7-15. | |
143 | */ | |
144 | struct ape_chdma_desc { | |
145 | /** | |
146 | * w0 consists of two 16-bit fields: | |
147 | * number of DWORDS to transfer | |
148 | * - lsb u16 length; | |
149 | * global control | |
150 | * - msb u16 control; | |
151 | */ | |
152 | u32 w0; | |
153 | /* address of memory in the End Point */ | |
154 | u32 ep_addr; | |
155 | /* bus address of source or destination memory in the Root Complex */ | |
156 | u32 rc_addr_h; | |
157 | u32 rc_addr_l; | |
158 | } __attribute__ ((packed)); | |
159 | ||
160 | /** | |
161 | * Descriptor Table, an array of descriptors describing a chained transfer. | |
162 | * | |
163 | * An array of descriptors, preceded by workspace for the End Point. | |
164 | * It exists in Root Complex memory. | |
165 | * | |
166 | * The End Point can update its last completed descriptor number in the | |
167 | * eplast field if requested by setting the EPLAST_ENA bit either | |
168 | * globally in the header's or locally in any descriptor's control field. | |
169 | * | |
170 | * @note this structure may not exceed 4096 bytes. This results in a | |
171 | * maximum of 4096 / (4 * 4) - 1 = 255 descriptors per chained transfer. | |
172 | * | |
173 | * @see ug_pci_express 8.0, tables 7-9, 7-10 and 7-11 at page 7-17 and page 7-18. | |
174 | */ | |
175 | struct ape_chdma_table { | |
176 | /* workspace 0x00-0x0b, reserved */ | |
177 | u32 reserved1[3]; | |
178 | /* workspace 0x0c-0x0f, last descriptor handled by End Point */ | |
179 | u32 w3; | |
180 | /* the actual array of descriptors | |
181 | * 0x10-0x1f, 0x20-0x2f, ... 0xff0-0xfff (255 entries) | |
182 | */ | |
183 | struct ape_chdma_desc desc[255]; | |
184 | } __attribute__ ((packed)); | |
185 | ||
186 | /** | |
187 | * Altera PCI Express ('ape') board specific book keeping data | |
188 | * | |
189 | * Keeps state of the PCIe core and the Chaining DMA controller | |
190 | * application. | |
191 | */ | |
192 | struct ape_dev { | |
193 | /** the kernel pci device data structure provided by probe() */ | |
194 | struct pci_dev *pci_dev; | |
195 | /** | |
196 | * kernel virtual address of the mapped BAR memory and IO regions of | |
197 | * the End Point. Used by map_bars()/unmap_bars(). | |
198 | */ | |
199 | void * __iomem bar[APE_BAR_NUM]; | |
200 | /** kernel virtual address for Descriptor Table in Root Complex memory */ | |
201 | struct ape_chdma_table *table_virt; | |
202 | /** | |
203 | * bus address for the Descriptor Table in Root Complex memory, in | |
204 | * CPU-native endianess | |
205 | */ | |
206 | dma_addr_t table_bus; | |
207 | /* if the device regions could not be allocated, assume and remember it | |
208 | * is in use by another driver; this driver must not disable the device. | |
209 | */ | |
210 | int in_use; | |
211 | /* whether this driver enabled msi for the device */ | |
212 | int msi_enabled; | |
213 | /* whether this driver could obtain the regions */ | |
214 | int got_regions; | |
215 | /* irq line succesfully requested by this driver, -1 otherwise */ | |
216 | int irq_line; | |
217 | /* board revision */ | |
218 | u8 revision; | |
219 | /* interrupt count, incremented by the interrupt handler */ | |
220 | int irq_count; | |
221 | #if ALTPCIECHDMA_CDEV | |
222 | /* character device */ | |
223 | dev_t cdevno; | |
224 | struct cdev cdev; | |
225 | /* user space scatter gather mapper */ | |
226 | struct sg_mapping_t *sgm; | |
227 | #endif | |
228 | }; | |
229 | ||
230 | /** | |
231 | * Using the subsystem vendor id and subsystem id, it is possible to | |
232 | * distinguish between different cards bases around the same | |
233 | * (third-party) logic core. | |
234 | * | |
235 | * Default Altera vendor and device ID's, and some (non-reserved) | |
236 | * ID's are now used here that are used amongst the testers/developers. | |
237 | */ | |
238 | static const struct pci_device_id ids[] = { | |
239 | { PCI_DEVICE(0x1172, 0xE001), }, | |
240 | { PCI_DEVICE(0x2071, 0x2071), }, | |
241 | { 0, } | |
242 | }; | |
243 | MODULE_DEVICE_TABLE(pci, ids); | |
244 | ||
245 | #if ALTPCIECHDMA_CDEV | |
246 | /* prototypes for character device */ | |
247 | static int sg_init(struct ape_dev *ape); | |
248 | static void sg_exit(struct ape_dev *ape); | |
249 | #endif | |
250 | ||
251 | /** | |
252 | * altpciechdma_isr() - Interrupt handler | |
253 | * | |
254 | */ | |
255 | static irqreturn_t altpciechdma_isr(int irq, void *dev_id) | |
256 | { | |
257 | struct ape_dev *ape = (struct ape_dev *)dev_id; | |
258 | if (!ape) | |
259 | return IRQ_NONE; | |
260 | ape->irq_count++; | |
261 | return IRQ_HANDLED; | |
262 | } | |
263 | ||
264 | static int __devinit scan_bars(struct ape_dev *ape, struct pci_dev *dev) | |
265 | { | |
266 | int i; | |
267 | for (i = 0; i < APE_BAR_NUM; i++) { | |
268 | unsigned long bar_start = pci_resource_start(dev, i); | |
269 | if (bar_start) { | |
270 | unsigned long bar_end = pci_resource_end(dev, i); | |
271 | unsigned long bar_flags = pci_resource_flags(dev, i); | |
272 | printk(KERN_DEBUG "BAR%d 0x%08lx-0x%08lx flags 0x%08lx\n", | |
273 | i, bar_start, bar_end, bar_flags); | |
274 | } | |
275 | } | |
276 | return 0; | |
277 | } | |
278 | ||
279 | /** | |
280 | * Unmap the BAR regions that had been mapped earlier using map_bars() | |
281 | */ | |
282 | static void unmap_bars(struct ape_dev *ape, struct pci_dev *dev) | |
283 | { | |
284 | int i; | |
285 | for (i = 0; i < APE_BAR_NUM; i++) { | |
286 | /* is this BAR mapped? */ | |
287 | if (ape->bar[i]) { | |
288 | /* unmap BAR */ | |
289 | pci_iounmap(dev, ape->bar[i]); | |
290 | ape->bar[i] = NULL; | |
291 | } | |
292 | } | |
293 | } | |
294 | ||
295 | /** | |
296 | * Map the device memory regions into kernel virtual address space after | |
297 | * verifying their sizes respect the minimum sizes needed, given by the | |
298 | * bar_min_len[] array. | |
299 | */ | |
300 | static int __devinit map_bars(struct ape_dev *ape, struct pci_dev *dev) | |
301 | { | |
302 | int rc; | |
303 | int i; | |
304 | /* iterate through all the BARs */ | |
305 | for (i = 0; i < APE_BAR_NUM; i++) { | |
306 | unsigned long bar_start = pci_resource_start(dev, i); | |
307 | unsigned long bar_end = pci_resource_end(dev, i); | |
308 | unsigned long bar_length = bar_end - bar_start + 1; | |
309 | ape->bar[i] = NULL; | |
310 | /* do not map, and skip, BARs with length 0 */ | |
311 | if (!bar_min_len[i]) | |
312 | continue; | |
313 | /* do not map BARs with address 0 */ | |
314 | if (!bar_start || !bar_end) { | |
c9b8015b | 315 | printk(KERN_DEBUG "BAR #%d is not present?!\n", i); |
c8801d8c LW |
316 | rc = -1; |
317 | goto fail; | |
318 | } | |
319 | bar_length = bar_end - bar_start + 1; | |
320 | /* BAR length is less than driver requires? */ | |
321 | if (bar_length < bar_min_len[i]) { | |
c9b8015b EA |
322 | printk(KERN_DEBUG "BAR #%d length = %lu bytes but driver " |
323 | "requires at least %lu bytes\n", | |
324 | i, bar_length, bar_min_len[i]); | |
c8801d8c LW |
325 | rc = -1; |
326 | goto fail; | |
327 | } | |
328 | /* map the device memory or IO region into kernel virtual | |
329 | * address space */ | |
330 | ape->bar[i] = pci_iomap(dev, i, bar_min_len[i]); | |
331 | if (!ape->bar[i]) { | |
332 | printk(KERN_DEBUG "Could not map BAR #%d.\n", i); | |
333 | rc = -1; | |
334 | goto fail; | |
335 | } | |
c9b8015b EA |
336 | printk(KERN_DEBUG "BAR[%d] mapped at 0x%p with length %lu(/%lu).\n", i, |
337 | ape->bar[i], bar_min_len[i], bar_length); | |
c8801d8c LW |
338 | } |
339 | /* succesfully mapped all required BAR regions */ | |
340 | rc = 0; | |
341 | goto success; | |
342 | fail: | |
343 | /* unmap any BARs that we did map */ | |
344 | unmap_bars(ape, dev); | |
345 | success: | |
346 | return rc; | |
347 | } | |
348 | ||
349 | #if 0 /* not yet implemented fully FIXME add opcode */ | |
350 | static void __devinit rcslave_test(struct ape_dev *ape, struct pci_dev *dev) | |
351 | { | |
352 | u32 *rcslave_mem = (u32 *)ape->bar[APE_BAR_RCSLAVE]; | |
353 | u32 result = 0; | |
354 | /** this number is assumed to be different each time this test runs */ | |
355 | u32 seed = (u32)jiffies; | |
356 | u32 value = seed; | |
357 | int i; | |
358 | ||
359 | /* write loop */ | |
360 | value = seed; | |
361 | for (i = 1024; i < 32768 / 4 ; i++) { | |
362 | printk(KERN_DEBUG "Writing 0x%08x to 0x%p.\n", | |
363 | (u32)value, (void *)rcslave_mem + i); | |
364 | iowrite32(value, rcslave_mem + i); | |
365 | value++; | |
366 | } | |
367 | /* read-back loop */ | |
368 | value = seed; | |
369 | for (i = 1024; i < 32768 / 4; i++) { | |
370 | result = ioread32(rcslave_mem + i); | |
371 | if (result != value) { | |
372 | printk(KERN_DEBUG "Wrote 0x%08x to 0x%p, but read back 0x%08x.\n", | |
373 | (u32)value, (void *)rcslave_mem + i, (u32)result); | |
374 | break; | |
375 | } | |
376 | value++; | |
377 | } | |
378 | } | |
379 | #endif | |
380 | ||
381 | /* obtain the 32 most significant (high) bits of a 32-bit or 64-bit address */ | |
382 | #define pci_dma_h(addr) ((addr >> 16) >> 16) | |
383 | /* obtain the 32 least significant (low) bits of a 32-bit or 64-bit address */ | |
384 | #define pci_dma_l(addr) (addr & 0xffffffffUL) | |
385 | ||
386 | /* ape_fill_chdma_desc() - Fill a Altera PCI Express Chaining DMA descriptor | |
387 | * | |
388 | * @desc pointer to descriptor to be filled | |
389 | * @addr root complex address | |
390 | * @ep_addr end point address | |
391 | * @len number of bytes, must be a multiple of 4. | |
392 | */ | |
393 | static inline void ape_chdma_desc_set(struct ape_chdma_desc *desc, dma_addr_t addr, u32 ep_addr, int len) | |
394 | { | |
395 | BUG_ON(len & 3); | |
396 | desc->w0 = cpu_to_le32(len / 4); | |
397 | desc->ep_addr = cpu_to_le32(ep_addr); | |
398 | desc->rc_addr_h = cpu_to_le32(pci_dma_h(addr)); | |
399 | desc->rc_addr_l = cpu_to_le32(pci_dma_l(addr)); | |
400 | } | |
401 | ||
402 | /* | |
403 | * ape_sg_to_chdma_table() - Create a device descriptor table from a scatterlist. | |
404 | * | |
405 | * The scatterlist must have been mapped by pci_map_sg(sgm->sgl). | |
406 | * | |
407 | * @sgl scatterlist. | |
408 | * @nents Number of entries in the scatterlist. | |
409 | * @first Start index in the scatterlist sgm->sgl. | |
410 | * @ep_addr End Point address for the scatter/gather transfer. | |
411 | * @desc pointer to first descriptor | |
412 | * | |
413 | * Returns Number of entries in the table on success, -1 on error. | |
414 | */ | |
415 | static int ape_sg_to_chdma_table(struct scatterlist *sgl, int nents, int first, struct ape_chdma_desc *desc, u32 ep_addr) | |
416 | { | |
417 | int i = first, j = 0; | |
418 | /* inspect first entry */ | |
419 | dma_addr_t addr = sg_dma_address(&sgl[i]); | |
420 | unsigned int len = sg_dma_len(&sgl[i]); | |
421 | /* contiguous block */ | |
422 | dma_addr_t cont_addr = addr; | |
423 | unsigned int cont_len = len; | |
424 | /* iterate over remaining entries */ | |
425 | for (; j < 25 && i < nents - 1; i++) { | |
426 | /* bus address of next entry i + 1 */ | |
427 | dma_addr_t next = sg_dma_address(&sgl[i + 1]); | |
428 | /* length of this entry i */ | |
429 | len = sg_dma_len(&sgl[i]); | |
a5c281cb RD |
430 | printk(KERN_DEBUG "%04d: addr=0x%Lx length=0x%08x\n", i, |
431 | (unsigned long long)addr, len); | |
c8801d8c LW |
432 | /* entry i + 1 is non-contiguous with entry i? */ |
433 | if (next != addr + len) { | |
434 | /* TODO create entry here (we could overwrite i) */ | |
a5c281cb RD |
435 | printk(KERN_DEBUG "%4d: cont_addr=0x%Lx cont_len=0x%08x\n", j, |
436 | (unsigned long long)cont_addr, cont_len); | |
c8801d8c LW |
437 | /* set descriptor for contiguous transfer */ |
438 | ape_chdma_desc_set(&desc[j], cont_addr, ep_addr, cont_len); | |
439 | /* next end point memory address */ | |
440 | ep_addr += cont_len; | |
441 | /* start new contiguous block */ | |
442 | cont_addr = next; | |
443 | cont_len = 0; | |
444 | j++; | |
445 | } | |
446 | /* add entry i + 1 to current contiguous block */ | |
447 | cont_len += len; | |
448 | /* goto entry i + 1 */ | |
449 | addr = next; | |
450 | } | |
451 | /* TODO create entry here (we could overwrite i) */ | |
a5c281cb RD |
452 | printk(KERN_DEBUG "%04d: addr=0x%Lx length=0x%08x\n", i, |
453 | (unsigned long long)addr, len); | |
454 | printk(KERN_DEBUG "%4d: cont_addr=0x%Lx length=0x%08x\n", j, | |
455 | (unsigned long long)cont_addr, cont_len); | |
c8801d8c LW |
456 | j++; |
457 | return j; | |
458 | } | |
459 | ||
460 | /* compare buffers */ | |
461 | static inline int compare(u32 *p, u32 *q, int len) | |
462 | { | |
463 | int result = -1; | |
464 | int fail = 0; | |
465 | int i; | |
466 | for (i = 0; i < len / 4; i++) { | |
467 | if (*p == *q) { | |
468 | /* every so many u32 words, show equals */ | |
469 | if ((i & 255) == 0) | |
470 | printk(KERN_DEBUG "[%p] = 0x%08x [%p] = 0x%08x\n", p, *p, q, *q); | |
471 | } else { | |
472 | fail++; | |
473 | /* show the first few miscompares */ | |
c9b8015b EA |
474 | if (fail < 10) |
475 | printk(KERN_DEBUG "[%p] = 0x%08x != [%p] = 0x%08x ?!\n", p, *p, q, *q); | |
476 | /* but stop after a while */ | |
477 | else if (fail == 10) | |
478 | printk(KERN_DEBUG "---more errors follow! not printed---\n"); | |
479 | else | |
c8801d8c | 480 | /* stop compare after this many errors */ |
c9b8015b | 481 | break; |
c8801d8c LW |
482 | } |
483 | p++; | |
484 | q++; | |
485 | } | |
486 | if (!fail) | |
487 | result = 0; | |
488 | return result; | |
489 | } | |
490 | ||
491 | /* dma_test() - Perform DMA loop back test to end point and back to root complex. | |
492 | * | |
493 | * Allocate a cache-coherent buffer in host memory, consisting of four pages. | |
494 | * | |
495 | * Fill the four memory pages such that each 32-bit word contains its own address. | |
496 | * | |
497 | * Now perform a loop back test, have the end point device copy the first buffer | |
498 | * half to end point memory, then have it copy back into the second half. | |
499 | * | |
500 | * Create a descriptor table to copy the first buffer half into End Point | |
501 | * memory. Instruct the End Point to do a DMA read using that table. | |
502 | * | |
503 | * Create a descriptor table to copy End Point memory to the second buffer | |
504 | * half. Instruct the End Point to do a DMA write using that table. | |
505 | * | |
506 | * Compare results, fail or pass. | |
507 | * | |
508 | */ | |
509 | static int __devinit dma_test(struct ape_dev *ape, struct pci_dev *dev) | |
510 | { | |
511 | /* test result; guilty until proven innocent */ | |
512 | int result = -1; | |
513 | /* the DMA read header sits at address 0x00 of the DMA engine BAR */ | |
514 | struct ape_chdma_header *write_header = (struct ape_chdma_header *)ape->bar[APE_BAR_HEADER]; | |
515 | /* the write DMA header sits after the read header at address 0x10 */ | |
516 | struct ape_chdma_header *read_header = write_header + 1; | |
517 | /* virtual address of the allocated buffer */ | |
518 | u8 *buffer_virt = 0; | |
519 | /* bus address of the allocated buffer */ | |
520 | dma_addr_t buffer_bus = 0; | |
521 | int i, n = 0, irq_count; | |
522 | ||
523 | /* temporary value used to construct 32-bit data words */ | |
524 | u32 w; | |
525 | ||
526 | printk(KERN_DEBUG "bar_tests(), PAGE_SIZE = 0x%0x\n", (int)PAGE_SIZE); | |
527 | printk(KERN_DEBUG "write_header = 0x%p.\n", write_header); | |
528 | printk(KERN_DEBUG "read_header = 0x%p.\n", read_header); | |
529 | printk(KERN_DEBUG "&write_header->w3 = 0x%p\n", &write_header->w3); | |
530 | printk(KERN_DEBUG "&read_header->w3 = 0x%p\n", &read_header->w3); | |
531 | printk(KERN_DEBUG "ape->table_virt = 0x%p.\n", ape->table_virt); | |
532 | ||
533 | if (!write_header || !read_header || !ape->table_virt) | |
c9b8015b | 534 | goto fail; |
c8801d8c LW |
535 | |
536 | /* allocate and map coherently-cached memory for a DMA-able buffer */ | |
5872fb94 | 537 | /* @see Documentation/PCI/PCI-DMA-mapping.txt, near line 318 */ |
c8801d8c LW |
538 | buffer_virt = (u8 *)pci_alloc_consistent(dev, PAGE_SIZE * 4, &buffer_bus); |
539 | if (!buffer_virt) { | |
540 | printk(KERN_DEBUG "Could not allocate coherent DMA buffer.\n"); | |
541 | goto fail; | |
542 | } | |
543 | printk(KERN_DEBUG "Allocated cache-coherent DMA buffer (virtual address = 0x%016llx, bus address = 0x%016llx).\n", | |
544 | (u64)buffer_virt, (u64)buffer_bus); | |
545 | ||
546 | /* fill first half of buffer with its virtual address as data */ | |
547 | for (i = 0; i < 4 * PAGE_SIZE; i += 4) | |
548 | #if 0 | |
549 | *(u32 *)(buffer_virt + i) = i / PAGE_SIZE + 1; | |
550 | #else | |
551 | *(u32 *)(buffer_virt + i) = (buffer_virt + i); | |
552 | #endif | |
553 | #if 0 | |
554 | compare((u32 *)buffer_virt, (u32 *)(buffer_virt + 2 * PAGE_SIZE), 8192); | |
555 | #endif | |
556 | ||
557 | #if 0 | |
558 | /* fill second half of buffer with zeroes */ | |
559 | for (i = 2 * PAGE_SIZE; i < 4 * PAGE_SIZE; i += 4) | |
560 | *(u32 *)(buffer_virt + i) = 0; | |
561 | #endif | |
562 | ||
563 | /* invalidate EPLAST, outside 0-255, 0xFADE is from the testbench */ | |
564 | ape->table_virt->w3 = cpu_to_le32(0x0000FADE); | |
565 | ||
566 | /* fill in first descriptor */ | |
567 | n = 0; | |
568 | /* read 8192 bytes from RC buffer to EP address 4096 */ | |
569 | ape_chdma_desc_set(&ape->table_virt->desc[n], buffer_bus, 4096, 2 * PAGE_SIZE); | |
570 | #if 1 | |
c9b8015b | 571 | for (i = 0; i < 255; i++) |
c8801d8c | 572 | ape_chdma_desc_set(&ape->table_virt->desc[i], buffer_bus, 4096, 2 * PAGE_SIZE); |
c8801d8c LW |
573 | /* index of last descriptor */ |
574 | n = i - 1; | |
575 | #endif | |
576 | #if 0 | |
577 | /* fill in next descriptor */ | |
578 | n++; | |
579 | /* read 1024 bytes from RC buffer to EP address 4096 + 1024 */ | |
580 | ape_chdma_desc_set(&ape->table_virt->desc[n], buffer_bus + 1024, 4096 + 1024, 1024); | |
581 | #endif | |
582 | ||
583 | #if 1 | |
584 | /* enable MSI after the last descriptor is completed */ | |
585 | if (ape->msi_enabled) | |
586 | ape->table_virt->desc[n].w0 |= cpu_to_le32(1UL << 16)/*local MSI*/; | |
587 | #endif | |
588 | #if 0 | |
589 | /* dump descriptor table for debugging */ | |
590 | printk(KERN_DEBUG "Descriptor Table (Read, in Root Complex Memory, # = %d)\n", n + 1); | |
591 | for (i = 0; i < 4 + (n + 1) * 4; i += 4) { | |
592 | u32 *p = (u32 *)ape->table_virt; | |
593 | p += i; | |
594 | printk(KERN_DEBUG "0x%08x/0x%02x: 0x%08x (LEN=0x%x)\n", (u32)p, (u32)p & 15, *p, 4 * le32_to_cpu(*p)); | |
595 | p++; | |
596 | printk(KERN_DEBUG "0x%08x/0x%02x: 0x%08x (EPA=0x%x)\n", (u32)p, (u32)p & 15, *p, le32_to_cpu(*p)); | |
597 | p++; | |
598 | printk(KERN_DEBUG "0x%08x/0x%02x: 0x%08x (RCH=0x%x)\n", (u32)p, (u32)p & 15, *p, le32_to_cpu(*p)); | |
599 | p++; | |
600 | printk(KERN_DEBUG "0x%08x/0x%02x: 0x%08x (RCL=0x%x)\n", (u32)p, (u32)p & 15, *p, le32_to_cpu(*p)); | |
601 | } | |
602 | #endif | |
603 | /* set available number of descriptors in table */ | |
604 | w = (u32)(n + 1); | |
605 | w |= (1UL << 18)/*global EPLAST_EN*/; | |
606 | #if 0 | |
607 | if (ape->msi_enabled) | |
608 | w |= (1UL << 17)/*global MSI*/; | |
609 | #endif | |
610 | printk(KERN_DEBUG "writing 0x%08x to 0x%p\n", w, (void *)&read_header->w0); | |
611 | iowrite32(w, &read_header->w0); | |
612 | ||
613 | /* write table address (higher 32-bits) */ | |
614 | printk(KERN_DEBUG "writing 0x%08x to 0x%p\n", (u32)((ape->table_bus >> 16) >> 16), (void *)&read_header->bdt_addr_h); | |
615 | iowrite32(pci_dma_h(ape->table_bus), &read_header->bdt_addr_h); | |
616 | ||
617 | /* write table address (lower 32-bits) */ | |
618 | printk(KERN_DEBUG "writing 0x%08x to 0x%p\n", (u32)(ape->table_bus & 0xffffffffUL), (void *)&read_header->bdt_addr_l); | |
619 | iowrite32(pci_dma_l(ape->table_bus), &read_header->bdt_addr_l); | |
620 | ||
621 | /* memory write barrier */ | |
622 | wmb(); | |
623 | printk(KERN_DEBUG "Flush posted writes\n"); | |
624 | /** FIXME Add dummy read to flush posted writes but need a readable location! */ | |
625 | #if 0 | |
626 | (void)ioread32(); | |
627 | #endif | |
628 | ||
629 | /* remember IRQ count before the transfer */ | |
630 | irq_count = ape->irq_count; | |
631 | /* write number of descriptors - this starts the DMA */ | |
632 | printk(KERN_DEBUG "\nStart DMA read\n"); | |
633 | printk(KERN_DEBUG "writing 0x%08x to 0x%p\n", (u32)n, (void *)&read_header->w3); | |
634 | iowrite32(n, &read_header->w3); | |
635 | printk(KERN_DEBUG "EPLAST = %lu\n", le32_to_cpu(*(u32 *)&ape->table_virt->w3) & 0xffffUL); | |
636 | ||
637 | /** memory write barrier */ | |
638 | wmb(); | |
639 | /* dummy read to flush posted writes */ | |
640 | /* FIXME Need a readable location! */ | |
641 | #if 0 | |
642 | (void)ioread32(); | |
643 | #endif | |
644 | printk(KERN_DEBUG "POLL FOR READ:\n"); | |
645 | /* poll for chain completion, 1000 times 1 millisecond */ | |
646 | for (i = 0; i < 100; i++) { | |
647 | volatile u32 *p = &ape->table_virt->w3; | |
648 | u32 eplast = le32_to_cpu(*p) & 0xffffUL; | |
649 | printk(KERN_DEBUG "EPLAST = %u, n = %d\n", eplast, n); | |
650 | if (eplast == n) { | |
651 | printk(KERN_DEBUG "DONE\n"); | |
c9b8015b | 652 | /* print IRQ count before the transfer */ |
c8801d8c LW |
653 | printk(KERN_DEBUG "#IRQs during transfer: %d\n", ape->irq_count - irq_count); |
654 | break; | |
655 | } | |
656 | udelay(100); | |
657 | } | |
658 | ||
659 | /* invalidate EPLAST, outside 0-255, 0xFADE is from the testbench */ | |
660 | ape->table_virt->w3 = cpu_to_le32(0x0000FADE); | |
661 | ||
662 | /* setup first descriptor */ | |
663 | n = 0; | |
664 | ape_chdma_desc_set(&ape->table_virt->desc[n], buffer_bus + 8192, 4096, 2 * PAGE_SIZE); | |
665 | #if 1 | |
c9b8015b | 666 | for (i = 0; i < 255; i++) |
c8801d8c | 667 | ape_chdma_desc_set(&ape->table_virt->desc[i], buffer_bus + 8192, 4096, 2 * PAGE_SIZE); |
c9b8015b | 668 | |
c8801d8c LW |
669 | /* index of last descriptor */ |
670 | n = i - 1; | |
671 | #endif | |
672 | #if 1 /* test variable, make a module option later */ | |
673 | if (ape->msi_enabled) | |
674 | ape->table_virt->desc[n].w0 |= cpu_to_le32(1UL << 16)/*local MSI*/; | |
675 | #endif | |
676 | #if 0 | |
677 | /* dump descriptor table for debugging */ | |
678 | printk(KERN_DEBUG "Descriptor Table (Write, in Root Complex Memory, # = %d)\n", n + 1); | |
679 | for (i = 0; i < 4 + (n + 1) * 4; i += 4) { | |
680 | u32 *p = (u32 *)ape->table_virt; | |
681 | p += i; | |
682 | printk(KERN_DEBUG "0x%08x/0x%02x: 0x%08x (LEN=0x%x)\n", (u32)p, (u32)p & 15, *p, 4 * le32_to_cpu(*p)); | |
683 | p++; | |
684 | printk(KERN_DEBUG "0x%08x/0x%02x: 0x%08x (EPA=0x%x)\n", (u32)p, (u32)p & 15, *p, le32_to_cpu(*p)); | |
685 | p++; | |
686 | printk(KERN_DEBUG "0x%08x/0x%02x: 0x%08x (RCH=0x%x)\n", (u32)p, (u32)p & 15, *p, le32_to_cpu(*p)); | |
687 | p++; | |
688 | printk(KERN_DEBUG "0x%08x/0x%02x: 0x%08x (RCL=0x%x)\n", (u32)p, (u32)p & 15, *p, le32_to_cpu(*p)); | |
689 | } | |
690 | #endif | |
691 | ||
692 | /* set number of available descriptors in the table */ | |
693 | w = (u32)(n + 1); | |
694 | /* enable updates of eplast for each descriptor completion */ | |
695 | w |= (u32)(1UL << 18)/*global EPLAST_EN*/; | |
c9b8015b | 696 | #if 0 /* test variable, make a module option later */ |
c8801d8c LW |
697 | /* enable MSI for each descriptor completion */ |
698 | if (ape->msi_enabled) | |
699 | w |= (1UL << 17)/*global MSI*/; | |
700 | #endif | |
701 | iowrite32(w, &write_header->w0); | |
702 | iowrite32(pci_dma_h(ape->table_bus), &write_header->bdt_addr_h); | |
703 | iowrite32(pci_dma_l(ape->table_bus), &write_header->bdt_addr_l); | |
704 | ||
705 | /** memory write barrier and flush posted writes */ | |
706 | wmb(); | |
707 | /* dummy read to flush posted writes */ | |
708 | /* FIXME Need a readable location! */ | |
709 | #if 0 | |
710 | (void)ioread32(); | |
711 | #endif | |
712 | irq_count = ape->irq_count; | |
713 | ||
714 | printk(KERN_DEBUG "\nStart DMA write\n"); | |
715 | iowrite32(n, &write_header->w3); | |
716 | ||
717 | /** memory write barrier */ | |
718 | wmb(); | |
719 | /** dummy read to flush posted writes */ | |
c9b8015b | 720 | /* (void) ioread32(); */ |
c8801d8c LW |
721 | |
722 | printk(KERN_DEBUG "POLL FOR WRITE:\n"); | |
723 | /* poll for completion, 1000 times 1 millisecond */ | |
724 | for (i = 0; i < 100; i++) { | |
725 | volatile u32 *p = &ape->table_virt->w3; | |
726 | u32 eplast = le32_to_cpu(*p) & 0xffffUL; | |
727 | printk(KERN_DEBUG "EPLAST = %u, n = %d\n", eplast, n); | |
728 | if (eplast == n) { | |
729 | printk(KERN_DEBUG "DONE\n"); | |
730 | /* print IRQ count before the transfer */ | |
731 | printk(KERN_DEBUG "#IRQs during transfer: %d\n", ape->irq_count - irq_count); | |
732 | break; | |
733 | } | |
734 | udelay(100); | |
735 | } | |
736 | /* soft-reset DMA write engine */ | |
737 | iowrite32(0x0000ffffUL, &write_header->w0); | |
738 | /* soft-reset DMA read engine */ | |
739 | iowrite32(0x0000ffffUL, &read_header->w0); | |
740 | ||
741 | /** memory write barrier */ | |
742 | wmb(); | |
743 | /* dummy read to flush posted writes */ | |
744 | /* FIXME Need a readable location! */ | |
745 | #if 0 | |
746 | (void)ioread32(); | |
747 | #endif | |
748 | /* compare first half of buffer with second half, should be identical */ | |
749 | result = compare((u32 *)buffer_virt, (u32 *)(buffer_virt + 2 * PAGE_SIZE), 8192); | |
750 | printk(KERN_DEBUG "DMA loop back test %s.\n", result ? "FAILED" : "PASSED"); | |
751 | ||
752 | pci_free_consistent(dev, 4 * PAGE_SIZE, buffer_virt, buffer_bus); | |
753 | fail: | |
754 | printk(KERN_DEBUG "bar_tests() end, result %d\n", result); | |
755 | return result; | |
756 | } | |
757 | ||
758 | /* Called when the PCI sub system thinks we can control the given device. | |
759 | * Inspect if we can support the device and if so take control of it. | |
760 | * | |
761 | * Return 0 when we have taken control of the given device. | |
762 | * | |
763 | * - allocate board specific bookkeeping | |
764 | * - allocate coherently-mapped memory for the descriptor table | |
765 | * - enable the board | |
766 | * - verify board revision | |
767 | * - request regions | |
768 | * - query DMA mask | |
769 | * - obtain and request irq | |
770 | * - map regions into kernel address space | |
771 | */ | |
772 | static int __devinit probe(struct pci_dev *dev, const struct pci_device_id *id) | |
773 | { | |
774 | int rc = 0; | |
775 | struct ape_dev *ape = NULL; | |
776 | u8 irq_pin, irq_line; | |
777 | printk(KERN_DEBUG "probe(dev = 0x%p, pciid = 0x%p)\n", dev, id); | |
778 | ||
779 | /* allocate memory for per-board book keeping */ | |
780 | ape = kzalloc(sizeof(struct ape_dev), GFP_KERNEL); | |
781 | if (!ape) { | |
782 | printk(KERN_DEBUG "Could not kzalloc()ate memory.\n"); | |
783 | goto err_ape; | |
784 | } | |
785 | ape->pci_dev = dev; | |
786 | dev->dev.driver_data = (void *)ape; | |
787 | printk(KERN_DEBUG "probe() ape = 0x%p\n", ape); | |
788 | ||
789 | printk(KERN_DEBUG "sizeof(struct ape_chdma_table) = %d.\n", | |
790 | (int)sizeof(struct ape_chdma_table)); | |
791 | /* the reference design has a size restriction on the table size */ | |
792 | BUG_ON(sizeof(struct ape_chdma_table) > APE_CHDMA_TABLE_SIZE); | |
793 | ||
794 | /* allocate and map coherently-cached memory for a descriptor table */ | |
795 | /* @see LDD3 page 446 */ | |
796 | ape->table_virt = (struct ape_chdma_table *)pci_alloc_consistent(dev, | |
797 | APE_CHDMA_TABLE_SIZE, &ape->table_bus); | |
798 | /* could not allocate table? */ | |
799 | if (!ape->table_virt) { | |
800 | printk(KERN_DEBUG "Could not dma_alloc()ate_coherent memory.\n"); | |
801 | goto err_table; | |
802 | } | |
803 | ||
804 | printk(KERN_DEBUG "table_virt = 0x%16llx, table_bus = 0x%16llx.\n", | |
805 | (u64)ape->table_virt, (u64)ape->table_bus); | |
806 | ||
807 | /* enable device */ | |
808 | rc = pci_enable_device(dev); | |
809 | if (rc) { | |
810 | printk(KERN_DEBUG "pci_enable_device() failed\n"); | |
811 | goto err_enable; | |
812 | } | |
813 | ||
814 | /* enable bus master capability on device */ | |
815 | pci_set_master(dev); | |
816 | /* enable message signaled interrupts */ | |
817 | rc = pci_enable_msi(dev); | |
818 | /* could not use MSI? */ | |
819 | if (rc) { | |
820 | /* resort to legacy interrupts */ | |
821 | printk(KERN_DEBUG "Could not enable MSI interrupting.\n"); | |
822 | ape->msi_enabled = 0; | |
823 | /* MSI enabled, remember for cleanup */ | |
824 | } else { | |
825 | printk(KERN_DEBUG "Enabled MSI interrupting.\n"); | |
826 | ape->msi_enabled = 1; | |
827 | } | |
828 | ||
829 | pci_read_config_byte(dev, PCI_REVISION_ID, &ape->revision); | |
830 | #if 0 /* example */ | |
831 | /* (for example) this driver does not support revision 0x42 */ | |
832 | if (ape->revision == 0x42) { | |
833 | printk(KERN_DEBUG "Revision 0x42 is not supported by this driver.\n"); | |
834 | rc = -ENODEV; | |
835 | goto err_rev; | |
836 | } | |
837 | #endif | |
838 | /** XXX check for native or legacy PCIe endpoint? */ | |
839 | ||
840 | rc = pci_request_regions(dev, DRV_NAME); | |
841 | /* could not request all regions? */ | |
842 | if (rc) { | |
843 | /* assume device is in use (and do not disable it later!) */ | |
844 | ape->in_use = 1; | |
845 | goto err_regions; | |
846 | } | |
847 | ape->got_regions = 1; | |
848 | ||
c9b8015b | 849 | #if 1 /* @todo For now, disable 64-bit, because I do not understand the implications (DAC!) */ |
c8801d8c | 850 | /* query for DMA transfer */ |
5872fb94 | 851 | /* @see Documentation/PCI/PCI-DMA-mapping.txt */ |
c8801d8c LW |
852 | if (!pci_set_dma_mask(dev, DMA_64BIT_MASK)) { |
853 | pci_set_consistent_dma_mask(dev, DMA_64BIT_MASK); | |
854 | /* use 64-bit DMA */ | |
855 | printk(KERN_DEBUG "Using a 64-bit DMA mask.\n"); | |
856 | } else | |
857 | #endif | |
858 | if (!pci_set_dma_mask(dev, DMA_32BIT_MASK)) { | |
859 | printk(KERN_DEBUG "Could not set 64-bit DMA mask.\n"); | |
860 | pci_set_consistent_dma_mask(dev, DMA_32BIT_MASK); | |
861 | /* use 32-bit DMA */ | |
862 | printk(KERN_DEBUG "Using a 32-bit DMA mask.\n"); | |
863 | } else { | |
864 | printk(KERN_DEBUG "No suitable DMA possible.\n"); | |
865 | /** @todo Choose proper error return code */ | |
866 | rc = -1; | |
867 | goto err_mask; | |
868 | } | |
869 | ||
870 | rc = pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &irq_pin); | |
871 | /* could not read? */ | |
872 | if (rc) | |
873 | goto err_irq; | |
874 | printk(KERN_DEBUG "IRQ pin #%d (0=none, 1=INTA#...4=INTD#).\n", irq_pin); | |
875 | ||
876 | /* @see LDD3, page 318 */ | |
877 | rc = pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &irq_line); | |
878 | /* could not read? */ | |
879 | if (rc) { | |
880 | printk(KERN_DEBUG "Could not query PCI_INTERRUPT_LINE, error %d\n", rc); | |
881 | goto err_irq; | |
882 | } | |
883 | printk(KERN_DEBUG "IRQ line #%d.\n", irq_line); | |
884 | #if 1 | |
885 | irq_line = dev->irq; | |
886 | /* @see LDD3, page 259 */ | |
887 | rc = request_irq(irq_line, altpciechdma_isr, IRQF_SHARED, DRV_NAME, (void *)ape); | |
888 | if (rc) { | |
889 | printk(KERN_DEBUG "Could not request IRQ #%d, error %d\n", irq_line, rc); | |
890 | ape->irq_line = -1; | |
891 | goto err_irq; | |
892 | } | |
893 | /* remember which irq we allocated */ | |
894 | ape->irq_line = (int)irq_line; | |
895 | printk(KERN_DEBUG "Succesfully requested IRQ #%d with dev_id 0x%p\n", irq_line, ape); | |
896 | #endif | |
897 | /* show BARs */ | |
898 | scan_bars(ape, dev); | |
899 | /* map BARs */ | |
900 | rc = map_bars(ape, dev); | |
901 | if (rc) | |
902 | goto err_map; | |
903 | #if ALTPCIECHDMA_CDEV | |
904 | /* initialize character device */ | |
905 | rc = sg_init(ape); | |
906 | if (rc) | |
907 | goto err_cdev; | |
908 | #endif | |
909 | /* perform DMA engines loop back test */ | |
910 | rc = dma_test(ape, dev); | |
911 | (void)rc; | |
912 | /* succesfully took the device */ | |
913 | rc = 0; | |
914 | printk(KERN_DEBUG "probe() successful.\n"); | |
915 | goto end; | |
916 | err_cdev: | |
917 | /* unmap the BARs */ | |
918 | unmap_bars(ape, dev); | |
919 | err_map: | |
920 | /* free allocated irq */ | |
921 | if (ape->irq_line >= 0) | |
922 | free_irq(ape->irq_line, (void *)ape); | |
923 | err_irq: | |
924 | if (ape->msi_enabled) | |
925 | pci_disable_msi(dev); | |
926 | /* disable the device iff it is not in use */ | |
927 | if (!ape->in_use) | |
928 | pci_disable_device(dev); | |
929 | if (ape->got_regions) | |
930 | pci_release_regions(dev); | |
931 | err_mask: | |
932 | err_regions: | |
933 | err_rev: | |
934 | /* clean up everything before device enable() */ | |
935 | err_enable: | |
936 | if (ape->table_virt) | |
937 | pci_free_consistent(dev, APE_CHDMA_TABLE_SIZE, ape->table_virt, ape->table_bus); | |
938 | /* clean up everything before allocating descriptor table */ | |
939 | err_table: | |
940 | if (ape) | |
941 | kfree(ape); | |
942 | err_ape: | |
943 | end: | |
944 | return rc; | |
945 | } | |
946 | ||
947 | static void __devexit remove(struct pci_dev *dev) | |
948 | { | |
949 | struct ape_dev *ape; | |
950 | printk(KERN_DEBUG "remove(0x%p)\n", dev); | |
951 | if ((dev == 0) || (dev->dev.driver_data == 0)) { | |
2c77cb3e DC |
952 | printk(KERN_DEBUG "remove(dev = 0x%p) dev->dev.driver_data = 0x%p\n", |
953 | dev, (dev? dev->dev.driver_data: NULL)); | |
c8801d8c LW |
954 | return; |
955 | } | |
956 | ape = (struct ape_dev *)dev->dev.driver_data; | |
957 | printk(KERN_DEBUG "remove(dev = 0x%p) where dev->dev.driver_data = 0x%p\n", dev, ape); | |
958 | if (ape->pci_dev != dev) { | |
959 | printk(KERN_DEBUG "dev->dev.driver_data->pci_dev (0x%08lx) != dev (0x%08lx)\n", | |
960 | (unsigned long)ape->pci_dev, (unsigned long)dev); | |
961 | } | |
962 | /* remove character device */ | |
963 | #if ALTPCIECHDMA_CDEV | |
964 | sg_exit(ape); | |
965 | #endif | |
966 | ||
967 | if (ape->table_virt) | |
968 | pci_free_consistent(dev, APE_CHDMA_TABLE_SIZE, ape->table_virt, ape->table_bus); | |
969 | ||
970 | /* free IRQ | |
971 | * @see LDD3 page 279 | |
972 | */ | |
973 | if (ape->irq_line >= 0) { | |
974 | printk(KERN_DEBUG "Freeing IRQ #%d for dev_id 0x%08lx.\n", | |
975 | ape->irq_line, (unsigned long)ape); | |
976 | free_irq(ape->irq_line, (void *)ape); | |
977 | } | |
978 | /* MSI was enabled? */ | |
979 | if (ape->msi_enabled) { | |
980 | /* Disable MSI @see Documentation/MSI-HOWTO.txt */ | |
981 | pci_disable_msi(dev); | |
982 | ape->msi_enabled = 0; | |
983 | } | |
984 | /* unmap the BARs */ | |
985 | unmap_bars(ape, dev); | |
986 | if (!ape->in_use) | |
987 | pci_disable_device(dev); | |
988 | if (ape->got_regions) | |
989 | /* to be called after device disable */ | |
990 | pci_release_regions(dev); | |
991 | } | |
992 | ||
993 | #if ALTPCIECHDMA_CDEV | |
994 | ||
995 | /* | |
996 | * Called when the device goes from unused to used. | |
997 | */ | |
998 | static int sg_open(struct inode *inode, struct file *file) | |
999 | { | |
1000 | struct ape_dev *ape; | |
1001 | printk(KERN_DEBUG DRV_NAME "_open()\n"); | |
1002 | /* pointer to containing data structure of the character device inode */ | |
1003 | ape = container_of(inode->i_cdev, struct ape_dev, cdev); | |
1004 | /* create a reference to our device state in the opened file */ | |
1005 | file->private_data = ape; | |
1006 | /* create virtual memory mapper */ | |
1007 | ape->sgm = sg_create_mapper(MAX_CHDMA_SIZE); | |
1008 | return 0; | |
1009 | } | |
1010 | ||
1011 | /* | |
1012 | * Called when the device goes from used to unused. | |
1013 | */ | |
1014 | static int sg_close(struct inode *inode, struct file *file) | |
1015 | { | |
1016 | /* fetch device specific data stored earlier during open */ | |
1017 | struct ape_dev *ape = (struct ape_dev *)file->private_data; | |
1018 | printk(KERN_DEBUG DRV_NAME "_close()\n"); | |
1019 | /* destroy virtual memory mapper */ | |
1020 | sg_destroy_mapper(ape->sgm); | |
1021 | return 0; | |
1022 | } | |
1023 | ||
1024 | static ssize_t sg_read(struct file *file, char __user *buf, size_t count, loff_t *pos) | |
1025 | { | |
1026 | /* fetch device specific data stored earlier during open */ | |
1027 | struct ape_dev *ape = (struct ape_dev *)file->private_data; | |
1028 | (void)ape; | |
1029 | printk(KERN_DEBUG DRV_NAME "_read(buf=0x%p, count=%lld, pos=%llu)\n", buf, (s64)count, (u64)*pos); | |
1030 | return count; | |
1031 | } | |
1032 | ||
1033 | /* sg_write() - Write to the device | |
1034 | * | |
1035 | * @buf userspace buffer | |
1036 | * @count number of bytes in the userspace buffer | |
1037 | * | |
1038 | * Iterate over the userspace buffer, taking at most 255 * PAGE_SIZE bytes for | |
1039 | * each DMA transfer. | |
1040 | * For each transfer, get the user pages, build a sglist, map, build a | |
1041 | * descriptor table. submit the transfer. wait for the interrupt handler | |
1042 | * to wake us on completion. | |
1043 | */ | |
1044 | static ssize_t sg_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) | |
1045 | { | |
1046 | int hwnents, tents; | |
1047 | size_t transfer_len, remaining = count, done = 0; | |
1048 | u64 transfer_addr = (u64)buf; | |
1049 | /* fetch device specific data stored earlier during open */ | |
1050 | struct ape_dev *ape = (struct ape_dev *)file->private_data; | |
1051 | printk(KERN_DEBUG DRV_NAME "_write(buf=0x%p, count=%lld, pos=%llu)\n", | |
1052 | buf, (s64)count, (u64)*pos); | |
1053 | /* TODO transfer boundaries at PAGE_SIZE granularity */ | |
c9b8015b | 1054 | while (remaining > 0) { |
c8801d8c | 1055 | /* limit DMA transfer size */ |
c9b8015b | 1056 | transfer_len = (remaining < APE_CHDMA_MAX_TRANSFER_LEN) ? remaining : |
c8801d8c LW |
1057 | APE_CHDMA_MAX_TRANSFER_LEN; |
1058 | /* get all user space buffer pages and create a scattergather list */ | |
1059 | sgm_map_user_pages(ape->sgm, transfer_addr, transfer_len, 0/*read from userspace*/); | |
1060 | printk(KERN_DEBUG DRV_NAME "mapped_pages=%d\n", ape->sgm->mapped_pages); | |
1061 | /* map all entries in the scattergather list */ | |
1062 | hwnents = pci_map_sg(ape->pci_dev, ape->sgm->sgl, ape->sgm->mapped_pages, DMA_TO_DEVICE); | |
1063 | printk(KERN_DEBUG DRV_NAME "hwnents=%d\n", hwnents); | |
1064 | /* build device descriptor tables and submit them to the DMA engine */ | |
1065 | tents = ape_sg_to_chdma_table(ape->sgm->sgl, hwnents, 0, &ape->table_virt->desc[0], 4096); | |
1066 | printk(KERN_DEBUG DRV_NAME "tents=%d\n", hwnents); | |
1067 | #if 0 | |
1068 | while (tables) { | |
1069 | /* TODO build table */ | |
1070 | /* TODO submit table to the device */ | |
1071 | /* if engine stopped and unfinished work then start engine */ | |
1072 | } | |
1073 | put ourselves on wait queue | |
1074 | #endif | |
1075 | ||
1076 | dma_unmap_sg(NULL, ape->sgm->sgl, ape->sgm->mapped_pages, DMA_TO_DEVICE); | |
1077 | /* dirty and free the pages */ | |
1078 | sgm_unmap_user_pages(ape->sgm, 1/*dirtied*/); | |
1079 | /* book keeping */ | |
1080 | transfer_addr += transfer_len; | |
1081 | remaining -= transfer_len; | |
1082 | done += transfer_len; | |
1083 | } | |
1084 | return done; | |
1085 | } | |
1086 | ||
1087 | /* | |
1088 | * character device file operations | |
1089 | */ | |
c9b8015b EA |
1090 | static const struct file_operations sg_fops = { |
1091 | .owner = THIS_MODULE, | |
1092 | .open = sg_open, | |
1093 | .release = sg_close, | |
1094 | .read = sg_read, | |
1095 | .write = sg_write, | |
c8801d8c LW |
1096 | }; |
1097 | ||
1098 | /* sg_init() - Initialize character device | |
1099 | * | |
1100 | * XXX Should ideally be tied to the device, on device probe, not module init. | |
1101 | */ | |
1102 | static int sg_init(struct ape_dev *ape) | |
1103 | { | |
1104 | int rc; | |
1105 | printk(KERN_DEBUG DRV_NAME " sg_init()\n"); | |
1106 | /* allocate a dynamically allocated character device node */ | |
1107 | rc = alloc_chrdev_region(&ape->cdevno, 0/*requested minor*/, 1/*count*/, DRV_NAME); | |
1108 | /* allocation failed? */ | |
1109 | if (rc < 0) { | |
1110 | printk("alloc_chrdev_region() = %d\n", rc); | |
1111 | goto fail_alloc; | |
1112 | } | |
1113 | /* couple the device file operations to the character device */ | |
1114 | cdev_init(&ape->cdev, &sg_fops); | |
1115 | ape->cdev.owner = THIS_MODULE; | |
1116 | /* bring character device live */ | |
1117 | rc = cdev_add(&ape->cdev, ape->cdevno, 1/*count*/); | |
1118 | if (rc < 0) { | |
1119 | printk("cdev_add() = %d\n", rc); | |
1120 | goto fail_add; | |
1121 | } | |
1122 | printk(KERN_DEBUG "altpciechdma = %d:%d\n", MAJOR(ape->cdevno), MINOR(ape->cdevno)); | |
1123 | return 0; | |
1124 | fail_add: | |
1125 | /* free the dynamically allocated character device node */ | |
1126 | unregister_chrdev_region(ape->cdevno, 1/*count*/); | |
1127 | fail_alloc: | |
1128 | return -1; | |
1129 | } | |
1130 | ||
1131 | /* sg_exit() - Cleanup character device | |
1132 | * | |
1133 | * XXX Should ideally be tied to the device, on device remove, not module exit. | |
1134 | */ | |
1135 | ||
1136 | static void sg_exit(struct ape_dev *ape) | |
1137 | { | |
1138 | printk(KERN_DEBUG DRV_NAME " sg_exit()\n"); | |
1139 | /* remove the character device */ | |
1140 | cdev_del(&ape->cdev); | |
1141 | /* free the dynamically allocated character device node */ | |
1142 | unregister_chrdev_region(ape->cdevno, 1/*count*/); | |
1143 | } | |
1144 | ||
1145 | #endif /* ALTPCIECHDMA_CDEV */ | |
1146 | ||
1147 | /* used to register the driver with the PCI kernel sub system | |
1148 | * @see LDD3 page 311 | |
1149 | */ | |
1150 | static struct pci_driver pci_driver = { | |
1151 | .name = DRV_NAME, | |
1152 | .id_table = ids, | |
1153 | .probe = probe, | |
1154 | .remove = remove, | |
1155 | /* resume, suspend are optional */ | |
1156 | }; | |
1157 | ||
1158 | /** | |
1159 | * alterapciechdma_init() - Module initialization, registers devices. | |
1160 | */ | |
1161 | static int __init alterapciechdma_init(void) | |
1162 | { | |
c9b8015b | 1163 | int rc = 0; |
c8801d8c LW |
1164 | printk(KERN_DEBUG DRV_NAME " init(), built at " __DATE__ " " __TIME__ "\n"); |
1165 | /* register this driver with the PCI bus driver */ | |
1166 | rc = pci_register_driver(&pci_driver); | |
1167 | if (rc < 0) | |
c9b8015b | 1168 | return rc; |
c8801d8c LW |
1169 | return 0; |
1170 | } | |
1171 | ||
1172 | /** | |
1173 | * alterapciechdma_init() - Module cleanup, unregisters devices. | |
1174 | */ | |
1175 | static void __exit alterapciechdma_exit(void) | |
1176 | { | |
1177 | printk(KERN_DEBUG DRV_NAME " exit(), built at " __DATE__ " " __TIME__ "\n"); | |
1178 | /* unregister this driver from the PCI bus driver */ | |
1179 | pci_unregister_driver(&pci_driver); | |
1180 | } | |
1181 | ||
1182 | MODULE_LICENSE("GPL"); | |
1183 | ||
1184 | module_init(alterapciechdma_init); | |
1185 | module_exit(alterapciechdma_exit); | |
1186 |