Commit | Line | Data |
---|---|---|
0bbd5f4e CL |
1 | /* |
2 | * Copyright(c) 2004 - 2006 Intel Corporation. All rights reserved. | |
3 | * | |
4 | * This program is free software; you can redistribute it and/or modify it | |
5 | * under the terms of the GNU General Public License as published by the Free | |
6 | * Software Foundation; either version 2 of the License, or (at your option) | |
7 | * any later version. | |
8 | * | |
9 | * This program is distributed in the hope that it will be useful, but WITHOUT | |
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
12 | * more details. | |
13 | * | |
14 | * You should have received a copy of the GNU General Public License along with | |
15 | * this program; if not, write to the Free Software Foundation, Inc., 59 | |
16 | * Temple Place - Suite 330, Boston, MA 02111-1307, USA. | |
17 | * | |
18 | * The full GNU General Public License is included in this distribution in the | |
19 | * file called COPYING. | |
20 | */ | |
21 | ||
22 | /* | |
23 | * This driver supports an Intel I/OAT DMA engine, which does asynchronous | |
24 | * copy operations. | |
25 | */ | |
26 | ||
27 | #include <linux/init.h> | |
28 | #include <linux/module.h> | |
29 | #include <linux/pci.h> | |
30 | #include <linux/interrupt.h> | |
31 | #include <linux/dmaengine.h> | |
32 | #include <linux/delay.h> | |
6b00c92c | 33 | #include <linux/dma-mapping.h> |
0bbd5f4e | 34 | #include "ioatdma.h" |
0bbd5f4e CL |
35 | #include "ioatdma_registers.h" |
36 | #include "ioatdma_hw.h" | |
37 | ||
38 | #define to_ioat_chan(chan) container_of(chan, struct ioat_dma_chan, common) | |
39 | #define to_ioat_device(dev) container_of(dev, struct ioat_device, common) | |
40 | #define to_ioat_desc(lh) container_of(lh, struct ioat_desc_sw, node) | |
7405f74b | 41 | #define tx_to_ioat_desc(tx) container_of(tx, struct ioat_desc_sw, async_tx) |
0bbd5f4e CL |
42 | |
43 | /* internal functions */ | |
44 | static int __devinit ioat_probe(struct pci_dev *pdev, const struct pci_device_id *ent); | |
428ed602 | 45 | static void ioat_shutdown(struct pci_dev *pdev); |
0bbd5f4e CL |
46 | static void __devexit ioat_remove(struct pci_dev *pdev); |
47 | ||
48 | static int enumerate_dma_channels(struct ioat_device *device) | |
49 | { | |
50 | u8 xfercap_scale; | |
51 | u32 xfercap; | |
52 | int i; | |
53 | struct ioat_dma_chan *ioat_chan; | |
54 | ||
e3828811 CL |
55 | device->common.chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET); |
56 | xfercap_scale = readb(device->reg_base + IOAT_XFERCAP_OFFSET); | |
0bbd5f4e CL |
57 | xfercap = (xfercap_scale == 0 ? -1 : (1UL << xfercap_scale)); |
58 | ||
59 | for (i = 0; i < device->common.chancnt; i++) { | |
60 | ioat_chan = kzalloc(sizeof(*ioat_chan), GFP_KERNEL); | |
61 | if (!ioat_chan) { | |
62 | device->common.chancnt = i; | |
63 | break; | |
64 | } | |
65 | ||
66 | ioat_chan->device = device; | |
67 | ioat_chan->reg_base = device->reg_base + (0x80 * (i + 1)); | |
68 | ioat_chan->xfercap = xfercap; | |
69 | spin_lock_init(&ioat_chan->cleanup_lock); | |
70 | spin_lock_init(&ioat_chan->desc_lock); | |
71 | INIT_LIST_HEAD(&ioat_chan->free_desc); | |
72 | INIT_LIST_HEAD(&ioat_chan->used_desc); | |
73 | /* This should be made common somewhere in dmaengine.c */ | |
74 | ioat_chan->common.device = &device->common; | |
0bbd5f4e CL |
75 | list_add_tail(&ioat_chan->common.device_node, |
76 | &device->common.channels); | |
77 | } | |
78 | return device->common.chancnt; | |
79 | } | |
80 | ||
7405f74b DW |
81 | static void |
82 | ioat_set_src(dma_addr_t addr, struct dma_async_tx_descriptor *tx, int index) | |
83 | { | |
84 | struct ioat_desc_sw *iter, *desc = tx_to_ioat_desc(tx); | |
85 | struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan); | |
86 | ||
87 | pci_unmap_addr_set(desc, src, addr); | |
88 | ||
89 | list_for_each_entry(iter, &desc->async_tx.tx_list, node) { | |
90 | iter->hw->src_addr = addr; | |
91 | addr += ioat_chan->xfercap; | |
92 | } | |
93 | ||
94 | } | |
95 | ||
96 | static void | |
97 | ioat_set_dest(dma_addr_t addr, struct dma_async_tx_descriptor *tx, int index) | |
98 | { | |
99 | struct ioat_desc_sw *iter, *desc = tx_to_ioat_desc(tx); | |
100 | struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan); | |
101 | ||
102 | pci_unmap_addr_set(desc, dst, addr); | |
103 | ||
104 | list_for_each_entry(iter, &desc->async_tx.tx_list, node) { | |
105 | iter->hw->dst_addr = addr; | |
106 | addr += ioat_chan->xfercap; | |
107 | } | |
108 | } | |
109 | ||
110 | static dma_cookie_t | |
111 | ioat_tx_submit(struct dma_async_tx_descriptor *tx) | |
112 | { | |
113 | struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan); | |
114 | struct ioat_desc_sw *desc = tx_to_ioat_desc(tx); | |
115 | int append = 0; | |
116 | dma_cookie_t cookie; | |
117 | struct ioat_desc_sw *group_start; | |
118 | ||
119 | group_start = list_entry(desc->async_tx.tx_list.next, | |
120 | struct ioat_desc_sw, node); | |
121 | spin_lock_bh(&ioat_chan->desc_lock); | |
122 | /* cookie incr and addition to used_list must be atomic */ | |
123 | cookie = ioat_chan->common.cookie; | |
124 | cookie++; | |
125 | if (cookie < 0) | |
126 | cookie = 1; | |
127 | ioat_chan->common.cookie = desc->async_tx.cookie = cookie; | |
128 | ||
129 | /* write address into NextDescriptor field of last desc in chain */ | |
130 | to_ioat_desc(ioat_chan->used_desc.prev)->hw->next = | |
131 | group_start->async_tx.phys; | |
132 | list_splice_init(&desc->async_tx.tx_list, ioat_chan->used_desc.prev); | |
133 | ||
134 | ioat_chan->pending += desc->tx_cnt; | |
135 | if (ioat_chan->pending >= 4) { | |
136 | append = 1; | |
137 | ioat_chan->pending = 0; | |
138 | } | |
139 | spin_unlock_bh(&ioat_chan->desc_lock); | |
140 | ||
141 | if (append) | |
142 | writeb(IOAT_CHANCMD_APPEND, | |
143 | ioat_chan->reg_base + IOAT_CHANCMD_OFFSET); | |
144 | ||
145 | return cookie; | |
146 | } | |
147 | ||
0bbd5f4e CL |
148 | static struct ioat_desc_sw *ioat_dma_alloc_descriptor( |
149 | struct ioat_dma_chan *ioat_chan, | |
47b16539 | 150 | gfp_t flags) |
0bbd5f4e CL |
151 | { |
152 | struct ioat_dma_descriptor *desc; | |
153 | struct ioat_desc_sw *desc_sw; | |
154 | struct ioat_device *ioat_device; | |
155 | dma_addr_t phys; | |
156 | ||
157 | ioat_device = to_ioat_device(ioat_chan->common.device); | |
158 | desc = pci_pool_alloc(ioat_device->dma_pool, flags, &phys); | |
159 | if (unlikely(!desc)) | |
160 | return NULL; | |
161 | ||
162 | desc_sw = kzalloc(sizeof(*desc_sw), flags); | |
163 | if (unlikely(!desc_sw)) { | |
164 | pci_pool_free(ioat_device->dma_pool, desc, phys); | |
165 | return NULL; | |
166 | } | |
167 | ||
168 | memset(desc, 0, sizeof(*desc)); | |
7405f74b DW |
169 | dma_async_tx_descriptor_init(&desc_sw->async_tx, &ioat_chan->common); |
170 | desc_sw->async_tx.tx_set_src = ioat_set_src; | |
171 | desc_sw->async_tx.tx_set_dest = ioat_set_dest; | |
172 | desc_sw->async_tx.tx_submit = ioat_tx_submit; | |
173 | INIT_LIST_HEAD(&desc_sw->async_tx.tx_list); | |
0bbd5f4e | 174 | desc_sw->hw = desc; |
7405f74b | 175 | desc_sw->async_tx.phys = phys; |
0bbd5f4e CL |
176 | |
177 | return desc_sw; | |
178 | } | |
179 | ||
180 | #define INITIAL_IOAT_DESC_COUNT 128 | |
181 | ||
182 | static void ioat_start_null_desc(struct ioat_dma_chan *ioat_chan); | |
183 | ||
184 | /* returns the actual number of allocated descriptors */ | |
185 | static int ioat_dma_alloc_chan_resources(struct dma_chan *chan) | |
186 | { | |
187 | struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); | |
188 | struct ioat_desc_sw *desc = NULL; | |
189 | u16 chanctrl; | |
190 | u32 chanerr; | |
191 | int i; | |
192 | LIST_HEAD(tmp_list); | |
193 | ||
194 | /* | |
195 | * In-use bit automatically set by reading chanctrl | |
196 | * If 0, we got it, if 1, someone else did | |
197 | */ | |
e3828811 | 198 | chanctrl = readw(ioat_chan->reg_base + IOAT_CHANCTRL_OFFSET); |
0bbd5f4e CL |
199 | if (chanctrl & IOAT_CHANCTRL_CHANNEL_IN_USE) |
200 | return -EBUSY; | |
201 | ||
202 | /* Setup register to interrupt and write completion status on error */ | |
203 | chanctrl = IOAT_CHANCTRL_CHANNEL_IN_USE | | |
204 | IOAT_CHANCTRL_ERR_INT_EN | | |
205 | IOAT_CHANCTRL_ANY_ERR_ABORT_EN | | |
206 | IOAT_CHANCTRL_ERR_COMPLETION_EN; | |
e3828811 | 207 | writew(chanctrl, ioat_chan->reg_base + IOAT_CHANCTRL_OFFSET); |
0bbd5f4e | 208 | |
e3828811 | 209 | chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET); |
0bbd5f4e CL |
210 | if (chanerr) { |
211 | printk("IOAT: CHANERR = %x, clearing\n", chanerr); | |
e3828811 | 212 | writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET); |
0bbd5f4e CL |
213 | } |
214 | ||
215 | /* Allocate descriptors */ | |
216 | for (i = 0; i < INITIAL_IOAT_DESC_COUNT; i++) { | |
217 | desc = ioat_dma_alloc_descriptor(ioat_chan, GFP_KERNEL); | |
218 | if (!desc) { | |
219 | printk(KERN_ERR "IOAT: Only %d initial descriptors\n", i); | |
220 | break; | |
221 | } | |
222 | list_add_tail(&desc->node, &tmp_list); | |
223 | } | |
224 | spin_lock_bh(&ioat_chan->desc_lock); | |
225 | list_splice(&tmp_list, &ioat_chan->free_desc); | |
226 | spin_unlock_bh(&ioat_chan->desc_lock); | |
227 | ||
228 | /* allocate a completion writeback area */ | |
229 | /* doing 2 32bit writes to mmio since 1 64b write doesn't work */ | |
230 | ioat_chan->completion_virt = | |
231 | pci_pool_alloc(ioat_chan->device->completion_pool, | |
232 | GFP_KERNEL, | |
233 | &ioat_chan->completion_addr); | |
234 | memset(ioat_chan->completion_virt, 0, | |
235 | sizeof(*ioat_chan->completion_virt)); | |
e3828811 CL |
236 | writel(((u64) ioat_chan->completion_addr) & 0x00000000FFFFFFFF, |
237 | ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_LOW); | |
238 | writel(((u64) ioat_chan->completion_addr) >> 32, | |
239 | ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH); | |
0bbd5f4e CL |
240 | |
241 | ioat_start_null_desc(ioat_chan); | |
242 | return i; | |
243 | } | |
244 | ||
245 | static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan); | |
246 | ||
247 | static void ioat_dma_free_chan_resources(struct dma_chan *chan) | |
248 | { | |
249 | struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); | |
250 | struct ioat_device *ioat_device = to_ioat_device(chan->device); | |
251 | struct ioat_desc_sw *desc, *_desc; | |
252 | u16 chanctrl; | |
253 | int in_use_descs = 0; | |
254 | ||
255 | ioat_dma_memcpy_cleanup(ioat_chan); | |
256 | ||
e3828811 | 257 | writeb(IOAT_CHANCMD_RESET, ioat_chan->reg_base + IOAT_CHANCMD_OFFSET); |
0bbd5f4e CL |
258 | |
259 | spin_lock_bh(&ioat_chan->desc_lock); | |
260 | list_for_each_entry_safe(desc, _desc, &ioat_chan->used_desc, node) { | |
261 | in_use_descs++; | |
262 | list_del(&desc->node); | |
7405f74b DW |
263 | pci_pool_free(ioat_device->dma_pool, desc->hw, |
264 | desc->async_tx.phys); | |
0bbd5f4e CL |
265 | kfree(desc); |
266 | } | |
267 | list_for_each_entry_safe(desc, _desc, &ioat_chan->free_desc, node) { | |
268 | list_del(&desc->node); | |
7405f74b DW |
269 | pci_pool_free(ioat_device->dma_pool, desc->hw, |
270 | desc->async_tx.phys); | |
0bbd5f4e CL |
271 | kfree(desc); |
272 | } | |
273 | spin_unlock_bh(&ioat_chan->desc_lock); | |
274 | ||
275 | pci_pool_free(ioat_device->completion_pool, | |
276 | ioat_chan->completion_virt, | |
277 | ioat_chan->completion_addr); | |
278 | ||
279 | /* one is ok since we left it on there on purpose */ | |
280 | if (in_use_descs > 1) | |
281 | printk(KERN_ERR "IOAT: Freeing %d in use descriptors!\n", | |
282 | in_use_descs - 1); | |
283 | ||
284 | ioat_chan->last_completion = ioat_chan->completion_addr = 0; | |
285 | ||
286 | /* Tell hw the chan is free */ | |
e3828811 | 287 | chanctrl = readw(ioat_chan->reg_base + IOAT_CHANCTRL_OFFSET); |
0bbd5f4e | 288 | chanctrl &= ~IOAT_CHANCTRL_CHANNEL_IN_USE; |
e3828811 | 289 | writew(chanctrl, ioat_chan->reg_base + IOAT_CHANCTRL_OFFSET); |
0bbd5f4e CL |
290 | } |
291 | ||
7405f74b DW |
292 | static struct dma_async_tx_descriptor * |
293 | ioat_dma_prep_memcpy(struct dma_chan *chan, size_t len, int int_en) | |
0bbd5f4e | 294 | { |
7405f74b DW |
295 | struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); |
296 | struct ioat_desc_sw *first, *prev, *new; | |
0bbd5f4e CL |
297 | LIST_HEAD(new_chain); |
298 | u32 copy; | |
299 | size_t orig_len; | |
7405f74b | 300 | int desc_count = 0; |
0bbd5f4e CL |
301 | |
302 | if (!len) | |
7405f74b | 303 | return NULL; |
0bbd5f4e CL |
304 | |
305 | orig_len = len; | |
0bbd5f4e CL |
306 | |
307 | first = NULL; | |
308 | prev = NULL; | |
309 | ||
310 | spin_lock_bh(&ioat_chan->desc_lock); | |
0bbd5f4e CL |
311 | while (len) { |
312 | if (!list_empty(&ioat_chan->free_desc)) { | |
313 | new = to_ioat_desc(ioat_chan->free_desc.next); | |
314 | list_del(&new->node); | |
315 | } else { | |
316 | /* try to get another desc */ | |
317 | new = ioat_dma_alloc_descriptor(ioat_chan, GFP_ATOMIC); | |
318 | /* will this ever happen? */ | |
319 | /* TODO add upper limit on these */ | |
320 | BUG_ON(!new); | |
321 | } | |
322 | ||
323 | copy = min((u32) len, ioat_chan->xfercap); | |
324 | ||
325 | new->hw->size = copy; | |
326 | new->hw->ctl = 0; | |
7405f74b DW |
327 | new->async_tx.cookie = 0; |
328 | new->async_tx.ack = 1; | |
0bbd5f4e CL |
329 | |
330 | /* chain together the physical address list for the HW */ | |
331 | if (!first) | |
332 | first = new; | |
333 | else | |
7405f74b | 334 | prev->hw->next = (u64) new->async_tx.phys; |
0bbd5f4e CL |
335 | |
336 | prev = new; | |
0bbd5f4e | 337 | len -= copy; |
0bbd5f4e CL |
338 | list_add_tail(&new->node, &new_chain); |
339 | desc_count++; | |
340 | } | |
0bbd5f4e | 341 | |
7405f74b | 342 | list_splice(&new_chain, &new->async_tx.tx_list); |
0bbd5f4e | 343 | |
7405f74b DW |
344 | new->hw->ctl = IOAT_DMA_DESCRIPTOR_CTL_CP_STS; |
345 | new->hw->next = 0; | |
346 | new->tx_cnt = desc_count; | |
347 | new->async_tx.ack = 0; /* client is in control of this ack */ | |
348 | new->async_tx.cookie = -EBUSY; | |
0bbd5f4e | 349 | |
54a09feb | 350 | pci_unmap_len_set(new, len, orig_len); |
0bbd5f4e CL |
351 | spin_unlock_bh(&ioat_chan->desc_lock); |
352 | ||
7405f74b | 353 | return new ? &new->async_tx : NULL; |
0bbd5f4e CL |
354 | } |
355 | ||
0bbd5f4e CL |
356 | |
357 | /** | |
6508871e | 358 | * ioat_dma_memcpy_issue_pending - push potentially unrecognized appended descriptors to hw |
0bbd5f4e CL |
359 | * @chan: DMA channel handle |
360 | */ | |
361 | ||
362 | static void ioat_dma_memcpy_issue_pending(struct dma_chan *chan) | |
363 | { | |
364 | struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); | |
365 | ||
366 | if (ioat_chan->pending != 0) { | |
367 | ioat_chan->pending = 0; | |
e3828811 CL |
368 | writeb(IOAT_CHANCMD_APPEND, |
369 | ioat_chan->reg_base + IOAT_CHANCMD_OFFSET); | |
0bbd5f4e CL |
370 | } |
371 | } | |
372 | ||
373 | static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *chan) | |
374 | { | |
375 | unsigned long phys_complete; | |
376 | struct ioat_desc_sw *desc, *_desc; | |
377 | dma_cookie_t cookie = 0; | |
378 | ||
379 | prefetch(chan->completion_virt); | |
380 | ||
381 | if (!spin_trylock(&chan->cleanup_lock)) | |
382 | return; | |
383 | ||
384 | /* The completion writeback can happen at any time, | |
385 | so reads by the driver need to be atomic operations | |
386 | The descriptor physical addresses are limited to 32-bits | |
387 | when the CPU can only do a 32-bit mov */ | |
388 | ||
389 | #if (BITS_PER_LONG == 64) | |
390 | phys_complete = | |
391 | chan->completion_virt->full & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR; | |
392 | #else | |
393 | phys_complete = chan->completion_virt->low & IOAT_LOW_COMPLETION_MASK; | |
394 | #endif | |
395 | ||
396 | if ((chan->completion_virt->full & IOAT_CHANSTS_DMA_TRANSFER_STATUS) == | |
397 | IOAT_CHANSTS_DMA_TRANSFER_STATUS_HALTED) { | |
398 | printk("IOAT: Channel halted, chanerr = %x\n", | |
e3828811 | 399 | readl(chan->reg_base + IOAT_CHANERR_OFFSET)); |
0bbd5f4e CL |
400 | |
401 | /* TODO do something to salvage the situation */ | |
402 | } | |
403 | ||
404 | if (phys_complete == chan->last_completion) { | |
405 | spin_unlock(&chan->cleanup_lock); | |
406 | return; | |
407 | } | |
408 | ||
409 | spin_lock_bh(&chan->desc_lock); | |
410 | list_for_each_entry_safe(desc, _desc, &chan->used_desc, node) { | |
411 | ||
412 | /* | |
413 | * Incoming DMA requests may use multiple descriptors, due to | |
414 | * exceeding xfercap, perhaps. If so, only the last one will | |
415 | * have a cookie, and require unmapping. | |
416 | */ | |
7405f74b DW |
417 | if (desc->async_tx.cookie) { |
418 | cookie = desc->async_tx.cookie; | |
0bbd5f4e CL |
419 | |
420 | /* yes we are unmapping both _page and _single alloc'd | |
421 | regions with unmap_page. Is this *really* that bad? | |
422 | */ | |
423 | pci_unmap_page(chan->device->pdev, | |
424 | pci_unmap_addr(desc, dst), | |
54a09feb | 425 | pci_unmap_len(desc, len), |
0bbd5f4e CL |
426 | PCI_DMA_FROMDEVICE); |
427 | pci_unmap_page(chan->device->pdev, | |
428 | pci_unmap_addr(desc, src), | |
54a09feb | 429 | pci_unmap_len(desc, len), |
0bbd5f4e CL |
430 | PCI_DMA_TODEVICE); |
431 | } | |
432 | ||
7405f74b DW |
433 | if (desc->async_tx.phys != phys_complete) { |
434 | /* a completed entry, but not the last, so cleanup | |
435 | * if the client is done with the descriptor | |
436 | */ | |
437 | if (desc->async_tx.ack) { | |
438 | list_del(&desc->node); | |
439 | list_add_tail(&desc->node, &chan->free_desc); | |
440 | } else | |
441 | desc->async_tx.cookie = 0; | |
0bbd5f4e CL |
442 | } else { |
443 | /* last used desc. Do not remove, so we can append from | |
444 | it, but don't look at it next time, either */ | |
7405f74b | 445 | desc->async_tx.cookie = 0; |
0bbd5f4e CL |
446 | |
447 | /* TODO check status bits? */ | |
448 | break; | |
449 | } | |
450 | } | |
451 | ||
452 | spin_unlock_bh(&chan->desc_lock); | |
453 | ||
454 | chan->last_completion = phys_complete; | |
455 | if (cookie != 0) | |
456 | chan->completed_cookie = cookie; | |
457 | ||
458 | spin_unlock(&chan->cleanup_lock); | |
459 | } | |
460 | ||
7405f74b DW |
461 | static void ioat_dma_dependency_added(struct dma_chan *chan) |
462 | { | |
463 | struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); | |
464 | spin_lock_bh(&ioat_chan->desc_lock); | |
465 | if (ioat_chan->pending == 0) { | |
466 | spin_unlock_bh(&ioat_chan->desc_lock); | |
467 | ioat_dma_memcpy_cleanup(ioat_chan); | |
468 | } else | |
469 | spin_unlock_bh(&ioat_chan->desc_lock); | |
470 | } | |
471 | ||
0bbd5f4e CL |
472 | /** |
473 | * ioat_dma_is_complete - poll the status of a IOAT DMA transaction | |
474 | * @chan: IOAT DMA channel handle | |
475 | * @cookie: DMA transaction identifier | |
6508871e RD |
476 | * @done: if not %NULL, updated with last completed transaction |
477 | * @used: if not %NULL, updated with last used transaction | |
0bbd5f4e CL |
478 | */ |
479 | ||
480 | static enum dma_status ioat_dma_is_complete(struct dma_chan *chan, | |
481 | dma_cookie_t cookie, | |
482 | dma_cookie_t *done, | |
483 | dma_cookie_t *used) | |
484 | { | |
485 | struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); | |
486 | dma_cookie_t last_used; | |
487 | dma_cookie_t last_complete; | |
488 | enum dma_status ret; | |
489 | ||
490 | last_used = chan->cookie; | |
491 | last_complete = ioat_chan->completed_cookie; | |
492 | ||
493 | if (done) | |
494 | *done= last_complete; | |
495 | if (used) | |
496 | *used = last_used; | |
497 | ||
498 | ret = dma_async_is_complete(cookie, last_complete, last_used); | |
499 | if (ret == DMA_SUCCESS) | |
500 | return ret; | |
501 | ||
502 | ioat_dma_memcpy_cleanup(ioat_chan); | |
503 | ||
504 | last_used = chan->cookie; | |
505 | last_complete = ioat_chan->completed_cookie; | |
506 | ||
507 | if (done) | |
508 | *done= last_complete; | |
509 | if (used) | |
510 | *used = last_used; | |
511 | ||
512 | return dma_async_is_complete(cookie, last_complete, last_used); | |
513 | } | |
514 | ||
515 | /* PCI API */ | |
516 | ||
517 | static struct pci_device_id ioat_pci_tbl[] = { | |
518 | { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT) }, | |
3039f073 DW |
519 | { PCI_DEVICE(PCI_VENDOR_ID_UNISYS, |
520 | PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR) }, | |
0bbd5f4e CL |
521 | { 0, } |
522 | }; | |
523 | ||
92504f79 | 524 | static struct pci_driver ioat_pci_driver = { |
0bbd5f4e CL |
525 | .name = "ioatdma", |
526 | .id_table = ioat_pci_tbl, | |
527 | .probe = ioat_probe, | |
428ed602 | 528 | .shutdown = ioat_shutdown, |
0bbd5f4e CL |
529 | .remove = __devexit_p(ioat_remove), |
530 | }; | |
531 | ||
7d12e780 | 532 | static irqreturn_t ioat_do_interrupt(int irq, void *data) |
0bbd5f4e CL |
533 | { |
534 | struct ioat_device *instance = data; | |
535 | unsigned long attnstatus; | |
536 | u8 intrctrl; | |
537 | ||
e3828811 | 538 | intrctrl = readb(instance->reg_base + IOAT_INTRCTRL_OFFSET); |
0bbd5f4e CL |
539 | |
540 | if (!(intrctrl & IOAT_INTRCTRL_MASTER_INT_EN)) | |
541 | return IRQ_NONE; | |
542 | ||
543 | if (!(intrctrl & IOAT_INTRCTRL_INT_STATUS)) { | |
e3828811 | 544 | writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET); |
0bbd5f4e CL |
545 | return IRQ_NONE; |
546 | } | |
547 | ||
e3828811 | 548 | attnstatus = readl(instance->reg_base + IOAT_ATTNSTATUS_OFFSET); |
0bbd5f4e CL |
549 | |
550 | printk(KERN_ERR "ioatdma error: interrupt! status %lx\n", attnstatus); | |
551 | ||
e3828811 | 552 | writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET); |
0bbd5f4e CL |
553 | return IRQ_HANDLED; |
554 | } | |
555 | ||
556 | static void ioat_start_null_desc(struct ioat_dma_chan *ioat_chan) | |
557 | { | |
558 | struct ioat_desc_sw *desc; | |
559 | ||
560 | spin_lock_bh(&ioat_chan->desc_lock); | |
561 | ||
562 | if (!list_empty(&ioat_chan->free_desc)) { | |
563 | desc = to_ioat_desc(ioat_chan->free_desc.next); | |
564 | list_del(&desc->node); | |
565 | } else { | |
566 | /* try to get another desc */ | |
567 | spin_unlock_bh(&ioat_chan->desc_lock); | |
568 | desc = ioat_dma_alloc_descriptor(ioat_chan, GFP_KERNEL); | |
569 | spin_lock_bh(&ioat_chan->desc_lock); | |
570 | /* will this ever happen? */ | |
571 | BUG_ON(!desc); | |
572 | } | |
573 | ||
574 | desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL; | |
575 | desc->hw->next = 0; | |
7405f74b | 576 | desc->async_tx.ack = 1; |
0bbd5f4e CL |
577 | |
578 | list_add_tail(&desc->node, &ioat_chan->used_desc); | |
579 | spin_unlock_bh(&ioat_chan->desc_lock); | |
580 | ||
7405f74b | 581 | writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF, |
e3828811 | 582 | ioat_chan->reg_base + IOAT_CHAINADDR_OFFSET_LOW); |
7405f74b | 583 | writel(((u64) desc->async_tx.phys) >> 32, |
70774b47 CL |
584 | ioat_chan->reg_base + IOAT_CHAINADDR_OFFSET_HIGH); |
585 | ||
e3828811 | 586 | writeb(IOAT_CHANCMD_START, ioat_chan->reg_base + IOAT_CHANCMD_OFFSET); |
0bbd5f4e CL |
587 | } |
588 | ||
589 | /* | |
590 | * Perform a IOAT transaction to verify the HW works. | |
591 | */ | |
592 | #define IOAT_TEST_SIZE 2000 | |
593 | ||
594 | static int ioat_self_test(struct ioat_device *device) | |
595 | { | |
596 | int i; | |
597 | u8 *src; | |
598 | u8 *dest; | |
599 | struct dma_chan *dma_chan; | |
7405f74b DW |
600 | struct dma_async_tx_descriptor *tx; |
601 | dma_addr_t addr; | |
0bbd5f4e CL |
602 | dma_cookie_t cookie; |
603 | int err = 0; | |
604 | ||
e94b1766 | 605 | src = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL); |
0bbd5f4e CL |
606 | if (!src) |
607 | return -ENOMEM; | |
e94b1766 | 608 | dest = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL); |
0bbd5f4e CL |
609 | if (!dest) { |
610 | kfree(src); | |
611 | return -ENOMEM; | |
612 | } | |
613 | ||
614 | /* Fill in src buffer */ | |
615 | for (i = 0; i < IOAT_TEST_SIZE; i++) | |
616 | src[i] = (u8)i; | |
617 | ||
618 | /* Start copy, using first DMA channel */ | |
619 | dma_chan = container_of(device->common.channels.next, | |
620 | struct dma_chan, | |
621 | device_node); | |
622 | if (ioat_dma_alloc_chan_resources(dma_chan) < 1) { | |
623 | err = -ENODEV; | |
624 | goto out; | |
625 | } | |
626 | ||
7405f74b DW |
627 | tx = ioat_dma_prep_memcpy(dma_chan, IOAT_TEST_SIZE, 0); |
628 | async_tx_ack(tx); | |
629 | addr = dma_map_single(dma_chan->device->dev, src, IOAT_TEST_SIZE, | |
630 | DMA_TO_DEVICE); | |
631 | ioat_set_src(addr, tx, 0); | |
632 | addr = dma_map_single(dma_chan->device->dev, dest, IOAT_TEST_SIZE, | |
633 | DMA_FROM_DEVICE); | |
634 | ioat_set_dest(addr, tx, 0); | |
635 | cookie = ioat_tx_submit(tx); | |
0bbd5f4e CL |
636 | ioat_dma_memcpy_issue_pending(dma_chan); |
637 | msleep(1); | |
638 | ||
639 | if (ioat_dma_is_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) { | |
640 | printk(KERN_ERR "ioatdma: Self-test copy timed out, disabling\n"); | |
641 | err = -ENODEV; | |
642 | goto free_resources; | |
643 | } | |
644 | if (memcmp(src, dest, IOAT_TEST_SIZE)) { | |
645 | printk(KERN_ERR "ioatdma: Self-test copy failed compare, disabling\n"); | |
646 | err = -ENODEV; | |
647 | goto free_resources; | |
648 | } | |
649 | ||
650 | free_resources: | |
651 | ioat_dma_free_chan_resources(dma_chan); | |
652 | out: | |
653 | kfree(src); | |
654 | kfree(dest); | |
655 | return err; | |
656 | } | |
657 | ||
658 | static int __devinit ioat_probe(struct pci_dev *pdev, | |
659 | const struct pci_device_id *ent) | |
660 | { | |
661 | int err; | |
662 | unsigned long mmio_start, mmio_len; | |
47b16539 | 663 | void __iomem *reg_base; |
0bbd5f4e CL |
664 | struct ioat_device *device; |
665 | ||
666 | err = pci_enable_device(pdev); | |
667 | if (err) | |
668 | goto err_enable_device; | |
669 | ||
670 | err = pci_set_dma_mask(pdev, DMA_64BIT_MASK); | |
671 | if (err) | |
672 | err = pci_set_dma_mask(pdev, DMA_32BIT_MASK); | |
673 | if (err) | |
674 | goto err_set_dma_mask; | |
675 | ||
92504f79 | 676 | err = pci_request_regions(pdev, ioat_pci_driver.name); |
0bbd5f4e CL |
677 | if (err) |
678 | goto err_request_regions; | |
679 | ||
680 | mmio_start = pci_resource_start(pdev, 0); | |
681 | mmio_len = pci_resource_len(pdev, 0); | |
682 | ||
683 | reg_base = ioremap(mmio_start, mmio_len); | |
684 | if (!reg_base) { | |
685 | err = -ENOMEM; | |
686 | goto err_ioremap; | |
687 | } | |
688 | ||
689 | device = kzalloc(sizeof(*device), GFP_KERNEL); | |
690 | if (!device) { | |
691 | err = -ENOMEM; | |
692 | goto err_kzalloc; | |
693 | } | |
694 | ||
695 | /* DMA coherent memory pool for DMA descriptor allocations */ | |
696 | device->dma_pool = pci_pool_create("dma_desc_pool", pdev, | |
697 | sizeof(struct ioat_dma_descriptor), 64, 0); | |
698 | if (!device->dma_pool) { | |
699 | err = -ENOMEM; | |
700 | goto err_dma_pool; | |
701 | } | |
702 | ||
703 | device->completion_pool = pci_pool_create("completion_pool", pdev, sizeof(u64), SMP_CACHE_BYTES, SMP_CACHE_BYTES); | |
704 | if (!device->completion_pool) { | |
705 | err = -ENOMEM; | |
706 | goto err_completion_pool; | |
707 | } | |
708 | ||
709 | device->pdev = pdev; | |
710 | pci_set_drvdata(pdev, device); | |
711 | #ifdef CONFIG_PCI_MSI | |
712 | if (pci_enable_msi(pdev) == 0) { | |
713 | device->msi = 1; | |
714 | } else { | |
715 | device->msi = 0; | |
716 | } | |
717 | #endif | |
dace1453 | 718 | err = request_irq(pdev->irq, &ioat_do_interrupt, IRQF_SHARED, "ioat", |
0bbd5f4e CL |
719 | device); |
720 | if (err) | |
721 | goto err_irq; | |
722 | ||
723 | device->reg_base = reg_base; | |
724 | ||
e3828811 | 725 | writeb(IOAT_INTRCTRL_MASTER_INT_EN, device->reg_base + IOAT_INTRCTRL_OFFSET); |
0bbd5f4e CL |
726 | pci_set_master(pdev); |
727 | ||
728 | INIT_LIST_HEAD(&device->common.channels); | |
729 | enumerate_dma_channels(device); | |
730 | ||
7405f74b | 731 | dma_cap_set(DMA_MEMCPY, device->common.cap_mask); |
0bbd5f4e CL |
732 | device->common.device_alloc_chan_resources = ioat_dma_alloc_chan_resources; |
733 | device->common.device_free_chan_resources = ioat_dma_free_chan_resources; | |
7405f74b DW |
734 | device->common.device_prep_dma_memcpy = ioat_dma_prep_memcpy; |
735 | device->common.device_is_tx_complete = ioat_dma_is_complete; | |
736 | device->common.device_issue_pending = ioat_dma_memcpy_issue_pending; | |
737 | device->common.device_dependency_added = ioat_dma_dependency_added; | |
738 | device->common.dev = &pdev->dev; | |
0bbd5f4e CL |
739 | printk(KERN_INFO "Intel(R) I/OAT DMA Engine found, %d channels\n", |
740 | device->common.chancnt); | |
741 | ||
742 | err = ioat_self_test(device); | |
743 | if (err) | |
744 | goto err_self_test; | |
745 | ||
746 | dma_async_device_register(&device->common); | |
747 | ||
748 | return 0; | |
749 | ||
750 | err_self_test: | |
751 | err_irq: | |
752 | pci_pool_destroy(device->completion_pool); | |
753 | err_completion_pool: | |
754 | pci_pool_destroy(device->dma_pool); | |
755 | err_dma_pool: | |
756 | kfree(device); | |
757 | err_kzalloc: | |
758 | iounmap(reg_base); | |
759 | err_ioremap: | |
760 | pci_release_regions(pdev); | |
761 | err_request_regions: | |
762 | err_set_dma_mask: | |
763 | pci_disable_device(pdev); | |
764 | err_enable_device: | |
428ed602 DA |
765 | |
766 | printk(KERN_ERR "Intel(R) I/OAT DMA Engine initialization failed\n"); | |
767 | ||
0bbd5f4e CL |
768 | return err; |
769 | } | |
770 | ||
428ed602 DA |
771 | static void ioat_shutdown(struct pci_dev *pdev) |
772 | { | |
773 | struct ioat_device *device; | |
774 | device = pci_get_drvdata(pdev); | |
775 | ||
776 | dma_async_device_unregister(&device->common); | |
777 | } | |
778 | ||
0bbd5f4e CL |
779 | static void __devexit ioat_remove(struct pci_dev *pdev) |
780 | { | |
781 | struct ioat_device *device; | |
782 | struct dma_chan *chan, *_chan; | |
783 | struct ioat_dma_chan *ioat_chan; | |
784 | ||
785 | device = pci_get_drvdata(pdev); | |
786 | dma_async_device_unregister(&device->common); | |
787 | ||
788 | free_irq(device->pdev->irq, device); | |
789 | #ifdef CONFIG_PCI_MSI | |
790 | if (device->msi) | |
791 | pci_disable_msi(device->pdev); | |
792 | #endif | |
793 | pci_pool_destroy(device->dma_pool); | |
794 | pci_pool_destroy(device->completion_pool); | |
795 | iounmap(device->reg_base); | |
796 | pci_release_regions(pdev); | |
797 | pci_disable_device(pdev); | |
798 | list_for_each_entry_safe(chan, _chan, &device->common.channels, device_node) { | |
799 | ioat_chan = to_ioat_chan(chan); | |
800 | list_del(&chan->device_node); | |
801 | kfree(ioat_chan); | |
802 | } | |
803 | kfree(device); | |
804 | } | |
805 | ||
806 | /* MODULE API */ | |
000725d5 | 807 | MODULE_VERSION("1.9"); |
0bbd5f4e CL |
808 | MODULE_LICENSE("GPL"); |
809 | MODULE_AUTHOR("Intel Corporation"); | |
810 | ||
811 | static int __init ioat_init_module(void) | |
812 | { | |
813 | /* it's currently unsafe to unload this module */ | |
814 | /* if forced, worst case is that rmmod hangs */ | |
8070b2b1 | 815 | __unsafe(THIS_MODULE); |
0bbd5f4e | 816 | |
92504f79 | 817 | return pci_register_driver(&ioat_pci_driver); |
0bbd5f4e CL |
818 | } |
819 | ||
820 | module_init(ioat_init_module); | |
821 | ||
822 | static void __exit ioat_exit_module(void) | |
823 | { | |
92504f79 | 824 | pci_unregister_driver(&ioat_pci_driver); |
0bbd5f4e CL |
825 | } |
826 | ||
827 | module_exit(ioat_exit_module); |