dmaengine: ioatdma: move all the init routines
[deliverable/linux.git] / drivers / dma / ioat / dma.c
1 /*
2 * Intel I/OAT DMA Linux driver
3 * Copyright(c) 2004 - 2015 Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * The full GNU General Public License is included in this distribution in
15 * the file called "COPYING".
16 *
17 */
18
19 /*
20 * This driver supports an Intel I/OAT DMA engine, which does asynchronous
21 * copy operations.
22 */
23
24 #include <linux/init.h>
25 #include <linux/module.h>
26 #include <linux/slab.h>
27 #include <linux/pci.h>
28 #include <linux/interrupt.h>
29 #include <linux/dmaengine.h>
30 #include <linux/delay.h>
31 #include <linux/dma-mapping.h>
32 #include <linux/workqueue.h>
33 #include <linux/prefetch.h>
34 #include "dma.h"
35 #include "registers.h"
36 #include "hw.h"
37
38 #include "../dmaengine.h"
39
40 /**
41 * ioat_dma_do_interrupt - handler used for single vector interrupt mode
42 * @irq: interrupt id
43 * @data: interrupt data
44 */
45 irqreturn_t ioat_dma_do_interrupt(int irq, void *data)
46 {
47 struct ioatdma_device *instance = data;
48 struct ioatdma_chan *ioat_chan;
49 unsigned long attnstatus;
50 int bit;
51 u8 intrctrl;
52
53 intrctrl = readb(instance->reg_base + IOAT_INTRCTRL_OFFSET);
54
55 if (!(intrctrl & IOAT_INTRCTRL_MASTER_INT_EN))
56 return IRQ_NONE;
57
58 if (!(intrctrl & IOAT_INTRCTRL_INT_STATUS)) {
59 writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET);
60 return IRQ_NONE;
61 }
62
63 attnstatus = readl(instance->reg_base + IOAT_ATTNSTATUS_OFFSET);
64 for_each_set_bit(bit, &attnstatus, BITS_PER_LONG) {
65 ioat_chan = ioat_chan_by_index(instance, bit);
66 if (test_bit(IOAT_RUN, &ioat_chan->state))
67 tasklet_schedule(&ioat_chan->cleanup_task);
68 }
69
70 writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET);
71 return IRQ_HANDLED;
72 }
73
74 /**
75 * ioat_dma_do_interrupt_msix - handler used for vector-per-channel interrupt mode
76 * @irq: interrupt id
77 * @data: interrupt data
78 */
79 irqreturn_t ioat_dma_do_interrupt_msix(int irq, void *data)
80 {
81 struct ioatdma_chan *ioat_chan = data;
82
83 if (test_bit(IOAT_RUN, &ioat_chan->state))
84 tasklet_schedule(&ioat_chan->cleanup_task);
85
86 return IRQ_HANDLED;
87 }
88
89 void ioat_stop(struct ioatdma_chan *ioat_chan)
90 {
91 struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma;
92 struct pci_dev *pdev = ioat_dma->pdev;
93 int chan_id = chan_num(ioat_chan);
94 struct msix_entry *msix;
95
96 /* 1/ stop irq from firing tasklets
97 * 2/ stop the tasklet from re-arming irqs
98 */
99 clear_bit(IOAT_RUN, &ioat_chan->state);
100
101 /* flush inflight interrupts */
102 switch (ioat_dma->irq_mode) {
103 case IOAT_MSIX:
104 msix = &ioat_dma->msix_entries[chan_id];
105 synchronize_irq(msix->vector);
106 break;
107 case IOAT_MSI:
108 case IOAT_INTX:
109 synchronize_irq(pdev->irq);
110 break;
111 default:
112 break;
113 }
114
115 /* flush inflight timers */
116 del_timer_sync(&ioat_chan->timer);
117
118 /* flush inflight tasklet runs */
119 tasklet_kill(&ioat_chan->cleanup_task);
120
121 /* final cleanup now that everything is quiesced and can't re-arm */
122 ioat_dma->cleanup_fn((unsigned long)&ioat_chan->dma_chan);
123 }
124
125 dma_addr_t ioat_get_current_completion(struct ioatdma_chan *ioat_chan)
126 {
127 dma_addr_t phys_complete;
128 u64 completion;
129
130 completion = *ioat_chan->completion;
131 phys_complete = ioat_chansts_to_addr(completion);
132
133 dev_dbg(to_dev(ioat_chan), "%s: phys_complete: %#llx\n", __func__,
134 (unsigned long long) phys_complete);
135
136 if (is_ioat_halted(completion)) {
137 u32 chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
138
139 dev_err(to_dev(ioat_chan), "Channel halted, chanerr = %x\n",
140 chanerr);
141
142 /* TODO do something to salvage the situation */
143 }
144
145 return phys_complete;
146 }
147
148 bool ioat_cleanup_preamble(struct ioatdma_chan *ioat_chan,
149 dma_addr_t *phys_complete)
150 {
151 *phys_complete = ioat_get_current_completion(ioat_chan);
152 if (*phys_complete == ioat_chan->last_completion)
153 return false;
154 clear_bit(IOAT_COMPLETION_ACK, &ioat_chan->state);
155 mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT);
156
157 return true;
158 }
159
160 enum dma_status
161 ioat_dma_tx_status(struct dma_chan *c, dma_cookie_t cookie,
162 struct dma_tx_state *txstate)
163 {
164 struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
165 struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma;
166 enum dma_status ret;
167
168 ret = dma_cookie_status(c, cookie, txstate);
169 if (ret == DMA_COMPLETE)
170 return ret;
171
172 ioat_dma->cleanup_fn((unsigned long) c);
173
174 return dma_cookie_status(c, cookie, txstate);
175 }
176
177 void __ioat_issue_pending(struct ioatdma_chan *ioat_chan)
178 {
179 ioat_chan->dmacount += ioat_ring_pending(ioat_chan);
180 ioat_chan->issued = ioat_chan->head;
181 writew(ioat_chan->dmacount,
182 ioat_chan->reg_base + IOAT_CHAN_DMACOUNT_OFFSET);
183 dev_dbg(to_dev(ioat_chan),
184 "%s: head: %#x tail: %#x issued: %#x count: %#x\n",
185 __func__, ioat_chan->head, ioat_chan->tail,
186 ioat_chan->issued, ioat_chan->dmacount);
187 }
188
189 void ioat_issue_pending(struct dma_chan *c)
190 {
191 struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
192
193 if (ioat_ring_pending(ioat_chan)) {
194 spin_lock_bh(&ioat_chan->prep_lock);
195 __ioat_issue_pending(ioat_chan);
196 spin_unlock_bh(&ioat_chan->prep_lock);
197 }
198 }
199
200 /**
201 * ioat_update_pending - log pending descriptors
202 * @ioat: ioat+ channel
203 *
204 * Check if the number of unsubmitted descriptors has exceeded the
205 * watermark. Called with prep_lock held
206 */
207 static void ioat_update_pending(struct ioatdma_chan *ioat_chan)
208 {
209 if (ioat_ring_pending(ioat_chan) > ioat_pending_level)
210 __ioat_issue_pending(ioat_chan);
211 }
212
213 static void __ioat_start_null_desc(struct ioatdma_chan *ioat_chan)
214 {
215 struct ioat_ring_ent *desc;
216 struct ioat_dma_descriptor *hw;
217
218 if (ioat_ring_space(ioat_chan) < 1) {
219 dev_err(to_dev(ioat_chan),
220 "Unable to start null desc - ring full\n");
221 return;
222 }
223
224 dev_dbg(to_dev(ioat_chan),
225 "%s: head: %#x tail: %#x issued: %#x\n",
226 __func__, ioat_chan->head, ioat_chan->tail, ioat_chan->issued);
227 desc = ioat_get_ring_ent(ioat_chan, ioat_chan->head);
228
229 hw = desc->hw;
230 hw->ctl = 0;
231 hw->ctl_f.null = 1;
232 hw->ctl_f.int_en = 1;
233 hw->ctl_f.compl_write = 1;
234 /* set size to non-zero value (channel returns error when size is 0) */
235 hw->size = NULL_DESC_BUFFER_SIZE;
236 hw->src_addr = 0;
237 hw->dst_addr = 0;
238 async_tx_ack(&desc->txd);
239 ioat_set_chainaddr(ioat_chan, desc->txd.phys);
240 dump_desc_dbg(ioat_chan, desc);
241 /* make sure descriptors are written before we submit */
242 wmb();
243 ioat_chan->head += 1;
244 __ioat_issue_pending(ioat_chan);
245 }
246
247 void ioat_start_null_desc(struct ioatdma_chan *ioat_chan)
248 {
249 spin_lock_bh(&ioat_chan->prep_lock);
250 __ioat_start_null_desc(ioat_chan);
251 spin_unlock_bh(&ioat_chan->prep_lock);
252 }
253
254 void __ioat_restart_chan(struct ioatdma_chan *ioat_chan)
255 {
256 /* set the tail to be re-issued */
257 ioat_chan->issued = ioat_chan->tail;
258 ioat_chan->dmacount = 0;
259 set_bit(IOAT_COMPLETION_PENDING, &ioat_chan->state);
260 mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT);
261
262 dev_dbg(to_dev(ioat_chan),
263 "%s: head: %#x tail: %#x issued: %#x count: %#x\n",
264 __func__, ioat_chan->head, ioat_chan->tail,
265 ioat_chan->issued, ioat_chan->dmacount);
266
267 if (ioat_ring_pending(ioat_chan)) {
268 struct ioat_ring_ent *desc;
269
270 desc = ioat_get_ring_ent(ioat_chan, ioat_chan->tail);
271 ioat_set_chainaddr(ioat_chan, desc->txd.phys);
272 __ioat_issue_pending(ioat_chan);
273 } else
274 __ioat_start_null_desc(ioat_chan);
275 }
276
277 int ioat_quiesce(struct ioatdma_chan *ioat_chan, unsigned long tmo)
278 {
279 unsigned long end = jiffies + tmo;
280 int err = 0;
281 u32 status;
282
283 status = ioat_chansts(ioat_chan);
284 if (is_ioat_active(status) || is_ioat_idle(status))
285 ioat_suspend(ioat_chan);
286 while (is_ioat_active(status) || is_ioat_idle(status)) {
287 if (tmo && time_after(jiffies, end)) {
288 err = -ETIMEDOUT;
289 break;
290 }
291 status = ioat_chansts(ioat_chan);
292 cpu_relax();
293 }
294
295 return err;
296 }
297
298 int ioat_reset_sync(struct ioatdma_chan *ioat_chan, unsigned long tmo)
299 {
300 unsigned long end = jiffies + tmo;
301 int err = 0;
302
303 ioat_reset(ioat_chan);
304 while (ioat_reset_pending(ioat_chan)) {
305 if (end && time_after(jiffies, end)) {
306 err = -ETIMEDOUT;
307 break;
308 }
309 cpu_relax();
310 }
311
312 return err;
313 }
314
315 static dma_cookie_t ioat_tx_submit_unlock(struct dma_async_tx_descriptor *tx)
316 {
317 struct dma_chan *c = tx->chan;
318 struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
319 dma_cookie_t cookie;
320
321 cookie = dma_cookie_assign(tx);
322 dev_dbg(to_dev(ioat_chan), "%s: cookie: %d\n", __func__, cookie);
323
324 if (!test_and_set_bit(IOAT_CHAN_ACTIVE, &ioat_chan->state))
325 mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT);
326
327 /* make descriptor updates visible before advancing ioat->head,
328 * this is purposefully not smp_wmb() since we are also
329 * publishing the descriptor updates to a dma device
330 */
331 wmb();
332
333 ioat_chan->head += ioat_chan->produce;
334
335 ioat_update_pending(ioat_chan);
336 spin_unlock_bh(&ioat_chan->prep_lock);
337
338 return cookie;
339 }
340
341 static struct ioat_ring_ent *
342 ioat_alloc_ring_ent(struct dma_chan *chan, gfp_t flags)
343 {
344 struct ioat_dma_descriptor *hw;
345 struct ioat_ring_ent *desc;
346 struct ioatdma_device *ioat_dma;
347 dma_addr_t phys;
348
349 ioat_dma = to_ioatdma_device(chan->device);
350 hw = pci_pool_alloc(ioat_dma->dma_pool, flags, &phys);
351 if (!hw)
352 return NULL;
353 memset(hw, 0, sizeof(*hw));
354
355 desc = kmem_cache_zalloc(ioat_cache, flags);
356 if (!desc) {
357 pci_pool_free(ioat_dma->dma_pool, hw, phys);
358 return NULL;
359 }
360
361 dma_async_tx_descriptor_init(&desc->txd, chan);
362 desc->txd.tx_submit = ioat_tx_submit_unlock;
363 desc->hw = hw;
364 desc->txd.phys = phys;
365 return desc;
366 }
367
368 void ioat_free_ring_ent(struct ioat_ring_ent *desc, struct dma_chan *chan)
369 {
370 struct ioatdma_device *ioat_dma;
371
372 ioat_dma = to_ioatdma_device(chan->device);
373 pci_pool_free(ioat_dma->dma_pool, desc->hw, desc->txd.phys);
374 kmem_cache_free(ioat_cache, desc);
375 }
376
377 struct ioat_ring_ent **
378 ioat_alloc_ring(struct dma_chan *c, int order, gfp_t flags)
379 {
380 struct ioat_ring_ent **ring;
381 int descs = 1 << order;
382 int i;
383
384 if (order > ioat_get_max_alloc_order())
385 return NULL;
386
387 /* allocate the array to hold the software ring */
388 ring = kcalloc(descs, sizeof(*ring), flags);
389 if (!ring)
390 return NULL;
391 for (i = 0; i < descs; i++) {
392 ring[i] = ioat_alloc_ring_ent(c, flags);
393 if (!ring[i]) {
394 while (i--)
395 ioat_free_ring_ent(ring[i], c);
396 kfree(ring);
397 return NULL;
398 }
399 set_desc_id(ring[i], i);
400 }
401
402 /* link descs */
403 for (i = 0; i < descs-1; i++) {
404 struct ioat_ring_ent *next = ring[i+1];
405 struct ioat_dma_descriptor *hw = ring[i]->hw;
406
407 hw->next = next->txd.phys;
408 }
409 ring[i]->hw->next = ring[0]->txd.phys;
410
411 return ring;
412 }
413
414 bool reshape_ring(struct ioatdma_chan *ioat_chan, int order)
415 {
416 /* reshape differs from normal ring allocation in that we want
417 * to allocate a new software ring while only
418 * extending/truncating the hardware ring
419 */
420 struct dma_chan *c = &ioat_chan->dma_chan;
421 const u32 curr_size = ioat_ring_size(ioat_chan);
422 const u16 active = ioat_ring_active(ioat_chan);
423 const u32 new_size = 1 << order;
424 struct ioat_ring_ent **ring;
425 u32 i;
426
427 if (order > ioat_get_max_alloc_order())
428 return false;
429
430 /* double check that we have at least 1 free descriptor */
431 if (active == curr_size)
432 return false;
433
434 /* when shrinking, verify that we can hold the current active
435 * set in the new ring
436 */
437 if (active >= new_size)
438 return false;
439
440 /* allocate the array to hold the software ring */
441 ring = kcalloc(new_size, sizeof(*ring), GFP_NOWAIT);
442 if (!ring)
443 return false;
444
445 /* allocate/trim descriptors as needed */
446 if (new_size > curr_size) {
447 /* copy current descriptors to the new ring */
448 for (i = 0; i < curr_size; i++) {
449 u16 curr_idx = (ioat_chan->tail+i) & (curr_size-1);
450 u16 new_idx = (ioat_chan->tail+i) & (new_size-1);
451
452 ring[new_idx] = ioat_chan->ring[curr_idx];
453 set_desc_id(ring[new_idx], new_idx);
454 }
455
456 /* add new descriptors to the ring */
457 for (i = curr_size; i < new_size; i++) {
458 u16 new_idx = (ioat_chan->tail+i) & (new_size-1);
459
460 ring[new_idx] = ioat_alloc_ring_ent(c, GFP_NOWAIT);
461 if (!ring[new_idx]) {
462 while (i--) {
463 u16 new_idx = (ioat_chan->tail+i) &
464 (new_size-1);
465
466 ioat_free_ring_ent(ring[new_idx], c);
467 }
468 kfree(ring);
469 return false;
470 }
471 set_desc_id(ring[new_idx], new_idx);
472 }
473
474 /* hw link new descriptors */
475 for (i = curr_size-1; i < new_size; i++) {
476 u16 new_idx = (ioat_chan->tail+i) & (new_size-1);
477 struct ioat_ring_ent *next =
478 ring[(new_idx+1) & (new_size-1)];
479 struct ioat_dma_descriptor *hw = ring[new_idx]->hw;
480
481 hw->next = next->txd.phys;
482 }
483 } else {
484 struct ioat_dma_descriptor *hw;
485 struct ioat_ring_ent *next;
486
487 /* copy current descriptors to the new ring, dropping the
488 * removed descriptors
489 */
490 for (i = 0; i < new_size; i++) {
491 u16 curr_idx = (ioat_chan->tail+i) & (curr_size-1);
492 u16 new_idx = (ioat_chan->tail+i) & (new_size-1);
493
494 ring[new_idx] = ioat_chan->ring[curr_idx];
495 set_desc_id(ring[new_idx], new_idx);
496 }
497
498 /* free deleted descriptors */
499 for (i = new_size; i < curr_size; i++) {
500 struct ioat_ring_ent *ent;
501
502 ent = ioat_get_ring_ent(ioat_chan, ioat_chan->tail+i);
503 ioat_free_ring_ent(ent, c);
504 }
505
506 /* fix up hardware ring */
507 hw = ring[(ioat_chan->tail+new_size-1) & (new_size-1)]->hw;
508 next = ring[(ioat_chan->tail+new_size) & (new_size-1)];
509 hw->next = next->txd.phys;
510 }
511
512 dev_dbg(to_dev(ioat_chan), "%s: allocated %d descriptors\n",
513 __func__, new_size);
514
515 kfree(ioat_chan->ring);
516 ioat_chan->ring = ring;
517 ioat_chan->alloc_order = order;
518
519 return true;
520 }
521
522 /**
523 * ioat_check_space_lock - verify space and grab ring producer lock
524 * @ioat: ioat,3 channel (ring) to operate on
525 * @num_descs: allocation length
526 */
527 int ioat_check_space_lock(struct ioatdma_chan *ioat_chan, int num_descs)
528 {
529 bool retry;
530
531 retry:
532 spin_lock_bh(&ioat_chan->prep_lock);
533 /* never allow the last descriptor to be consumed, we need at
534 * least one free at all times to allow for on-the-fly ring
535 * resizing.
536 */
537 if (likely(ioat_ring_space(ioat_chan) > num_descs)) {
538 dev_dbg(to_dev(ioat_chan), "%s: num_descs: %d (%x:%x:%x)\n",
539 __func__, num_descs, ioat_chan->head,
540 ioat_chan->tail, ioat_chan->issued);
541 ioat_chan->produce = num_descs;
542 return 0; /* with ioat->prep_lock held */
543 }
544 retry = test_and_set_bit(IOAT_RESHAPE_PENDING, &ioat_chan->state);
545 spin_unlock_bh(&ioat_chan->prep_lock);
546
547 /* is another cpu already trying to expand the ring? */
548 if (retry)
549 goto retry;
550
551 spin_lock_bh(&ioat_chan->cleanup_lock);
552 spin_lock_bh(&ioat_chan->prep_lock);
553 retry = reshape_ring(ioat_chan, ioat_chan->alloc_order + 1);
554 clear_bit(IOAT_RESHAPE_PENDING, &ioat_chan->state);
555 spin_unlock_bh(&ioat_chan->prep_lock);
556 spin_unlock_bh(&ioat_chan->cleanup_lock);
557
558 /* if we were able to expand the ring retry the allocation */
559 if (retry)
560 goto retry;
561
562 dev_dbg_ratelimited(to_dev(ioat_chan),
563 "%s: ring full! num_descs: %d (%x:%x:%x)\n",
564 __func__, num_descs, ioat_chan->head,
565 ioat_chan->tail, ioat_chan->issued);
566
567 /* progress reclaim in the allocation failure case we may be
568 * called under bh_disabled so we need to trigger the timer
569 * event directly
570 */
571 if (time_is_before_jiffies(ioat_chan->timer.expires)
572 && timer_pending(&ioat_chan->timer)) {
573 struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma;
574
575 mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT);
576 ioat_dma->timer_fn((unsigned long)ioat_chan);
577 }
578
579 return -ENOMEM;
580 }
581
582 struct dma_async_tx_descriptor *
583 ioat_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest,
584 dma_addr_t dma_src, size_t len, unsigned long flags)
585 {
586 struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
587 struct ioat_dma_descriptor *hw;
588 struct ioat_ring_ent *desc;
589 dma_addr_t dst = dma_dest;
590 dma_addr_t src = dma_src;
591 size_t total_len = len;
592 int num_descs, idx, i;
593
594 num_descs = ioat_xferlen_to_descs(ioat_chan, len);
595 if (likely(num_descs) &&
596 ioat_check_space_lock(ioat_chan, num_descs) == 0)
597 idx = ioat_chan->head;
598 else
599 return NULL;
600 i = 0;
601 do {
602 size_t copy = min_t(size_t, len, 1 << ioat_chan->xfercap_log);
603
604 desc = ioat_get_ring_ent(ioat_chan, idx + i);
605 hw = desc->hw;
606
607 hw->size = copy;
608 hw->ctl = 0;
609 hw->src_addr = src;
610 hw->dst_addr = dst;
611
612 len -= copy;
613 dst += copy;
614 src += copy;
615 dump_desc_dbg(ioat_chan, desc);
616 } while (++i < num_descs);
617
618 desc->txd.flags = flags;
619 desc->len = total_len;
620 hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
621 hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
622 hw->ctl_f.compl_write = 1;
623 dump_desc_dbg(ioat_chan, desc);
624 /* we leave the channel locked to ensure in order submission */
625
626 return &desc->txd;
627 }
This page took 0.051461 seconds and 5 git commands to generate.