Commit | Line | Data |
---|---|---|
9b3452d1 SAS |
1 | #include <linux/dmaengine.h> |
2 | #include <linux/dma-mapping.h> | |
3 | #include <linux/platform_device.h> | |
4 | #include <linux/module.h> | |
5 | #include <linux/of.h> | |
6 | #include <linux/slab.h> | |
7 | #include <linux/of_dma.h> | |
8 | #include <linux/of_irq.h> | |
9 | #include <linux/dmapool.h> | |
10 | #include <linux/interrupt.h> | |
11 | #include <linux/of_address.h> | |
d6aafa2b | 12 | #include <linux/pm_runtime.h> |
9b3452d1 SAS |
13 | #include "dmaengine.h" |
14 | ||
15 | #define DESC_TYPE 27 | |
16 | #define DESC_TYPE_HOST 0x10 | |
17 | #define DESC_TYPE_TEARD 0x13 | |
18 | ||
19 | #define TD_DESC_IS_RX (1 << 16) | |
20 | #define TD_DESC_DMA_NUM 10 | |
21 | ||
22 | #define DESC_LENGTH_BITS_NUM 21 | |
23 | ||
24 | #define DESC_TYPE_USB (5 << 26) | |
25 | #define DESC_PD_COMPLETE (1 << 31) | |
26 | ||
27 | /* DMA engine */ | |
28 | #define DMA_TDFDQ 4 | |
29 | #define DMA_TXGCR(x) (0x800 + (x) * 0x20) | |
30 | #define DMA_RXGCR(x) (0x808 + (x) * 0x20) | |
31 | #define RXHPCRA0 4 | |
32 | ||
33 | #define GCR_CHAN_ENABLE (1 << 31) | |
34 | #define GCR_TEARDOWN (1 << 30) | |
35 | #define GCR_STARV_RETRY (1 << 24) | |
36 | #define GCR_DESC_TYPE_HOST (1 << 14) | |
37 | ||
38 | /* DMA scheduler */ | |
39 | #define DMA_SCHED_CTRL 0 | |
40 | #define DMA_SCHED_CTRL_EN (1 << 31) | |
41 | #define DMA_SCHED_WORD(x) ((x) * 4 + 0x800) | |
42 | ||
43 | #define SCHED_ENTRY0_CHAN(x) ((x) << 0) | |
44 | #define SCHED_ENTRY0_IS_RX (1 << 7) | |
45 | ||
46 | #define SCHED_ENTRY1_CHAN(x) ((x) << 8) | |
47 | #define SCHED_ENTRY1_IS_RX (1 << 15) | |
48 | ||
49 | #define SCHED_ENTRY2_CHAN(x) ((x) << 16) | |
50 | #define SCHED_ENTRY2_IS_RX (1 << 23) | |
51 | ||
52 | #define SCHED_ENTRY3_CHAN(x) ((x) << 24) | |
53 | #define SCHED_ENTRY3_IS_RX (1 << 31) | |
54 | ||
55 | /* Queue manager */ | |
56 | /* 4 KiB of memory for descriptors, 2 for each endpoint */ | |
57 | #define ALLOC_DECS_NUM 128 | |
58 | #define DESCS_AREAS 1 | |
59 | #define TOTAL_DESCS_NUM (ALLOC_DECS_NUM * DESCS_AREAS) | |
60 | #define QMGR_SCRATCH_SIZE (TOTAL_DESCS_NUM * 4) | |
61 | ||
62 | #define QMGR_LRAM0_BASE 0x80 | |
63 | #define QMGR_LRAM_SIZE 0x84 | |
64 | #define QMGR_LRAM1_BASE 0x88 | |
65 | #define QMGR_MEMBASE(x) (0x1000 + (x) * 0x10) | |
66 | #define QMGR_MEMCTRL(x) (0x1004 + (x) * 0x10) | |
67 | #define QMGR_MEMCTRL_IDX_SH 16 | |
68 | #define QMGR_MEMCTRL_DESC_SH 8 | |
69 | ||
70 | #define QMGR_NUM_PEND 5 | |
71 | #define QMGR_PEND(x) (0x90 + (x) * 4) | |
72 | ||
73 | #define QMGR_PENDING_SLOT_Q(x) (x / 32) | |
74 | #define QMGR_PENDING_BIT_Q(x) (x % 32) | |
75 | ||
76 | #define QMGR_QUEUE_A(n) (0x2000 + (n) * 0x10) | |
77 | #define QMGR_QUEUE_B(n) (0x2004 + (n) * 0x10) | |
78 | #define QMGR_QUEUE_C(n) (0x2008 + (n) * 0x10) | |
79 | #define QMGR_QUEUE_D(n) (0x200c + (n) * 0x10) | |
80 | ||
81 | /* Glue layer specific */ | |
82 | /* USBSS / USB AM335x */ | |
83 | #define USBSS_IRQ_STATUS 0x28 | |
84 | #define USBSS_IRQ_ENABLER 0x2c | |
85 | #define USBSS_IRQ_CLEARR 0x30 | |
86 | ||
87 | #define USBSS_IRQ_PD_COMP (1 << 2) | |
88 | ||
89 | struct cppi41_channel { | |
90 | struct dma_chan chan; | |
91 | struct dma_async_tx_descriptor txd; | |
92 | struct cppi41_dd *cdd; | |
93 | struct cppi41_desc *desc; | |
94 | dma_addr_t desc_phys; | |
95 | void __iomem *gcr_reg; | |
96 | int is_tx; | |
97 | u32 residue; | |
98 | ||
99 | unsigned int q_num; | |
100 | unsigned int q_comp_num; | |
101 | unsigned int port_num; | |
102 | ||
103 | unsigned td_retry; | |
104 | unsigned td_queued:1; | |
105 | unsigned td_seen:1; | |
106 | unsigned td_desc_seen:1; | |
107 | }; | |
108 | ||
109 | struct cppi41_desc { | |
110 | u32 pd0; | |
111 | u32 pd1; | |
112 | u32 pd2; | |
113 | u32 pd3; | |
114 | u32 pd4; | |
115 | u32 pd5; | |
116 | u32 pd6; | |
117 | u32 pd7; | |
118 | } __aligned(32); | |
119 | ||
120 | struct chan_queues { | |
121 | u16 submit; | |
122 | u16 complete; | |
123 | }; | |
124 | ||
125 | struct cppi41_dd { | |
126 | struct dma_device ddev; | |
127 | ||
128 | void *qmgr_scratch; | |
129 | dma_addr_t scratch_phys; | |
130 | ||
131 | struct cppi41_desc *cd; | |
132 | dma_addr_t descs_phys; | |
133 | u32 first_td_desc; | |
134 | struct cppi41_channel *chan_busy[ALLOC_DECS_NUM]; | |
135 | ||
136 | void __iomem *usbss_mem; | |
137 | void __iomem *ctrl_mem; | |
138 | void __iomem *sched_mem; | |
139 | void __iomem *qmgr_mem; | |
140 | unsigned int irq; | |
141 | const struct chan_queues *queues_rx; | |
142 | const struct chan_queues *queues_tx; | |
143 | struct chan_queues td_queue; | |
144 | }; | |
145 | ||
146 | #define FIST_COMPLETION_QUEUE 93 | |
147 | static struct chan_queues usb_queues_tx[] = { | |
148 | /* USB0 ENDP 1 */ | |
149 | [ 0] = { .submit = 32, .complete = 93}, | |
150 | [ 1] = { .submit = 34, .complete = 94}, | |
151 | [ 2] = { .submit = 36, .complete = 95}, | |
152 | [ 3] = { .submit = 38, .complete = 96}, | |
153 | [ 4] = { .submit = 40, .complete = 97}, | |
154 | [ 5] = { .submit = 42, .complete = 98}, | |
155 | [ 6] = { .submit = 44, .complete = 99}, | |
156 | [ 7] = { .submit = 46, .complete = 100}, | |
157 | [ 8] = { .submit = 48, .complete = 101}, | |
158 | [ 9] = { .submit = 50, .complete = 102}, | |
159 | [10] = { .submit = 52, .complete = 103}, | |
160 | [11] = { .submit = 54, .complete = 104}, | |
161 | [12] = { .submit = 56, .complete = 105}, | |
162 | [13] = { .submit = 58, .complete = 106}, | |
163 | [14] = { .submit = 60, .complete = 107}, | |
164 | ||
165 | /* USB1 ENDP1 */ | |
166 | [15] = { .submit = 62, .complete = 125}, | |
167 | [16] = { .submit = 64, .complete = 126}, | |
168 | [17] = { .submit = 66, .complete = 127}, | |
169 | [18] = { .submit = 68, .complete = 128}, | |
170 | [19] = { .submit = 70, .complete = 129}, | |
171 | [20] = { .submit = 72, .complete = 130}, | |
172 | [21] = { .submit = 74, .complete = 131}, | |
173 | [22] = { .submit = 76, .complete = 132}, | |
174 | [23] = { .submit = 78, .complete = 133}, | |
175 | [24] = { .submit = 80, .complete = 134}, | |
176 | [25] = { .submit = 82, .complete = 135}, | |
177 | [26] = { .submit = 84, .complete = 136}, | |
178 | [27] = { .submit = 86, .complete = 137}, | |
179 | [28] = { .submit = 88, .complete = 138}, | |
180 | [29] = { .submit = 90, .complete = 139}, | |
181 | }; | |
182 | ||
183 | static const struct chan_queues usb_queues_rx[] = { | |
184 | /* USB0 ENDP 1 */ | |
185 | [ 0] = { .submit = 1, .complete = 109}, | |
186 | [ 1] = { .submit = 2, .complete = 110}, | |
187 | [ 2] = { .submit = 3, .complete = 111}, | |
188 | [ 3] = { .submit = 4, .complete = 112}, | |
189 | [ 4] = { .submit = 5, .complete = 113}, | |
190 | [ 5] = { .submit = 6, .complete = 114}, | |
191 | [ 6] = { .submit = 7, .complete = 115}, | |
192 | [ 7] = { .submit = 8, .complete = 116}, | |
193 | [ 8] = { .submit = 9, .complete = 117}, | |
194 | [ 9] = { .submit = 10, .complete = 118}, | |
195 | [10] = { .submit = 11, .complete = 119}, | |
196 | [11] = { .submit = 12, .complete = 120}, | |
197 | [12] = { .submit = 13, .complete = 121}, | |
198 | [13] = { .submit = 14, .complete = 122}, | |
199 | [14] = { .submit = 15, .complete = 123}, | |
200 | ||
201 | /* USB1 ENDP 1 */ | |
202 | [15] = { .submit = 16, .complete = 141}, | |
203 | [16] = { .submit = 17, .complete = 142}, | |
204 | [17] = { .submit = 18, .complete = 143}, | |
205 | [18] = { .submit = 19, .complete = 144}, | |
206 | [19] = { .submit = 20, .complete = 145}, | |
207 | [20] = { .submit = 21, .complete = 146}, | |
208 | [21] = { .submit = 22, .complete = 147}, | |
209 | [22] = { .submit = 23, .complete = 148}, | |
210 | [23] = { .submit = 24, .complete = 149}, | |
211 | [24] = { .submit = 25, .complete = 150}, | |
212 | [25] = { .submit = 26, .complete = 151}, | |
213 | [26] = { .submit = 27, .complete = 152}, | |
214 | [27] = { .submit = 28, .complete = 153}, | |
215 | [28] = { .submit = 29, .complete = 154}, | |
216 | [29] = { .submit = 30, .complete = 155}, | |
217 | }; | |
218 | ||
219 | struct cppi_glue_infos { | |
220 | irqreturn_t (*isr)(int irq, void *data); | |
221 | const struct chan_queues *queues_rx; | |
222 | const struct chan_queues *queues_tx; | |
223 | struct chan_queues td_queue; | |
224 | }; | |
225 | ||
226 | static struct cppi41_channel *to_cpp41_chan(struct dma_chan *c) | |
227 | { | |
228 | return container_of(c, struct cppi41_channel, chan); | |
229 | } | |
230 | ||
231 | static struct cppi41_channel *desc_to_chan(struct cppi41_dd *cdd, u32 desc) | |
232 | { | |
233 | struct cppi41_channel *c; | |
234 | u32 descs_size; | |
235 | u32 desc_num; | |
236 | ||
237 | descs_size = sizeof(struct cppi41_desc) * ALLOC_DECS_NUM; | |
238 | ||
239 | if (!((desc >= cdd->descs_phys) && | |
240 | (desc < (cdd->descs_phys + descs_size)))) { | |
241 | return NULL; | |
242 | } | |
243 | ||
244 | desc_num = (desc - cdd->descs_phys) / sizeof(struct cppi41_desc); | |
2d17f7fb | 245 | BUG_ON(desc_num >= ALLOC_DECS_NUM); |
9b3452d1 SAS |
246 | c = cdd->chan_busy[desc_num]; |
247 | cdd->chan_busy[desc_num] = NULL; | |
248 | return c; | |
249 | } | |
250 | ||
251 | static void cppi_writel(u32 val, void *__iomem *mem) | |
252 | { | |
253 | __raw_writel(val, mem); | |
254 | } | |
255 | ||
256 | static u32 cppi_readl(void *__iomem *mem) | |
257 | { | |
258 | return __raw_readl(mem); | |
259 | } | |
260 | ||
261 | static u32 pd_trans_len(u32 val) | |
262 | { | |
263 | return val & ((1 << (DESC_LENGTH_BITS_NUM + 1)) - 1); | |
264 | } | |
265 | ||
266 | static irqreturn_t cppi41_irq(int irq, void *data) | |
267 | { | |
268 | struct cppi41_dd *cdd = data; | |
269 | struct cppi41_channel *c; | |
270 | u32 status; | |
271 | int i; | |
272 | ||
273 | status = cppi_readl(cdd->usbss_mem + USBSS_IRQ_STATUS); | |
274 | if (!(status & USBSS_IRQ_PD_COMP)) | |
275 | return IRQ_NONE; | |
276 | cppi_writel(status, cdd->usbss_mem + USBSS_IRQ_STATUS); | |
277 | ||
278 | for (i = QMGR_PENDING_SLOT_Q(FIST_COMPLETION_QUEUE); i < QMGR_NUM_PEND; | |
279 | i++) { | |
280 | u32 val; | |
281 | u32 q_num; | |
282 | ||
283 | val = cppi_readl(cdd->qmgr_mem + QMGR_PEND(i)); | |
284 | if (i == QMGR_PENDING_SLOT_Q(FIST_COMPLETION_QUEUE) && val) { | |
285 | u32 mask; | |
286 | /* set corresponding bit for completetion Q 93 */ | |
287 | mask = 1 << QMGR_PENDING_BIT_Q(FIST_COMPLETION_QUEUE); | |
288 | /* not set all bits for queues less than Q 93 */ | |
289 | mask--; | |
290 | /* now invert and keep only Q 93+ set */ | |
291 | val &= ~mask; | |
292 | } | |
293 | ||
294 | if (val) | |
295 | __iormb(); | |
296 | ||
297 | while (val) { | |
298 | u32 desc; | |
299 | ||
300 | q_num = __fls(val); | |
301 | val &= ~(1 << q_num); | |
302 | q_num += 32 * i; | |
303 | desc = cppi_readl(cdd->qmgr_mem + QMGR_QUEUE_D(q_num)); | |
304 | desc &= ~0x1f; | |
305 | c = desc_to_chan(cdd, desc); | |
306 | if (WARN_ON(!c)) { | |
307 | pr_err("%s() q %d desc %08x\n", __func__, | |
308 | q_num, desc); | |
309 | continue; | |
310 | } | |
311 | c->residue = pd_trans_len(c->desc->pd6) - | |
312 | pd_trans_len(c->desc->pd0); | |
313 | ||
314 | dma_cookie_complete(&c->txd); | |
315 | c->txd.callback(c->txd.callback_param); | |
316 | } | |
317 | } | |
318 | return IRQ_HANDLED; | |
319 | } | |
320 | ||
321 | static dma_cookie_t cppi41_tx_submit(struct dma_async_tx_descriptor *tx) | |
322 | { | |
323 | dma_cookie_t cookie; | |
324 | ||
325 | cookie = dma_cookie_assign(tx); | |
326 | ||
327 | return cookie; | |
328 | } | |
329 | ||
330 | static int cppi41_dma_alloc_chan_resources(struct dma_chan *chan) | |
331 | { | |
332 | struct cppi41_channel *c = to_cpp41_chan(chan); | |
333 | ||
334 | dma_cookie_init(chan); | |
335 | dma_async_tx_descriptor_init(&c->txd, chan); | |
336 | c->txd.tx_submit = cppi41_tx_submit; | |
337 | ||
338 | if (!c->is_tx) | |
339 | cppi_writel(c->q_num, c->gcr_reg + RXHPCRA0); | |
340 | ||
341 | return 0; | |
342 | } | |
343 | ||
344 | static void cppi41_dma_free_chan_resources(struct dma_chan *chan) | |
345 | { | |
346 | } | |
347 | ||
348 | static enum dma_status cppi41_dma_tx_status(struct dma_chan *chan, | |
349 | dma_cookie_t cookie, struct dma_tx_state *txstate) | |
350 | { | |
351 | struct cppi41_channel *c = to_cpp41_chan(chan); | |
352 | enum dma_status ret; | |
353 | ||
354 | /* lock */ | |
355 | ret = dma_cookie_status(chan, cookie, txstate); | |
356 | if (txstate && ret == DMA_SUCCESS) | |
357 | txstate->residue = c->residue; | |
358 | /* unlock */ | |
359 | ||
360 | return ret; | |
361 | } | |
362 | ||
363 | static void push_desc_queue(struct cppi41_channel *c) | |
364 | { | |
365 | struct cppi41_dd *cdd = c->cdd; | |
366 | u32 desc_num; | |
367 | u32 desc_phys; | |
368 | u32 reg; | |
369 | ||
370 | desc_phys = lower_32_bits(c->desc_phys); | |
371 | desc_num = (desc_phys - cdd->descs_phys) / sizeof(struct cppi41_desc); | |
372 | WARN_ON(cdd->chan_busy[desc_num]); | |
373 | cdd->chan_busy[desc_num] = c; | |
374 | ||
375 | reg = (sizeof(struct cppi41_desc) - 24) / 4; | |
376 | reg |= desc_phys; | |
377 | cppi_writel(reg, cdd->qmgr_mem + QMGR_QUEUE_D(c->q_num)); | |
378 | } | |
379 | ||
380 | static void cppi41_dma_issue_pending(struct dma_chan *chan) | |
381 | { | |
382 | struct cppi41_channel *c = to_cpp41_chan(chan); | |
383 | u32 reg; | |
384 | ||
385 | c->residue = 0; | |
386 | ||
387 | reg = GCR_CHAN_ENABLE; | |
388 | if (!c->is_tx) { | |
389 | reg |= GCR_STARV_RETRY; | |
390 | reg |= GCR_DESC_TYPE_HOST; | |
391 | reg |= c->q_comp_num; | |
392 | } | |
393 | ||
394 | cppi_writel(reg, c->gcr_reg); | |
395 | ||
396 | /* | |
397 | * We don't use writel() but __raw_writel() so we have to make sure | |
398 | * that the DMA descriptor in coherent memory made to the main memory | |
399 | * before starting the dma engine. | |
400 | */ | |
401 | __iowmb(); | |
402 | push_desc_queue(c); | |
403 | } | |
404 | ||
405 | static u32 get_host_pd0(u32 length) | |
406 | { | |
407 | u32 reg; | |
408 | ||
409 | reg = DESC_TYPE_HOST << DESC_TYPE; | |
410 | reg |= length; | |
411 | ||
412 | return reg; | |
413 | } | |
414 | ||
415 | static u32 get_host_pd1(struct cppi41_channel *c) | |
416 | { | |
417 | u32 reg; | |
418 | ||
419 | reg = 0; | |
420 | ||
421 | return reg; | |
422 | } | |
423 | ||
424 | static u32 get_host_pd2(struct cppi41_channel *c) | |
425 | { | |
426 | u32 reg; | |
427 | ||
428 | reg = DESC_TYPE_USB; | |
429 | reg |= c->q_comp_num; | |
430 | ||
431 | return reg; | |
432 | } | |
433 | ||
434 | static u32 get_host_pd3(u32 length) | |
435 | { | |
436 | u32 reg; | |
437 | ||
438 | /* PD3 = packet size */ | |
439 | reg = length; | |
440 | ||
441 | return reg; | |
442 | } | |
443 | ||
444 | static u32 get_host_pd6(u32 length) | |
445 | { | |
446 | u32 reg; | |
447 | ||
448 | /* PD6 buffer size */ | |
449 | reg = DESC_PD_COMPLETE; | |
450 | reg |= length; | |
451 | ||
452 | return reg; | |
453 | } | |
454 | ||
455 | static u32 get_host_pd4_or_7(u32 addr) | |
456 | { | |
457 | u32 reg; | |
458 | ||
459 | reg = addr; | |
460 | ||
461 | return reg; | |
462 | } | |
463 | ||
464 | static u32 get_host_pd5(void) | |
465 | { | |
466 | u32 reg; | |
467 | ||
468 | reg = 0; | |
469 | ||
470 | return reg; | |
471 | } | |
472 | ||
473 | static struct dma_async_tx_descriptor *cppi41_dma_prep_slave_sg( | |
474 | struct dma_chan *chan, struct scatterlist *sgl, unsigned sg_len, | |
475 | enum dma_transfer_direction dir, unsigned long tx_flags, void *context) | |
476 | { | |
477 | struct cppi41_channel *c = to_cpp41_chan(chan); | |
478 | struct cppi41_desc *d; | |
479 | struct scatterlist *sg; | |
480 | unsigned int i; | |
481 | unsigned int num; | |
482 | ||
483 | num = 0; | |
484 | d = c->desc; | |
485 | for_each_sg(sgl, sg, sg_len, i) { | |
486 | u32 addr; | |
487 | u32 len; | |
488 | ||
489 | /* We need to use more than one desc once musb supports sg */ | |
490 | BUG_ON(num > 0); | |
491 | addr = lower_32_bits(sg_dma_address(sg)); | |
492 | len = sg_dma_len(sg); | |
493 | ||
494 | d->pd0 = get_host_pd0(len); | |
495 | d->pd1 = get_host_pd1(c); | |
496 | d->pd2 = get_host_pd2(c); | |
497 | d->pd3 = get_host_pd3(len); | |
498 | d->pd4 = get_host_pd4_or_7(addr); | |
499 | d->pd5 = get_host_pd5(); | |
500 | d->pd6 = get_host_pd6(len); | |
501 | d->pd7 = get_host_pd4_or_7(addr); | |
502 | ||
503 | d++; | |
504 | } | |
505 | ||
506 | return &c->txd; | |
507 | } | |
508 | ||
509 | static int cpp41_cfg_chan(struct cppi41_channel *c, | |
510 | struct dma_slave_config *cfg) | |
511 | { | |
512 | return 0; | |
513 | } | |
514 | ||
515 | static void cppi41_compute_td_desc(struct cppi41_desc *d) | |
516 | { | |
517 | d->pd0 = DESC_TYPE_TEARD << DESC_TYPE; | |
518 | } | |
519 | ||
520 | static u32 cppi41_pop_desc(struct cppi41_dd *cdd, unsigned queue_num) | |
521 | { | |
522 | u32 desc; | |
523 | ||
524 | desc = cppi_readl(cdd->qmgr_mem + QMGR_QUEUE_D(queue_num)); | |
525 | desc &= ~0x1f; | |
526 | return desc; | |
527 | } | |
528 | ||
529 | static int cppi41_tear_down_chan(struct cppi41_channel *c) | |
530 | { | |
531 | struct cppi41_dd *cdd = c->cdd; | |
532 | struct cppi41_desc *td; | |
533 | u32 reg; | |
534 | u32 desc_phys; | |
535 | u32 td_desc_phys; | |
536 | ||
537 | td = cdd->cd; | |
538 | td += cdd->first_td_desc; | |
539 | ||
540 | td_desc_phys = cdd->descs_phys; | |
541 | td_desc_phys += cdd->first_td_desc * sizeof(struct cppi41_desc); | |
542 | ||
543 | if (!c->td_queued) { | |
544 | cppi41_compute_td_desc(td); | |
545 | __iowmb(); | |
546 | ||
547 | reg = (sizeof(struct cppi41_desc) - 24) / 4; | |
548 | reg |= td_desc_phys; | |
549 | cppi_writel(reg, cdd->qmgr_mem + | |
550 | QMGR_QUEUE_D(cdd->td_queue.submit)); | |
551 | ||
552 | reg = GCR_CHAN_ENABLE; | |
553 | if (!c->is_tx) { | |
554 | reg |= GCR_STARV_RETRY; | |
555 | reg |= GCR_DESC_TYPE_HOST; | |
556 | reg |= c->q_comp_num; | |
557 | } | |
558 | reg |= GCR_TEARDOWN; | |
559 | cppi_writel(reg, c->gcr_reg); | |
560 | c->td_queued = 1; | |
561 | c->td_retry = 100; | |
562 | } | |
563 | ||
564 | if (!c->td_seen) { | |
565 | unsigned td_comp_queue; | |
566 | ||
567 | if (c->is_tx) | |
568 | td_comp_queue = cdd->td_queue.complete; | |
569 | else | |
570 | td_comp_queue = c->q_comp_num; | |
571 | ||
572 | desc_phys = cppi41_pop_desc(cdd, td_comp_queue); | |
573 | if (desc_phys) { | |
574 | __iormb(); | |
575 | ||
576 | if (desc_phys == td_desc_phys) { | |
577 | u32 pd0; | |
578 | pd0 = td->pd0; | |
579 | WARN_ON((pd0 >> DESC_TYPE) != DESC_TYPE_TEARD); | |
580 | WARN_ON(!c->is_tx && !(pd0 & TD_DESC_IS_RX)); | |
581 | WARN_ON((pd0 & 0x1f) != c->port_num); | |
582 | } else { | |
bd2fbf3a | 583 | WARN_ON_ONCE(1); |
9b3452d1 SAS |
584 | } |
585 | c->td_seen = 1; | |
586 | } | |
587 | } | |
588 | if (!c->td_desc_seen) { | |
589 | desc_phys = cppi41_pop_desc(cdd, c->q_comp_num); | |
590 | if (desc_phys) { | |
591 | __iormb(); | |
592 | WARN_ON(c->desc_phys != desc_phys); | |
593 | c->td_desc_seen = 1; | |
594 | } | |
595 | } | |
596 | c->td_retry--; | |
597 | /* | |
598 | * If the TX descriptor / channel is in use, the caller needs to poke | |
599 | * his TD bit multiple times. After that he hardware releases the | |
600 | * transfer descriptor followed by TD descriptor. Waiting seems not to | |
601 | * cause any difference. | |
602 | * RX seems to be thrown out right away. However once the TearDown | |
603 | * descriptor gets through we are done. If we have seens the transfer | |
604 | * descriptor before the TD we fetch it from enqueue, it has to be | |
605 | * there waiting for us. | |
606 | */ | |
607 | if (!c->td_seen && c->td_retry) | |
608 | return -EAGAIN; | |
609 | ||
610 | WARN_ON(!c->td_retry); | |
611 | if (!c->td_desc_seen) { | |
612 | desc_phys = cppi_readl(cdd->qmgr_mem + QMGR_QUEUE_D(c->q_num)); | |
613 | WARN_ON(!desc_phys); | |
614 | } | |
615 | ||
616 | c->td_queued = 0; | |
617 | c->td_seen = 0; | |
618 | c->td_desc_seen = 0; | |
619 | cppi_writel(0, c->gcr_reg); | |
620 | return 0; | |
621 | } | |
622 | ||
623 | static int cppi41_stop_chan(struct dma_chan *chan) | |
624 | { | |
625 | struct cppi41_channel *c = to_cpp41_chan(chan); | |
626 | struct cppi41_dd *cdd = c->cdd; | |
627 | u32 desc_num; | |
628 | u32 desc_phys; | |
629 | int ret; | |
630 | ||
631 | ret = cppi41_tear_down_chan(c); | |
632 | if (ret) | |
633 | return ret; | |
634 | ||
635 | desc_phys = lower_32_bits(c->desc_phys); | |
636 | desc_num = (desc_phys - cdd->descs_phys) / sizeof(struct cppi41_desc); | |
637 | WARN_ON(!cdd->chan_busy[desc_num]); | |
638 | cdd->chan_busy[desc_num] = NULL; | |
639 | ||
640 | return 0; | |
641 | } | |
642 | ||
643 | static int cppi41_dma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, | |
644 | unsigned long arg) | |
645 | { | |
646 | struct cppi41_channel *c = to_cpp41_chan(chan); | |
647 | int ret; | |
648 | ||
649 | switch (cmd) { | |
650 | case DMA_SLAVE_CONFIG: | |
651 | ret = cpp41_cfg_chan(c, (struct dma_slave_config *) arg); | |
652 | break; | |
653 | ||
654 | case DMA_TERMINATE_ALL: | |
655 | ret = cppi41_stop_chan(chan); | |
656 | break; | |
657 | ||
658 | default: | |
659 | ret = -ENXIO; | |
660 | break; | |
661 | } | |
662 | return ret; | |
663 | } | |
664 | ||
665 | static void cleanup_chans(struct cppi41_dd *cdd) | |
666 | { | |
667 | while (!list_empty(&cdd->ddev.channels)) { | |
668 | struct cppi41_channel *cchan; | |
669 | ||
670 | cchan = list_first_entry(&cdd->ddev.channels, | |
671 | struct cppi41_channel, chan.device_node); | |
672 | list_del(&cchan->chan.device_node); | |
673 | kfree(cchan); | |
674 | } | |
675 | } | |
676 | ||
677 | static int cppi41_add_chans(struct platform_device *pdev, struct cppi41_dd *cdd) | |
678 | { | |
679 | struct cppi41_channel *cchan; | |
680 | int i; | |
681 | int ret; | |
682 | u32 n_chans; | |
683 | ||
684 | ret = of_property_read_u32(pdev->dev.of_node, "#dma-channels", | |
685 | &n_chans); | |
686 | if (ret) | |
687 | return ret; | |
688 | /* | |
689 | * The channels can only be used as TX or as RX. So we add twice | |
690 | * that much dma channels because USB can only do RX or TX. | |
691 | */ | |
692 | n_chans *= 2; | |
693 | ||
694 | for (i = 0; i < n_chans; i++) { | |
695 | cchan = kzalloc(sizeof(*cchan), GFP_KERNEL); | |
696 | if (!cchan) | |
697 | goto err; | |
698 | ||
699 | cchan->cdd = cdd; | |
700 | if (i & 1) { | |
701 | cchan->gcr_reg = cdd->ctrl_mem + DMA_TXGCR(i >> 1); | |
702 | cchan->is_tx = 1; | |
703 | } else { | |
704 | cchan->gcr_reg = cdd->ctrl_mem + DMA_RXGCR(i >> 1); | |
705 | cchan->is_tx = 0; | |
706 | } | |
707 | cchan->port_num = i >> 1; | |
708 | cchan->desc = &cdd->cd[i]; | |
709 | cchan->desc_phys = cdd->descs_phys; | |
710 | cchan->desc_phys += i * sizeof(struct cppi41_desc); | |
711 | cchan->chan.device = &cdd->ddev; | |
712 | list_add_tail(&cchan->chan.device_node, &cdd->ddev.channels); | |
713 | } | |
714 | cdd->first_td_desc = n_chans; | |
715 | ||
716 | return 0; | |
717 | err: | |
718 | cleanup_chans(cdd); | |
719 | return -ENOMEM; | |
720 | } | |
721 | ||
722 | static void purge_descs(struct platform_device *pdev, struct cppi41_dd *cdd) | |
723 | { | |
724 | unsigned int mem_decs; | |
725 | int i; | |
726 | ||
727 | mem_decs = ALLOC_DECS_NUM * sizeof(struct cppi41_desc); | |
728 | ||
729 | for (i = 0; i < DESCS_AREAS; i++) { | |
730 | ||
731 | cppi_writel(0, cdd->qmgr_mem + QMGR_MEMBASE(i)); | |
732 | cppi_writel(0, cdd->qmgr_mem + QMGR_MEMCTRL(i)); | |
733 | ||
734 | dma_free_coherent(&pdev->dev, mem_decs, cdd->cd, | |
735 | cdd->descs_phys); | |
736 | } | |
737 | } | |
738 | ||
739 | static void disable_sched(struct cppi41_dd *cdd) | |
740 | { | |
741 | cppi_writel(0, cdd->sched_mem + DMA_SCHED_CTRL); | |
742 | } | |
743 | ||
744 | static void deinit_cpii41(struct platform_device *pdev, struct cppi41_dd *cdd) | |
745 | { | |
746 | disable_sched(cdd); | |
747 | ||
748 | purge_descs(pdev, cdd); | |
749 | ||
750 | cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM0_BASE); | |
751 | cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM0_BASE); | |
752 | dma_free_coherent(&pdev->dev, QMGR_SCRATCH_SIZE, cdd->qmgr_scratch, | |
753 | cdd->scratch_phys); | |
754 | } | |
755 | ||
756 | static int init_descs(struct platform_device *pdev, struct cppi41_dd *cdd) | |
757 | { | |
758 | unsigned int desc_size; | |
759 | unsigned int mem_decs; | |
760 | int i; | |
761 | u32 reg; | |
762 | u32 idx; | |
763 | ||
764 | BUILD_BUG_ON(sizeof(struct cppi41_desc) & | |
765 | (sizeof(struct cppi41_desc) - 1)); | |
766 | BUILD_BUG_ON(sizeof(struct cppi41_desc) < 32); | |
767 | BUILD_BUG_ON(ALLOC_DECS_NUM < 32); | |
768 | ||
769 | desc_size = sizeof(struct cppi41_desc); | |
770 | mem_decs = ALLOC_DECS_NUM * desc_size; | |
771 | ||
772 | idx = 0; | |
773 | for (i = 0; i < DESCS_AREAS; i++) { | |
774 | ||
775 | reg = idx << QMGR_MEMCTRL_IDX_SH; | |
776 | reg |= (ilog2(desc_size) - 5) << QMGR_MEMCTRL_DESC_SH; | |
777 | reg |= ilog2(ALLOC_DECS_NUM) - 5; | |
778 | ||
779 | BUILD_BUG_ON(DESCS_AREAS != 1); | |
780 | cdd->cd = dma_alloc_coherent(&pdev->dev, mem_decs, | |
781 | &cdd->descs_phys, GFP_KERNEL); | |
782 | if (!cdd->cd) | |
783 | return -ENOMEM; | |
784 | ||
785 | cppi_writel(cdd->descs_phys, cdd->qmgr_mem + QMGR_MEMBASE(i)); | |
786 | cppi_writel(reg, cdd->qmgr_mem + QMGR_MEMCTRL(i)); | |
787 | ||
788 | idx += ALLOC_DECS_NUM; | |
789 | } | |
790 | return 0; | |
791 | } | |
792 | ||
793 | static void init_sched(struct cppi41_dd *cdd) | |
794 | { | |
795 | unsigned ch; | |
796 | unsigned word; | |
797 | u32 reg; | |
798 | ||
799 | word = 0; | |
800 | cppi_writel(0, cdd->sched_mem + DMA_SCHED_CTRL); | |
801 | for (ch = 0; ch < 15 * 2; ch += 2) { | |
802 | ||
803 | reg = SCHED_ENTRY0_CHAN(ch); | |
804 | reg |= SCHED_ENTRY1_CHAN(ch) | SCHED_ENTRY1_IS_RX; | |
805 | ||
806 | reg |= SCHED_ENTRY2_CHAN(ch + 1); | |
807 | reg |= SCHED_ENTRY3_CHAN(ch + 1) | SCHED_ENTRY3_IS_RX; | |
808 | cppi_writel(reg, cdd->sched_mem + DMA_SCHED_WORD(word)); | |
809 | word++; | |
810 | } | |
811 | reg = 15 * 2 * 2 - 1; | |
812 | reg |= DMA_SCHED_CTRL_EN; | |
813 | cppi_writel(reg, cdd->sched_mem + DMA_SCHED_CTRL); | |
814 | } | |
815 | ||
816 | static int init_cppi41(struct platform_device *pdev, struct cppi41_dd *cdd) | |
817 | { | |
818 | int ret; | |
819 | ||
820 | BUILD_BUG_ON(QMGR_SCRATCH_SIZE > ((1 << 14) - 1)); | |
821 | cdd->qmgr_scratch = dma_alloc_coherent(&pdev->dev, QMGR_SCRATCH_SIZE, | |
822 | &cdd->scratch_phys, GFP_KERNEL); | |
823 | if (!cdd->qmgr_scratch) | |
824 | return -ENOMEM; | |
825 | ||
826 | cppi_writel(cdd->scratch_phys, cdd->qmgr_mem + QMGR_LRAM0_BASE); | |
827 | cppi_writel(QMGR_SCRATCH_SIZE, cdd->qmgr_mem + QMGR_LRAM_SIZE); | |
828 | cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM1_BASE); | |
829 | ||
830 | ret = init_descs(pdev, cdd); | |
831 | if (ret) | |
832 | goto err_td; | |
833 | ||
834 | cppi_writel(cdd->td_queue.submit, cdd->ctrl_mem + DMA_TDFDQ); | |
835 | init_sched(cdd); | |
836 | return 0; | |
837 | err_td: | |
838 | deinit_cpii41(pdev, cdd); | |
839 | return ret; | |
840 | } | |
841 | ||
842 | static struct platform_driver cpp41_dma_driver; | |
843 | /* | |
844 | * The param format is: | |
845 | * X Y | |
846 | * X: Port | |
847 | * Y: 0 = RX else TX | |
848 | */ | |
849 | #define INFO_PORT 0 | |
850 | #define INFO_IS_TX 1 | |
851 | ||
852 | static bool cpp41_dma_filter_fn(struct dma_chan *chan, void *param) | |
853 | { | |
854 | struct cppi41_channel *cchan; | |
855 | struct cppi41_dd *cdd; | |
856 | const struct chan_queues *queues; | |
857 | u32 *num = param; | |
858 | ||
859 | if (chan->device->dev->driver != &cpp41_dma_driver.driver) | |
860 | return false; | |
861 | ||
862 | cchan = to_cpp41_chan(chan); | |
863 | ||
864 | if (cchan->port_num != num[INFO_PORT]) | |
865 | return false; | |
866 | ||
867 | if (cchan->is_tx && !num[INFO_IS_TX]) | |
868 | return false; | |
869 | cdd = cchan->cdd; | |
870 | if (cchan->is_tx) | |
871 | queues = cdd->queues_tx; | |
872 | else | |
873 | queues = cdd->queues_rx; | |
874 | ||
875 | BUILD_BUG_ON(ARRAY_SIZE(usb_queues_rx) != ARRAY_SIZE(usb_queues_tx)); | |
876 | if (WARN_ON(cchan->port_num > ARRAY_SIZE(usb_queues_rx))) | |
877 | return false; | |
878 | ||
879 | cchan->q_num = queues[cchan->port_num].submit; | |
880 | cchan->q_comp_num = queues[cchan->port_num].complete; | |
881 | return true; | |
882 | } | |
883 | ||
884 | static struct of_dma_filter_info cpp41_dma_info = { | |
885 | .filter_fn = cpp41_dma_filter_fn, | |
886 | }; | |
887 | ||
888 | static struct dma_chan *cppi41_dma_xlate(struct of_phandle_args *dma_spec, | |
889 | struct of_dma *ofdma) | |
890 | { | |
891 | int count = dma_spec->args_count; | |
892 | struct of_dma_filter_info *info = ofdma->of_dma_data; | |
893 | ||
894 | if (!info || !info->filter_fn) | |
895 | return NULL; | |
896 | ||
897 | if (count != 2) | |
898 | return NULL; | |
899 | ||
900 | return dma_request_channel(info->dma_cap, info->filter_fn, | |
901 | &dma_spec->args[0]); | |
902 | } | |
903 | ||
904 | static const struct cppi_glue_infos usb_infos = { | |
905 | .isr = cppi41_irq, | |
906 | .queues_rx = usb_queues_rx, | |
907 | .queues_tx = usb_queues_tx, | |
908 | .td_queue = { .submit = 31, .complete = 0 }, | |
909 | }; | |
910 | ||
911 | static const struct of_device_id cppi41_dma_ids[] = { | |
912 | { .compatible = "ti,am3359-cppi41", .data = &usb_infos}, | |
913 | {}, | |
914 | }; | |
915 | MODULE_DEVICE_TABLE(of, cppi41_dma_ids); | |
916 | ||
917 | static const struct cppi_glue_infos *get_glue_info(struct platform_device *pdev) | |
918 | { | |
919 | const struct of_device_id *of_id; | |
920 | ||
921 | of_id = of_match_node(cppi41_dma_ids, pdev->dev.of_node); | |
922 | if (!of_id) | |
923 | return NULL; | |
924 | return of_id->data; | |
925 | } | |
926 | ||
927 | static int cppi41_dma_probe(struct platform_device *pdev) | |
928 | { | |
929 | struct cppi41_dd *cdd; | |
930 | const struct cppi_glue_infos *glue_info; | |
931 | int irq; | |
932 | int ret; | |
933 | ||
934 | glue_info = get_glue_info(pdev); | |
935 | if (!glue_info) | |
936 | return -EINVAL; | |
937 | ||
938 | cdd = kzalloc(sizeof(*cdd), GFP_KERNEL); | |
939 | if (!cdd) | |
940 | return -ENOMEM; | |
941 | ||
942 | dma_cap_set(DMA_SLAVE, cdd->ddev.cap_mask); | |
943 | cdd->ddev.device_alloc_chan_resources = cppi41_dma_alloc_chan_resources; | |
944 | cdd->ddev.device_free_chan_resources = cppi41_dma_free_chan_resources; | |
945 | cdd->ddev.device_tx_status = cppi41_dma_tx_status; | |
946 | cdd->ddev.device_issue_pending = cppi41_dma_issue_pending; | |
947 | cdd->ddev.device_prep_slave_sg = cppi41_dma_prep_slave_sg; | |
948 | cdd->ddev.device_control = cppi41_dma_control; | |
949 | cdd->ddev.dev = &pdev->dev; | |
950 | INIT_LIST_HEAD(&cdd->ddev.channels); | |
951 | cpp41_dma_info.dma_cap = cdd->ddev.cap_mask; | |
952 | ||
953 | cdd->usbss_mem = of_iomap(pdev->dev.of_node, 0); | |
954 | cdd->ctrl_mem = of_iomap(pdev->dev.of_node, 1); | |
955 | cdd->sched_mem = of_iomap(pdev->dev.of_node, 2); | |
956 | cdd->qmgr_mem = of_iomap(pdev->dev.of_node, 3); | |
957 | ||
958 | if (!cdd->usbss_mem || !cdd->ctrl_mem || !cdd->sched_mem || | |
959 | !cdd->qmgr_mem) { | |
960 | ret = -ENXIO; | |
961 | goto err_remap; | |
962 | } | |
963 | ||
d6aafa2b SAS |
964 | pm_runtime_enable(&pdev->dev); |
965 | ret = pm_runtime_get_sync(&pdev->dev); | |
966 | if (ret) | |
967 | goto err_get_sync; | |
968 | ||
9b3452d1 SAS |
969 | cdd->queues_rx = glue_info->queues_rx; |
970 | cdd->queues_tx = glue_info->queues_tx; | |
971 | cdd->td_queue = glue_info->td_queue; | |
972 | ||
973 | ret = init_cppi41(pdev, cdd); | |
974 | if (ret) | |
975 | goto err_init_cppi; | |
976 | ||
977 | ret = cppi41_add_chans(pdev, cdd); | |
978 | if (ret) | |
979 | goto err_chans; | |
980 | ||
981 | irq = irq_of_parse_and_map(pdev->dev.of_node, 0); | |
982 | if (!irq) | |
983 | goto err_irq; | |
984 | ||
985 | cppi_writel(USBSS_IRQ_PD_COMP, cdd->usbss_mem + USBSS_IRQ_ENABLER); | |
986 | ||
987 | ret = request_irq(irq, glue_info->isr, IRQF_SHARED, | |
988 | dev_name(&pdev->dev), cdd); | |
989 | if (ret) | |
990 | goto err_irq; | |
991 | cdd->irq = irq; | |
992 | ||
993 | ret = dma_async_device_register(&cdd->ddev); | |
994 | if (ret) | |
995 | goto err_dma_reg; | |
996 | ||
997 | ret = of_dma_controller_register(pdev->dev.of_node, | |
998 | cppi41_dma_xlate, &cpp41_dma_info); | |
999 | if (ret) | |
1000 | goto err_of; | |
1001 | ||
1002 | platform_set_drvdata(pdev, cdd); | |
1003 | return 0; | |
1004 | err_of: | |
1005 | dma_async_device_unregister(&cdd->ddev); | |
1006 | err_dma_reg: | |
1007 | free_irq(irq, cdd); | |
1008 | err_irq: | |
1009 | cppi_writel(0, cdd->usbss_mem + USBSS_IRQ_CLEARR); | |
1010 | cleanup_chans(cdd); | |
1011 | err_chans: | |
1012 | deinit_cpii41(pdev, cdd); | |
1013 | err_init_cppi: | |
d6aafa2b SAS |
1014 | pm_runtime_put(&pdev->dev); |
1015 | err_get_sync: | |
1016 | pm_runtime_disable(&pdev->dev); | |
9b3452d1 SAS |
1017 | iounmap(cdd->usbss_mem); |
1018 | iounmap(cdd->ctrl_mem); | |
1019 | iounmap(cdd->sched_mem); | |
1020 | iounmap(cdd->qmgr_mem); | |
1021 | err_remap: | |
1022 | kfree(cdd); | |
1023 | return ret; | |
1024 | } | |
1025 | ||
1026 | static int cppi41_dma_remove(struct platform_device *pdev) | |
1027 | { | |
1028 | struct cppi41_dd *cdd = platform_get_drvdata(pdev); | |
1029 | ||
1030 | of_dma_controller_free(pdev->dev.of_node); | |
1031 | dma_async_device_unregister(&cdd->ddev); | |
1032 | ||
1033 | cppi_writel(0, cdd->usbss_mem + USBSS_IRQ_CLEARR); | |
1034 | free_irq(cdd->irq, cdd); | |
1035 | cleanup_chans(cdd); | |
1036 | deinit_cpii41(pdev, cdd); | |
1037 | iounmap(cdd->usbss_mem); | |
1038 | iounmap(cdd->ctrl_mem); | |
1039 | iounmap(cdd->sched_mem); | |
1040 | iounmap(cdd->qmgr_mem); | |
d6aafa2b SAS |
1041 | pm_runtime_put(&pdev->dev); |
1042 | pm_runtime_disable(&pdev->dev); | |
9b3452d1 SAS |
1043 | kfree(cdd); |
1044 | return 0; | |
1045 | } | |
1046 | ||
1047 | static struct platform_driver cpp41_dma_driver = { | |
1048 | .probe = cppi41_dma_probe, | |
1049 | .remove = cppi41_dma_remove, | |
1050 | .driver = { | |
1051 | .name = "cppi41-dma-engine", | |
1052 | .owner = THIS_MODULE, | |
1053 | .of_match_table = of_match_ptr(cppi41_dma_ids), | |
1054 | }, | |
1055 | }; | |
1056 | ||
1057 | module_platform_driver(cpp41_dma_driver); | |
1058 | MODULE_LICENSE("GPL"); | |
1059 | MODULE_AUTHOR("Sebastian Andrzej Siewior <bigeasy@linutronix.de>"); |