Commit | Line | Data |
---|---|---|
77241056 | 1 | /* |
05d6ac1d | 2 | * Copyright(c) 2015, 2016 Intel Corporation. |
77241056 MM |
3 | * |
4 | * This file is provided under a dual BSD/GPLv2 license. When using or | |
5 | * redistributing this file, you may do so under either license. | |
6 | * | |
7 | * GPL LICENSE SUMMARY | |
8 | * | |
77241056 MM |
9 | * This program is free software; you can redistribute it and/or modify |
10 | * it under the terms of version 2 of the GNU General Public License as | |
11 | * published by the Free Software Foundation. | |
12 | * | |
13 | * This program is distributed in the hope that it will be useful, but | |
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
16 | * General Public License for more details. | |
17 | * | |
18 | * BSD LICENSE | |
19 | * | |
77241056 MM |
20 | * Redistribution and use in source and binary forms, with or without |
21 | * modification, are permitted provided that the following conditions | |
22 | * are met: | |
23 | * | |
24 | * - Redistributions of source code must retain the above copyright | |
25 | * notice, this list of conditions and the following disclaimer. | |
26 | * - Redistributions in binary form must reproduce the above copyright | |
27 | * notice, this list of conditions and the following disclaimer in | |
28 | * the documentation and/or other materials provided with the | |
29 | * distribution. | |
30 | * - Neither the name of Intel Corporation nor the names of its | |
31 | * contributors may be used to endorse or promote products derived | |
32 | * from this software without specific prior written permission. | |
33 | * | |
34 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
35 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
36 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
37 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
38 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
39 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
40 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
41 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
42 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
43 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
44 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
45 | * | |
46 | */ | |
47 | ||
48 | #include "hfi.h" | |
49 | ||
50 | /* additive distance between non-SOP and SOP space */ | |
51 | #define SOP_DISTANCE (TXE_PIO_SIZE / 2) | |
8638b77f | 52 | #define PIO_BLOCK_MASK (PIO_BLOCK_SIZE - 1) |
77241056 | 53 | /* number of QUADWORDs in a block */ |
8638b77f | 54 | #define PIO_BLOCK_QWS (PIO_BLOCK_SIZE / sizeof(u64)) |
77241056 MM |
55 | |
56 | /** | |
57 | * pio_copy - copy data block to MMIO space | |
58 | * @pbuf: a number of blocks allocated within a PIO send context | |
59 | * @pbc: PBC to send | |
60 | * @from: source, must be 8 byte aligned | |
61 | * @count: number of DWORD (32-bit) quantities to copy from source | |
62 | * | |
63 | * Copy data from source to PIO Send Buffer memory, 8 bytes at a time. | |
64 | * Must always write full BLOCK_SIZE bytes blocks. The first block must | |
65 | * be written to the corresponding SOP=1 address. | |
66 | * | |
67 | * Known: | |
68 | * o pbuf->start always starts on a block boundary | |
69 | * o pbuf can wrap only at a block boundary | |
70 | */ | |
71 | void pio_copy(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc, | |
72 | const void *from, size_t count) | |
73 | { | |
74 | void __iomem *dest = pbuf->start + SOP_DISTANCE; | |
75 | void __iomem *send = dest + PIO_BLOCK_SIZE; | |
76 | void __iomem *dend; /* 8-byte data end */ | |
77 | ||
78 | /* write the PBC */ | |
79 | writeq(pbc, dest); | |
80 | dest += sizeof(u64); | |
81 | ||
82 | /* calculate where the QWORD data ends - in SOP=1 space */ | |
8638b77f | 83 | dend = dest + ((count >> 1) * sizeof(u64)); |
77241056 MM |
84 | |
85 | if (dend < send) { | |
4d114fdd JJ |
86 | /* |
87 | * all QWORD data is within the SOP block, does *not* | |
88 | * reach the end of the SOP block | |
89 | */ | |
77241056 MM |
90 | |
91 | while (dest < dend) { | |
92 | writeq(*(u64 *)from, dest); | |
93 | from += sizeof(u64); | |
94 | dest += sizeof(u64); | |
95 | } | |
96 | /* | |
97 | * No boundary checks are needed here: | |
98 | * 0. We're not on the SOP block boundary | |
99 | * 1. The possible DWORD dangle will still be within | |
100 | * the SOP block | |
101 | * 2. We cannot wrap except on a block boundary. | |
102 | */ | |
103 | } else { | |
104 | /* QWORD data extends _to_ or beyond the SOP block */ | |
105 | ||
106 | /* write 8-byte SOP chunk data */ | |
107 | while (dest < send) { | |
108 | writeq(*(u64 *)from, dest); | |
109 | from += sizeof(u64); | |
110 | dest += sizeof(u64); | |
111 | } | |
112 | /* drop out of the SOP range */ | |
113 | dest -= SOP_DISTANCE; | |
114 | dend -= SOP_DISTANCE; | |
115 | ||
116 | /* | |
117 | * If the wrap comes before or matches the data end, | |
118 | * copy until until the wrap, then wrap. | |
119 | * | |
120 | * If the data ends at the end of the SOP above and | |
121 | * the buffer wraps, then pbuf->end == dend == dest | |
122 | * and nothing will get written, but we will wrap in | |
123 | * case there is a dangling DWORD. | |
124 | */ | |
125 | if (pbuf->end <= dend) { | |
126 | while (dest < pbuf->end) { | |
127 | writeq(*(u64 *)from, dest); | |
128 | from += sizeof(u64); | |
129 | dest += sizeof(u64); | |
130 | } | |
131 | ||
132 | dest -= pbuf->size; | |
133 | dend -= pbuf->size; | |
134 | } | |
135 | ||
136 | /* write 8-byte non-SOP, non-wrap chunk data */ | |
137 | while (dest < dend) { | |
138 | writeq(*(u64 *)from, dest); | |
139 | from += sizeof(u64); | |
140 | dest += sizeof(u64); | |
141 | } | |
142 | } | |
143 | /* at this point we have wrapped if we are going to wrap */ | |
144 | ||
145 | /* write dangling u32, if any */ | |
146 | if (count & 1) { | |
147 | union mix val; | |
148 | ||
149 | val.val64 = 0; | |
150 | val.val32[0] = *(u32 *)from; | |
151 | writeq(val.val64, dest); | |
152 | dest += sizeof(u64); | |
153 | } | |
4d114fdd JJ |
154 | /* |
155 | * fill in rest of block, no need to check pbuf->end | |
156 | * as we only wrap on a block boundary | |
157 | */ | |
77241056 MM |
158 | while (((unsigned long)dest & PIO_BLOCK_MASK) != 0) { |
159 | writeq(0, dest); | |
160 | dest += sizeof(u64); | |
161 | } | |
162 | ||
163 | /* finished with this buffer */ | |
a054374f MM |
164 | this_cpu_dec(*pbuf->sc->buffers_allocated); |
165 | preempt_enable(); | |
77241056 MM |
166 | } |
167 | ||
168 | /* USE_SHIFTS is faster in user-space tests on a Xeon X5570 @ 2.93GHz */ | |
169 | #define USE_SHIFTS 1 | |
170 | #ifdef USE_SHIFTS | |
171 | /* | |
172 | * Handle carry bytes using shifts and masks. | |
173 | * | |
174 | * NOTE: the value the unused portion of carry is expected to always be zero. | |
175 | */ | |
176 | ||
177 | /* | |
178 | * "zero" shift - bit shift used to zero out upper bytes. Input is | |
179 | * the count of LSB bytes to preserve. | |
180 | */ | |
8638b77f | 181 | #define zshift(x) (8 * (8 - (x))) |
77241056 MM |
182 | |
183 | /* | |
184 | * "merge" shift - bit shift used to merge with carry bytes. Input is | |
185 | * the LSB byte count to move beyond. | |
186 | */ | |
187 | #define mshift(x) (8 * (x)) | |
188 | ||
189 | /* | |
190 | * Read nbytes bytes from "from" and return them in the LSB bytes | |
191 | * of pbuf->carry. Other bytes are zeroed. Any previous value | |
192 | * pbuf->carry is lost. | |
193 | * | |
194 | * NOTES: | |
195 | * o do not read from from if nbytes is zero | |
196 | * o from may _not_ be u64 aligned | |
197 | * o nbytes must not span a QW boundary | |
198 | */ | |
199 | static inline void read_low_bytes(struct pio_buf *pbuf, const void *from, | |
17fb4f29 | 200 | unsigned int nbytes) |
77241056 MM |
201 | { |
202 | unsigned long off; | |
203 | ||
204 | if (nbytes == 0) { | |
205 | pbuf->carry.val64 = 0; | |
206 | } else { | |
207 | /* align our pointer */ | |
208 | off = (unsigned long)from & 0x7; | |
209 | from = (void *)((unsigned long)from & ~0x7l); | |
210 | pbuf->carry.val64 = ((*(u64 *)from) | |
211 | << zshift(nbytes + off))/* zero upper bytes */ | |
212 | >> zshift(nbytes); /* place at bottom */ | |
213 | } | |
214 | pbuf->carry_bytes = nbytes; | |
215 | } | |
216 | ||
217 | /* | |
218 | * Read nbytes bytes from "from" and put them at the next significant bytes | |
219 | * of pbuf->carry. Unused bytes are zeroed. It is expected that the extra | |
220 | * read does not overfill carry. | |
221 | * | |
222 | * NOTES: | |
223 | * o from may _not_ be u64 aligned | |
224 | * o nbytes may span a QW boundary | |
225 | */ | |
226 | static inline void read_extra_bytes(struct pio_buf *pbuf, | |
17fb4f29 | 227 | const void *from, unsigned int nbytes) |
77241056 MM |
228 | { |
229 | unsigned long off = (unsigned long)from & 0x7; | |
230 | unsigned int room, xbytes; | |
231 | ||
232 | /* align our pointer */ | |
233 | from = (void *)((unsigned long)from & ~0x7l); | |
234 | ||
235 | /* check count first - don't read anything if count is zero */ | |
236 | while (nbytes) { | |
237 | /* find the number of bytes in this u64 */ | |
238 | room = 8 - off; /* this u64 has room for this many bytes */ | |
c754db40 | 239 | xbytes = min(room, nbytes); |
77241056 MM |
240 | |
241 | /* | |
242 | * shift down to zero lower bytes, shift up to zero upper | |
243 | * bytes, shift back down to move into place | |
244 | */ | |
245 | pbuf->carry.val64 |= (((*(u64 *)from) | |
246 | >> mshift(off)) | |
247 | << zshift(xbytes)) | |
8638b77f | 248 | >> zshift(xbytes + pbuf->carry_bytes); |
77241056 MM |
249 | off = 0; |
250 | pbuf->carry_bytes += xbytes; | |
251 | nbytes -= xbytes; | |
252 | from += sizeof(u64); | |
253 | } | |
254 | } | |
255 | ||
256 | /* | |
257 | * Zero extra bytes from the end of pbuf->carry. | |
258 | * | |
259 | * NOTES: | |
260 | * o zbytes <= old_bytes | |
261 | */ | |
262 | static inline void zero_extra_bytes(struct pio_buf *pbuf, unsigned int zbytes) | |
263 | { | |
264 | unsigned int remaining; | |
265 | ||
266 | if (zbytes == 0) /* nothing to do */ | |
267 | return; | |
268 | ||
269 | remaining = pbuf->carry_bytes - zbytes; /* remaining bytes */ | |
270 | ||
271 | /* NOTE: zshift only guaranteed to work if remaining != 0 */ | |
272 | if (remaining) | |
273 | pbuf->carry.val64 = (pbuf->carry.val64 << zshift(remaining)) | |
274 | >> zshift(remaining); | |
275 | else | |
276 | pbuf->carry.val64 = 0; | |
277 | pbuf->carry_bytes = remaining; | |
278 | } | |
279 | ||
280 | /* | |
281 | * Write a quad word using parts of pbuf->carry and the next 8 bytes of src. | |
282 | * Put the unused part of the next 8 bytes of src into the LSB bytes of | |
283 | * pbuf->carry with the upper bytes zeroed.. | |
284 | * | |
285 | * NOTES: | |
286 | * o result must keep unused bytes zeroed | |
287 | * o src must be u64 aligned | |
288 | */ | |
289 | static inline void merge_write8( | |
290 | struct pio_buf *pbuf, | |
291 | void __iomem *dest, | |
292 | const void *src) | |
293 | { | |
294 | u64 new, temp; | |
295 | ||
296 | new = *(u64 *)src; | |
297 | temp = pbuf->carry.val64 | (new << mshift(pbuf->carry_bytes)); | |
298 | writeq(temp, dest); | |
299 | pbuf->carry.val64 = new >> zshift(pbuf->carry_bytes); | |
300 | } | |
301 | ||
302 | /* | |
303 | * Write a quad word using all bytes of carry. | |
304 | */ | |
305 | static inline void carry8_write8(union mix carry, void __iomem *dest) | |
306 | { | |
307 | writeq(carry.val64, dest); | |
308 | } | |
309 | ||
310 | /* | |
311 | * Write a quad word using all the valid bytes of carry. If carry | |
312 | * has zero valid bytes, nothing is written. | |
313 | * Returns 0 on nothing written, non-zero on quad word written. | |
314 | */ | |
315 | static inline int carry_write8(struct pio_buf *pbuf, void __iomem *dest) | |
316 | { | |
317 | if (pbuf->carry_bytes) { | |
318 | /* unused bytes are always kept zeroed, so just write */ | |
319 | writeq(pbuf->carry.val64, dest); | |
320 | return 1; | |
321 | } | |
322 | ||
323 | return 0; | |
324 | } | |
325 | ||
326 | #else /* USE_SHIFTS */ | |
327 | /* | |
328 | * Handle carry bytes using byte copies. | |
329 | * | |
330 | * NOTE: the value the unused portion of carry is left uninitialized. | |
331 | */ | |
332 | ||
333 | /* | |
334 | * Jump copy - no-loop copy for < 8 bytes. | |
335 | */ | |
336 | static inline void jcopy(u8 *dest, const u8 *src, u32 n) | |
337 | { | |
338 | switch (n) { | |
339 | case 7: | |
340 | *dest++ = *src++; | |
341 | case 6: | |
342 | *dest++ = *src++; | |
343 | case 5: | |
344 | *dest++ = *src++; | |
345 | case 4: | |
346 | *dest++ = *src++; | |
347 | case 3: | |
348 | *dest++ = *src++; | |
349 | case 2: | |
350 | *dest++ = *src++; | |
351 | case 1: | |
352 | *dest++ = *src++; | |
353 | } | |
354 | } | |
355 | ||
356 | /* | |
357 | * Read nbytes from "from" and and place them in the low bytes | |
358 | * of pbuf->carry. Other bytes are left as-is. Any previous | |
359 | * value in pbuf->carry is lost. | |
360 | * | |
361 | * NOTES: | |
362 | * o do not read from from if nbytes is zero | |
363 | * o from may _not_ be u64 aligned. | |
364 | */ | |
365 | static inline void read_low_bytes(struct pio_buf *pbuf, const void *from, | |
17fb4f29 | 366 | unsigned int nbytes) |
77241056 MM |
367 | { |
368 | jcopy(&pbuf->carry.val8[0], from, nbytes); | |
369 | pbuf->carry_bytes = nbytes; | |
370 | } | |
371 | ||
372 | /* | |
373 | * Read nbytes bytes from "from" and put them at the end of pbuf->carry. | |
374 | * It is expected that the extra read does not overfill carry. | |
375 | * | |
376 | * NOTES: | |
377 | * o from may _not_ be u64 aligned | |
378 | * o nbytes may span a QW boundary | |
379 | */ | |
380 | static inline void read_extra_bytes(struct pio_buf *pbuf, | |
17fb4f29 | 381 | const void *from, unsigned int nbytes) |
77241056 MM |
382 | { |
383 | jcopy(&pbuf->carry.val8[pbuf->carry_bytes], from, nbytes); | |
384 | pbuf->carry_bytes += nbytes; | |
385 | } | |
386 | ||
387 | /* | |
388 | * Zero extra bytes from the end of pbuf->carry. | |
389 | * | |
390 | * We do not care about the value of unused bytes in carry, so just | |
391 | * reduce the byte count. | |
392 | * | |
393 | * NOTES: | |
394 | * o zbytes <= old_bytes | |
395 | */ | |
396 | static inline void zero_extra_bytes(struct pio_buf *pbuf, unsigned int zbytes) | |
397 | { | |
398 | pbuf->carry_bytes -= zbytes; | |
399 | } | |
400 | ||
401 | /* | |
402 | * Write a quad word using parts of pbuf->carry and the next 8 bytes of src. | |
403 | * Put the unused part of the next 8 bytes of src into the low bytes of | |
404 | * pbuf->carry. | |
405 | */ | |
406 | static inline void merge_write8( | |
407 | struct pio_buf *pbuf, | |
408 | void *dest, | |
409 | const void *src) | |
410 | { | |
411 | u32 remainder = 8 - pbuf->carry_bytes; | |
412 | ||
413 | jcopy(&pbuf->carry.val8[pbuf->carry_bytes], src, remainder); | |
414 | writeq(pbuf->carry.val64, dest); | |
8638b77f | 415 | jcopy(&pbuf->carry.val8[0], src + remainder, pbuf->carry_bytes); |
77241056 MM |
416 | } |
417 | ||
418 | /* | |
419 | * Write a quad word using all bytes of carry. | |
420 | */ | |
421 | static inline void carry8_write8(union mix carry, void *dest) | |
422 | { | |
423 | writeq(carry.val64, dest); | |
424 | } | |
425 | ||
426 | /* | |
427 | * Write a quad word using all the valid bytes of carry. If carry | |
428 | * has zero valid bytes, nothing is written. | |
429 | * Returns 0 on nothing written, non-zero on quad word written. | |
430 | */ | |
431 | static inline int carry_write8(struct pio_buf *pbuf, void *dest) | |
432 | { | |
433 | if (pbuf->carry_bytes) { | |
434 | u64 zero = 0; | |
435 | ||
436 | jcopy(&pbuf->carry.val8[pbuf->carry_bytes], (u8 *)&zero, | |
17fb4f29 | 437 | 8 - pbuf->carry_bytes); |
77241056 MM |
438 | writeq(pbuf->carry.val64, dest); |
439 | return 1; | |
440 | } | |
441 | ||
442 | return 0; | |
443 | } | |
444 | #endif /* USE_SHIFTS */ | |
445 | ||
446 | /* | |
447 | * Segmented PIO Copy - start | |
448 | * | |
449 | * Start a PIO copy. | |
450 | * | |
451 | * @pbuf: destination buffer | |
452 | * @pbc: the PBC for the PIO buffer | |
453 | * @from: data source, QWORD aligned | |
454 | * @nbytes: bytes to copy | |
455 | */ | |
456 | void seg_pio_copy_start(struct pio_buf *pbuf, u64 pbc, | |
17fb4f29 | 457 | const void *from, size_t nbytes) |
77241056 MM |
458 | { |
459 | void __iomem *dest = pbuf->start + SOP_DISTANCE; | |
460 | void __iomem *send = dest + PIO_BLOCK_SIZE; | |
461 | void __iomem *dend; /* 8-byte data end */ | |
462 | ||
463 | writeq(pbc, dest); | |
464 | dest += sizeof(u64); | |
465 | ||
466 | /* calculate where the QWORD data ends - in SOP=1 space */ | |
8638b77f | 467 | dend = dest + ((nbytes >> 3) * sizeof(u64)); |
77241056 MM |
468 | |
469 | if (dend < send) { | |
4d114fdd JJ |
470 | /* |
471 | * all QWORD data is within the SOP block, does *not* | |
472 | * reach the end of the SOP block | |
473 | */ | |
77241056 MM |
474 | |
475 | while (dest < dend) { | |
476 | writeq(*(u64 *)from, dest); | |
477 | from += sizeof(u64); | |
478 | dest += sizeof(u64); | |
479 | } | |
480 | /* | |
481 | * No boundary checks are needed here: | |
482 | * 0. We're not on the SOP block boundary | |
483 | * 1. The possible DWORD dangle will still be within | |
484 | * the SOP block | |
485 | * 2. We cannot wrap except on a block boundary. | |
486 | */ | |
487 | } else { | |
488 | /* QWORD data extends _to_ or beyond the SOP block */ | |
489 | ||
490 | /* write 8-byte SOP chunk data */ | |
491 | while (dest < send) { | |
492 | writeq(*(u64 *)from, dest); | |
493 | from += sizeof(u64); | |
494 | dest += sizeof(u64); | |
495 | } | |
496 | /* drop out of the SOP range */ | |
497 | dest -= SOP_DISTANCE; | |
498 | dend -= SOP_DISTANCE; | |
499 | ||
500 | /* | |
501 | * If the wrap comes before or matches the data end, | |
502 | * copy until until the wrap, then wrap. | |
503 | * | |
504 | * If the data ends at the end of the SOP above and | |
505 | * the buffer wraps, then pbuf->end == dend == dest | |
506 | * and nothing will get written, but we will wrap in | |
507 | * case there is a dangling DWORD. | |
508 | */ | |
509 | if (pbuf->end <= dend) { | |
510 | while (dest < pbuf->end) { | |
511 | writeq(*(u64 *)from, dest); | |
512 | from += sizeof(u64); | |
513 | dest += sizeof(u64); | |
514 | } | |
515 | ||
516 | dest -= pbuf->size; | |
517 | dend -= pbuf->size; | |
518 | } | |
519 | ||
520 | /* write 8-byte non-SOP, non-wrap chunk data */ | |
521 | while (dest < dend) { | |
522 | writeq(*(u64 *)from, dest); | |
523 | from += sizeof(u64); | |
524 | dest += sizeof(u64); | |
525 | } | |
526 | } | |
527 | /* at this point we have wrapped if we are going to wrap */ | |
528 | ||
529 | /* ...but it doesn't matter as we're done writing */ | |
530 | ||
531 | /* save dangling bytes, if any */ | |
532 | read_low_bytes(pbuf, from, nbytes & 0x7); | |
533 | ||
534 | pbuf->qw_written = 1 /*PBC*/ + (nbytes >> 3); | |
535 | } | |
536 | ||
537 | /* | |
538 | * Mid copy helper, "mixed case" - source is 64-bit aligned but carry | |
539 | * bytes are non-zero. | |
540 | * | |
541 | * Whole u64s must be written to the chip, so bytes must be manually merged. | |
542 | * | |
543 | * @pbuf: destination buffer | |
544 | * @from: data source, is QWORD aligned. | |
545 | * @nbytes: bytes to copy | |
546 | * | |
547 | * Must handle nbytes < 8. | |
548 | */ | |
549 | static void mid_copy_mix(struct pio_buf *pbuf, const void *from, size_t nbytes) | |
550 | { | |
551 | void __iomem *dest = pbuf->start + (pbuf->qw_written * sizeof(u64)); | |
552 | void __iomem *dend; /* 8-byte data end */ | |
553 | unsigned long qw_to_write = (pbuf->carry_bytes + nbytes) >> 3; | |
554 | unsigned long bytes_left = (pbuf->carry_bytes + nbytes) & 0x7; | |
555 | ||
556 | /* calculate 8-byte data end */ | |
557 | dend = dest + (qw_to_write * sizeof(u64)); | |
558 | ||
559 | if (pbuf->qw_written < PIO_BLOCK_QWS) { | |
560 | /* | |
561 | * Still within SOP block. We don't need to check for | |
562 | * wrap because we are still in the first block and | |
563 | * can only wrap on block boundaries. | |
564 | */ | |
565 | void __iomem *send; /* SOP end */ | |
566 | void __iomem *xend; | |
567 | ||
4d114fdd JJ |
568 | /* |
569 | * calculate the end of data or end of block, whichever | |
570 | * comes first | |
571 | */ | |
77241056 | 572 | send = pbuf->start + PIO_BLOCK_SIZE; |
c754db40 | 573 | xend = min(send, dend); |
77241056 MM |
574 | |
575 | /* shift up to SOP=1 space */ | |
576 | dest += SOP_DISTANCE; | |
577 | xend += SOP_DISTANCE; | |
578 | ||
579 | /* write 8-byte chunk data */ | |
580 | while (dest < xend) { | |
581 | merge_write8(pbuf, dest, from); | |
582 | from += sizeof(u64); | |
583 | dest += sizeof(u64); | |
584 | } | |
585 | ||
586 | /* shift down to SOP=0 space */ | |
587 | dest -= SOP_DISTANCE; | |
588 | } | |
589 | /* | |
590 | * At this point dest could be (either, both, or neither): | |
591 | * - at dend | |
592 | * - at the wrap | |
593 | */ | |
594 | ||
595 | /* | |
596 | * If the wrap comes before or matches the data end, | |
597 | * copy until until the wrap, then wrap. | |
598 | * | |
599 | * If dest is at the wrap, we will fall into the if, | |
600 | * not do the loop, when wrap. | |
601 | * | |
602 | * If the data ends at the end of the SOP above and | |
603 | * the buffer wraps, then pbuf->end == dend == dest | |
604 | * and nothing will get written. | |
605 | */ | |
606 | if (pbuf->end <= dend) { | |
607 | while (dest < pbuf->end) { | |
608 | merge_write8(pbuf, dest, from); | |
609 | from += sizeof(u64); | |
610 | dest += sizeof(u64); | |
611 | } | |
612 | ||
613 | dest -= pbuf->size; | |
614 | dend -= pbuf->size; | |
615 | } | |
616 | ||
617 | /* write 8-byte non-SOP, non-wrap chunk data */ | |
618 | while (dest < dend) { | |
619 | merge_write8(pbuf, dest, from); | |
620 | from += sizeof(u64); | |
621 | dest += sizeof(u64); | |
622 | } | |
623 | ||
624 | /* adjust carry */ | |
625 | if (pbuf->carry_bytes < bytes_left) { | |
626 | /* need to read more */ | |
627 | read_extra_bytes(pbuf, from, bytes_left - pbuf->carry_bytes); | |
628 | } else { | |
629 | /* remove invalid bytes */ | |
630 | zero_extra_bytes(pbuf, pbuf->carry_bytes - bytes_left); | |
631 | } | |
632 | ||
633 | pbuf->qw_written += qw_to_write; | |
634 | } | |
635 | ||
636 | /* | |
637 | * Mid copy helper, "straight case" - source pointer is 64-bit aligned | |
638 | * with no carry bytes. | |
639 | * | |
640 | * @pbuf: destination buffer | |
641 | * @from: data source, is QWORD aligned | |
642 | * @nbytes: bytes to copy | |
643 | * | |
644 | * Must handle nbytes < 8. | |
645 | */ | |
646 | static void mid_copy_straight(struct pio_buf *pbuf, | |
17fb4f29 | 647 | const void *from, size_t nbytes) |
77241056 MM |
648 | { |
649 | void __iomem *dest = pbuf->start + (pbuf->qw_written * sizeof(u64)); | |
650 | void __iomem *dend; /* 8-byte data end */ | |
651 | ||
652 | /* calculate 8-byte data end */ | |
8638b77f | 653 | dend = dest + ((nbytes >> 3) * sizeof(u64)); |
77241056 MM |
654 | |
655 | if (pbuf->qw_written < PIO_BLOCK_QWS) { | |
656 | /* | |
657 | * Still within SOP block. We don't need to check for | |
658 | * wrap because we are still in the first block and | |
659 | * can only wrap on block boundaries. | |
660 | */ | |
661 | void __iomem *send; /* SOP end */ | |
662 | void __iomem *xend; | |
663 | ||
4d114fdd JJ |
664 | /* |
665 | * calculate the end of data or end of block, whichever | |
666 | * comes first | |
667 | */ | |
77241056 | 668 | send = pbuf->start + PIO_BLOCK_SIZE; |
c754db40 | 669 | xend = min(send, dend); |
77241056 MM |
670 | |
671 | /* shift up to SOP=1 space */ | |
672 | dest += SOP_DISTANCE; | |
673 | xend += SOP_DISTANCE; | |
674 | ||
675 | /* write 8-byte chunk data */ | |
676 | while (dest < xend) { | |
677 | writeq(*(u64 *)from, dest); | |
678 | from += sizeof(u64); | |
679 | dest += sizeof(u64); | |
680 | } | |
681 | ||
682 | /* shift down to SOP=0 space */ | |
683 | dest -= SOP_DISTANCE; | |
684 | } | |
685 | /* | |
686 | * At this point dest could be (either, both, or neither): | |
687 | * - at dend | |
688 | * - at the wrap | |
689 | */ | |
690 | ||
691 | /* | |
692 | * If the wrap comes before or matches the data end, | |
693 | * copy until until the wrap, then wrap. | |
694 | * | |
695 | * If dest is at the wrap, we will fall into the if, | |
696 | * not do the loop, when wrap. | |
697 | * | |
698 | * If the data ends at the end of the SOP above and | |
699 | * the buffer wraps, then pbuf->end == dend == dest | |
700 | * and nothing will get written. | |
701 | */ | |
702 | if (pbuf->end <= dend) { | |
703 | while (dest < pbuf->end) { | |
704 | writeq(*(u64 *)from, dest); | |
705 | from += sizeof(u64); | |
706 | dest += sizeof(u64); | |
707 | } | |
708 | ||
709 | dest -= pbuf->size; | |
710 | dend -= pbuf->size; | |
711 | } | |
712 | ||
713 | /* write 8-byte non-SOP, non-wrap chunk data */ | |
714 | while (dest < dend) { | |
715 | writeq(*(u64 *)from, dest); | |
716 | from += sizeof(u64); | |
717 | dest += sizeof(u64); | |
718 | } | |
719 | ||
720 | /* we know carry_bytes was zero on entry to this routine */ | |
721 | read_low_bytes(pbuf, from, nbytes & 0x7); | |
722 | ||
8638b77f | 723 | pbuf->qw_written += nbytes >> 3; |
77241056 MM |
724 | } |
725 | ||
726 | /* | |
727 | * Segmented PIO Copy - middle | |
728 | * | |
729 | * Must handle any aligned tail and any aligned source with any byte count. | |
730 | * | |
731 | * @pbuf: a number of blocks allocated within a PIO send context | |
732 | * @from: data source | |
733 | * @nbytes: number of bytes to copy | |
734 | */ | |
735 | void seg_pio_copy_mid(struct pio_buf *pbuf, const void *from, size_t nbytes) | |
736 | { | |
737 | unsigned long from_align = (unsigned long)from & 0x7; | |
738 | ||
739 | if (pbuf->carry_bytes + nbytes < 8) { | |
740 | /* not enough bytes to fill a QW */ | |
741 | read_extra_bytes(pbuf, from, nbytes); | |
742 | return; | |
743 | } | |
744 | ||
745 | if (from_align) { | |
746 | /* misaligned source pointer - align it */ | |
747 | unsigned long to_align; | |
748 | ||
749 | /* bytes to read to align "from" */ | |
750 | to_align = 8 - from_align; | |
751 | ||
752 | /* | |
753 | * In the advance-to-alignment logic below, we do not need | |
754 | * to check if we are using more than nbytes. This is because | |
755 | * if we are here, we already know that carry+nbytes will | |
756 | * fill at least one QW. | |
757 | */ | |
758 | if (pbuf->carry_bytes + to_align < 8) { | |
759 | /* not enough align bytes to fill a QW */ | |
760 | read_extra_bytes(pbuf, from, to_align); | |
761 | from += to_align; | |
762 | nbytes -= to_align; | |
763 | } else { | |
764 | /* bytes to fill carry */ | |
765 | unsigned long to_fill = 8 - pbuf->carry_bytes; | |
766 | /* bytes left over to be read */ | |
767 | unsigned long extra = to_align - to_fill; | |
768 | void __iomem *dest; | |
769 | ||
770 | /* fill carry... */ | |
771 | read_extra_bytes(pbuf, from, to_fill); | |
772 | from += to_fill; | |
773 | nbytes -= to_fill; | |
3e6c3b0f SS |
774 | /* may not be enough valid bytes left to align */ |
775 | if (extra > nbytes) | |
776 | extra = nbytes; | |
77241056 MM |
777 | |
778 | /* ...now write carry */ | |
779 | dest = pbuf->start + (pbuf->qw_written * sizeof(u64)); | |
780 | ||
781 | /* | |
782 | * The two checks immediately below cannot both be | |
783 | * true, hence the else. If we have wrapped, we | |
784 | * cannot still be within the first block. | |
785 | * Conversely, if we are still in the first block, we | |
786 | * cannot have wrapped. We do the wrap check first | |
787 | * as that is more likely. | |
788 | */ | |
789 | /* adjust if we've wrapped */ | |
790 | if (dest >= pbuf->end) | |
791 | dest -= pbuf->size; | |
792 | /* jump to SOP range if within the first block */ | |
793 | else if (pbuf->qw_written < PIO_BLOCK_QWS) | |
794 | dest += SOP_DISTANCE; | |
795 | ||
796 | carry8_write8(pbuf->carry, dest); | |
797 | pbuf->qw_written++; | |
798 | ||
799 | /* read any extra bytes to do final alignment */ | |
800 | /* this will overwrite anything in pbuf->carry */ | |
801 | read_low_bytes(pbuf, from, extra); | |
802 | from += extra; | |
803 | nbytes -= extra; | |
3e6c3b0f SS |
804 | /* |
805 | * If no bytes are left, return early - we are done. | |
806 | * NOTE: This short-circuit is *required* because | |
807 | * "extra" may have been reduced in size and "from" | |
808 | * is not aligned, as required when leaving this | |
809 | * if block. | |
810 | */ | |
811 | if (nbytes == 0) | |
812 | return; | |
77241056 MM |
813 | } |
814 | ||
815 | /* at this point, from is QW aligned */ | |
816 | } | |
817 | ||
818 | if (pbuf->carry_bytes) | |
819 | mid_copy_mix(pbuf, from, nbytes); | |
820 | else | |
821 | mid_copy_straight(pbuf, from, nbytes); | |
822 | } | |
823 | ||
824 | /* | |
825 | * Segmented PIO Copy - end | |
826 | * | |
827 | * Write any remainder (in pbuf->carry) and finish writing the whole block. | |
828 | * | |
829 | * @pbuf: a number of blocks allocated within a PIO send context | |
830 | */ | |
831 | void seg_pio_copy_end(struct pio_buf *pbuf) | |
832 | { | |
833 | void __iomem *dest = pbuf->start + (pbuf->qw_written * sizeof(u64)); | |
834 | ||
835 | /* | |
836 | * The two checks immediately below cannot both be true, hence the | |
837 | * else. If we have wrapped, we cannot still be within the first | |
838 | * block. Conversely, if we are still in the first block, we | |
839 | * cannot have wrapped. We do the wrap check first as that is | |
840 | * more likely. | |
841 | */ | |
842 | /* adjust if we have wrapped */ | |
843 | if (dest >= pbuf->end) | |
844 | dest -= pbuf->size; | |
845 | /* jump to the SOP range if within the first block */ | |
846 | else if (pbuf->qw_written < PIO_BLOCK_QWS) | |
847 | dest += SOP_DISTANCE; | |
848 | ||
849 | /* write final bytes, if any */ | |
850 | if (carry_write8(pbuf, dest)) { | |
851 | dest += sizeof(u64); | |
852 | /* | |
853 | * NOTE: We do not need to recalculate whether dest needs | |
854 | * SOP_DISTANCE or not. | |
855 | * | |
856 | * If we are in the first block and the dangle write | |
857 | * keeps us in the same block, dest will need | |
858 | * to retain SOP_DISTANCE in the loop below. | |
859 | * | |
860 | * If we are in the first block and the dangle write pushes | |
861 | * us to the next block, then loop below will not run | |
862 | * and dest is not used. Hence we do not need to update | |
863 | * it. | |
864 | * | |
865 | * If we are past the first block, then SOP_DISTANCE | |
866 | * was never added, so there is nothing to do. | |
867 | */ | |
868 | } | |
869 | ||
870 | /* fill in rest of block */ | |
871 | while (((unsigned long)dest & PIO_BLOCK_MASK) != 0) { | |
872 | writeq(0, dest); | |
873 | dest += sizeof(u64); | |
874 | } | |
875 | ||
876 | /* finished with this buffer */ | |
a054374f MM |
877 | this_cpu_dec(*pbuf->sc->buffers_allocated); |
878 | preempt_enable(); | |
77241056 | 879 | } |