Commit | Line | Data |
---|---|---|
77241056 MM |
1 | /* |
2 | * | |
3 | * This file is provided under a dual BSD/GPLv2 license. When using or | |
4 | * redistributing this file, you may do so under either license. | |
5 | * | |
6 | * GPL LICENSE SUMMARY | |
7 | * | |
8 | * Copyright(c) 2015 Intel Corporation. | |
9 | * | |
10 | * This program is free software; you can redistribute it and/or modify | |
11 | * it under the terms of version 2 of the GNU General Public License as | |
12 | * published by the Free Software Foundation. | |
13 | * | |
14 | * This program is distributed in the hope that it will be useful, but | |
15 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
17 | * General Public License for more details. | |
18 | * | |
19 | * BSD LICENSE | |
20 | * | |
21 | * Copyright(c) 2015 Intel Corporation. | |
22 | * | |
23 | * Redistribution and use in source and binary forms, with or without | |
24 | * modification, are permitted provided that the following conditions | |
25 | * are met: | |
26 | * | |
27 | * - Redistributions of source code must retain the above copyright | |
28 | * notice, this list of conditions and the following disclaimer. | |
29 | * - Redistributions in binary form must reproduce the above copyright | |
30 | * notice, this list of conditions and the following disclaimer in | |
31 | * the documentation and/or other materials provided with the | |
32 | * distribution. | |
33 | * - Neither the name of Intel Corporation nor the names of its | |
34 | * contributors may be used to endorse or promote products derived | |
35 | * from this software without specific prior written permission. | |
36 | * | |
37 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
38 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
39 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
40 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
41 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
42 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
43 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
44 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
45 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
46 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
47 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
48 | * | |
49 | */ | |
50 | ||
51 | #include "hfi.h" | |
52 | ||
53 | /* additive distance between non-SOP and SOP space */ | |
54 | #define SOP_DISTANCE (TXE_PIO_SIZE / 2) | |
55 | #define PIO_BLOCK_MASK (PIO_BLOCK_SIZE-1) | |
56 | /* number of QUADWORDs in a block */ | |
57 | #define PIO_BLOCK_QWS (PIO_BLOCK_SIZE/sizeof(u64)) | |
58 | ||
59 | /** | |
60 | * pio_copy - copy data block to MMIO space | |
61 | * @pbuf: a number of blocks allocated within a PIO send context | |
62 | * @pbc: PBC to send | |
63 | * @from: source, must be 8 byte aligned | |
64 | * @count: number of DWORD (32-bit) quantities to copy from source | |
65 | * | |
66 | * Copy data from source to PIO Send Buffer memory, 8 bytes at a time. | |
67 | * Must always write full BLOCK_SIZE bytes blocks. The first block must | |
68 | * be written to the corresponding SOP=1 address. | |
69 | * | |
70 | * Known: | |
71 | * o pbuf->start always starts on a block boundary | |
72 | * o pbuf can wrap only at a block boundary | |
73 | */ | |
74 | void pio_copy(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc, | |
75 | const void *from, size_t count) | |
76 | { | |
77 | void __iomem *dest = pbuf->start + SOP_DISTANCE; | |
78 | void __iomem *send = dest + PIO_BLOCK_SIZE; | |
79 | void __iomem *dend; /* 8-byte data end */ | |
80 | ||
81 | /* write the PBC */ | |
82 | writeq(pbc, dest); | |
83 | dest += sizeof(u64); | |
84 | ||
85 | /* calculate where the QWORD data ends - in SOP=1 space */ | |
86 | dend = dest + ((count>>1) * sizeof(u64)); | |
87 | ||
88 | if (dend < send) { | |
89 | /* all QWORD data is within the SOP block, does *not* | |
90 | reach the end of the SOP block */ | |
91 | ||
92 | while (dest < dend) { | |
93 | writeq(*(u64 *)from, dest); | |
94 | from += sizeof(u64); | |
95 | dest += sizeof(u64); | |
96 | } | |
97 | /* | |
98 | * No boundary checks are needed here: | |
99 | * 0. We're not on the SOP block boundary | |
100 | * 1. The possible DWORD dangle will still be within | |
101 | * the SOP block | |
102 | * 2. We cannot wrap except on a block boundary. | |
103 | */ | |
104 | } else { | |
105 | /* QWORD data extends _to_ or beyond the SOP block */ | |
106 | ||
107 | /* write 8-byte SOP chunk data */ | |
108 | while (dest < send) { | |
109 | writeq(*(u64 *)from, dest); | |
110 | from += sizeof(u64); | |
111 | dest += sizeof(u64); | |
112 | } | |
113 | /* drop out of the SOP range */ | |
114 | dest -= SOP_DISTANCE; | |
115 | dend -= SOP_DISTANCE; | |
116 | ||
117 | /* | |
118 | * If the wrap comes before or matches the data end, | |
119 | * copy until until the wrap, then wrap. | |
120 | * | |
121 | * If the data ends at the end of the SOP above and | |
122 | * the buffer wraps, then pbuf->end == dend == dest | |
123 | * and nothing will get written, but we will wrap in | |
124 | * case there is a dangling DWORD. | |
125 | */ | |
126 | if (pbuf->end <= dend) { | |
127 | while (dest < pbuf->end) { | |
128 | writeq(*(u64 *)from, dest); | |
129 | from += sizeof(u64); | |
130 | dest += sizeof(u64); | |
131 | } | |
132 | ||
133 | dest -= pbuf->size; | |
134 | dend -= pbuf->size; | |
135 | } | |
136 | ||
137 | /* write 8-byte non-SOP, non-wrap chunk data */ | |
138 | while (dest < dend) { | |
139 | writeq(*(u64 *)from, dest); | |
140 | from += sizeof(u64); | |
141 | dest += sizeof(u64); | |
142 | } | |
143 | } | |
144 | /* at this point we have wrapped if we are going to wrap */ | |
145 | ||
146 | /* write dangling u32, if any */ | |
147 | if (count & 1) { | |
148 | union mix val; | |
149 | ||
150 | val.val64 = 0; | |
151 | val.val32[0] = *(u32 *)from; | |
152 | writeq(val.val64, dest); | |
153 | dest += sizeof(u64); | |
154 | } | |
155 | /* fill in rest of block, no need to check pbuf->end | |
156 | as we only wrap on a block boundary */ | |
157 | while (((unsigned long)dest & PIO_BLOCK_MASK) != 0) { | |
158 | writeq(0, dest); | |
159 | dest += sizeof(u64); | |
160 | } | |
161 | ||
162 | /* finished with this buffer */ | |
163 | atomic_dec(&pbuf->sc->buffers_allocated); | |
164 | } | |
165 | ||
166 | /* USE_SHIFTS is faster in user-space tests on a Xeon X5570 @ 2.93GHz */ | |
167 | #define USE_SHIFTS 1 | |
168 | #ifdef USE_SHIFTS | |
169 | /* | |
170 | * Handle carry bytes using shifts and masks. | |
171 | * | |
172 | * NOTE: the value the unused portion of carry is expected to always be zero. | |
173 | */ | |
174 | ||
175 | /* | |
176 | * "zero" shift - bit shift used to zero out upper bytes. Input is | |
177 | * the count of LSB bytes to preserve. | |
178 | */ | |
179 | #define zshift(x) (8 * (8-(x))) | |
180 | ||
181 | /* | |
182 | * "merge" shift - bit shift used to merge with carry bytes. Input is | |
183 | * the LSB byte count to move beyond. | |
184 | */ | |
185 | #define mshift(x) (8 * (x)) | |
186 | ||
187 | /* | |
188 | * Read nbytes bytes from "from" and return them in the LSB bytes | |
189 | * of pbuf->carry. Other bytes are zeroed. Any previous value | |
190 | * pbuf->carry is lost. | |
191 | * | |
192 | * NOTES: | |
193 | * o do not read from from if nbytes is zero | |
194 | * o from may _not_ be u64 aligned | |
195 | * o nbytes must not span a QW boundary | |
196 | */ | |
197 | static inline void read_low_bytes(struct pio_buf *pbuf, const void *from, | |
198 | unsigned int nbytes) | |
199 | { | |
200 | unsigned long off; | |
201 | ||
202 | if (nbytes == 0) { | |
203 | pbuf->carry.val64 = 0; | |
204 | } else { | |
205 | /* align our pointer */ | |
206 | off = (unsigned long)from & 0x7; | |
207 | from = (void *)((unsigned long)from & ~0x7l); | |
208 | pbuf->carry.val64 = ((*(u64 *)from) | |
209 | << zshift(nbytes + off))/* zero upper bytes */ | |
210 | >> zshift(nbytes); /* place at bottom */ | |
211 | } | |
212 | pbuf->carry_bytes = nbytes; | |
213 | } | |
214 | ||
215 | /* | |
216 | * Read nbytes bytes from "from" and put them at the next significant bytes | |
217 | * of pbuf->carry. Unused bytes are zeroed. It is expected that the extra | |
218 | * read does not overfill carry. | |
219 | * | |
220 | * NOTES: | |
221 | * o from may _not_ be u64 aligned | |
222 | * o nbytes may span a QW boundary | |
223 | */ | |
224 | static inline void read_extra_bytes(struct pio_buf *pbuf, | |
225 | const void *from, unsigned int nbytes) | |
226 | { | |
227 | unsigned long off = (unsigned long)from & 0x7; | |
228 | unsigned int room, xbytes; | |
229 | ||
230 | /* align our pointer */ | |
231 | from = (void *)((unsigned long)from & ~0x7l); | |
232 | ||
233 | /* check count first - don't read anything if count is zero */ | |
234 | while (nbytes) { | |
235 | /* find the number of bytes in this u64 */ | |
236 | room = 8 - off; /* this u64 has room for this many bytes */ | |
237 | xbytes = nbytes > room ? room : nbytes; | |
238 | ||
239 | /* | |
240 | * shift down to zero lower bytes, shift up to zero upper | |
241 | * bytes, shift back down to move into place | |
242 | */ | |
243 | pbuf->carry.val64 |= (((*(u64 *)from) | |
244 | >> mshift(off)) | |
245 | << zshift(xbytes)) | |
246 | >> zshift(xbytes+pbuf->carry_bytes); | |
247 | off = 0; | |
248 | pbuf->carry_bytes += xbytes; | |
249 | nbytes -= xbytes; | |
250 | from += sizeof(u64); | |
251 | } | |
252 | } | |
253 | ||
254 | /* | |
255 | * Zero extra bytes from the end of pbuf->carry. | |
256 | * | |
257 | * NOTES: | |
258 | * o zbytes <= old_bytes | |
259 | */ | |
260 | static inline void zero_extra_bytes(struct pio_buf *pbuf, unsigned int zbytes) | |
261 | { | |
262 | unsigned int remaining; | |
263 | ||
264 | if (zbytes == 0) /* nothing to do */ | |
265 | return; | |
266 | ||
267 | remaining = pbuf->carry_bytes - zbytes; /* remaining bytes */ | |
268 | ||
269 | /* NOTE: zshift only guaranteed to work if remaining != 0 */ | |
270 | if (remaining) | |
271 | pbuf->carry.val64 = (pbuf->carry.val64 << zshift(remaining)) | |
272 | >> zshift(remaining); | |
273 | else | |
274 | pbuf->carry.val64 = 0; | |
275 | pbuf->carry_bytes = remaining; | |
276 | } | |
277 | ||
278 | /* | |
279 | * Write a quad word using parts of pbuf->carry and the next 8 bytes of src. | |
280 | * Put the unused part of the next 8 bytes of src into the LSB bytes of | |
281 | * pbuf->carry with the upper bytes zeroed.. | |
282 | * | |
283 | * NOTES: | |
284 | * o result must keep unused bytes zeroed | |
285 | * o src must be u64 aligned | |
286 | */ | |
287 | static inline void merge_write8( | |
288 | struct pio_buf *pbuf, | |
289 | void __iomem *dest, | |
290 | const void *src) | |
291 | { | |
292 | u64 new, temp; | |
293 | ||
294 | new = *(u64 *)src; | |
295 | temp = pbuf->carry.val64 | (new << mshift(pbuf->carry_bytes)); | |
296 | writeq(temp, dest); | |
297 | pbuf->carry.val64 = new >> zshift(pbuf->carry_bytes); | |
298 | } | |
299 | ||
300 | /* | |
301 | * Write a quad word using all bytes of carry. | |
302 | */ | |
303 | static inline void carry8_write8(union mix carry, void __iomem *dest) | |
304 | { | |
305 | writeq(carry.val64, dest); | |
306 | } | |
307 | ||
308 | /* | |
309 | * Write a quad word using all the valid bytes of carry. If carry | |
310 | * has zero valid bytes, nothing is written. | |
311 | * Returns 0 on nothing written, non-zero on quad word written. | |
312 | */ | |
313 | static inline int carry_write8(struct pio_buf *pbuf, void __iomem *dest) | |
314 | { | |
315 | if (pbuf->carry_bytes) { | |
316 | /* unused bytes are always kept zeroed, so just write */ | |
317 | writeq(pbuf->carry.val64, dest); | |
318 | return 1; | |
319 | } | |
320 | ||
321 | return 0; | |
322 | } | |
323 | ||
324 | #else /* USE_SHIFTS */ | |
325 | /* | |
326 | * Handle carry bytes using byte copies. | |
327 | * | |
328 | * NOTE: the value the unused portion of carry is left uninitialized. | |
329 | */ | |
330 | ||
331 | /* | |
332 | * Jump copy - no-loop copy for < 8 bytes. | |
333 | */ | |
334 | static inline void jcopy(u8 *dest, const u8 *src, u32 n) | |
335 | { | |
336 | switch (n) { | |
337 | case 7: | |
338 | *dest++ = *src++; | |
339 | case 6: | |
340 | *dest++ = *src++; | |
341 | case 5: | |
342 | *dest++ = *src++; | |
343 | case 4: | |
344 | *dest++ = *src++; | |
345 | case 3: | |
346 | *dest++ = *src++; | |
347 | case 2: | |
348 | *dest++ = *src++; | |
349 | case 1: | |
350 | *dest++ = *src++; | |
351 | } | |
352 | } | |
353 | ||
354 | /* | |
355 | * Read nbytes from "from" and and place them in the low bytes | |
356 | * of pbuf->carry. Other bytes are left as-is. Any previous | |
357 | * value in pbuf->carry is lost. | |
358 | * | |
359 | * NOTES: | |
360 | * o do not read from from if nbytes is zero | |
361 | * o from may _not_ be u64 aligned. | |
362 | */ | |
363 | static inline void read_low_bytes(struct pio_buf *pbuf, const void *from, | |
364 | unsigned int nbytes) | |
365 | { | |
366 | jcopy(&pbuf->carry.val8[0], from, nbytes); | |
367 | pbuf->carry_bytes = nbytes; | |
368 | } | |
369 | ||
370 | /* | |
371 | * Read nbytes bytes from "from" and put them at the end of pbuf->carry. | |
372 | * It is expected that the extra read does not overfill carry. | |
373 | * | |
374 | * NOTES: | |
375 | * o from may _not_ be u64 aligned | |
376 | * o nbytes may span a QW boundary | |
377 | */ | |
378 | static inline void read_extra_bytes(struct pio_buf *pbuf, | |
379 | const void *from, unsigned int nbytes) | |
380 | { | |
381 | jcopy(&pbuf->carry.val8[pbuf->carry_bytes], from, nbytes); | |
382 | pbuf->carry_bytes += nbytes; | |
383 | } | |
384 | ||
385 | /* | |
386 | * Zero extra bytes from the end of pbuf->carry. | |
387 | * | |
388 | * We do not care about the value of unused bytes in carry, so just | |
389 | * reduce the byte count. | |
390 | * | |
391 | * NOTES: | |
392 | * o zbytes <= old_bytes | |
393 | */ | |
394 | static inline void zero_extra_bytes(struct pio_buf *pbuf, unsigned int zbytes) | |
395 | { | |
396 | pbuf->carry_bytes -= zbytes; | |
397 | } | |
398 | ||
399 | /* | |
400 | * Write a quad word using parts of pbuf->carry and the next 8 bytes of src. | |
401 | * Put the unused part of the next 8 bytes of src into the low bytes of | |
402 | * pbuf->carry. | |
403 | */ | |
404 | static inline void merge_write8( | |
405 | struct pio_buf *pbuf, | |
406 | void *dest, | |
407 | const void *src) | |
408 | { | |
409 | u32 remainder = 8 - pbuf->carry_bytes; | |
410 | ||
411 | jcopy(&pbuf->carry.val8[pbuf->carry_bytes], src, remainder); | |
412 | writeq(pbuf->carry.val64, dest); | |
413 | jcopy(&pbuf->carry.val8[0], src+remainder, pbuf->carry_bytes); | |
414 | } | |
415 | ||
416 | /* | |
417 | * Write a quad word using all bytes of carry. | |
418 | */ | |
419 | static inline void carry8_write8(union mix carry, void *dest) | |
420 | { | |
421 | writeq(carry.val64, dest); | |
422 | } | |
423 | ||
424 | /* | |
425 | * Write a quad word using all the valid bytes of carry. If carry | |
426 | * has zero valid bytes, nothing is written. | |
427 | * Returns 0 on nothing written, non-zero on quad word written. | |
428 | */ | |
429 | static inline int carry_write8(struct pio_buf *pbuf, void *dest) | |
430 | { | |
431 | if (pbuf->carry_bytes) { | |
432 | u64 zero = 0; | |
433 | ||
434 | jcopy(&pbuf->carry.val8[pbuf->carry_bytes], (u8 *)&zero, | |
435 | 8 - pbuf->carry_bytes); | |
436 | writeq(pbuf->carry.val64, dest); | |
437 | return 1; | |
438 | } | |
439 | ||
440 | return 0; | |
441 | } | |
442 | #endif /* USE_SHIFTS */ | |
443 | ||
444 | /* | |
445 | * Segmented PIO Copy - start | |
446 | * | |
447 | * Start a PIO copy. | |
448 | * | |
449 | * @pbuf: destination buffer | |
450 | * @pbc: the PBC for the PIO buffer | |
451 | * @from: data source, QWORD aligned | |
452 | * @nbytes: bytes to copy | |
453 | */ | |
454 | void seg_pio_copy_start(struct pio_buf *pbuf, u64 pbc, | |
455 | const void *from, size_t nbytes) | |
456 | { | |
457 | void __iomem *dest = pbuf->start + SOP_DISTANCE; | |
458 | void __iomem *send = dest + PIO_BLOCK_SIZE; | |
459 | void __iomem *dend; /* 8-byte data end */ | |
460 | ||
461 | writeq(pbc, dest); | |
462 | dest += sizeof(u64); | |
463 | ||
464 | /* calculate where the QWORD data ends - in SOP=1 space */ | |
465 | dend = dest + ((nbytes>>3) * sizeof(u64)); | |
466 | ||
467 | if (dend < send) { | |
468 | /* all QWORD data is within the SOP block, does *not* | |
469 | reach the end of the SOP block */ | |
470 | ||
471 | while (dest < dend) { | |
472 | writeq(*(u64 *)from, dest); | |
473 | from += sizeof(u64); | |
474 | dest += sizeof(u64); | |
475 | } | |
476 | /* | |
477 | * No boundary checks are needed here: | |
478 | * 0. We're not on the SOP block boundary | |
479 | * 1. The possible DWORD dangle will still be within | |
480 | * the SOP block | |
481 | * 2. We cannot wrap except on a block boundary. | |
482 | */ | |
483 | } else { | |
484 | /* QWORD data extends _to_ or beyond the SOP block */ | |
485 | ||
486 | /* write 8-byte SOP chunk data */ | |
487 | while (dest < send) { | |
488 | writeq(*(u64 *)from, dest); | |
489 | from += sizeof(u64); | |
490 | dest += sizeof(u64); | |
491 | } | |
492 | /* drop out of the SOP range */ | |
493 | dest -= SOP_DISTANCE; | |
494 | dend -= SOP_DISTANCE; | |
495 | ||
496 | /* | |
497 | * If the wrap comes before or matches the data end, | |
498 | * copy until until the wrap, then wrap. | |
499 | * | |
500 | * If the data ends at the end of the SOP above and | |
501 | * the buffer wraps, then pbuf->end == dend == dest | |
502 | * and nothing will get written, but we will wrap in | |
503 | * case there is a dangling DWORD. | |
504 | */ | |
505 | if (pbuf->end <= dend) { | |
506 | while (dest < pbuf->end) { | |
507 | writeq(*(u64 *)from, dest); | |
508 | from += sizeof(u64); | |
509 | dest += sizeof(u64); | |
510 | } | |
511 | ||
512 | dest -= pbuf->size; | |
513 | dend -= pbuf->size; | |
514 | } | |
515 | ||
516 | /* write 8-byte non-SOP, non-wrap chunk data */ | |
517 | while (dest < dend) { | |
518 | writeq(*(u64 *)from, dest); | |
519 | from += sizeof(u64); | |
520 | dest += sizeof(u64); | |
521 | } | |
522 | } | |
523 | /* at this point we have wrapped if we are going to wrap */ | |
524 | ||
525 | /* ...but it doesn't matter as we're done writing */ | |
526 | ||
527 | /* save dangling bytes, if any */ | |
528 | read_low_bytes(pbuf, from, nbytes & 0x7); | |
529 | ||
530 | pbuf->qw_written = 1 /*PBC*/ + (nbytes >> 3); | |
531 | } | |
532 | ||
533 | /* | |
534 | * Mid copy helper, "mixed case" - source is 64-bit aligned but carry | |
535 | * bytes are non-zero. | |
536 | * | |
537 | * Whole u64s must be written to the chip, so bytes must be manually merged. | |
538 | * | |
539 | * @pbuf: destination buffer | |
540 | * @from: data source, is QWORD aligned. | |
541 | * @nbytes: bytes to copy | |
542 | * | |
543 | * Must handle nbytes < 8. | |
544 | */ | |
545 | static void mid_copy_mix(struct pio_buf *pbuf, const void *from, size_t nbytes) | |
546 | { | |
547 | void __iomem *dest = pbuf->start + (pbuf->qw_written * sizeof(u64)); | |
548 | void __iomem *dend; /* 8-byte data end */ | |
549 | unsigned long qw_to_write = (pbuf->carry_bytes + nbytes) >> 3; | |
550 | unsigned long bytes_left = (pbuf->carry_bytes + nbytes) & 0x7; | |
551 | ||
552 | /* calculate 8-byte data end */ | |
553 | dend = dest + (qw_to_write * sizeof(u64)); | |
554 | ||
555 | if (pbuf->qw_written < PIO_BLOCK_QWS) { | |
556 | /* | |
557 | * Still within SOP block. We don't need to check for | |
558 | * wrap because we are still in the first block and | |
559 | * can only wrap on block boundaries. | |
560 | */ | |
561 | void __iomem *send; /* SOP end */ | |
562 | void __iomem *xend; | |
563 | ||
564 | /* calculate the end of data or end of block, whichever | |
565 | comes first */ | |
566 | send = pbuf->start + PIO_BLOCK_SIZE; | |
567 | xend = send < dend ? send : dend; | |
568 | ||
569 | /* shift up to SOP=1 space */ | |
570 | dest += SOP_DISTANCE; | |
571 | xend += SOP_DISTANCE; | |
572 | ||
573 | /* write 8-byte chunk data */ | |
574 | while (dest < xend) { | |
575 | merge_write8(pbuf, dest, from); | |
576 | from += sizeof(u64); | |
577 | dest += sizeof(u64); | |
578 | } | |
579 | ||
580 | /* shift down to SOP=0 space */ | |
581 | dest -= SOP_DISTANCE; | |
582 | } | |
583 | /* | |
584 | * At this point dest could be (either, both, or neither): | |
585 | * - at dend | |
586 | * - at the wrap | |
587 | */ | |
588 | ||
589 | /* | |
590 | * If the wrap comes before or matches the data end, | |
591 | * copy until until the wrap, then wrap. | |
592 | * | |
593 | * If dest is at the wrap, we will fall into the if, | |
594 | * not do the loop, when wrap. | |
595 | * | |
596 | * If the data ends at the end of the SOP above and | |
597 | * the buffer wraps, then pbuf->end == dend == dest | |
598 | * and nothing will get written. | |
599 | */ | |
600 | if (pbuf->end <= dend) { | |
601 | while (dest < pbuf->end) { | |
602 | merge_write8(pbuf, dest, from); | |
603 | from += sizeof(u64); | |
604 | dest += sizeof(u64); | |
605 | } | |
606 | ||
607 | dest -= pbuf->size; | |
608 | dend -= pbuf->size; | |
609 | } | |
610 | ||
611 | /* write 8-byte non-SOP, non-wrap chunk data */ | |
612 | while (dest < dend) { | |
613 | merge_write8(pbuf, dest, from); | |
614 | from += sizeof(u64); | |
615 | dest += sizeof(u64); | |
616 | } | |
617 | ||
618 | /* adjust carry */ | |
619 | if (pbuf->carry_bytes < bytes_left) { | |
620 | /* need to read more */ | |
621 | read_extra_bytes(pbuf, from, bytes_left - pbuf->carry_bytes); | |
622 | } else { | |
623 | /* remove invalid bytes */ | |
624 | zero_extra_bytes(pbuf, pbuf->carry_bytes - bytes_left); | |
625 | } | |
626 | ||
627 | pbuf->qw_written += qw_to_write; | |
628 | } | |
629 | ||
630 | /* | |
631 | * Mid copy helper, "straight case" - source pointer is 64-bit aligned | |
632 | * with no carry bytes. | |
633 | * | |
634 | * @pbuf: destination buffer | |
635 | * @from: data source, is QWORD aligned | |
636 | * @nbytes: bytes to copy | |
637 | * | |
638 | * Must handle nbytes < 8. | |
639 | */ | |
640 | static void mid_copy_straight(struct pio_buf *pbuf, | |
641 | const void *from, size_t nbytes) | |
642 | { | |
643 | void __iomem *dest = pbuf->start + (pbuf->qw_written * sizeof(u64)); | |
644 | void __iomem *dend; /* 8-byte data end */ | |
645 | ||
646 | /* calculate 8-byte data end */ | |
647 | dend = dest + ((nbytes>>3) * sizeof(u64)); | |
648 | ||
649 | if (pbuf->qw_written < PIO_BLOCK_QWS) { | |
650 | /* | |
651 | * Still within SOP block. We don't need to check for | |
652 | * wrap because we are still in the first block and | |
653 | * can only wrap on block boundaries. | |
654 | */ | |
655 | void __iomem *send; /* SOP end */ | |
656 | void __iomem *xend; | |
657 | ||
658 | /* calculate the end of data or end of block, whichever | |
659 | comes first */ | |
660 | send = pbuf->start + PIO_BLOCK_SIZE; | |
661 | xend = send < dend ? send : dend; | |
662 | ||
663 | /* shift up to SOP=1 space */ | |
664 | dest += SOP_DISTANCE; | |
665 | xend += SOP_DISTANCE; | |
666 | ||
667 | /* write 8-byte chunk data */ | |
668 | while (dest < xend) { | |
669 | writeq(*(u64 *)from, dest); | |
670 | from += sizeof(u64); | |
671 | dest += sizeof(u64); | |
672 | } | |
673 | ||
674 | /* shift down to SOP=0 space */ | |
675 | dest -= SOP_DISTANCE; | |
676 | } | |
677 | /* | |
678 | * At this point dest could be (either, both, or neither): | |
679 | * - at dend | |
680 | * - at the wrap | |
681 | */ | |
682 | ||
683 | /* | |
684 | * If the wrap comes before or matches the data end, | |
685 | * copy until until the wrap, then wrap. | |
686 | * | |
687 | * If dest is at the wrap, we will fall into the if, | |
688 | * not do the loop, when wrap. | |
689 | * | |
690 | * If the data ends at the end of the SOP above and | |
691 | * the buffer wraps, then pbuf->end == dend == dest | |
692 | * and nothing will get written. | |
693 | */ | |
694 | if (pbuf->end <= dend) { | |
695 | while (dest < pbuf->end) { | |
696 | writeq(*(u64 *)from, dest); | |
697 | from += sizeof(u64); | |
698 | dest += sizeof(u64); | |
699 | } | |
700 | ||
701 | dest -= pbuf->size; | |
702 | dend -= pbuf->size; | |
703 | } | |
704 | ||
705 | /* write 8-byte non-SOP, non-wrap chunk data */ | |
706 | while (dest < dend) { | |
707 | writeq(*(u64 *)from, dest); | |
708 | from += sizeof(u64); | |
709 | dest += sizeof(u64); | |
710 | } | |
711 | ||
712 | /* we know carry_bytes was zero on entry to this routine */ | |
713 | read_low_bytes(pbuf, from, nbytes & 0x7); | |
714 | ||
715 | pbuf->qw_written += nbytes>>3; | |
716 | } | |
717 | ||
718 | /* | |
719 | * Segmented PIO Copy - middle | |
720 | * | |
721 | * Must handle any aligned tail and any aligned source with any byte count. | |
722 | * | |
723 | * @pbuf: a number of blocks allocated within a PIO send context | |
724 | * @from: data source | |
725 | * @nbytes: number of bytes to copy | |
726 | */ | |
727 | void seg_pio_copy_mid(struct pio_buf *pbuf, const void *from, size_t nbytes) | |
728 | { | |
729 | unsigned long from_align = (unsigned long)from & 0x7; | |
730 | ||
731 | if (pbuf->carry_bytes + nbytes < 8) { | |
732 | /* not enough bytes to fill a QW */ | |
733 | read_extra_bytes(pbuf, from, nbytes); | |
734 | return; | |
735 | } | |
736 | ||
737 | if (from_align) { | |
738 | /* misaligned source pointer - align it */ | |
739 | unsigned long to_align; | |
740 | ||
741 | /* bytes to read to align "from" */ | |
742 | to_align = 8 - from_align; | |
743 | ||
744 | /* | |
745 | * In the advance-to-alignment logic below, we do not need | |
746 | * to check if we are using more than nbytes. This is because | |
747 | * if we are here, we already know that carry+nbytes will | |
748 | * fill at least one QW. | |
749 | */ | |
750 | if (pbuf->carry_bytes + to_align < 8) { | |
751 | /* not enough align bytes to fill a QW */ | |
752 | read_extra_bytes(pbuf, from, to_align); | |
753 | from += to_align; | |
754 | nbytes -= to_align; | |
755 | } else { | |
756 | /* bytes to fill carry */ | |
757 | unsigned long to_fill = 8 - pbuf->carry_bytes; | |
758 | /* bytes left over to be read */ | |
759 | unsigned long extra = to_align - to_fill; | |
760 | void __iomem *dest; | |
761 | ||
762 | /* fill carry... */ | |
763 | read_extra_bytes(pbuf, from, to_fill); | |
764 | from += to_fill; | |
765 | nbytes -= to_fill; | |
766 | ||
767 | /* ...now write carry */ | |
768 | dest = pbuf->start + (pbuf->qw_written * sizeof(u64)); | |
769 | ||
770 | /* | |
771 | * The two checks immediately below cannot both be | |
772 | * true, hence the else. If we have wrapped, we | |
773 | * cannot still be within the first block. | |
774 | * Conversely, if we are still in the first block, we | |
775 | * cannot have wrapped. We do the wrap check first | |
776 | * as that is more likely. | |
777 | */ | |
778 | /* adjust if we've wrapped */ | |
779 | if (dest >= pbuf->end) | |
780 | dest -= pbuf->size; | |
781 | /* jump to SOP range if within the first block */ | |
782 | else if (pbuf->qw_written < PIO_BLOCK_QWS) | |
783 | dest += SOP_DISTANCE; | |
784 | ||
785 | carry8_write8(pbuf->carry, dest); | |
786 | pbuf->qw_written++; | |
787 | ||
788 | /* read any extra bytes to do final alignment */ | |
789 | /* this will overwrite anything in pbuf->carry */ | |
790 | read_low_bytes(pbuf, from, extra); | |
791 | from += extra; | |
792 | nbytes -= extra; | |
793 | } | |
794 | ||
795 | /* at this point, from is QW aligned */ | |
796 | } | |
797 | ||
798 | if (pbuf->carry_bytes) | |
799 | mid_copy_mix(pbuf, from, nbytes); | |
800 | else | |
801 | mid_copy_straight(pbuf, from, nbytes); | |
802 | } | |
803 | ||
804 | /* | |
805 | * Segmented PIO Copy - end | |
806 | * | |
807 | * Write any remainder (in pbuf->carry) and finish writing the whole block. | |
808 | * | |
809 | * @pbuf: a number of blocks allocated within a PIO send context | |
810 | */ | |
811 | void seg_pio_copy_end(struct pio_buf *pbuf) | |
812 | { | |
813 | void __iomem *dest = pbuf->start + (pbuf->qw_written * sizeof(u64)); | |
814 | ||
815 | /* | |
816 | * The two checks immediately below cannot both be true, hence the | |
817 | * else. If we have wrapped, we cannot still be within the first | |
818 | * block. Conversely, if we are still in the first block, we | |
819 | * cannot have wrapped. We do the wrap check first as that is | |
820 | * more likely. | |
821 | */ | |
822 | /* adjust if we have wrapped */ | |
823 | if (dest >= pbuf->end) | |
824 | dest -= pbuf->size; | |
825 | /* jump to the SOP range if within the first block */ | |
826 | else if (pbuf->qw_written < PIO_BLOCK_QWS) | |
827 | dest += SOP_DISTANCE; | |
828 | ||
829 | /* write final bytes, if any */ | |
830 | if (carry_write8(pbuf, dest)) { | |
831 | dest += sizeof(u64); | |
832 | /* | |
833 | * NOTE: We do not need to recalculate whether dest needs | |
834 | * SOP_DISTANCE or not. | |
835 | * | |
836 | * If we are in the first block and the dangle write | |
837 | * keeps us in the same block, dest will need | |
838 | * to retain SOP_DISTANCE in the loop below. | |
839 | * | |
840 | * If we are in the first block and the dangle write pushes | |
841 | * us to the next block, then loop below will not run | |
842 | * and dest is not used. Hence we do not need to update | |
843 | * it. | |
844 | * | |
845 | * If we are past the first block, then SOP_DISTANCE | |
846 | * was never added, so there is nothing to do. | |
847 | */ | |
848 | } | |
849 | ||
850 | /* fill in rest of block */ | |
851 | while (((unsigned long)dest & PIO_BLOCK_MASK) != 0) { | |
852 | writeq(0, dest); | |
853 | dest += sizeof(u64); | |
854 | } | |
855 | ||
856 | /* finished with this buffer */ | |
857 | atomic_dec(&pbuf->sc->buffers_allocated); | |
858 | } |