Commit | Line | Data |
---|---|---|
45dfc1a0 HS |
1 | /* |
2 | * Freescale GPMI NAND Flash Driver | |
3 | * | |
4 | * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. | |
5 | * Copyright (C) 2008 Embedded Alley Solutions, Inc. | |
6 | * | |
7 | * This program is free software; you can redistribute it and/or modify | |
8 | * it under the terms of the GNU General Public License as published by | |
9 | * the Free Software Foundation; either version 2 of the License, or | |
10 | * (at your option) any later version. | |
11 | * | |
12 | * This program is distributed in the hope that it will be useful, | |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | * GNU General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU General Public License along | |
18 | * with this program; if not, write to the Free Software Foundation, Inc., | |
19 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | |
20 | */ | |
45dfc1a0 HS |
21 | #include <linux/delay.h> |
22 | #include <linux/clk.h> | |
df877fb3 | 23 | #include <linux/slab.h> |
45dfc1a0 HS |
24 | |
25 | #include "gpmi-nand.h" | |
26 | #include "gpmi-regs.h" | |
27 | #include "bch-regs.h" | |
28 | ||
513d57e1 | 29 | static struct timing_threshod timing_default_threshold = { |
45dfc1a0 HS |
30 | .max_data_setup_cycles = (BM_GPMI_TIMING0_DATA_SETUP >> |
31 | BP_GPMI_TIMING0_DATA_SETUP), | |
32 | .internal_data_setup_in_ns = 0, | |
33 | .max_sample_delay_factor = (BM_GPMI_CTRL1_RDN_DELAY >> | |
34 | BP_GPMI_CTRL1_RDN_DELAY), | |
35 | .max_dll_clock_period_in_ns = 32, | |
36 | .max_dll_delay_in_ns = 16, | |
37 | }; | |
38 | ||
4aa6ae3e HS |
39 | #define MXS_SET_ADDR 0x4 |
40 | #define MXS_CLR_ADDR 0x8 | |
45dfc1a0 HS |
41 | /* |
42 | * Clear the bit and poll it cleared. This is usually called with | |
43 | * a reset address and mask being either SFTRST(bit 31) or CLKGATE | |
44 | * (bit 30). | |
45 | */ | |
46 | static int clear_poll_bit(void __iomem *addr, u32 mask) | |
47 | { | |
48 | int timeout = 0x400; | |
49 | ||
50 | /* clear the bit */ | |
4aa6ae3e | 51 | writel(mask, addr + MXS_CLR_ADDR); |
45dfc1a0 HS |
52 | |
53 | /* | |
54 | * SFTRST needs 3 GPMI clocks to settle, the reference manual | |
55 | * recommends to wait 1us. | |
56 | */ | |
57 | udelay(1); | |
58 | ||
59 | /* poll the bit becoming clear */ | |
60 | while ((readl(addr) & mask) && --timeout) | |
61 | /* nothing */; | |
62 | ||
63 | return !timeout; | |
64 | } | |
65 | ||
66 | #define MODULE_CLKGATE (1 << 30) | |
67 | #define MODULE_SFTRST (1 << 31) | |
68 | /* | |
69 | * The current mxs_reset_block() will do two things: | |
70 | * [1] enable the module. | |
71 | * [2] reset the module. | |
72 | * | |
9398d1ce HS |
73 | * In most of the cases, it's ok. |
74 | * But in MX23, there is a hardware bug in the BCH block (see erratum #2847). | |
45dfc1a0 HS |
75 | * If you try to soft reset the BCH block, it becomes unusable until |
76 | * the next hard reset. This case occurs in the NAND boot mode. When the board | |
77 | * boots by NAND, the ROM of the chip will initialize the BCH blocks itself. | |
78 | * So If the driver tries to reset the BCH again, the BCH will not work anymore. | |
9398d1ce HS |
79 | * You will see a DMA timeout in this case. The bug has been fixed |
80 | * in the following chips, such as MX28. | |
45dfc1a0 HS |
81 | * |
82 | * To avoid this bug, just add a new parameter `just_enable` for | |
83 | * the mxs_reset_block(), and rewrite it here. | |
84 | */ | |
9398d1ce | 85 | static int gpmi_reset_block(void __iomem *reset_addr, bool just_enable) |
45dfc1a0 HS |
86 | { |
87 | int ret; | |
88 | int timeout = 0x400; | |
89 | ||
90 | /* clear and poll SFTRST */ | |
91 | ret = clear_poll_bit(reset_addr, MODULE_SFTRST); | |
92 | if (unlikely(ret)) | |
93 | goto error; | |
94 | ||
95 | /* clear CLKGATE */ | |
4aa6ae3e | 96 | writel(MODULE_CLKGATE, reset_addr + MXS_CLR_ADDR); |
45dfc1a0 HS |
97 | |
98 | if (!just_enable) { | |
99 | /* set SFTRST to reset the block */ | |
4aa6ae3e | 100 | writel(MODULE_SFTRST, reset_addr + MXS_SET_ADDR); |
45dfc1a0 HS |
101 | udelay(1); |
102 | ||
103 | /* poll CLKGATE becoming set */ | |
104 | while ((!(readl(reset_addr) & MODULE_CLKGATE)) && --timeout) | |
105 | /* nothing */; | |
106 | if (unlikely(!timeout)) | |
107 | goto error; | |
108 | } | |
109 | ||
110 | /* clear and poll SFTRST */ | |
111 | ret = clear_poll_bit(reset_addr, MODULE_SFTRST); | |
112 | if (unlikely(ret)) | |
113 | goto error; | |
114 | ||
115 | /* clear and poll CLKGATE */ | |
116 | ret = clear_poll_bit(reset_addr, MODULE_CLKGATE); | |
117 | if (unlikely(ret)) | |
118 | goto error; | |
119 | ||
120 | return 0; | |
121 | ||
122 | error: | |
123 | pr_err("%s(%p): module reset timeout\n", __func__, reset_addr); | |
124 | return -ETIMEDOUT; | |
125 | } | |
126 | ||
ff506172 HS |
127 | static int __gpmi_enable_clk(struct gpmi_nand_data *this, bool v) |
128 | { | |
129 | struct clk *clk; | |
130 | int ret; | |
131 | int i; | |
132 | ||
133 | for (i = 0; i < GPMI_CLK_MAX; i++) { | |
134 | clk = this->resources.clock[i]; | |
135 | if (!clk) | |
136 | break; | |
137 | ||
138 | if (v) { | |
139 | ret = clk_prepare_enable(clk); | |
140 | if (ret) | |
141 | goto err_clk; | |
142 | } else { | |
143 | clk_disable_unprepare(clk); | |
144 | } | |
145 | } | |
146 | return 0; | |
147 | ||
148 | err_clk: | |
149 | for (; i > 0; i--) | |
150 | clk_disable_unprepare(this->resources.clock[i - 1]); | |
151 | return ret; | |
152 | } | |
153 | ||
154 | #define gpmi_enable_clk(x) __gpmi_enable_clk(x, true) | |
155 | #define gpmi_disable_clk(x) __gpmi_enable_clk(x, false) | |
156 | ||
45dfc1a0 HS |
157 | int gpmi_init(struct gpmi_nand_data *this) |
158 | { | |
159 | struct resources *r = &this->resources; | |
160 | int ret; | |
161 | ||
ff506172 | 162 | ret = gpmi_enable_clk(this); |
45dfc1a0 HS |
163 | if (ret) |
164 | goto err_out; | |
165 | ret = gpmi_reset_block(r->gpmi_regs, false); | |
166 | if (ret) | |
167 | goto err_out; | |
168 | ||
6f2a6a52 WS |
169 | /* |
170 | * Reset BCH here, too. We got failures otherwise :( | |
171 | * See later BCH reset for explanation of MX23 handling | |
172 | */ | |
173 | ret = gpmi_reset_block(r->bch_regs, GPMI_IS_MX23(this)); | |
174 | if (ret) | |
175 | goto err_out; | |
176 | ||
177 | ||
45dfc1a0 HS |
178 | /* Choose NAND mode. */ |
179 | writel(BM_GPMI_CTRL1_GPMI_MODE, r->gpmi_regs + HW_GPMI_CTRL1_CLR); | |
180 | ||
181 | /* Set the IRQ polarity. */ | |
182 | writel(BM_GPMI_CTRL1_ATA_IRQRDY_POLARITY, | |
183 | r->gpmi_regs + HW_GPMI_CTRL1_SET); | |
184 | ||
185 | /* Disable Write-Protection. */ | |
186 | writel(BM_GPMI_CTRL1_DEV_RESET, r->gpmi_regs + HW_GPMI_CTRL1_SET); | |
187 | ||
188 | /* Select BCH ECC. */ | |
189 | writel(BM_GPMI_CTRL1_BCH_MODE, r->gpmi_regs + HW_GPMI_CTRL1_SET); | |
190 | ||
d159d8b7 HS |
191 | /* |
192 | * Decouple the chip select from dma channel. We use dma0 for all | |
193 | * the chips. | |
194 | */ | |
195 | writel(BM_GPMI_CTRL1_DECOUPLE_CS, r->gpmi_regs + HW_GPMI_CTRL1_SET); | |
196 | ||
ff506172 | 197 | gpmi_disable_clk(this); |
45dfc1a0 HS |
198 | return 0; |
199 | err_out: | |
200 | return ret; | |
201 | } | |
202 | ||
203 | /* This function is very useful. It is called only when the bug occur. */ | |
204 | void gpmi_dump_info(struct gpmi_nand_data *this) | |
205 | { | |
206 | struct resources *r = &this->resources; | |
207 | struct bch_geometry *geo = &this->bch_geometry; | |
208 | u32 reg; | |
209 | int i; | |
210 | ||
da40c16a | 211 | dev_err(this->dev, "Show GPMI registers :\n"); |
45dfc1a0 HS |
212 | for (i = 0; i <= HW_GPMI_DEBUG / 0x10 + 1; i++) { |
213 | reg = readl(r->gpmi_regs + i * 0x10); | |
da40c16a | 214 | dev_err(this->dev, "offset 0x%.3x : 0x%.8x\n", i * 0x10, reg); |
45dfc1a0 HS |
215 | } |
216 | ||
217 | /* start to print out the BCH info */ | |
da40c16a | 218 | dev_err(this->dev, "Show BCH registers :\n"); |
f7226893 HS |
219 | for (i = 0; i <= HW_BCH_VERSION / 0x10 + 1; i++) { |
220 | reg = readl(r->bch_regs + i * 0x10); | |
da40c16a | 221 | dev_err(this->dev, "offset 0x%.3x : 0x%.8x\n", i * 0x10, reg); |
f7226893 | 222 | } |
da40c16a HS |
223 | dev_err(this->dev, "BCH Geometry :\n" |
224 | "GF length : %u\n" | |
225 | "ECC Strength : %u\n" | |
226 | "Page Size in Bytes : %u\n" | |
227 | "Metadata Size in Bytes : %u\n" | |
228 | "ECC Chunk Size in Bytes: %u\n" | |
229 | "ECC Chunk Count : %u\n" | |
230 | "Payload Size in Bytes : %u\n" | |
231 | "Auxiliary Size in Bytes: %u\n" | |
232 | "Auxiliary Status Offset: %u\n" | |
233 | "Block Mark Byte Offset : %u\n" | |
234 | "Block Mark Bit Offset : %u\n", | |
235 | geo->gf_len, | |
236 | geo->ecc_strength, | |
237 | geo->page_size, | |
238 | geo->metadata_size, | |
239 | geo->ecc_chunk_size, | |
240 | geo->ecc_chunk_count, | |
241 | geo->payload_size, | |
242 | geo->auxiliary_size, | |
243 | geo->auxiliary_status_offset, | |
244 | geo->block_mark_byte_offset, | |
245 | geo->block_mark_bit_offset); | |
45dfc1a0 HS |
246 | } |
247 | ||
248 | /* Configures the geometry for BCH. */ | |
249 | int bch_set_geometry(struct gpmi_nand_data *this) | |
250 | { | |
251 | struct resources *r = &this->resources; | |
252 | struct bch_geometry *bch_geo = &this->bch_geometry; | |
253 | unsigned int block_count; | |
254 | unsigned int block_size; | |
255 | unsigned int metadata_size; | |
256 | unsigned int ecc_strength; | |
257 | unsigned int page_size; | |
9ff16f08 | 258 | unsigned int gf_len; |
45dfc1a0 HS |
259 | int ret; |
260 | ||
261 | if (common_nfc_set_geometry(this)) | |
262 | return !0; | |
263 | ||
264 | block_count = bch_geo->ecc_chunk_count - 1; | |
265 | block_size = bch_geo->ecc_chunk_size; | |
266 | metadata_size = bch_geo->metadata_size; | |
267 | ecc_strength = bch_geo->ecc_strength >> 1; | |
268 | page_size = bch_geo->page_size; | |
9ff16f08 | 269 | gf_len = bch_geo->gf_len; |
45dfc1a0 | 270 | |
ff506172 | 271 | ret = gpmi_enable_clk(this); |
45dfc1a0 HS |
272 | if (ret) |
273 | goto err_out; | |
274 | ||
9398d1ce HS |
275 | /* |
276 | * Due to erratum #2847 of the MX23, the BCH cannot be soft reset on this | |
277 | * chip, otherwise it will lock up. So we skip resetting BCH on the MX23. | |
278 | * On the other hand, the MX28 needs the reset, because one case has been | |
279 | * seen where the BCH produced ECC errors constantly after 10000 | |
76c930be FE |
280 | * consecutive reboots. The latter case has not been seen on the MX23 |
281 | * yet, still we don't know if it could happen there as well. | |
9398d1ce HS |
282 | */ |
283 | ret = gpmi_reset_block(r->bch_regs, GPMI_IS_MX23(this)); | |
45dfc1a0 HS |
284 | if (ret) |
285 | goto err_out; | |
286 | ||
287 | /* Configure layout 0. */ | |
288 | writel(BF_BCH_FLASH0LAYOUT0_NBLOCKS(block_count) | |
289 | | BF_BCH_FLASH0LAYOUT0_META_SIZE(metadata_size) | |
9013bb40 | 290 | | BF_BCH_FLASH0LAYOUT0_ECC0(ecc_strength, this) |
9ff16f08 | 291 | | BF_BCH_FLASH0LAYOUT0_GF(gf_len, this) |
9013bb40 | 292 | | BF_BCH_FLASH0LAYOUT0_DATA0_SIZE(block_size, this), |
45dfc1a0 HS |
293 | r->bch_regs + HW_BCH_FLASH0LAYOUT0); |
294 | ||
295 | writel(BF_BCH_FLASH0LAYOUT1_PAGE_SIZE(page_size) | |
9013bb40 | 296 | | BF_BCH_FLASH0LAYOUT1_ECCN(ecc_strength, this) |
9ff16f08 | 297 | | BF_BCH_FLASH0LAYOUT1_GF(gf_len, this) |
9013bb40 | 298 | | BF_BCH_FLASH0LAYOUT1_DATAN_SIZE(block_size, this), |
45dfc1a0 HS |
299 | r->bch_regs + HW_BCH_FLASH0LAYOUT1); |
300 | ||
301 | /* Set *all* chip selects to use layout 0. */ | |
302 | writel(0, r->bch_regs + HW_BCH_LAYOUTSELECT); | |
303 | ||
304 | /* Enable interrupts. */ | |
305 | writel(BM_BCH_CTRL_COMPLETE_IRQ_EN, | |
306 | r->bch_regs + HW_BCH_CTRL_SET); | |
307 | ||
ff506172 | 308 | gpmi_disable_clk(this); |
45dfc1a0 HS |
309 | return 0; |
310 | err_out: | |
311 | return ret; | |
312 | } | |
313 | ||
314 | /* Converts time in nanoseconds to cycles. */ | |
315 | static unsigned int ns_to_cycles(unsigned int time, | |
316 | unsigned int period, unsigned int min) | |
317 | { | |
318 | unsigned int k; | |
319 | ||
320 | k = (time + period - 1) / period; | |
321 | return max(k, min); | |
322 | } | |
323 | ||
e10db1f0 HS |
324 | #define DEF_MIN_PROP_DELAY 5 |
325 | #define DEF_MAX_PROP_DELAY 9 | |
45dfc1a0 HS |
326 | /* Apply timing to current hardware conditions. */ |
327 | static int gpmi_nfc_compute_hardware_timing(struct gpmi_nand_data *this, | |
328 | struct gpmi_nfc_hardware_timing *hw) | |
329 | { | |
45dfc1a0 | 330 | struct timing_threshod *nfc = &timing_default_threshold; |
ae70ba2d | 331 | struct resources *r = &this->resources; |
45dfc1a0 HS |
332 | struct nand_chip *nand = &this->nand; |
333 | struct nand_timing target = this->timing; | |
334 | bool improved_timing_is_available; | |
335 | unsigned long clock_frequency_in_hz; | |
336 | unsigned int clock_period_in_ns; | |
337 | bool dll_use_half_periods; | |
338 | unsigned int dll_delay_shift; | |
339 | unsigned int max_sample_delay_in_ns; | |
340 | unsigned int address_setup_in_cycles; | |
341 | unsigned int data_setup_in_ns; | |
342 | unsigned int data_setup_in_cycles; | |
343 | unsigned int data_hold_in_cycles; | |
344 | int ideal_sample_delay_in_ns; | |
345 | unsigned int sample_delay_factor; | |
346 | int tEYE; | |
e10db1f0 HS |
347 | unsigned int min_prop_delay_in_ns = DEF_MIN_PROP_DELAY; |
348 | unsigned int max_prop_delay_in_ns = DEF_MAX_PROP_DELAY; | |
45dfc1a0 HS |
349 | |
350 | /* | |
351 | * If there are multiple chips, we need to relax the timings to allow | |
352 | * for signal distortion due to higher capacitance. | |
353 | */ | |
354 | if (nand->numchips > 2) { | |
355 | target.data_setup_in_ns += 10; | |
356 | target.data_hold_in_ns += 10; | |
357 | target.address_setup_in_ns += 10; | |
358 | } else if (nand->numchips > 1) { | |
359 | target.data_setup_in_ns += 5; | |
360 | target.data_hold_in_ns += 5; | |
361 | target.address_setup_in_ns += 5; | |
362 | } | |
363 | ||
364 | /* Check if improved timing information is available. */ | |
365 | improved_timing_is_available = | |
366 | (target.tREA_in_ns >= 0) && | |
367 | (target.tRLOH_in_ns >= 0) && | |
76c930be | 368 | (target.tRHOH_in_ns >= 0); |
45dfc1a0 HS |
369 | |
370 | /* Inspect the clock. */ | |
ae70ba2d | 371 | nfc->clock_frequency_in_hz = clk_get_rate(r->clock[0]); |
45dfc1a0 | 372 | clock_frequency_in_hz = nfc->clock_frequency_in_hz; |
ae70ba2d | 373 | clock_period_in_ns = NSEC_PER_SEC / clock_frequency_in_hz; |
45dfc1a0 HS |
374 | |
375 | /* | |
376 | * The NFC quantizes setup and hold parameters in terms of clock cycles. | |
377 | * Here, we quantize the setup and hold timing parameters to the | |
378 | * next-highest clock period to make sure we apply at least the | |
379 | * specified times. | |
380 | * | |
381 | * For data setup and data hold, the hardware interprets a value of zero | |
382 | * as the largest possible delay. This is not what's intended by a zero | |
383 | * in the input parameter, so we impose a minimum of one cycle. | |
384 | */ | |
385 | data_setup_in_cycles = ns_to_cycles(target.data_setup_in_ns, | |
386 | clock_period_in_ns, 1); | |
387 | data_hold_in_cycles = ns_to_cycles(target.data_hold_in_ns, | |
388 | clock_period_in_ns, 1); | |
389 | address_setup_in_cycles = ns_to_cycles(target.address_setup_in_ns, | |
390 | clock_period_in_ns, 0); | |
391 | ||
392 | /* | |
393 | * The clock's period affects the sample delay in a number of ways: | |
394 | * | |
395 | * (1) The NFC HAL tells us the maximum clock period the sample delay | |
396 | * DLL can tolerate. If the clock period is greater than half that | |
397 | * maximum, we must configure the DLL to be driven by half periods. | |
398 | * | |
399 | * (2) We need to convert from an ideal sample delay, in ns, to a | |
400 | * "sample delay factor," which the NFC uses. This factor depends on | |
401 | * whether we're driving the DLL with full or half periods. | |
402 | * Paraphrasing the reference manual: | |
403 | * | |
404 | * AD = SDF x 0.125 x RP | |
405 | * | |
406 | * where: | |
407 | * | |
408 | * AD is the applied delay, in ns. | |
409 | * SDF is the sample delay factor, which is dimensionless. | |
410 | * RP is the reference period, in ns, which is a full clock period | |
411 | * if the DLL is being driven by full periods, or half that if | |
412 | * the DLL is being driven by half periods. | |
413 | * | |
414 | * Let's re-arrange this in a way that's more useful to us: | |
415 | * | |
416 | * 8 | |
417 | * SDF = AD x ---- | |
418 | * RP | |
419 | * | |
420 | * The reference period is either the clock period or half that, so this | |
421 | * is: | |
422 | * | |
423 | * 8 AD x DDF | |
424 | * SDF = AD x ----- = -------- | |
425 | * f x P P | |
426 | * | |
427 | * where: | |
428 | * | |
429 | * f is 1 or 1/2, depending on how we're driving the DLL. | |
430 | * P is the clock period. | |
431 | * DDF is the DLL Delay Factor, a dimensionless value that | |
432 | * incorporates all the constants in the conversion. | |
433 | * | |
434 | * DDF will be either 8 or 16, both of which are powers of two. We can | |
435 | * reduce the cost of this conversion by using bit shifts instead of | |
436 | * multiplication or division. Thus: | |
437 | * | |
438 | * AD << DDS | |
439 | * SDF = --------- | |
440 | * P | |
441 | * | |
442 | * or | |
443 | * | |
444 | * AD = (SDF >> DDS) x P | |
445 | * | |
446 | * where: | |
447 | * | |
448 | * DDS is the DLL Delay Shift, the logarithm to base 2 of the DDF. | |
449 | */ | |
450 | if (clock_period_in_ns > (nfc->max_dll_clock_period_in_ns >> 1)) { | |
451 | dll_use_half_periods = true; | |
452 | dll_delay_shift = 3 + 1; | |
453 | } else { | |
454 | dll_use_half_periods = false; | |
455 | dll_delay_shift = 3; | |
456 | } | |
457 | ||
458 | /* | |
459 | * Compute the maximum sample delay the NFC allows, under current | |
460 | * conditions. If the clock is running too slowly, no sample delay is | |
461 | * possible. | |
462 | */ | |
463 | if (clock_period_in_ns > nfc->max_dll_clock_period_in_ns) | |
464 | max_sample_delay_in_ns = 0; | |
465 | else { | |
466 | /* | |
467 | * Compute the delay implied by the largest sample delay factor | |
468 | * the NFC allows. | |
469 | */ | |
470 | max_sample_delay_in_ns = | |
471 | (nfc->max_sample_delay_factor * clock_period_in_ns) >> | |
472 | dll_delay_shift; | |
473 | ||
474 | /* | |
475 | * Check if the implied sample delay larger than the NFC | |
476 | * actually allows. | |
477 | */ | |
478 | if (max_sample_delay_in_ns > nfc->max_dll_delay_in_ns) | |
479 | max_sample_delay_in_ns = nfc->max_dll_delay_in_ns; | |
480 | } | |
481 | ||
482 | /* | |
483 | * Check if improved timing information is available. If not, we have to | |
484 | * use a less-sophisticated algorithm. | |
485 | */ | |
486 | if (!improved_timing_is_available) { | |
487 | /* | |
488 | * Fold the read setup time required by the NFC into the ideal | |
489 | * sample delay. | |
490 | */ | |
491 | ideal_sample_delay_in_ns = target.gpmi_sample_delay_in_ns + | |
492 | nfc->internal_data_setup_in_ns; | |
493 | ||
494 | /* | |
495 | * The ideal sample delay may be greater than the maximum | |
496 | * allowed by the NFC. If so, we can trade off sample delay time | |
497 | * for more data setup time. | |
498 | * | |
499 | * In each iteration of the following loop, we add a cycle to | |
500 | * the data setup time and subtract a corresponding amount from | |
501 | * the sample delay until we've satisified the constraints or | |
502 | * can't do any better. | |
503 | */ | |
504 | while ((ideal_sample_delay_in_ns > max_sample_delay_in_ns) && | |
505 | (data_setup_in_cycles < nfc->max_data_setup_cycles)) { | |
506 | ||
507 | data_setup_in_cycles++; | |
508 | ideal_sample_delay_in_ns -= clock_period_in_ns; | |
509 | ||
510 | if (ideal_sample_delay_in_ns < 0) | |
511 | ideal_sample_delay_in_ns = 0; | |
512 | ||
513 | } | |
514 | ||
515 | /* | |
516 | * Compute the sample delay factor that corresponds most closely | |
517 | * to the ideal sample delay. If the result is too large for the | |
518 | * NFC, use the maximum value. | |
519 | * | |
520 | * Notice that we use the ns_to_cycles function to compute the | |
521 | * sample delay factor. We do this because the form of the | |
522 | * computation is the same as that for calculating cycles. | |
523 | */ | |
524 | sample_delay_factor = | |
525 | ns_to_cycles( | |
526 | ideal_sample_delay_in_ns << dll_delay_shift, | |
527 | clock_period_in_ns, 0); | |
528 | ||
529 | if (sample_delay_factor > nfc->max_sample_delay_factor) | |
530 | sample_delay_factor = nfc->max_sample_delay_factor; | |
531 | ||
532 | /* Skip to the part where we return our results. */ | |
533 | goto return_results; | |
534 | } | |
535 | ||
536 | /* | |
537 | * If control arrives here, we have more detailed timing information, | |
538 | * so we can use a better algorithm. | |
539 | */ | |
540 | ||
541 | /* | |
542 | * Fold the read setup time required by the NFC into the maximum | |
543 | * propagation delay. | |
544 | */ | |
545 | max_prop_delay_in_ns += nfc->internal_data_setup_in_ns; | |
546 | ||
547 | /* | |
548 | * Earlier, we computed the number of clock cycles required to satisfy | |
549 | * the data setup time. Now, we need to know the actual nanoseconds. | |
550 | */ | |
551 | data_setup_in_ns = clock_period_in_ns * data_setup_in_cycles; | |
552 | ||
553 | /* | |
554 | * Compute tEYE, the width of the data eye when reading from the NAND | |
555 | * Flash. The eye width is fundamentally determined by the data setup | |
556 | * time, perturbed by propagation delays and some characteristics of the | |
557 | * NAND Flash device. | |
558 | * | |
559 | * start of the eye = max_prop_delay + tREA | |
560 | * end of the eye = min_prop_delay + tRHOH + data_setup | |
561 | */ | |
562 | tEYE = (int)min_prop_delay_in_ns + (int)target.tRHOH_in_ns + | |
563 | (int)data_setup_in_ns; | |
564 | ||
565 | tEYE -= (int)max_prop_delay_in_ns + (int)target.tREA_in_ns; | |
566 | ||
567 | /* | |
568 | * The eye must be open. If it's not, we can try to open it by | |
569 | * increasing its main forcer, the data setup time. | |
570 | * | |
571 | * In each iteration of the following loop, we increase the data setup | |
572 | * time by a single clock cycle. We do this until either the eye is | |
573 | * open or we run into NFC limits. | |
574 | */ | |
575 | while ((tEYE <= 0) && | |
576 | (data_setup_in_cycles < nfc->max_data_setup_cycles)) { | |
577 | /* Give a cycle to data setup. */ | |
578 | data_setup_in_cycles++; | |
579 | /* Synchronize the data setup time with the cycles. */ | |
580 | data_setup_in_ns += clock_period_in_ns; | |
581 | /* Adjust tEYE accordingly. */ | |
582 | tEYE += clock_period_in_ns; | |
583 | } | |
584 | ||
585 | /* | |
586 | * When control arrives here, the eye is open. The ideal time to sample | |
587 | * the data is in the center of the eye: | |
588 | * | |
589 | * end of the eye + start of the eye | |
590 | * --------------------------------- - data_setup | |
591 | * 2 | |
592 | * | |
593 | * After some algebra, this simplifies to the code immediately below. | |
594 | */ | |
595 | ideal_sample_delay_in_ns = | |
596 | ((int)max_prop_delay_in_ns + | |
597 | (int)target.tREA_in_ns + | |
598 | (int)min_prop_delay_in_ns + | |
599 | (int)target.tRHOH_in_ns - | |
600 | (int)data_setup_in_ns) >> 1; | |
601 | ||
602 | /* | |
603 | * The following figure illustrates some aspects of a NAND Flash read: | |
604 | * | |
605 | * | |
606 | * __ _____________________________________ | |
607 | * RDN \_________________/ | |
608 | * | |
609 | * <---- tEYE -----> | |
610 | * /-----------------\ | |
611 | * Read Data ----------------------------< >--------- | |
612 | * \-----------------/ | |
613 | * ^ ^ ^ ^ | |
614 | * | | | | | |
615 | * |<--Data Setup -->|<--Delay Time -->| | | |
616 | * | | | | | |
617 | * | | | | |
618 | * | |<-- Quantized Delay Time -->| | |
619 | * | | | | |
620 | * | |
621 | * | |
622 | * We have some issues we must now address: | |
623 | * | |
624 | * (1) The *ideal* sample delay time must not be negative. If it is, we | |
625 | * jam it to zero. | |
626 | * | |
627 | * (2) The *ideal* sample delay time must not be greater than that | |
628 | * allowed by the NFC. If it is, we can increase the data setup | |
629 | * time, which will reduce the delay between the end of the data | |
630 | * setup and the center of the eye. It will also make the eye | |
631 | * larger, which might help with the next issue... | |
632 | * | |
633 | * (3) The *quantized* sample delay time must not fall either before the | |
634 | * eye opens or after it closes (the latter is the problem | |
635 | * illustrated in the above figure). | |
636 | */ | |
637 | ||
638 | /* Jam a negative ideal sample delay to zero. */ | |
639 | if (ideal_sample_delay_in_ns < 0) | |
640 | ideal_sample_delay_in_ns = 0; | |
641 | ||
642 | /* | |
643 | * Extend the data setup as needed to reduce the ideal sample delay | |
644 | * below the maximum permitted by the NFC. | |
645 | */ | |
646 | while ((ideal_sample_delay_in_ns > max_sample_delay_in_ns) && | |
647 | (data_setup_in_cycles < nfc->max_data_setup_cycles)) { | |
648 | ||
649 | /* Give a cycle to data setup. */ | |
650 | data_setup_in_cycles++; | |
651 | /* Synchronize the data setup time with the cycles. */ | |
652 | data_setup_in_ns += clock_period_in_ns; | |
653 | /* Adjust tEYE accordingly. */ | |
654 | tEYE += clock_period_in_ns; | |
655 | ||
656 | /* | |
657 | * Decrease the ideal sample delay by one half cycle, to keep it | |
658 | * in the middle of the eye. | |
659 | */ | |
660 | ideal_sample_delay_in_ns -= (clock_period_in_ns >> 1); | |
661 | ||
662 | /* Jam a negative ideal sample delay to zero. */ | |
663 | if (ideal_sample_delay_in_ns < 0) | |
664 | ideal_sample_delay_in_ns = 0; | |
665 | } | |
666 | ||
667 | /* | |
668 | * Compute the sample delay factor that corresponds to the ideal sample | |
669 | * delay. If the result is too large, then use the maximum allowed | |
670 | * value. | |
671 | * | |
672 | * Notice that we use the ns_to_cycles function to compute the sample | |
673 | * delay factor. We do this because the form of the computation is the | |
674 | * same as that for calculating cycles. | |
675 | */ | |
676 | sample_delay_factor = | |
677 | ns_to_cycles(ideal_sample_delay_in_ns << dll_delay_shift, | |
678 | clock_period_in_ns, 0); | |
679 | ||
680 | if (sample_delay_factor > nfc->max_sample_delay_factor) | |
681 | sample_delay_factor = nfc->max_sample_delay_factor; | |
682 | ||
683 | /* | |
684 | * These macros conveniently encapsulate a computation we'll use to | |
685 | * continuously evaluate whether or not the data sample delay is inside | |
686 | * the eye. | |
687 | */ | |
688 | #define IDEAL_DELAY ((int) ideal_sample_delay_in_ns) | |
689 | ||
690 | #define QUANTIZED_DELAY \ | |
691 | ((int) ((sample_delay_factor * clock_period_in_ns) >> \ | |
692 | dll_delay_shift)) | |
693 | ||
694 | #define DELAY_ERROR (abs(QUANTIZED_DELAY - IDEAL_DELAY)) | |
695 | ||
696 | #define SAMPLE_IS_NOT_WITHIN_THE_EYE (DELAY_ERROR > (tEYE >> 1)) | |
697 | ||
698 | /* | |
699 | * While the quantized sample time falls outside the eye, reduce the | |
700 | * sample delay or extend the data setup to move the sampling point back | |
701 | * toward the eye. Do not allow the number of data setup cycles to | |
702 | * exceed the maximum allowed by the NFC. | |
703 | */ | |
704 | while (SAMPLE_IS_NOT_WITHIN_THE_EYE && | |
705 | (data_setup_in_cycles < nfc->max_data_setup_cycles)) { | |
706 | /* | |
707 | * If control arrives here, the quantized sample delay falls | |
708 | * outside the eye. Check if it's before the eye opens, or after | |
709 | * the eye closes. | |
710 | */ | |
711 | if (QUANTIZED_DELAY > IDEAL_DELAY) { | |
712 | /* | |
713 | * If control arrives here, the quantized sample delay | |
714 | * falls after the eye closes. Decrease the quantized | |
715 | * delay time and then go back to re-evaluate. | |
716 | */ | |
717 | if (sample_delay_factor != 0) | |
718 | sample_delay_factor--; | |
719 | continue; | |
720 | } | |
721 | ||
722 | /* | |
723 | * If control arrives here, the quantized sample delay falls | |
724 | * before the eye opens. Shift the sample point by increasing | |
725 | * data setup time. This will also make the eye larger. | |
726 | */ | |
727 | ||
728 | /* Give a cycle to data setup. */ | |
729 | data_setup_in_cycles++; | |
730 | /* Synchronize the data setup time with the cycles. */ | |
731 | data_setup_in_ns += clock_period_in_ns; | |
732 | /* Adjust tEYE accordingly. */ | |
733 | tEYE += clock_period_in_ns; | |
734 | ||
735 | /* | |
736 | * Decrease the ideal sample delay by one half cycle, to keep it | |
737 | * in the middle of the eye. | |
738 | */ | |
739 | ideal_sample_delay_in_ns -= (clock_period_in_ns >> 1); | |
740 | ||
741 | /* ...and one less period for the delay time. */ | |
742 | ideal_sample_delay_in_ns -= clock_period_in_ns; | |
743 | ||
744 | /* Jam a negative ideal sample delay to zero. */ | |
745 | if (ideal_sample_delay_in_ns < 0) | |
746 | ideal_sample_delay_in_ns = 0; | |
747 | ||
748 | /* | |
749 | * We have a new ideal sample delay, so re-compute the quantized | |
750 | * delay. | |
751 | */ | |
752 | sample_delay_factor = | |
753 | ns_to_cycles( | |
754 | ideal_sample_delay_in_ns << dll_delay_shift, | |
755 | clock_period_in_ns, 0); | |
756 | ||
757 | if (sample_delay_factor > nfc->max_sample_delay_factor) | |
758 | sample_delay_factor = nfc->max_sample_delay_factor; | |
759 | } | |
760 | ||
761 | /* Control arrives here when we're ready to return our results. */ | |
762 | return_results: | |
763 | hw->data_setup_in_cycles = data_setup_in_cycles; | |
764 | hw->data_hold_in_cycles = data_hold_in_cycles; | |
765 | hw->address_setup_in_cycles = address_setup_in_cycles; | |
766 | hw->use_half_periods = dll_use_half_periods; | |
767 | hw->sample_delay_factor = sample_delay_factor; | |
ddab3838 | 768 | hw->device_busy_timeout = GPMI_DEFAULT_BUSY_TIMEOUT; |
d37e02d8 | 769 | hw->wrn_dly_sel = BV_GPMI_CTRL1_WRN_DLY_SEL_4_TO_8NS; |
45dfc1a0 HS |
770 | |
771 | /* Return success. */ | |
772 | return 0; | |
773 | } | |
774 | ||
995fbbf5 HS |
775 | /* |
776 | * <1> Firstly, we should know what's the GPMI-clock means. | |
777 | * The GPMI-clock is the internal clock in the gpmi nand controller. | |
778 | * If you set 100MHz to gpmi nand controller, the GPMI-clock's period | |
779 | * is 10ns. Mark the GPMI-clock's period as GPMI-clock-period. | |
780 | * | |
781 | * <2> Secondly, we should know what's the frequency on the nand chip pins. | |
782 | * The frequency on the nand chip pins is derived from the GPMI-clock. | |
783 | * We can get it from the following equation: | |
784 | * | |
785 | * F = G / (DS + DH) | |
786 | * | |
787 | * F : the frequency on the nand chip pins. | |
788 | * G : the GPMI clock, such as 100MHz. | |
789 | * DS : GPMI_HW_GPMI_TIMING0:DATA_SETUP | |
790 | * DH : GPMI_HW_GPMI_TIMING0:DATA_HOLD | |
791 | * | |
792 | * <3> Thirdly, when the frequency on the nand chip pins is above 33MHz, | |
793 | * the nand EDO(extended Data Out) timing could be applied. | |
794 | * The GPMI implements a feedback read strobe to sample the read data. | |
795 | * The feedback read strobe can be delayed to support the nand EDO timing | |
796 | * where the read strobe may deasserts before the read data is valid, and | |
797 | * read data is valid for some time after read strobe. | |
798 | * | |
799 | * The following figure illustrates some aspects of a NAND Flash read: | |
800 | * | |
801 | * |<---tREA---->| | |
802 | * | | | |
803 | * | | | | |
804 | * |<--tRP-->| | | |
805 | * | | | | |
806 | * __ ___|__________________________________ | |
807 | * RDN \________/ | | |
808 | * | | |
809 | * /---------\ | |
810 | * Read Data --------------< >--------- | |
811 | * \---------/ | |
812 | * | | | |
813 | * |<-D->| | |
814 | * FeedbackRDN ________ ____________ | |
815 | * \___________/ | |
816 | * | |
817 | * D stands for delay, set in the HW_GPMI_CTRL1:RDN_DELAY. | |
818 | * | |
819 | * | |
820 | * <4> Now, we begin to describe how to compute the right RDN_DELAY. | |
821 | * | |
822 | * 4.1) From the aspect of the nand chip pins: | |
823 | * Delay = (tREA + C - tRP) {1} | |
824 | * | |
825 | * tREA : the maximum read access time. From the ONFI nand standards, | |
826 | * we know that tREA is 16ns in mode 5, tREA is 20ns is mode 4. | |
827 | * Please check it in : www.onfi.org | |
828 | * C : a constant for adjust the delay. default is 4. | |
829 | * tRP : the read pulse width. | |
830 | * Specified by the HW_GPMI_TIMING0:DATA_SETUP: | |
831 | * tRP = (GPMI-clock-period) * DATA_SETUP | |
832 | * | |
833 | * 4.2) From the aspect of the GPMI nand controller: | |
834 | * Delay = RDN_DELAY * 0.125 * RP {2} | |
835 | * | |
836 | * RP : the DLL reference period. | |
837 | * if (GPMI-clock-period > DLL_THRETHOLD) | |
838 | * RP = GPMI-clock-period / 2; | |
839 | * else | |
840 | * RP = GPMI-clock-period; | |
841 | * | |
842 | * Set the HW_GPMI_CTRL1:HALF_PERIOD if GPMI-clock-period | |
843 | * is greater DLL_THRETHOLD. In other SOCs, the DLL_THRETHOLD | |
844 | * is 16ns, but in mx6q, we use 12ns. | |
845 | * | |
846 | * 4.3) since {1} equals {2}, we get: | |
847 | * | |
848 | * (tREA + 4 - tRP) * 8 | |
849 | * RDN_DELAY = --------------------- {3} | |
850 | * RP | |
851 | * | |
852 | * 4.4) We only support the fastest asynchronous mode of ONFI nand. | |
853 | * For some ONFI nand, the mode 4 is the fastest mode; | |
854 | * while for some ONFI nand, the mode 5 is the fastest mode. | |
855 | * So we only support the mode 4 and mode 5. It is no need to | |
856 | * support other modes. | |
857 | */ | |
858 | static void gpmi_compute_edo_timing(struct gpmi_nand_data *this, | |
859 | struct gpmi_nfc_hardware_timing *hw) | |
860 | { | |
861 | struct resources *r = &this->resources; | |
862 | unsigned long rate = clk_get_rate(r->clock[0]); | |
863 | int mode = this->timing_mode; | |
6189cccb | 864 | int dll_threshold = this->devdata->max_chain_delay; |
995fbbf5 HS |
865 | unsigned long delay; |
866 | unsigned long clk_period; | |
867 | int t_rea; | |
868 | int c = 4; | |
869 | int t_rp; | |
870 | int rp; | |
871 | ||
872 | /* | |
873 | * [1] for GPMI_HW_GPMI_TIMING0: | |
874 | * The async mode requires 40MHz for mode 4, 50MHz for mode 5. | |
875 | * The GPMI can support 100MHz at most. So if we want to | |
876 | * get the 40MHz or 50MHz, we have to set DS=1, DH=1. | |
877 | * Set the ADDRESS_SETUP to 0 in mode 4. | |
878 | */ | |
879 | hw->data_setup_in_cycles = 1; | |
880 | hw->data_hold_in_cycles = 1; | |
881 | hw->address_setup_in_cycles = ((mode == 5) ? 1 : 0); | |
882 | ||
883 | /* [2] for GPMI_HW_GPMI_TIMING1 */ | |
884 | hw->device_busy_timeout = 0x9000; | |
885 | ||
886 | /* [3] for GPMI_HW_GPMI_CTRL1 */ | |
887 | hw->wrn_dly_sel = BV_GPMI_CTRL1_WRN_DLY_SEL_NO_DELAY; | |
995fbbf5 HS |
888 | |
889 | /* | |
890 | * Enlarge 10 times for the numerator and denominator in {3}. | |
891 | * This make us to get more accurate result. | |
892 | */ | |
893 | clk_period = NSEC_PER_SEC / (rate / 10); | |
894 | dll_threshold *= 10; | |
895 | t_rea = ((mode == 5) ? 16 : 20) * 10; | |
896 | c *= 10; | |
897 | ||
898 | t_rp = clk_period * 1; /* DATA_SETUP is 1 */ | |
899 | ||
900 | if (clk_period > dll_threshold) { | |
901 | hw->use_half_periods = 1; | |
902 | rp = clk_period / 2; | |
903 | } else { | |
904 | hw->use_half_periods = 0; | |
905 | rp = clk_period; | |
906 | } | |
907 | ||
908 | /* | |
909 | * Multiply the numerator with 10, we could do a round off: | |
910 | * 7.8 round up to 8; 7.4 round down to 7. | |
911 | */ | |
912 | delay = (((t_rea + c - t_rp) * 8) * 10) / rp; | |
913 | delay = (delay + 5) / 10; | |
914 | ||
915 | hw->sample_delay_factor = delay; | |
916 | } | |
917 | ||
918 | static int enable_edo_mode(struct gpmi_nand_data *this, int mode) | |
919 | { | |
920 | struct resources *r = &this->resources; | |
921 | struct nand_chip *nand = &this->nand; | |
2a690b25 | 922 | struct mtd_info *mtd = nand_to_mtd(nand); |
df877fb3 | 923 | uint8_t *feature; |
995fbbf5 HS |
924 | unsigned long rate; |
925 | int ret; | |
926 | ||
df877fb3 HS |
927 | feature = kzalloc(ONFI_SUBFEATURE_PARAM_LEN, GFP_KERNEL); |
928 | if (!feature) | |
929 | return -ENOMEM; | |
930 | ||
995fbbf5 HS |
931 | nand->select_chip(mtd, 0); |
932 | ||
933 | /* [1] send SET FEATURE commond to NAND */ | |
934 | feature[0] = mode; | |
935 | ret = nand->onfi_set_features(mtd, nand, | |
936 | ONFI_FEATURE_ADDR_TIMING_MODE, feature); | |
937 | if (ret) | |
938 | goto err_out; | |
939 | ||
940 | /* [2] send GET FEATURE command to double-check the timing mode */ | |
941 | memset(feature, 0, ONFI_SUBFEATURE_PARAM_LEN); | |
942 | ret = nand->onfi_get_features(mtd, nand, | |
943 | ONFI_FEATURE_ADDR_TIMING_MODE, feature); | |
944 | if (ret || feature[0] != mode) | |
945 | goto err_out; | |
946 | ||
947 | nand->select_chip(mtd, -1); | |
948 | ||
949 | /* [3] set the main IO clock, 100MHz for mode 5, 80MHz for mode 4. */ | |
950 | rate = (mode == 5) ? 100000000 : 80000000; | |
951 | clk_set_rate(r->clock[0], rate); | |
952 | ||
9c95f11b HS |
953 | /* Let the gpmi_begin() re-compute the timing again. */ |
954 | this->flags &= ~GPMI_TIMING_INIT_OK; | |
955 | ||
995fbbf5 HS |
956 | this->flags |= GPMI_ASYNC_EDO_ENABLED; |
957 | this->timing_mode = mode; | |
df877fb3 | 958 | kfree(feature); |
995fbbf5 HS |
959 | dev_info(this->dev, "enable the asynchronous EDO mode %d\n", mode); |
960 | return 0; | |
961 | ||
962 | err_out: | |
963 | nand->select_chip(mtd, -1); | |
df877fb3 | 964 | kfree(feature); |
995fbbf5 HS |
965 | dev_err(this->dev, "mode:%d ,failed in set feature.\n", mode); |
966 | return -EINVAL; | |
967 | } | |
968 | ||
969 | int gpmi_extra_init(struct gpmi_nand_data *this) | |
970 | { | |
971 | struct nand_chip *chip = &this->nand; | |
972 | ||
973 | /* Enable the asynchronous EDO feature. */ | |
91f5498e | 974 | if (GPMI_IS_MX6(this) && chip->onfi_version) { |
995fbbf5 HS |
975 | int mode = onfi_get_async_timing_mode(chip); |
976 | ||
977 | /* We only support the timing mode 4 and mode 5. */ | |
978 | if (mode & ONFI_TIMING_MODE_5) | |
979 | mode = 5; | |
980 | else if (mode & ONFI_TIMING_MODE_4) | |
981 | mode = 4; | |
982 | else | |
983 | return 0; | |
984 | ||
985 | return enable_edo_mode(this, mode); | |
986 | } | |
987 | return 0; | |
988 | } | |
989 | ||
45dfc1a0 HS |
990 | /* Begin the I/O */ |
991 | void gpmi_begin(struct gpmi_nand_data *this) | |
992 | { | |
993 | struct resources *r = &this->resources; | |
513d57e1 | 994 | void __iomem *gpmi_regs = r->gpmi_regs; |
45dfc1a0 HS |
995 | unsigned int clock_period_in_ns; |
996 | uint32_t reg; | |
997 | unsigned int dll_wait_time_in_us; | |
998 | struct gpmi_nfc_hardware_timing hw; | |
999 | int ret; | |
1000 | ||
1001 | /* Enable the clock. */ | |
ff506172 | 1002 | ret = gpmi_enable_clk(this); |
45dfc1a0 | 1003 | if (ret) { |
da40c16a | 1004 | dev_err(this->dev, "We failed in enable the clk\n"); |
45dfc1a0 HS |
1005 | goto err_out; |
1006 | } | |
1007 | ||
9c95f11b HS |
1008 | /* Only initialize the timing once */ |
1009 | if (this->flags & GPMI_TIMING_INIT_OK) | |
1010 | return; | |
1011 | this->flags |= GPMI_TIMING_INIT_OK; | |
1012 | ||
995fbbf5 HS |
1013 | if (this->flags & GPMI_ASYNC_EDO_ENABLED) |
1014 | gpmi_compute_edo_timing(this, &hw); | |
1015 | else | |
1016 | gpmi_nfc_compute_hardware_timing(this, &hw); | |
45dfc1a0 | 1017 | |
ddab3838 | 1018 | /* [1] Set HW_GPMI_TIMING0 */ |
45dfc1a0 HS |
1019 | reg = BF_GPMI_TIMING0_ADDRESS_SETUP(hw.address_setup_in_cycles) | |
1020 | BF_GPMI_TIMING0_DATA_HOLD(hw.data_hold_in_cycles) | | |
76c930be | 1021 | BF_GPMI_TIMING0_DATA_SETUP(hw.data_setup_in_cycles); |
45dfc1a0 HS |
1022 | |
1023 | writel(reg, gpmi_regs + HW_GPMI_TIMING0); | |
1024 | ||
ddab3838 HS |
1025 | /* [2] Set HW_GPMI_TIMING1 */ |
1026 | writel(BF_GPMI_TIMING1_BUSY_TIMEOUT(hw.device_busy_timeout), | |
1027 | gpmi_regs + HW_GPMI_TIMING1); | |
1028 | ||
1029 | /* [3] The following code is to set the HW_GPMI_CTRL1. */ | |
1030 | ||
d37e02d8 HS |
1031 | /* Set the WRN_DLY_SEL */ |
1032 | writel(BM_GPMI_CTRL1_WRN_DLY_SEL, gpmi_regs + HW_GPMI_CTRL1_CLR); | |
1033 | writel(BF_GPMI_CTRL1_WRN_DLY_SEL(hw.wrn_dly_sel), | |
1034 | gpmi_regs + HW_GPMI_CTRL1_SET); | |
1035 | ||
ddab3838 | 1036 | /* DLL_ENABLE must be set to 0 when setting RDN_DELAY or HALF_PERIOD. */ |
45dfc1a0 HS |
1037 | writel(BM_GPMI_CTRL1_DLL_ENABLE, gpmi_regs + HW_GPMI_CTRL1_CLR); |
1038 | ||
1039 | /* Clear out the DLL control fields. */ | |
c50d35a9 HS |
1040 | reg = BM_GPMI_CTRL1_RDN_DELAY | BM_GPMI_CTRL1_HALF_PERIOD; |
1041 | writel(reg, gpmi_regs + HW_GPMI_CTRL1_CLR); | |
45dfc1a0 HS |
1042 | |
1043 | /* If no sample delay is called for, return immediately. */ | |
1044 | if (!hw.sample_delay_factor) | |
1045 | return; | |
1046 | ||
c50d35a9 HS |
1047 | /* Set RDN_DELAY or HALF_PERIOD. */ |
1048 | reg = ((hw.use_half_periods) ? BM_GPMI_CTRL1_HALF_PERIOD : 0) | |
1049 | | BF_GPMI_CTRL1_RDN_DELAY(hw.sample_delay_factor); | |
45dfc1a0 | 1050 | |
c50d35a9 | 1051 | writel(reg, gpmi_regs + HW_GPMI_CTRL1_SET); |
45dfc1a0 | 1052 | |
c50d35a9 | 1053 | /* At last, we enable the DLL. */ |
45dfc1a0 HS |
1054 | writel(BM_GPMI_CTRL1_DLL_ENABLE, gpmi_regs + HW_GPMI_CTRL1_SET); |
1055 | ||
1056 | /* | |
1057 | * After we enable the GPMI DLL, we have to wait 64 clock cycles before | |
c50d35a9 HS |
1058 | * we can use the GPMI. Calculate the amount of time we need to wait, |
1059 | * in microseconds. | |
45dfc1a0 | 1060 | */ |
ae70ba2d | 1061 | clock_period_in_ns = NSEC_PER_SEC / clk_get_rate(r->clock[0]); |
45dfc1a0 HS |
1062 | dll_wait_time_in_us = (clock_period_in_ns * 64) / 1000; |
1063 | ||
1064 | if (!dll_wait_time_in_us) | |
1065 | dll_wait_time_in_us = 1; | |
1066 | ||
1067 | /* Wait for the DLL to settle. */ | |
1068 | udelay(dll_wait_time_in_us); | |
1069 | ||
1070 | err_out: | |
1071 | return; | |
1072 | } | |
1073 | ||
1074 | void gpmi_end(struct gpmi_nand_data *this) | |
1075 | { | |
ff506172 | 1076 | gpmi_disable_clk(this); |
45dfc1a0 HS |
1077 | } |
1078 | ||
1079 | /* Clears a BCH interrupt. */ | |
1080 | void gpmi_clear_bch(struct gpmi_nand_data *this) | |
1081 | { | |
1082 | struct resources *r = &this->resources; | |
1083 | writel(BM_BCH_CTRL_COMPLETE_IRQ, r->bch_regs + HW_BCH_CTRL_CLR); | |
1084 | } | |
1085 | ||
1086 | /* Returns the Ready/Busy status of the given chip. */ | |
1087 | int gpmi_is_ready(struct gpmi_nand_data *this, unsigned chip) | |
1088 | { | |
1089 | struct resources *r = &this->resources; | |
1090 | uint32_t mask = 0; | |
1091 | uint32_t reg = 0; | |
1092 | ||
1093 | if (GPMI_IS_MX23(this)) { | |
1094 | mask = MX23_BM_GPMI_DEBUG_READY0 << chip; | |
1095 | reg = readl(r->gpmi_regs + HW_GPMI_DEBUG); | |
91f5498e | 1096 | } else if (GPMI_IS_MX28(this) || GPMI_IS_MX6(this)) { |
7caa4fd2 HS |
1097 | /* |
1098 | * In the imx6, all the ready/busy pins are bound | |
1099 | * together. So we only need to check chip 0. | |
1100 | */ | |
91f5498e | 1101 | if (GPMI_IS_MX6(this)) |
7caa4fd2 HS |
1102 | chip = 0; |
1103 | ||
9013bb40 | 1104 | /* MX28 shares the same R/B register as MX6Q. */ |
45dfc1a0 HS |
1105 | mask = MX28_BF_GPMI_STAT_READY_BUSY(1 << chip); |
1106 | reg = readl(r->gpmi_regs + HW_GPMI_STAT); | |
1107 | } else | |
f42cf8d6 | 1108 | dev_err(this->dev, "unknown arch.\n"); |
45dfc1a0 HS |
1109 | return reg & mask; |
1110 | } | |
1111 | ||
1112 | static inline void set_dma_type(struct gpmi_nand_data *this, | |
1113 | enum dma_ops_type type) | |
1114 | { | |
1115 | this->last_dma_type = this->dma_type; | |
1116 | this->dma_type = type; | |
1117 | } | |
1118 | ||
1119 | int gpmi_send_command(struct gpmi_nand_data *this) | |
1120 | { | |
1121 | struct dma_chan *channel = get_dma_chan(this); | |
1122 | struct dma_async_tx_descriptor *desc; | |
1123 | struct scatterlist *sgl; | |
1124 | int chip = this->current_chip; | |
1125 | u32 pio[3]; | |
1126 | ||
1127 | /* [1] send out the PIO words */ | |
1128 | pio[0] = BF_GPMI_CTRL0_COMMAND_MODE(BV_GPMI_CTRL0_COMMAND_MODE__WRITE) | |
1129 | | BM_GPMI_CTRL0_WORD_LENGTH | |
1130 | | BF_GPMI_CTRL0_CS(chip, this) | |
1131 | | BF_GPMI_CTRL0_LOCK_CS(LOCK_CS_ENABLE, this) | |
1132 | | BF_GPMI_CTRL0_ADDRESS(BV_GPMI_CTRL0_ADDRESS__NAND_CLE) | |
1133 | | BM_GPMI_CTRL0_ADDRESS_INCREMENT | |
1134 | | BF_GPMI_CTRL0_XFER_COUNT(this->command_length); | |
1135 | pio[1] = pio[2] = 0; | |
16052827 | 1136 | desc = dmaengine_prep_slave_sg(channel, |
45dfc1a0 | 1137 | (struct scatterlist *)pio, |
0ef7e206 | 1138 | ARRAY_SIZE(pio), DMA_TRANS_NONE, 0); |
43a34b8b HS |
1139 | if (!desc) |
1140 | return -EINVAL; | |
45dfc1a0 HS |
1141 | |
1142 | /* [2] send out the COMMAND + ADDRESS string stored in @buffer */ | |
1143 | sgl = &this->cmd_sgl; | |
1144 | ||
1145 | sg_init_one(sgl, this->cmd_buffer, this->command_length); | |
1146 | dma_map_sg(this->dev, sgl, 1, DMA_TO_DEVICE); | |
623ff773 | 1147 | desc = dmaengine_prep_slave_sg(channel, |
921de864 HS |
1148 | sgl, 1, DMA_MEM_TO_DEV, |
1149 | DMA_PREP_INTERRUPT | DMA_CTRL_ACK); | |
43a34b8b HS |
1150 | if (!desc) |
1151 | return -EINVAL; | |
45dfc1a0 HS |
1152 | |
1153 | /* [3] submit the DMA */ | |
1154 | set_dma_type(this, DMA_FOR_COMMAND); | |
1155 | return start_dma_without_bch_irq(this, desc); | |
1156 | } | |
1157 | ||
1158 | int gpmi_send_data(struct gpmi_nand_data *this) | |
1159 | { | |
1160 | struct dma_async_tx_descriptor *desc; | |
1161 | struct dma_chan *channel = get_dma_chan(this); | |
1162 | int chip = this->current_chip; | |
1163 | uint32_t command_mode; | |
1164 | uint32_t address; | |
1165 | u32 pio[2]; | |
1166 | ||
1167 | /* [1] PIO */ | |
1168 | command_mode = BV_GPMI_CTRL0_COMMAND_MODE__WRITE; | |
1169 | address = BV_GPMI_CTRL0_ADDRESS__NAND_DATA; | |
1170 | ||
1171 | pio[0] = BF_GPMI_CTRL0_COMMAND_MODE(command_mode) | |
1172 | | BM_GPMI_CTRL0_WORD_LENGTH | |
1173 | | BF_GPMI_CTRL0_CS(chip, this) | |
1174 | | BF_GPMI_CTRL0_LOCK_CS(LOCK_CS_ENABLE, this) | |
1175 | | BF_GPMI_CTRL0_ADDRESS(address) | |
1176 | | BF_GPMI_CTRL0_XFER_COUNT(this->upper_len); | |
1177 | pio[1] = 0; | |
16052827 | 1178 | desc = dmaengine_prep_slave_sg(channel, (struct scatterlist *)pio, |
0ef7e206 | 1179 | ARRAY_SIZE(pio), DMA_TRANS_NONE, 0); |
43a34b8b HS |
1180 | if (!desc) |
1181 | return -EINVAL; | |
45dfc1a0 HS |
1182 | |
1183 | /* [2] send DMA request */ | |
1184 | prepare_data_dma(this, DMA_TO_DEVICE); | |
16052827 | 1185 | desc = dmaengine_prep_slave_sg(channel, &this->data_sgl, |
921de864 HS |
1186 | 1, DMA_MEM_TO_DEV, |
1187 | DMA_PREP_INTERRUPT | DMA_CTRL_ACK); | |
43a34b8b HS |
1188 | if (!desc) |
1189 | return -EINVAL; | |
1190 | ||
45dfc1a0 HS |
1191 | /* [3] submit the DMA */ |
1192 | set_dma_type(this, DMA_FOR_WRITE_DATA); | |
1193 | return start_dma_without_bch_irq(this, desc); | |
1194 | } | |
1195 | ||
1196 | int gpmi_read_data(struct gpmi_nand_data *this) | |
1197 | { | |
1198 | struct dma_async_tx_descriptor *desc; | |
1199 | struct dma_chan *channel = get_dma_chan(this); | |
1200 | int chip = this->current_chip; | |
1201 | u32 pio[2]; | |
1202 | ||
1203 | /* [1] : send PIO */ | |
1204 | pio[0] = BF_GPMI_CTRL0_COMMAND_MODE(BV_GPMI_CTRL0_COMMAND_MODE__READ) | |
1205 | | BM_GPMI_CTRL0_WORD_LENGTH | |
1206 | | BF_GPMI_CTRL0_CS(chip, this) | |
1207 | | BF_GPMI_CTRL0_LOCK_CS(LOCK_CS_ENABLE, this) | |
1208 | | BF_GPMI_CTRL0_ADDRESS(BV_GPMI_CTRL0_ADDRESS__NAND_DATA) | |
1209 | | BF_GPMI_CTRL0_XFER_COUNT(this->upper_len); | |
1210 | pio[1] = 0; | |
16052827 | 1211 | desc = dmaengine_prep_slave_sg(channel, |
45dfc1a0 | 1212 | (struct scatterlist *)pio, |
0ef7e206 | 1213 | ARRAY_SIZE(pio), DMA_TRANS_NONE, 0); |
43a34b8b HS |
1214 | if (!desc) |
1215 | return -EINVAL; | |
45dfc1a0 HS |
1216 | |
1217 | /* [2] : send DMA request */ | |
1218 | prepare_data_dma(this, DMA_FROM_DEVICE); | |
16052827 | 1219 | desc = dmaengine_prep_slave_sg(channel, &this->data_sgl, |
921de864 HS |
1220 | 1, DMA_DEV_TO_MEM, |
1221 | DMA_PREP_INTERRUPT | DMA_CTRL_ACK); | |
43a34b8b HS |
1222 | if (!desc) |
1223 | return -EINVAL; | |
45dfc1a0 HS |
1224 | |
1225 | /* [3] : submit the DMA */ | |
1226 | set_dma_type(this, DMA_FOR_READ_DATA); | |
1227 | return start_dma_without_bch_irq(this, desc); | |
1228 | } | |
1229 | ||
1230 | int gpmi_send_page(struct gpmi_nand_data *this, | |
1231 | dma_addr_t payload, dma_addr_t auxiliary) | |
1232 | { | |
1233 | struct bch_geometry *geo = &this->bch_geometry; | |
1234 | uint32_t command_mode; | |
1235 | uint32_t address; | |
1236 | uint32_t ecc_command; | |
1237 | uint32_t buffer_mask; | |
1238 | struct dma_async_tx_descriptor *desc; | |
1239 | struct dma_chan *channel = get_dma_chan(this); | |
1240 | int chip = this->current_chip; | |
1241 | u32 pio[6]; | |
1242 | ||
1243 | /* A DMA descriptor that does an ECC page read. */ | |
1244 | command_mode = BV_GPMI_CTRL0_COMMAND_MODE__WRITE; | |
1245 | address = BV_GPMI_CTRL0_ADDRESS__NAND_DATA; | |
1246 | ecc_command = BV_GPMI_ECCCTRL_ECC_CMD__BCH_ENCODE; | |
1247 | buffer_mask = BV_GPMI_ECCCTRL_BUFFER_MASK__BCH_PAGE | | |
1248 | BV_GPMI_ECCCTRL_BUFFER_MASK__BCH_AUXONLY; | |
1249 | ||
1250 | pio[0] = BF_GPMI_CTRL0_COMMAND_MODE(command_mode) | |
1251 | | BM_GPMI_CTRL0_WORD_LENGTH | |
1252 | | BF_GPMI_CTRL0_CS(chip, this) | |
1253 | | BF_GPMI_CTRL0_LOCK_CS(LOCK_CS_ENABLE, this) | |
1254 | | BF_GPMI_CTRL0_ADDRESS(address) | |
1255 | | BF_GPMI_CTRL0_XFER_COUNT(0); | |
1256 | pio[1] = 0; | |
1257 | pio[2] = BM_GPMI_ECCCTRL_ENABLE_ECC | |
1258 | | BF_GPMI_ECCCTRL_ECC_CMD(ecc_command) | |
1259 | | BF_GPMI_ECCCTRL_BUFFER_MASK(buffer_mask); | |
1260 | pio[3] = geo->page_size; | |
1261 | pio[4] = payload; | |
1262 | pio[5] = auxiliary; | |
1263 | ||
623ff773 | 1264 | desc = dmaengine_prep_slave_sg(channel, |
45dfc1a0 | 1265 | (struct scatterlist *)pio, |
921de864 HS |
1266 | ARRAY_SIZE(pio), DMA_TRANS_NONE, |
1267 | DMA_CTRL_ACK); | |
43a34b8b HS |
1268 | if (!desc) |
1269 | return -EINVAL; | |
1270 | ||
45dfc1a0 HS |
1271 | set_dma_type(this, DMA_FOR_WRITE_ECC_PAGE); |
1272 | return start_dma_with_bch_irq(this, desc); | |
1273 | } | |
1274 | ||
1275 | int gpmi_read_page(struct gpmi_nand_data *this, | |
1276 | dma_addr_t payload, dma_addr_t auxiliary) | |
1277 | { | |
1278 | struct bch_geometry *geo = &this->bch_geometry; | |
1279 | uint32_t command_mode; | |
1280 | uint32_t address; | |
1281 | uint32_t ecc_command; | |
1282 | uint32_t buffer_mask; | |
1283 | struct dma_async_tx_descriptor *desc; | |
1284 | struct dma_chan *channel = get_dma_chan(this); | |
1285 | int chip = this->current_chip; | |
1286 | u32 pio[6]; | |
1287 | ||
1288 | /* [1] Wait for the chip to report ready. */ | |
1289 | command_mode = BV_GPMI_CTRL0_COMMAND_MODE__WAIT_FOR_READY; | |
1290 | address = BV_GPMI_CTRL0_ADDRESS__NAND_DATA; | |
1291 | ||
1292 | pio[0] = BF_GPMI_CTRL0_COMMAND_MODE(command_mode) | |
1293 | | BM_GPMI_CTRL0_WORD_LENGTH | |
1294 | | BF_GPMI_CTRL0_CS(chip, this) | |
1295 | | BF_GPMI_CTRL0_LOCK_CS(LOCK_CS_ENABLE, this) | |
1296 | | BF_GPMI_CTRL0_ADDRESS(address) | |
1297 | | BF_GPMI_CTRL0_XFER_COUNT(0); | |
1298 | pio[1] = 0; | |
16052827 | 1299 | desc = dmaengine_prep_slave_sg(channel, |
0ef7e206 SG |
1300 | (struct scatterlist *)pio, 2, |
1301 | DMA_TRANS_NONE, 0); | |
43a34b8b HS |
1302 | if (!desc) |
1303 | return -EINVAL; | |
45dfc1a0 HS |
1304 | |
1305 | /* [2] Enable the BCH block and read. */ | |
1306 | command_mode = BV_GPMI_CTRL0_COMMAND_MODE__READ; | |
1307 | address = BV_GPMI_CTRL0_ADDRESS__NAND_DATA; | |
1308 | ecc_command = BV_GPMI_ECCCTRL_ECC_CMD__BCH_DECODE; | |
1309 | buffer_mask = BV_GPMI_ECCCTRL_BUFFER_MASK__BCH_PAGE | |
1310 | | BV_GPMI_ECCCTRL_BUFFER_MASK__BCH_AUXONLY; | |
1311 | ||
1312 | pio[0] = BF_GPMI_CTRL0_COMMAND_MODE(command_mode) | |
1313 | | BM_GPMI_CTRL0_WORD_LENGTH | |
1314 | | BF_GPMI_CTRL0_CS(chip, this) | |
1315 | | BF_GPMI_CTRL0_LOCK_CS(LOCK_CS_ENABLE, this) | |
1316 | | BF_GPMI_CTRL0_ADDRESS(address) | |
1317 | | BF_GPMI_CTRL0_XFER_COUNT(geo->page_size); | |
1318 | ||
1319 | pio[1] = 0; | |
1320 | pio[2] = BM_GPMI_ECCCTRL_ENABLE_ECC | |
1321 | | BF_GPMI_ECCCTRL_ECC_CMD(ecc_command) | |
1322 | | BF_GPMI_ECCCTRL_BUFFER_MASK(buffer_mask); | |
1323 | pio[3] = geo->page_size; | |
1324 | pio[4] = payload; | |
1325 | pio[5] = auxiliary; | |
16052827 | 1326 | desc = dmaengine_prep_slave_sg(channel, |
45dfc1a0 | 1327 | (struct scatterlist *)pio, |
921de864 HS |
1328 | ARRAY_SIZE(pio), DMA_TRANS_NONE, |
1329 | DMA_PREP_INTERRUPT | DMA_CTRL_ACK); | |
43a34b8b HS |
1330 | if (!desc) |
1331 | return -EINVAL; | |
45dfc1a0 HS |
1332 | |
1333 | /* [3] Disable the BCH block */ | |
1334 | command_mode = BV_GPMI_CTRL0_COMMAND_MODE__WAIT_FOR_READY; | |
1335 | address = BV_GPMI_CTRL0_ADDRESS__NAND_DATA; | |
1336 | ||
1337 | pio[0] = BF_GPMI_CTRL0_COMMAND_MODE(command_mode) | |
1338 | | BM_GPMI_CTRL0_WORD_LENGTH | |
1339 | | BF_GPMI_CTRL0_CS(chip, this) | |
1340 | | BF_GPMI_CTRL0_LOCK_CS(LOCK_CS_ENABLE, this) | |
1341 | | BF_GPMI_CTRL0_ADDRESS(address) | |
1342 | | BF_GPMI_CTRL0_XFER_COUNT(geo->page_size); | |
1343 | pio[1] = 0; | |
09ef90d9 | 1344 | pio[2] = 0; /* clear GPMI_HW_GPMI_ECCCTRL, disable the BCH. */ |
16052827 | 1345 | desc = dmaengine_prep_slave_sg(channel, |
09ef90d9 | 1346 | (struct scatterlist *)pio, 3, |
921de864 HS |
1347 | DMA_TRANS_NONE, |
1348 | DMA_PREP_INTERRUPT | DMA_CTRL_ACK); | |
43a34b8b HS |
1349 | if (!desc) |
1350 | return -EINVAL; | |
45dfc1a0 HS |
1351 | |
1352 | /* [4] submit the DMA */ | |
1353 | set_dma_type(this, DMA_FOR_READ_ECC_PAGE); | |
1354 | return start_dma_with_bch_irq(this, desc); | |
1355 | } | |
66de54a7 BB |
1356 | |
1357 | /** | |
1358 | * gpmi_copy_bits - copy bits from one memory region to another | |
1359 | * @dst: destination buffer | |
1360 | * @dst_bit_off: bit offset we're starting to write at | |
1361 | * @src: source buffer | |
1362 | * @src_bit_off: bit offset we're starting to read from | |
1363 | * @nbits: number of bits to copy | |
1364 | * | |
1365 | * This functions copies bits from one memory region to another, and is used by | |
1366 | * the GPMI driver to copy ECC sections which are not guaranteed to be byte | |
1367 | * aligned. | |
1368 | * | |
1369 | * src and dst should not overlap. | |
1370 | * | |
1371 | */ | |
1372 | void gpmi_copy_bits(u8 *dst, size_t dst_bit_off, | |
1373 | const u8 *src, size_t src_bit_off, | |
1374 | size_t nbits) | |
1375 | { | |
1376 | size_t i; | |
1377 | size_t nbytes; | |
1378 | u32 src_buffer = 0; | |
1379 | size_t bits_in_src_buffer = 0; | |
1380 | ||
1381 | if (!nbits) | |
1382 | return; | |
1383 | ||
1384 | /* | |
1385 | * Move src and dst pointers to the closest byte pointer and store bit | |
1386 | * offsets within a byte. | |
1387 | */ | |
1388 | src += src_bit_off / 8; | |
1389 | src_bit_off %= 8; | |
1390 | ||
1391 | dst += dst_bit_off / 8; | |
1392 | dst_bit_off %= 8; | |
1393 | ||
1394 | /* | |
1395 | * Initialize the src_buffer value with bits available in the first | |
1396 | * byte of data so that we end up with a byte aligned src pointer. | |
1397 | */ | |
1398 | if (src_bit_off) { | |
1399 | src_buffer = src[0] >> src_bit_off; | |
1400 | if (nbits >= (8 - src_bit_off)) { | |
1401 | bits_in_src_buffer += 8 - src_bit_off; | |
1402 | } else { | |
1403 | src_buffer &= GENMASK(nbits - 1, 0); | |
1404 | bits_in_src_buffer += nbits; | |
1405 | } | |
1406 | nbits -= bits_in_src_buffer; | |
1407 | src++; | |
1408 | } | |
1409 | ||
1410 | /* Calculate the number of bytes that can be copied from src to dst. */ | |
1411 | nbytes = nbits / 8; | |
1412 | ||
1413 | /* Try to align dst to a byte boundary. */ | |
1414 | if (dst_bit_off) { | |
1415 | if (bits_in_src_buffer < (8 - dst_bit_off) && nbytes) { | |
1416 | src_buffer |= src[0] << bits_in_src_buffer; | |
1417 | bits_in_src_buffer += 8; | |
1418 | src++; | |
1419 | nbytes--; | |
1420 | } | |
1421 | ||
1422 | if (bits_in_src_buffer >= (8 - dst_bit_off)) { | |
1423 | dst[0] &= GENMASK(dst_bit_off - 1, 0); | |
1424 | dst[0] |= src_buffer << dst_bit_off; | |
1425 | src_buffer >>= (8 - dst_bit_off); | |
1426 | bits_in_src_buffer -= (8 - dst_bit_off); | |
1427 | dst_bit_off = 0; | |
1428 | dst++; | |
1429 | if (bits_in_src_buffer > 7) { | |
1430 | bits_in_src_buffer -= 8; | |
1431 | dst[0] = src_buffer; | |
1432 | dst++; | |
1433 | src_buffer >>= 8; | |
1434 | } | |
1435 | } | |
1436 | } | |
1437 | ||
1438 | if (!bits_in_src_buffer && !dst_bit_off) { | |
1439 | /* | |
1440 | * Both src and dst pointers are byte aligned, thus we can | |
1441 | * just use the optimized memcpy function. | |
1442 | */ | |
1443 | if (nbytes) | |
1444 | memcpy(dst, src, nbytes); | |
1445 | } else { | |
1446 | /* | |
1447 | * src buffer is not byte aligned, hence we have to copy each | |
1448 | * src byte to the src_buffer variable before extracting a byte | |
1449 | * to store in dst. | |
1450 | */ | |
1451 | for (i = 0; i < nbytes; i++) { | |
1452 | src_buffer |= src[i] << bits_in_src_buffer; | |
1453 | dst[i] = src_buffer; | |
1454 | src_buffer >>= 8; | |
1455 | } | |
1456 | } | |
1457 | /* Update dst and src pointers */ | |
1458 | dst += nbytes; | |
1459 | src += nbytes; | |
1460 | ||
1461 | /* | |
1462 | * nbits is the number of remaining bits. It should not exceed 8 as | |
1463 | * we've already copied as much bytes as possible. | |
1464 | */ | |
1465 | nbits %= 8; | |
1466 | ||
1467 | /* | |
1468 | * If there's no more bits to copy to the destination and src buffer | |
1469 | * was already byte aligned, then we're done. | |
1470 | */ | |
1471 | if (!nbits && !bits_in_src_buffer) | |
1472 | return; | |
1473 | ||
1474 | /* Copy the remaining bits to src_buffer */ | |
1475 | if (nbits) | |
1476 | src_buffer |= (*src & GENMASK(nbits - 1, 0)) << | |
1477 | bits_in_src_buffer; | |
1478 | bits_in_src_buffer += nbits; | |
1479 | ||
1480 | /* | |
1481 | * In case there were not enough bits to get a byte aligned dst buffer | |
1482 | * prepare the src_buffer variable to match the dst organization (shift | |
1483 | * src_buffer by dst_bit_off and retrieve the least significant bits | |
1484 | * from dst). | |
1485 | */ | |
1486 | if (dst_bit_off) | |
1487 | src_buffer = (src_buffer << dst_bit_off) | | |
1488 | (*dst & GENMASK(dst_bit_off - 1, 0)); | |
1489 | bits_in_src_buffer += dst_bit_off; | |
1490 | ||
1491 | /* | |
1492 | * Keep most significant bits from dst if we end up with an unaligned | |
1493 | * number of bits. | |
1494 | */ | |
1495 | nbytes = bits_in_src_buffer / 8; | |
1496 | if (bits_in_src_buffer % 8) { | |
1497 | src_buffer |= (dst[nbytes] & | |
1498 | GENMASK(7, bits_in_src_buffer % 8)) << | |
1499 | (nbytes * 8); | |
1500 | nbytes++; | |
1501 | } | |
1502 | ||
1503 | /* Copy the remaining bytes to dst */ | |
1504 | for (i = 0; i < nbytes; i++) { | |
1505 | dst[i] = src_buffer; | |
1506 | src_buffer >>= 8; | |
1507 | } | |
1508 | } |