summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorWang YanQing <udknight@gmail.com>2015-10-29 12:36:33 -0400
committerBrian Norris <computersforpeace@gmail.com>2015-11-17 20:05:14 -0500
commitfc5adbebac6ffab461492f7a415648b29b1a3b31 (patch)
treec71b2c7b93d4bea72af2d136dd4a903414b100ec
parent1b15b1f5a01019524815a9ce5c575f3b2068e7f8 (diff)
Documentation: mtd: improve nand_ecc.txt for readability and correctness
This patch correct some representation errors, add a little clarification in some places, and fix indentation problems for pseudo code. It also delete one more white space for one place. Signed-off-by: Wang YanQing <udknight@gmail.com> [Brian: a few tweaks] Signed-off-by: Brian Norris <computersforpeace@gmail.com>
-rw-r--r--Documentation/mtd/nand_ecc.txt58
1 files changed, 29 insertions, 29 deletions
diff --git a/Documentation/mtd/nand_ecc.txt b/Documentation/mtd/nand_ecc.txt
index e129b2479ea8..f8c3284bf6a7 100644
--- a/Documentation/mtd/nand_ecc.txt
+++ b/Documentation/mtd/nand_ecc.txt
@@ -107,7 +107,7 @@ for (i = 0; i < 256; i++)
107 if (i & 0x01) 107 if (i & 0x01)
108 rp1 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp1; 108 rp1 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp1;
109 else 109 else
110 rp0 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp1; 110 rp0 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp0;
111 if (i & 0x02) 111 if (i & 0x02)
112 rp3 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp3; 112 rp3 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp3;
113 else 113 else
@@ -127,7 +127,7 @@ for (i = 0; i < 256; i++)
127 if (i & 0x20) 127 if (i & 0x20)
128 rp11 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp11; 128 rp11 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp11;
129 else 129 else
130 rp10 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp10; 130 rp10 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp10;
131 if (i & 0x40) 131 if (i & 0x40)
132 rp13 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp13; 132 rp13 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp13;
133 else 133 else
@@ -158,7 +158,7 @@ the values in any order. So instead of calculating all the bits
158individually, let us try to rearrange things. 158individually, let us try to rearrange things.
159For the column parity this is easy. We can just xor the bytes and in the 159For the column parity this is easy. We can just xor the bytes and in the
160end filter out the relevant bits. This is pretty nice as it will bring 160end filter out the relevant bits. This is pretty nice as it will bring
161all cp calculation out of the if loop. 161all cp calculation out of the for loop.
162 162
163Similarly we can first xor the bytes for the various rows. 163Similarly we can first xor the bytes for the various rows.
164This leads to: 164This leads to:
@@ -271,11 +271,11 @@ to write our code in such a way that we process data in 32 bit chunks.
271Of course this means some modification as the row parity is byte by 271Of course this means some modification as the row parity is byte by
272byte. A quick analysis: 272byte. A quick analysis:
273for the column parity we use the par variable. When extending to 32 bits 273for the column parity we use the par variable. When extending to 32 bits
274we can in the end easily calculate p0 and p1 from it. 274we can in the end easily calculate rp0 and rp1 from it.
275(because par now consists of 4 bytes, contributing to rp1, rp0, rp1, rp0 275(because par now consists of 4 bytes, contributing to rp1, rp0, rp1, rp0
276respectively) 276respectively, from MSB to LSB)
277also rp2 and rp3 can be easily retrieved from par as rp3 covers the 277also rp2 and rp3 can be easily retrieved from par as rp3 covers the
278first two bytes and rp2 the last two bytes. 278first two MSBs and rp2 covers the last two LSBs.
279 279
280Note that of course now the loop is executed only 64 times (256/4). 280Note that of course now the loop is executed only 64 times (256/4).
281And note that care must taken wrt byte ordering. The way bytes are 281And note that care must taken wrt byte ordering. The way bytes are
@@ -387,11 +387,11 @@ Analysis 2
387 387
388The code (of course) works, and hurray: we are a little bit faster than 388The code (of course) works, and hurray: we are a little bit faster than
389the linux driver code (about 15%). But wait, don't cheer too quickly. 389the linux driver code (about 15%). But wait, don't cheer too quickly.
390THere is more to be gained. 390There is more to be gained.
391If we look at e.g. rp14 and rp15 we see that we either xor our data with 391If we look at e.g. rp14 and rp15 we see that we either xor our data with
392rp14 or with rp15. However we also have par which goes over all data. 392rp14 or with rp15. However we also have par which goes over all data.
393This means there is no need to calculate rp14 as it can be calculated from 393This means there is no need to calculate rp14 as it can be calculated from
394rp15 through rp14 = par ^ rp15; 394rp15 through rp14 = par ^ rp15, because par = rp14 ^ rp15;
395(or if desired we can avoid calculating rp15 and calculate it from 395(or if desired we can avoid calculating rp15 and calculate it from
396rp14). That is why some places refer to inverse parity. 396rp14). That is why some places refer to inverse parity.
397Of course the same thing holds for rp4/5, rp6/7, rp8/9, rp10/11 and rp12/13. 397Of course the same thing holds for rp4/5, rp6/7, rp8/9, rp10/11 and rp12/13.
@@ -419,12 +419,12 @@ with
419 if (i & 0x20) rp15 ^= cur; 419 if (i & 0x20) rp15 ^= cur;
420 420
421 and outside the loop added: 421 and outside the loop added:
422 rp4 = par ^ rp5; 422 rp4 = par ^ rp5;
423 rp6 = par ^ rp7; 423 rp6 = par ^ rp7;
424 rp8 = par ^ rp9; 424 rp8 = par ^ rp9;
425 rp10 = par ^ rp11; 425 rp10 = par ^ rp11;
426 rp12 = par ^ rp13; 426 rp12 = par ^ rp13;
427 rp14 = par ^ rp15; 427 rp14 = par ^ rp15;
428 428
429And after that the code takes about 30% more time, although the number of 429And after that the code takes about 30% more time, although the number of
430statements is reduced. This is also reflected in the assembly code. 430statements is reduced. This is also reflected in the assembly code.
@@ -524,12 +524,12 @@ THe code within the for loop was changed to:
524 524
525 cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp6 ^= cur; 525 cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp6 ^= cur;
526 cur = *bp++; tmppar ^= cur; rp6 ^= cur; 526 cur = *bp++; tmppar ^= cur; rp6 ^= cur;
527 cur = *bp++; tmppar ^= cur; rp4 ^= cur; 527 cur = *bp++; tmppar ^= cur; rp4 ^= cur;
528 cur = *bp++; tmppar ^= cur; rp10 ^= tmppar; 528 cur = *bp++; tmppar ^= cur; rp10 ^= tmppar;
529 529
530 cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp6 ^= cur; rp8 ^= cur; 530 cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp6 ^= cur; rp8 ^= cur;
531 cur = *bp++; tmppar ^= cur; rp6 ^= cur; rp8 ^= cur; 531 cur = *bp++; tmppar ^= cur; rp6 ^= cur; rp8 ^= cur;
532 cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp8 ^= cur; 532 cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp8 ^= cur;
533 cur = *bp++; tmppar ^= cur; rp8 ^= cur; 533 cur = *bp++; tmppar ^= cur; rp8 ^= cur;
534 534
535 cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp6 ^= cur; 535 cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp6 ^= cur;
@@ -537,7 +537,7 @@ THe code within the for loop was changed to:
537 cur = *bp++; tmppar ^= cur; rp4 ^= cur; 537 cur = *bp++; tmppar ^= cur; rp4 ^= cur;
538 cur = *bp++; tmppar ^= cur; 538 cur = *bp++; tmppar ^= cur;
539 539
540 par ^= tmppar; 540 par ^= tmppar;
541 if ((i & 0x1) == 0) rp12 ^= tmppar; 541 if ((i & 0x1) == 0) rp12 ^= tmppar;
542 if ((i & 0x2) == 0) rp14 ^= tmppar; 542 if ((i & 0x2) == 0) rp14 ^= tmppar;
543 } 543 }
@@ -548,8 +548,8 @@ to rp12 and rp14.
548 548
549While making the changes I also found that I could exploit that tmppar 549While making the changes I also found that I could exploit that tmppar
550contains the running parity for this iteration. So instead of having: 550contains the running parity for this iteration. So instead of having:
551rp4 ^= cur; rp6 = cur; 551rp4 ^= cur; rp6 ^= cur;
552I removed the rp6 = cur; statement and did rp6 ^= tmppar; on next 552I removed the rp6 ^= cur; statement and did rp6 ^= tmppar; on next
553statement. A similar change was done for rp8 and rp10 553statement. A similar change was done for rp8 and rp10
554 554
555 555
@@ -593,22 +593,22 @@ The new code now looks like:
593 593
594 cur = *bp++; tmppar ^= cur; rp4_6 ^= cur; 594 cur = *bp++; tmppar ^= cur; rp4_6 ^= cur;
595 cur = *bp++; tmppar ^= cur; rp6 ^= cur; 595 cur = *bp++; tmppar ^= cur; rp6 ^= cur;
596 cur = *bp++; tmppar ^= cur; rp4 ^= cur; 596 cur = *bp++; tmppar ^= cur; rp4 ^= cur;
597 cur = *bp++; tmppar ^= cur; rp10 ^= tmppar; 597 cur = *bp++; tmppar ^= cur; rp10 ^= tmppar;
598 598
599 notrp8 = tmppar; 599 notrp8 = tmppar;
600 cur = *bp++; tmppar ^= cur; rp4_6 ^= cur; 600 cur = *bp++; tmppar ^= cur; rp4_6 ^= cur;
601 cur = *bp++; tmppar ^= cur; rp6 ^= cur; 601 cur = *bp++; tmppar ^= cur; rp6 ^= cur;
602 cur = *bp++; tmppar ^= cur; rp4 ^= cur; 602 cur = *bp++; tmppar ^= cur; rp4 ^= cur;
603 cur = *bp++; tmppar ^= cur; 603 cur = *bp++; tmppar ^= cur;
604 rp8 = rp8 ^ tmppar ^ notrp8; 604 rp8 = rp8 ^ tmppar ^ notrp8;
605 605
606 cur = *bp++; tmppar ^= cur; rp4_6 ^= cur; 606 cur = *bp++; tmppar ^= cur; rp4_6 ^= cur;
607 cur = *bp++; tmppar ^= cur; rp6 ^= cur; 607 cur = *bp++; tmppar ^= cur; rp6 ^= cur;
608 cur = *bp++; tmppar ^= cur; rp4 ^= cur; 608 cur = *bp++; tmppar ^= cur; rp4 ^= cur;
609 cur = *bp++; tmppar ^= cur; 609 cur = *bp++; tmppar ^= cur;
610 610
611 par ^= tmppar; 611 par ^= tmppar;
612 if ((i & 0x1) == 0) rp12 ^= tmppar; 612 if ((i & 0x1) == 0) rp12 ^= tmppar;
613 if ((i & 0x2) == 0) rp14 ^= tmppar; 613 if ((i & 0x2) == 0) rp14 ^= tmppar;
614 } 614 }
@@ -700,7 +700,7 @@ Conclusion
700The gain when calculating the ecc is tremendous. Om my development hardware 700The gain when calculating the ecc is tremendous. Om my development hardware
701a speedup of a factor of 18 for ecc calculation was achieved. On a test on an 701a speedup of a factor of 18 for ecc calculation was achieved. On a test on an
702embedded system with a MIPS core a factor 7 was obtained. 702embedded system with a MIPS core a factor 7 was obtained.
703On a test with a Linksys NSLU2 (ARMv5TE processor) the speedup was a factor 703On a test with a Linksys NSLU2 (ARMv5TE processor) the speedup was a factor
7045 (big endian mode, gcc 4.1.2, -O3) 7045 (big endian mode, gcc 4.1.2, -O3)
705For correction not much gain could be obtained (as bitflips are rare). Then 705For correction not much gain could be obtained (as bitflips are rare). Then
706again there are also much less cycles spent there. 706again there are also much less cycles spent there.