diff options
author | frans <fransmeulenbroeks@gmail.com> | 2008-08-15 17:14:31 -0400 |
---|---|---|
committer | David Woodhouse <David.Woodhouse@intel.com> | 2008-08-16 05:55:33 -0400 |
commit | e6cf5df1838c28bb060ac45b5585e48e71bbc740 (patch) | |
tree | b1333e4664fce7dd3c58dd879192e085cb1c2066 /drivers/mtd/nand/nand_ecc.c | |
parent | 782b7a367d81da005d93b28cb00f9ae086773c24 (diff) |
[MTD] [NAND] nand_ecc.c: rewrite for improved performance
This patch improves the performance of the ecc generation code by a
factor of 18 on an INTEL D920 CPU, a factor of 7 on MIPS and a factor of 5
on ARM (NSLU2)
Signed-off-by: Frans Meulenbroeks <fransmeulenbroeks@gmail.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
Diffstat (limited to 'drivers/mtd/nand/nand_ecc.c')
-rw-r--r-- | drivers/mtd/nand/nand_ecc.c | 496 |
1 files changed, 372 insertions, 124 deletions
diff --git a/drivers/mtd/nand/nand_ecc.c b/drivers/mtd/nand/nand_ecc.c index 918a806a8471..7129da51bb33 100644 --- a/drivers/mtd/nand/nand_ecc.c +++ b/drivers/mtd/nand/nand_ecc.c | |||
@@ -1,13 +1,18 @@ | |||
1 | /* | 1 | /* |
2 | * This file contains an ECC algorithm from Toshiba that detects and | 2 | * This file contains an ECC algorithm that detects and corrects 1 bit |
3 | * corrects 1 bit errors in a 256 byte block of data. | 3 | * errors in a 256 byte block of data. |
4 | * | 4 | * |
5 | * drivers/mtd/nand/nand_ecc.c | 5 | * drivers/mtd/nand/nand_ecc.c |
6 | * | 6 | * |
7 | * Copyright (C) 2000-2004 Steven J. Hill (sjhill@realitydiluted.com) | 7 | * Copyright (C) 2008 Koninklijke Philips Electronics NV. |
8 | * Toshiba America Electronics Components, Inc. | 8 | * Author: Frans Meulenbroeks |
9 | * | 9 | * |
10 | * Copyright (C) 2006 Thomas Gleixner <tglx@linutronix.de> | 10 | * Completely replaces the previous ECC implementation which was written by: |
11 | * Steven J. Hill (sjhill@realitydiluted.com) | ||
12 | * Thomas Gleixner (tglx@linutronix.de) | ||
13 | * | ||
14 | * Information on how this algorithm works and how it was developed | ||
15 | * can be found in Documentation/nand/ecc.txt | ||
11 | * | 16 | * |
12 | * This file is free software; you can redistribute it and/or modify it | 17 | * This file is free software; you can redistribute it and/or modify it |
13 | * under the terms of the GNU General Public License as published by the | 18 | * under the terms of the GNU General Public License as published by the |
@@ -23,174 +28,417 @@ | |||
23 | * with this file; if not, write to the Free Software Foundation, Inc., | 28 | * with this file; if not, write to the Free Software Foundation, Inc., |
24 | * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. | 29 | * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. |
25 | * | 30 | * |
26 | * As a special exception, if other files instantiate templates or use | ||
27 | * macros or inline functions from these files, or you compile these | ||
28 | * files and link them with other works to produce a work based on these | ||
29 | * files, these files do not by themselves cause the resulting work to be | ||
30 | * covered by the GNU General Public License. However the source code for | ||
31 | * these files must still be made available in accordance with section (3) | ||
32 | * of the GNU General Public License. | ||
33 | * | ||
34 | * This exception does not invalidate any other reasons why a work based on | ||
35 | * this file might be covered by the GNU General Public License. | ||
36 | */ | 31 | */ |
37 | 32 | ||
33 | /* | ||
34 | * The STANDALONE macro is useful when running the code outside the kernel | ||
35 | * e.g. when running the code in a testbed or a benchmark program. | ||
36 | * When STANDALONE is used, the module related macros are commented out | ||
37 | * as well as the linux include files. | ||
38 | * Instead a private definition of mtd_into is given to satisfy the compiler | ||
39 | * (the code does not use mtd_info, so the code does not care) | ||
40 | */ | ||
41 | #ifndef STANDALONE | ||
38 | #include <linux/types.h> | 42 | #include <linux/types.h> |
39 | #include <linux/kernel.h> | 43 | #include <linux/kernel.h> |
40 | #include <linux/module.h> | 44 | #include <linux/module.h> |
41 | #include <linux/mtd/nand_ecc.h> | 45 | #include <linux/mtd/nand_ecc.h> |
46 | #else | ||
47 | typedef uint32_t unsigned long | ||
48 | struct mtd_info { | ||
49 | int dummy; | ||
50 | }; | ||
51 | #define EXPORT_SYMBOL(x) /* x */ | ||
52 | |||
53 | #define MODULE_LICENSE(x) /* x */ | ||
54 | #define MODULE_AUTHOR(x) /* x */ | ||
55 | #define MODULE_DESCRIPTION(x) /* x */ | ||
56 | #endif | ||
57 | |||
58 | /* | ||
59 | * invparity is a 256 byte table that contains the odd parity | ||
60 | * for each byte. So if the number of bits in a byte is even, | ||
61 | * the array element is 1, and when the number of bits is odd | ||
62 | * the array eleemnt is 0. | ||
63 | */ | ||
64 | static const char invparity[256] = { | ||
65 | 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, | ||
66 | 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, | ||
67 | 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, | ||
68 | 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, | ||
69 | 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, | ||
70 | 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, | ||
71 | 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, | ||
72 | 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, | ||
73 | 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, | ||
74 | 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, | ||
75 | 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, | ||
76 | 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, | ||
77 | 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, | ||
78 | 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, | ||
79 | 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, | ||
80 | 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1 | ||
81 | }; | ||
42 | 82 | ||
43 | /* | 83 | /* |
44 | * Pre-calculated 256-way 1 byte column parity | 84 | * bitsperbyte contains the number of bits per byte |
85 | * this is only used for testing and repairing parity | ||
86 | * (a precalculated value slightly improves performance) | ||
45 | */ | 87 | */ |
46 | static const u_char nand_ecc_precalc_table[] = { | 88 | static const char bitsperbyte[256] = { |
47 | 0x00, 0x55, 0x56, 0x03, 0x59, 0x0c, 0x0f, 0x5a, 0x5a, 0x0f, 0x0c, 0x59, 0x03, 0x56, 0x55, 0x00, | 89 | 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, |
48 | 0x65, 0x30, 0x33, 0x66, 0x3c, 0x69, 0x6a, 0x3f, 0x3f, 0x6a, 0x69, 0x3c, 0x66, 0x33, 0x30, 0x65, | 90 | 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, |
49 | 0x66, 0x33, 0x30, 0x65, 0x3f, 0x6a, 0x69, 0x3c, 0x3c, 0x69, 0x6a, 0x3f, 0x65, 0x30, 0x33, 0x66, | 91 | 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, |
50 | 0x03, 0x56, 0x55, 0x00, 0x5a, 0x0f, 0x0c, 0x59, 0x59, 0x0c, 0x0f, 0x5a, 0x00, 0x55, 0x56, 0x03, | 92 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, |
51 | 0x69, 0x3c, 0x3f, 0x6a, 0x30, 0x65, 0x66, 0x33, 0x33, 0x66, 0x65, 0x30, 0x6a, 0x3f, 0x3c, 0x69, | 93 | 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, |
52 | 0x0c, 0x59, 0x5a, 0x0f, 0x55, 0x00, 0x03, 0x56, 0x56, 0x03, 0x00, 0x55, 0x0f, 0x5a, 0x59, 0x0c, | 94 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, |
53 | 0x0f, 0x5a, 0x59, 0x0c, 0x56, 0x03, 0x00, 0x55, 0x55, 0x00, 0x03, 0x56, 0x0c, 0x59, 0x5a, 0x0f, | 95 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, |
54 | 0x6a, 0x3f, 0x3c, 0x69, 0x33, 0x66, 0x65, 0x30, 0x30, 0x65, 0x66, 0x33, 0x69, 0x3c, 0x3f, 0x6a, | 96 | 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, |
55 | 0x6a, 0x3f, 0x3c, 0x69, 0x33, 0x66, 0x65, 0x30, 0x30, 0x65, 0x66, 0x33, 0x69, 0x3c, 0x3f, 0x6a, | 97 | 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, |
56 | 0x0f, 0x5a, 0x59, 0x0c, 0x56, 0x03, 0x00, 0x55, 0x55, 0x00, 0x03, 0x56, 0x0c, 0x59, 0x5a, 0x0f, | 98 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, |
57 | 0x0c, 0x59, 0x5a, 0x0f, 0x55, 0x00, 0x03, 0x56, 0x56, 0x03, 0x00, 0x55, 0x0f, 0x5a, 0x59, 0x0c, | 99 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, |
58 | 0x69, 0x3c, 0x3f, 0x6a, 0x30, 0x65, 0x66, 0x33, 0x33, 0x66, 0x65, 0x30, 0x6a, 0x3f, 0x3c, 0x69, | 100 | 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, |
59 | 0x03, 0x56, 0x55, 0x00, 0x5a, 0x0f, 0x0c, 0x59, 0x59, 0x0c, 0x0f, 0x5a, 0x00, 0x55, 0x56, 0x03, | 101 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, |
60 | 0x66, 0x33, 0x30, 0x65, 0x3f, 0x6a, 0x69, 0x3c, 0x3c, 0x69, 0x6a, 0x3f, 0x65, 0x30, 0x33, 0x66, | 102 | 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, |
61 | 0x65, 0x30, 0x33, 0x66, 0x3c, 0x69, 0x6a, 0x3f, 0x3f, 0x6a, 0x69, 0x3c, 0x66, 0x33, 0x30, 0x65, | 103 | 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, |
62 | 0x00, 0x55, 0x56, 0x03, 0x59, 0x0c, 0x0f, 0x5a, 0x5a, 0x0f, 0x0c, 0x59, 0x03, 0x56, 0x55, 0x00 | 104 | 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8, |
105 | }; | ||
106 | |||
107 | /* | ||
108 | * addressbits is a lookup table to filter out the bits from the xor-ed | ||
109 | * ecc data that identify the faulty location. | ||
110 | * this is only used for repairing parity | ||
111 | * see the comments in nand_correct_data for more details | ||
112 | */ | ||
113 | static const char addressbits[256] = { | ||
114 | 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01, | ||
115 | 0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03, | ||
116 | 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01, | ||
117 | 0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03, | ||
118 | 0x04, 0x04, 0x05, 0x05, 0x04, 0x04, 0x05, 0x05, | ||
119 | 0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07, | ||
120 | 0x04, 0x04, 0x05, 0x05, 0x04, 0x04, 0x05, 0x05, | ||
121 | 0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07, | ||
122 | 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01, | ||
123 | 0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03, | ||
124 | 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01, | ||
125 | 0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03, | ||
126 | 0x04, 0x04, 0x05, 0x05, 0x04, 0x04, 0x05, 0x05, | ||
127 | 0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07, | ||
128 | 0x04, 0x04, 0x05, 0x05, 0x04, 0x04, 0x05, 0x05, | ||
129 | 0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07, | ||
130 | 0x08, 0x08, 0x09, 0x09, 0x08, 0x08, 0x09, 0x09, | ||
131 | 0x0a, 0x0a, 0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b, | ||
132 | 0x08, 0x08, 0x09, 0x09, 0x08, 0x08, 0x09, 0x09, | ||
133 | 0x0a, 0x0a, 0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b, | ||
134 | 0x0c, 0x0c, 0x0d, 0x0d, 0x0c, 0x0c, 0x0d, 0x0d, | ||
135 | 0x0e, 0x0e, 0x0f, 0x0f, 0x0e, 0x0e, 0x0f, 0x0f, | ||
136 | 0x0c, 0x0c, 0x0d, 0x0d, 0x0c, 0x0c, 0x0d, 0x0d, | ||
137 | 0x0e, 0x0e, 0x0f, 0x0f, 0x0e, 0x0e, 0x0f, 0x0f, | ||
138 | 0x08, 0x08, 0x09, 0x09, 0x08, 0x08, 0x09, 0x09, | ||
139 | 0x0a, 0x0a, 0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b, | ||
140 | 0x08, 0x08, 0x09, 0x09, 0x08, 0x08, 0x09, 0x09, | ||
141 | 0x0a, 0x0a, 0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b, | ||
142 | 0x0c, 0x0c, 0x0d, 0x0d, 0x0c, 0x0c, 0x0d, 0x0d, | ||
143 | 0x0e, 0x0e, 0x0f, 0x0f, 0x0e, 0x0e, 0x0f, 0x0f, | ||
144 | 0x0c, 0x0c, 0x0d, 0x0d, 0x0c, 0x0c, 0x0d, 0x0d, | ||
145 | 0x0e, 0x0e, 0x0f, 0x0f, 0x0e, 0x0e, 0x0f, 0x0f | ||
63 | }; | 146 | }; |
64 | 147 | ||
65 | /** | 148 | /** |
66 | * nand_calculate_ecc - [NAND Interface] Calculate 3-byte ECC for 256-byte block | 149 | * nand_calculate_ecc - [NAND Interface] Calculate 3-byte ECC for 256-byte block |
67 | * @mtd: MTD block structure | 150 | * @mtd: MTD block structure (unused) |
68 | * @dat: raw data | 151 | * @dat: raw data |
69 | * @ecc_code: buffer for ECC | 152 | * @ecc_code: buffer for ECC |
70 | */ | 153 | */ |
71 | int nand_calculate_ecc(struct mtd_info *mtd, const u_char *dat, | 154 | int nand_calculate_ecc(struct mtd_info *mtd, const unsigned char *buf, |
72 | u_char *ecc_code) | 155 | unsigned char *code) |
73 | { | 156 | { |
74 | uint8_t idx, reg1, reg2, reg3, tmp1, tmp2; | ||
75 | int i; | 157 | int i; |
158 | const uint32_t *bp = (uint32_t *)buf; | ||
159 | uint32_t cur; /* current value in buffer */ | ||
160 | /* rp0..rp15 are the various accumulated parities (per byte) */ | ||
161 | uint32_t rp0, rp1, rp2, rp3, rp4, rp5, rp6, rp7; | ||
162 | uint32_t rp8, rp9, rp10, rp11, rp12, rp13, rp14, rp15; | ||
163 | uint32_t par; /* the cumulative parity for all data */ | ||
164 | uint32_t tmppar; /* the cumulative parity for this iteration; | ||
165 | for rp12 and rp14 at the end of the loop */ | ||
166 | |||
167 | par = 0; | ||
168 | rp4 = 0; | ||
169 | rp6 = 0; | ||
170 | rp8 = 0; | ||
171 | rp10 = 0; | ||
172 | rp12 = 0; | ||
173 | rp14 = 0; | ||
174 | |||
175 | /* | ||
176 | * The loop is unrolled a number of times; | ||
177 | * This avoids if statements to decide on which rp value to update | ||
178 | * Also we process the data by longwords. | ||
179 | * Note: passing unaligned data might give a performance penalty. | ||
180 | * It is assumed that the buffers are aligned. | ||
181 | * tmppar is the cumulative sum of this iteration. | ||
182 | * needed for calculating rp12, rp14 and par | ||
183 | * also used as a performance improvement for rp6, rp8 and rp10 | ||
184 | */ | ||
185 | for (i = 0; i < 4; i++) { | ||
186 | cur = *bp++; | ||
187 | tmppar = cur; | ||
188 | rp4 ^= cur; | ||
189 | cur = *bp++; | ||
190 | tmppar ^= cur; | ||
191 | rp6 ^= tmppar; | ||
192 | cur = *bp++; | ||
193 | tmppar ^= cur; | ||
194 | rp4 ^= cur; | ||
195 | cur = *bp++; | ||
196 | tmppar ^= cur; | ||
197 | rp8 ^= tmppar; | ||
76 | 198 | ||
77 | /* Initialize variables */ | 199 | cur = *bp++; |
78 | reg1 = reg2 = reg3 = 0; | 200 | tmppar ^= cur; |
201 | rp4 ^= cur; | ||
202 | rp6 ^= cur; | ||
203 | cur = *bp++; | ||
204 | tmppar ^= cur; | ||
205 | rp6 ^= cur; | ||
206 | cur = *bp++; | ||
207 | tmppar ^= cur; | ||
208 | rp4 ^= cur; | ||
209 | cur = *bp++; | ||
210 | tmppar ^= cur; | ||
211 | rp10 ^= tmppar; | ||
79 | 212 | ||
80 | /* Build up column parity */ | 213 | cur = *bp++; |
81 | for(i = 0; i < 256; i++) { | 214 | tmppar ^= cur; |
82 | /* Get CP0 - CP5 from table */ | 215 | rp4 ^= cur; |
83 | idx = nand_ecc_precalc_table[*dat++]; | 216 | rp6 ^= cur; |
84 | reg1 ^= (idx & 0x3f); | 217 | rp8 ^= cur; |
218 | cur = *bp++; | ||
219 | tmppar ^= cur; | ||
220 | rp6 ^= cur; | ||
221 | rp8 ^= cur; | ||
222 | cur = *bp++; | ||
223 | tmppar ^= cur; | ||
224 | rp4 ^= cur; | ||
225 | rp8 ^= cur; | ||
226 | cur = *bp++; | ||
227 | tmppar ^= cur; | ||
228 | rp8 ^= cur; | ||
85 | 229 | ||
86 | /* All bit XOR = 1 ? */ | 230 | cur = *bp++; |
87 | if (idx & 0x40) { | 231 | tmppar ^= cur; |
88 | reg3 ^= (uint8_t) i; | 232 | rp4 ^= cur; |
89 | reg2 ^= ~((uint8_t) i); | 233 | rp6 ^= cur; |
90 | } | 234 | cur = *bp++; |
235 | tmppar ^= cur; | ||
236 | rp6 ^= cur; | ||
237 | cur = *bp++; | ||
238 | tmppar ^= cur; | ||
239 | rp4 ^= cur; | ||
240 | cur = *bp++; | ||
241 | tmppar ^= cur; | ||
242 | |||
243 | par ^= tmppar; | ||
244 | if ((i & 0x1) == 0) | ||
245 | rp12 ^= tmppar; | ||
246 | if ((i & 0x2) == 0) | ||
247 | rp14 ^= tmppar; | ||
91 | } | 248 | } |
92 | 249 | ||
93 | /* Create non-inverted ECC code from line parity */ | 250 | /* |
94 | tmp1 = (reg3 & 0x80) >> 0; /* B7 -> B7 */ | 251 | * handle the fact that we use longword operations |
95 | tmp1 |= (reg2 & 0x80) >> 1; /* B7 -> B6 */ | 252 | * we'll bring rp4..rp14 back to single byte entities by shifting and |
96 | tmp1 |= (reg3 & 0x40) >> 1; /* B6 -> B5 */ | 253 | * xoring first fold the upper and lower 16 bits, |
97 | tmp1 |= (reg2 & 0x40) >> 2; /* B6 -> B4 */ | 254 | * then the upper and lower 8 bits. |
98 | tmp1 |= (reg3 & 0x20) >> 2; /* B5 -> B3 */ | 255 | */ |
99 | tmp1 |= (reg2 & 0x20) >> 3; /* B5 -> B2 */ | 256 | rp4 ^= (rp4 >> 16); |
100 | tmp1 |= (reg3 & 0x10) >> 3; /* B4 -> B1 */ | 257 | rp4 ^= (rp4 >> 8); |
101 | tmp1 |= (reg2 & 0x10) >> 4; /* B4 -> B0 */ | 258 | rp4 &= 0xff; |
102 | 259 | rp6 ^= (rp6 >> 16); | |
103 | tmp2 = (reg3 & 0x08) << 4; /* B3 -> B7 */ | 260 | rp6 ^= (rp6 >> 8); |
104 | tmp2 |= (reg2 & 0x08) << 3; /* B3 -> B6 */ | 261 | rp6 &= 0xff; |
105 | tmp2 |= (reg3 & 0x04) << 3; /* B2 -> B5 */ | 262 | rp8 ^= (rp8 >> 16); |
106 | tmp2 |= (reg2 & 0x04) << 2; /* B2 -> B4 */ | 263 | rp8 ^= (rp8 >> 8); |
107 | tmp2 |= (reg3 & 0x02) << 2; /* B1 -> B3 */ | 264 | rp8 &= 0xff; |
108 | tmp2 |= (reg2 & 0x02) << 1; /* B1 -> B2 */ | 265 | rp10 ^= (rp10 >> 16); |
109 | tmp2 |= (reg3 & 0x01) << 1; /* B0 -> B1 */ | 266 | rp10 ^= (rp10 >> 8); |
110 | tmp2 |= (reg2 & 0x01) << 0; /* B7 -> B0 */ | 267 | rp10 &= 0xff; |
111 | 268 | rp12 ^= (rp12 >> 16); | |
112 | /* Calculate final ECC code */ | 269 | rp12 ^= (rp12 >> 8); |
270 | rp12 &= 0xff; | ||
271 | rp14 ^= (rp14 >> 16); | ||
272 | rp14 ^= (rp14 >> 8); | ||
273 | rp14 &= 0xff; | ||
274 | |||
275 | /* | ||
276 | * we also need to calculate the row parity for rp0..rp3 | ||
277 | * This is present in par, because par is now | ||
278 | * rp3 rp3 rp2 rp2 | ||
279 | * as well as | ||
280 | * rp1 rp0 rp1 rp0 | ||
281 | * First calculate rp2 and rp3 | ||
282 | * (and yes: rp2 = (par ^ rp3) & 0xff; but doing that did not | ||
283 | * give a performance improvement) | ||
284 | */ | ||
285 | rp3 = (par >> 16); | ||
286 | rp3 ^= (rp3 >> 8); | ||
287 | rp3 &= 0xff; | ||
288 | rp2 = par & 0xffff; | ||
289 | rp2 ^= (rp2 >> 8); | ||
290 | rp2 &= 0xff; | ||
291 | |||
292 | /* reduce par to 16 bits then calculate rp1 and rp0 */ | ||
293 | par ^= (par >> 16); | ||
294 | rp1 = (par >> 8) & 0xff; | ||
295 | rp0 = (par & 0xff); | ||
296 | |||
297 | /* finally reduce par to 8 bits */ | ||
298 | par ^= (par >> 8); | ||
299 | par &= 0xff; | ||
300 | |||
301 | /* | ||
302 | * and calculate rp5..rp15 | ||
303 | * note that par = rp4 ^ rp5 and due to the commutative property | ||
304 | * of the ^ operator we can say: | ||
305 | * rp5 = (par ^ rp4); | ||
306 | * The & 0xff seems superfluous, but benchmarking learned that | ||
307 | * leaving it out gives slightly worse results. No idea why, probably | ||
308 | * it has to do with the way the pipeline in pentium is organized. | ||
309 | */ | ||
310 | rp5 = (par ^ rp4) & 0xff; | ||
311 | rp7 = (par ^ rp6) & 0xff; | ||
312 | rp9 = (par ^ rp8) & 0xff; | ||
313 | rp11 = (par ^ rp10) & 0xff; | ||
314 | rp13 = (par ^ rp12) & 0xff; | ||
315 | rp15 = (par ^ rp14) & 0xff; | ||
316 | |||
317 | /* | ||
318 | * Finally calculate the ecc bits. | ||
319 | * Again here it might seem that there are performance optimisations | ||
320 | * possible, but benchmarks showed that on the system this is developed | ||
321 | * the code below is the fastest | ||
322 | */ | ||
113 | #ifdef CONFIG_MTD_NAND_ECC_SMC | 323 | #ifdef CONFIG_MTD_NAND_ECC_SMC |
114 | ecc_code[0] = ~tmp2; | 324 | code[0] = |
115 | ecc_code[1] = ~tmp1; | 325 | (invparity[rp7] << 7) | |
326 | (invparity[rp6] << 6) | | ||
327 | (invparity[rp5] << 5) | | ||
328 | (invparity[rp4] << 4) | | ||
329 | (invparity[rp3] << 3) | | ||
330 | (invparity[rp2] << 2) | | ||
331 | (invparity[rp1] << 1) | | ||
332 | (invparity[rp0]); | ||
333 | code[1] = | ||
334 | (invparity[rp15] << 7) | | ||
335 | (invparity[rp14] << 6) | | ||
336 | (invparity[rp13] << 5) | | ||
337 | (invparity[rp12] << 4) | | ||
338 | (invparity[rp11] << 3) | | ||
339 | (invparity[rp10] << 2) | | ||
340 | (invparity[rp9] << 1) | | ||
341 | (invparity[rp8]); | ||
116 | #else | 342 | #else |
117 | ecc_code[0] = ~tmp1; | 343 | code[1] = |
118 | ecc_code[1] = ~tmp2; | 344 | (invparity[rp7] << 7) | |
345 | (invparity[rp6] << 6) | | ||
346 | (invparity[rp5] << 5) | | ||
347 | (invparity[rp4] << 4) | | ||
348 | (invparity[rp3] << 3) | | ||
349 | (invparity[rp2] << 2) | | ||
350 | (invparity[rp1] << 1) | | ||
351 | (invparity[rp0]); | ||
352 | code[0] = | ||
353 | (invparity[rp15] << 7) | | ||
354 | (invparity[rp14] << 6) | | ||
355 | (invparity[rp13] << 5) | | ||
356 | (invparity[rp12] << 4) | | ||
357 | (invparity[rp11] << 3) | | ||
358 | (invparity[rp10] << 2) | | ||
359 | (invparity[rp9] << 1) | | ||
360 | (invparity[rp8]); | ||
119 | #endif | 361 | #endif |
120 | ecc_code[2] = ((~reg1) << 2) | 0x03; | 362 | code[2] = |
121 | 363 | (invparity[par & 0xf0] << 7) | | |
364 | (invparity[par & 0x0f] << 6) | | ||
365 | (invparity[par & 0xcc] << 5) | | ||
366 | (invparity[par & 0x33] << 4) | | ||
367 | (invparity[par & 0xaa] << 3) | | ||
368 | (invparity[par & 0x55] << 2) | | ||
369 | 3; | ||
122 | return 0; | 370 | return 0; |
123 | } | 371 | } |
124 | EXPORT_SYMBOL(nand_calculate_ecc); | 372 | EXPORT_SYMBOL(nand_calculate_ecc); |
125 | 373 | ||
126 | static inline int countbits(uint32_t byte) | ||
127 | { | ||
128 | int res = 0; | ||
129 | |||
130 | for (;byte; byte >>= 1) | ||
131 | res += byte & 0x01; | ||
132 | return res; | ||
133 | } | ||
134 | |||
135 | /** | 374 | /** |
136 | * nand_correct_data - [NAND Interface] Detect and correct bit error(s) | 375 | * nand_correct_data - [NAND Interface] Detect and correct bit error(s) |
137 | * @mtd: MTD block structure | 376 | * @mtd: MTD block structure (unused) |
138 | * @dat: raw data read from the chip | 377 | * @dat: raw data read from the chip |
139 | * @read_ecc: ECC from the chip | 378 | * @read_ecc: ECC from the chip |
140 | * @calc_ecc: the ECC calculated from raw data | 379 | * @calc_ecc: the ECC calculated from raw data |
141 | * | 380 | * |
142 | * Detect and correct a 1 bit error for 256 byte block | 381 | * Detect and correct a 1 bit error for 256 byte block |
143 | */ | 382 | */ |
144 | int nand_correct_data(struct mtd_info *mtd, u_char *dat, | 383 | int nand_correct_data(struct mtd_info *mtd, unsigned char *buf, |
145 | u_char *read_ecc, u_char *calc_ecc) | 384 | unsigned char *read_ecc, unsigned char *calc_ecc) |
146 | { | 385 | { |
147 | uint8_t s0, s1, s2; | 386 | int nr_bits; |
387 | unsigned char b0, b1, b2; | ||
388 | unsigned char byte_addr, bit_addr; | ||
148 | 389 | ||
390 | /* | ||
391 | * b0 to b2 indicate which bit is faulty (if any) | ||
392 | * we might need the xor result more than once, | ||
393 | * so keep them in a local var | ||
394 | */ | ||
149 | #ifdef CONFIG_MTD_NAND_ECC_SMC | 395 | #ifdef CONFIG_MTD_NAND_ECC_SMC |
150 | s0 = calc_ecc[0] ^ read_ecc[0]; | 396 | b0 = read_ecc[0] ^ calc_ecc[0]; |
151 | s1 = calc_ecc[1] ^ read_ecc[1]; | 397 | b1 = read_ecc[1] ^ calc_ecc[1]; |
152 | s2 = calc_ecc[2] ^ read_ecc[2]; | ||
153 | #else | 398 | #else |
154 | s1 = calc_ecc[0] ^ read_ecc[0]; | 399 | b0 = read_ecc[1] ^ calc_ecc[1]; |
155 | s0 = calc_ecc[1] ^ read_ecc[1]; | 400 | b1 = read_ecc[0] ^ calc_ecc[0]; |
156 | s2 = calc_ecc[2] ^ read_ecc[2]; | ||
157 | #endif | 401 | #endif |
158 | if ((s0 | s1 | s2) == 0) | 402 | b2 = read_ecc[2] ^ calc_ecc[2]; |
159 | return 0; | ||
160 | |||
161 | /* Check for a single bit error */ | ||
162 | if( ((s0 ^ (s0 >> 1)) & 0x55) == 0x55 && | ||
163 | ((s1 ^ (s1 >> 1)) & 0x55) == 0x55 && | ||
164 | ((s2 ^ (s2 >> 1)) & 0x54) == 0x54) { | ||
165 | |||
166 | uint32_t byteoffs, bitnum; | ||
167 | 403 | ||
168 | byteoffs = (s1 << 0) & 0x80; | 404 | /* check if there are any bitfaults */ |
169 | byteoffs |= (s1 << 1) & 0x40; | ||
170 | byteoffs |= (s1 << 2) & 0x20; | ||
171 | byteoffs |= (s1 << 3) & 0x10; | ||
172 | 405 | ||
173 | byteoffs |= (s0 >> 4) & 0x08; | 406 | /* count nr of bits; use table lookup, faster than calculating it */ |
174 | byteoffs |= (s0 >> 3) & 0x04; | 407 | nr_bits = bitsperbyte[b0] + bitsperbyte[b1] + bitsperbyte[b2]; |
175 | byteoffs |= (s0 >> 2) & 0x02; | ||
176 | byteoffs |= (s0 >> 1) & 0x01; | ||
177 | 408 | ||
178 | bitnum = (s2 >> 5) & 0x04; | 409 | /* repeated if statements are slightly more efficient than switch ... */ |
179 | bitnum |= (s2 >> 4) & 0x02; | 410 | /* ordered in order of likelihood */ |
180 | bitnum |= (s2 >> 3) & 0x01; | 411 | if (nr_bits == 0) |
181 | 412 | return (0); /* no error */ | |
182 | dat[byteoffs] ^= (1 << bitnum); | 413 | if (nr_bits == 11) { /* correctable error */ |
183 | 414 | /* | |
184 | return 1; | 415 | * rp15/13/11/9/7/5/3/1 indicate which byte is the faulty byte |
416 | * cp 5/3/1 indicate the faulty bit. | ||
417 | * A lookup table (called addressbits) is used to filter | ||
418 | * the bits from the byte they are in. | ||
419 | * A marginal optimisation is possible by having three | ||
420 | * different lookup tables. | ||
421 | * One as we have now (for b0), one for b2 | ||
422 | * (that would avoid the >> 1), and one for b1 (with all values | ||
423 | * << 4). However it was felt that introducing two more tables | ||
424 | * hardly justify the gain. | ||
425 | * | ||
426 | * The b2 shift is there to get rid of the lowest two bits. | ||
427 | * We could also do addressbits[b2] >> 1 but for the | ||
428 | * performace it does not make any difference | ||
429 | */ | ||
430 | byte_addr = (addressbits[b1] << 4) + addressbits[b0]; | ||
431 | bit_addr = addressbits[b2 >> 2]; | ||
432 | /* flip the bit */ | ||
433 | buf[byte_addr] ^= (1 << bit_addr); | ||
434 | return (1); | ||
185 | } | 435 | } |
186 | 436 | if (nr_bits == 1) | |
187 | if(countbits(s0 | ((uint32_t)s1 << 8) | ((uint32_t)s2 <<16)) == 1) | 437 | return (1); /* error in ecc data; no action needed */ |
188 | return 1; | 438 | return -1; |
189 | |||
190 | return -EBADMSG; | ||
191 | } | 439 | } |
192 | EXPORT_SYMBOL(nand_correct_data); | 440 | EXPORT_SYMBOL(nand_correct_data); |
193 | 441 | ||
194 | MODULE_LICENSE("GPL"); | 442 | MODULE_LICENSE("GPL"); |
195 | MODULE_AUTHOR("Steven J. Hill <sjhill@realitydiluted.com>"); | 443 | MODULE_AUTHOR("Frans Meulenbroeks <fransmeulenbroeks@gmail.com>"); |
196 | MODULE_DESCRIPTION("Generic NAND ECC support"); | 444 | MODULE_DESCRIPTION("Generic NAND ECC support"); |