diff options
author | Denys Vlasenko <dvlasenk@redhat.com> | 2015-05-18 06:58:40 -0400 |
---|---|---|
committer | Brian Norris <computersforpeace@gmail.com> | 2015-05-27 15:42:16 -0400 |
commit | 4612c715a6ea6b3af2aee0163c0721375b2548d7 (patch) | |
tree | 6ec1f9324fa54c41a0a93c5e607e4a0139fd13f5 | |
parent | c3c263a8a38c277e8867bfc7731ec95846a23b11 (diff) |
mtd: cfi: deinline large functions
With this .config: http://busybox.net/~vda/kernel_config,
after uninlining these functions have sizes and callsite counts
as follows:
cfi_udelay(): 74 bytes, 26 callsites
cfi_send_gen_cmd(): 153 bytes, 95 callsites
cfi_build_cmd(): 274 bytes, 123 callsites
cfi_build_cmd_addr(): 49 bytes, 15 callsites
cfi_merge_status(): 230 bytes, 3 callsites
Reduction in code size is about 50,000:
text data bss dec hex filename
85842882 22294584 20627456 128764922 7accbfa vmlinux.before
85789648 22294616 20627456 128711720 7abfc28 vmlinux
Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
CC: Dan Carpenter <dan.carpenter@oracle.com>
CC: Jingoo Han <jg1.han@samsung.com>
CC: Brian Norris <computersforpeace@gmail.com>
CC: Aaron Sierra <asierra@xes-inc.com>
CC: Artem Bityutskiy <Artem.Bityutskiy@linux.intel.com>
CC: David Woodhouse <David.Woodhouse@intel.com>
CC: linux-mtd@lists.infradead.org
CC: linux-kernel@vger.kernel.org
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
-rw-r--r-- | drivers/mtd/chips/cfi_util.c | 188 | ||||
-rw-r--r-- | include/linux/mtd/cfi.h | 188 |
2 files changed, 196 insertions, 180 deletions
diff --git a/drivers/mtd/chips/cfi_util.c b/drivers/mtd/chips/cfi_util.c index 09c79bd0b4f4..6f16552cd59f 100644 --- a/drivers/mtd/chips/cfi_util.c +++ b/drivers/mtd/chips/cfi_util.c | |||
@@ -23,6 +23,194 @@ | |||
23 | #include <linux/mtd/map.h> | 23 | #include <linux/mtd/map.h> |
24 | #include <linux/mtd/cfi.h> | 24 | #include <linux/mtd/cfi.h> |
25 | 25 | ||
26 | void cfi_udelay(int us) | ||
27 | { | ||
28 | if (us >= 1000) { | ||
29 | msleep((us+999)/1000); | ||
30 | } else { | ||
31 | udelay(us); | ||
32 | cond_resched(); | ||
33 | } | ||
34 | } | ||
35 | EXPORT_SYMBOL(cfi_udelay); | ||
36 | |||
37 | /* | ||
38 | * Returns the command address according to the given geometry. | ||
39 | */ | ||
40 | uint32_t cfi_build_cmd_addr(uint32_t cmd_ofs, | ||
41 | struct map_info *map, struct cfi_private *cfi) | ||
42 | { | ||
43 | unsigned bankwidth = map_bankwidth(map); | ||
44 | unsigned interleave = cfi_interleave(cfi); | ||
45 | unsigned type = cfi->device_type; | ||
46 | uint32_t addr; | ||
47 | |||
48 | addr = (cmd_ofs * type) * interleave; | ||
49 | |||
50 | /* Modify the unlock address if we are in compatibility mode. | ||
51 | * For 16bit devices on 8 bit busses | ||
52 | * and 32bit devices on 16 bit busses | ||
53 | * set the low bit of the alternating bit sequence of the address. | ||
54 | */ | ||
55 | if (((type * interleave) > bankwidth) && ((cmd_ofs & 0xff) == 0xaa)) | ||
56 | addr |= (type >> 1)*interleave; | ||
57 | |||
58 | return addr; | ||
59 | } | ||
60 | EXPORT_SYMBOL(cfi_build_cmd_addr); | ||
61 | |||
62 | /* | ||
63 | * Transforms the CFI command for the given geometry (bus width & interleave). | ||
64 | * It looks too long to be inline, but in the common case it should almost all | ||
65 | * get optimised away. | ||
66 | */ | ||
67 | map_word cfi_build_cmd(u_long cmd, struct map_info *map, struct cfi_private *cfi) | ||
68 | { | ||
69 | map_word val = { {0} }; | ||
70 | int wordwidth, words_per_bus, chip_mode, chips_per_word; | ||
71 | unsigned long onecmd; | ||
72 | int i; | ||
73 | |||
74 | /* We do it this way to give the compiler a fighting chance | ||
75 | of optimising away all the crap for 'bankwidth' larger than | ||
76 | an unsigned long, in the common case where that support is | ||
77 | disabled */ | ||
78 | if (map_bankwidth_is_large(map)) { | ||
79 | wordwidth = sizeof(unsigned long); | ||
80 | words_per_bus = (map_bankwidth(map)) / wordwidth; // i.e. normally 1 | ||
81 | } else { | ||
82 | wordwidth = map_bankwidth(map); | ||
83 | words_per_bus = 1; | ||
84 | } | ||
85 | |||
86 | chip_mode = map_bankwidth(map) / cfi_interleave(cfi); | ||
87 | chips_per_word = wordwidth * cfi_interleave(cfi) / map_bankwidth(map); | ||
88 | |||
89 | /* First, determine what the bit-pattern should be for a single | ||
90 | device, according to chip mode and endianness... */ | ||
91 | switch (chip_mode) { | ||
92 | default: BUG(); | ||
93 | case 1: | ||
94 | onecmd = cmd; | ||
95 | break; | ||
96 | case 2: | ||
97 | onecmd = cpu_to_cfi16(map, cmd); | ||
98 | break; | ||
99 | case 4: | ||
100 | onecmd = cpu_to_cfi32(map, cmd); | ||
101 | break; | ||
102 | } | ||
103 | |||
104 | /* Now replicate it across the size of an unsigned long, or | ||
105 | just to the bus width as appropriate */ | ||
106 | switch (chips_per_word) { | ||
107 | default: BUG(); | ||
108 | #if BITS_PER_LONG >= 64 | ||
109 | case 8: | ||
110 | onecmd |= (onecmd << (chip_mode * 32)); | ||
111 | #endif | ||
112 | case 4: | ||
113 | onecmd |= (onecmd << (chip_mode * 16)); | ||
114 | case 2: | ||
115 | onecmd |= (onecmd << (chip_mode * 8)); | ||
116 | case 1: | ||
117 | ; | ||
118 | } | ||
119 | |||
120 | /* And finally, for the multi-word case, replicate it | ||
121 | in all words in the structure */ | ||
122 | for (i=0; i < words_per_bus; i++) { | ||
123 | val.x[i] = onecmd; | ||
124 | } | ||
125 | |||
126 | return val; | ||
127 | } | ||
128 | EXPORT_SYMBOL(cfi_build_cmd); | ||
129 | |||
130 | unsigned long cfi_merge_status(map_word val, struct map_info *map, | ||
131 | struct cfi_private *cfi) | ||
132 | { | ||
133 | int wordwidth, words_per_bus, chip_mode, chips_per_word; | ||
134 | unsigned long onestat, res = 0; | ||
135 | int i; | ||
136 | |||
137 | /* We do it this way to give the compiler a fighting chance | ||
138 | of optimising away all the crap for 'bankwidth' larger than | ||
139 | an unsigned long, in the common case where that support is | ||
140 | disabled */ | ||
141 | if (map_bankwidth_is_large(map)) { | ||
142 | wordwidth = sizeof(unsigned long); | ||
143 | words_per_bus = (map_bankwidth(map)) / wordwidth; // i.e. normally 1 | ||
144 | } else { | ||
145 | wordwidth = map_bankwidth(map); | ||
146 | words_per_bus = 1; | ||
147 | } | ||
148 | |||
149 | chip_mode = map_bankwidth(map) / cfi_interleave(cfi); | ||
150 | chips_per_word = wordwidth * cfi_interleave(cfi) / map_bankwidth(map); | ||
151 | |||
152 | onestat = val.x[0]; | ||
153 | /* Or all status words together */ | ||
154 | for (i=1; i < words_per_bus; i++) { | ||
155 | onestat |= val.x[i]; | ||
156 | } | ||
157 | |||
158 | res = onestat; | ||
159 | switch(chips_per_word) { | ||
160 | default: BUG(); | ||
161 | #if BITS_PER_LONG >= 64 | ||
162 | case 8: | ||
163 | res |= (onestat >> (chip_mode * 32)); | ||
164 | #endif | ||
165 | case 4: | ||
166 | res |= (onestat >> (chip_mode * 16)); | ||
167 | case 2: | ||
168 | res |= (onestat >> (chip_mode * 8)); | ||
169 | case 1: | ||
170 | ; | ||
171 | } | ||
172 | |||
173 | /* Last, determine what the bit-pattern should be for a single | ||
174 | device, according to chip mode and endianness... */ | ||
175 | switch (chip_mode) { | ||
176 | case 1: | ||
177 | break; | ||
178 | case 2: | ||
179 | res = cfi16_to_cpu(map, res); | ||
180 | break; | ||
181 | case 4: | ||
182 | res = cfi32_to_cpu(map, res); | ||
183 | break; | ||
184 | default: BUG(); | ||
185 | } | ||
186 | return res; | ||
187 | } | ||
188 | EXPORT_SYMBOL(cfi_merge_status); | ||
189 | |||
190 | /* | ||
191 | * Sends a CFI command to a bank of flash for the given geometry. | ||
192 | * | ||
193 | * Returns the offset in flash where the command was written. | ||
194 | * If prev_val is non-null, it will be set to the value at the command address, | ||
195 | * before the command was written. | ||
196 | */ | ||
197 | uint32_t cfi_send_gen_cmd(u_char cmd, uint32_t cmd_addr, uint32_t base, | ||
198 | struct map_info *map, struct cfi_private *cfi, | ||
199 | int type, map_word *prev_val) | ||
200 | { | ||
201 | map_word val; | ||
202 | uint32_t addr = base + cfi_build_cmd_addr(cmd_addr, map, cfi); | ||
203 | val = cfi_build_cmd(cmd, map, cfi); | ||
204 | |||
205 | if (prev_val) | ||
206 | *prev_val = map_read(map, addr); | ||
207 | |||
208 | map_write(map, val, addr); | ||
209 | |||
210 | return addr - base; | ||
211 | } | ||
212 | EXPORT_SYMBOL(cfi_send_gen_cmd); | ||
213 | |||
26 | int __xipram cfi_qry_present(struct map_info *map, __u32 base, | 214 | int __xipram cfi_qry_present(struct map_info *map, __u32 base, |
27 | struct cfi_private *cfi) | 215 | struct cfi_private *cfi) |
28 | { | 216 | { |
diff --git a/include/linux/mtd/cfi.h b/include/linux/mtd/cfi.h index 299d7d31fe53..9b57a9b1b081 100644 --- a/include/linux/mtd/cfi.h +++ b/include/linux/mtd/cfi.h | |||
@@ -296,183 +296,19 @@ struct cfi_private { | |||
296 | struct flchip chips[0]; /* per-chip data structure for each chip */ | 296 | struct flchip chips[0]; /* per-chip data structure for each chip */ |
297 | }; | 297 | }; |
298 | 298 | ||
299 | /* | 299 | uint32_t cfi_build_cmd_addr(uint32_t cmd_ofs, |
300 | * Returns the command address according to the given geometry. | 300 | struct map_info *map, struct cfi_private *cfi); |
301 | */ | ||
302 | static inline uint32_t cfi_build_cmd_addr(uint32_t cmd_ofs, | ||
303 | struct map_info *map, struct cfi_private *cfi) | ||
304 | { | ||
305 | unsigned bankwidth = map_bankwidth(map); | ||
306 | unsigned interleave = cfi_interleave(cfi); | ||
307 | unsigned type = cfi->device_type; | ||
308 | uint32_t addr; | ||
309 | |||
310 | addr = (cmd_ofs * type) * interleave; | ||
311 | |||
312 | /* Modify the unlock address if we are in compatibility mode. | ||
313 | * For 16bit devices on 8 bit busses | ||
314 | * and 32bit devices on 16 bit busses | ||
315 | * set the low bit of the alternating bit sequence of the address. | ||
316 | */ | ||
317 | if (((type * interleave) > bankwidth) && ((cmd_ofs & 0xff) == 0xaa)) | ||
318 | addr |= (type >> 1)*interleave; | ||
319 | |||
320 | return addr; | ||
321 | } | ||
322 | |||
323 | /* | ||
324 | * Transforms the CFI command for the given geometry (bus width & interleave). | ||
325 | * It looks too long to be inline, but in the common case it should almost all | ||
326 | * get optimised away. | ||
327 | */ | ||
328 | static inline map_word cfi_build_cmd(u_long cmd, struct map_info *map, struct cfi_private *cfi) | ||
329 | { | ||
330 | map_word val = { {0} }; | ||
331 | int wordwidth, words_per_bus, chip_mode, chips_per_word; | ||
332 | unsigned long onecmd; | ||
333 | int i; | ||
334 | |||
335 | /* We do it this way to give the compiler a fighting chance | ||
336 | of optimising away all the crap for 'bankwidth' larger than | ||
337 | an unsigned long, in the common case where that support is | ||
338 | disabled */ | ||
339 | if (map_bankwidth_is_large(map)) { | ||
340 | wordwidth = sizeof(unsigned long); | ||
341 | words_per_bus = (map_bankwidth(map)) / wordwidth; // i.e. normally 1 | ||
342 | } else { | ||
343 | wordwidth = map_bankwidth(map); | ||
344 | words_per_bus = 1; | ||
345 | } | ||
346 | |||
347 | chip_mode = map_bankwidth(map) / cfi_interleave(cfi); | ||
348 | chips_per_word = wordwidth * cfi_interleave(cfi) / map_bankwidth(map); | ||
349 | |||
350 | /* First, determine what the bit-pattern should be for a single | ||
351 | device, according to chip mode and endianness... */ | ||
352 | switch (chip_mode) { | ||
353 | default: BUG(); | ||
354 | case 1: | ||
355 | onecmd = cmd; | ||
356 | break; | ||
357 | case 2: | ||
358 | onecmd = cpu_to_cfi16(map, cmd); | ||
359 | break; | ||
360 | case 4: | ||
361 | onecmd = cpu_to_cfi32(map, cmd); | ||
362 | break; | ||
363 | } | ||
364 | |||
365 | /* Now replicate it across the size of an unsigned long, or | ||
366 | just to the bus width as appropriate */ | ||
367 | switch (chips_per_word) { | ||
368 | default: BUG(); | ||
369 | #if BITS_PER_LONG >= 64 | ||
370 | case 8: | ||
371 | onecmd |= (onecmd << (chip_mode * 32)); | ||
372 | #endif | ||
373 | case 4: | ||
374 | onecmd |= (onecmd << (chip_mode * 16)); | ||
375 | case 2: | ||
376 | onecmd |= (onecmd << (chip_mode * 8)); | ||
377 | case 1: | ||
378 | ; | ||
379 | } | ||
380 | 301 | ||
381 | /* And finally, for the multi-word case, replicate it | 302 | map_word cfi_build_cmd(u_long cmd, struct map_info *map, struct cfi_private *cfi); |
382 | in all words in the structure */ | ||
383 | for (i=0; i < words_per_bus; i++) { | ||
384 | val.x[i] = onecmd; | ||
385 | } | ||
386 | |||
387 | return val; | ||
388 | } | ||
389 | #define CMD(x) cfi_build_cmd((x), map, cfi) | 303 | #define CMD(x) cfi_build_cmd((x), map, cfi) |
390 | 304 | ||
391 | 305 | unsigned long cfi_merge_status(map_word val, struct map_info *map, | |
392 | static inline unsigned long cfi_merge_status(map_word val, struct map_info *map, | 306 | struct cfi_private *cfi); |
393 | struct cfi_private *cfi) | ||
394 | { | ||
395 | int wordwidth, words_per_bus, chip_mode, chips_per_word; | ||
396 | unsigned long onestat, res = 0; | ||
397 | int i; | ||
398 | |||
399 | /* We do it this way to give the compiler a fighting chance | ||
400 | of optimising away all the crap for 'bankwidth' larger than | ||
401 | an unsigned long, in the common case where that support is | ||
402 | disabled */ | ||
403 | if (map_bankwidth_is_large(map)) { | ||
404 | wordwidth = sizeof(unsigned long); | ||
405 | words_per_bus = (map_bankwidth(map)) / wordwidth; // i.e. normally 1 | ||
406 | } else { | ||
407 | wordwidth = map_bankwidth(map); | ||
408 | words_per_bus = 1; | ||
409 | } | ||
410 | |||
411 | chip_mode = map_bankwidth(map) / cfi_interleave(cfi); | ||
412 | chips_per_word = wordwidth * cfi_interleave(cfi) / map_bankwidth(map); | ||
413 | |||
414 | onestat = val.x[0]; | ||
415 | /* Or all status words together */ | ||
416 | for (i=1; i < words_per_bus; i++) { | ||
417 | onestat |= val.x[i]; | ||
418 | } | ||
419 | |||
420 | res = onestat; | ||
421 | switch(chips_per_word) { | ||
422 | default: BUG(); | ||
423 | #if BITS_PER_LONG >= 64 | ||
424 | case 8: | ||
425 | res |= (onestat >> (chip_mode * 32)); | ||
426 | #endif | ||
427 | case 4: | ||
428 | res |= (onestat >> (chip_mode * 16)); | ||
429 | case 2: | ||
430 | res |= (onestat >> (chip_mode * 8)); | ||
431 | case 1: | ||
432 | ; | ||
433 | } | ||
434 | |||
435 | /* Last, determine what the bit-pattern should be for a single | ||
436 | device, according to chip mode and endianness... */ | ||
437 | switch (chip_mode) { | ||
438 | case 1: | ||
439 | break; | ||
440 | case 2: | ||
441 | res = cfi16_to_cpu(map, res); | ||
442 | break; | ||
443 | case 4: | ||
444 | res = cfi32_to_cpu(map, res); | ||
445 | break; | ||
446 | default: BUG(); | ||
447 | } | ||
448 | return res; | ||
449 | } | ||
450 | |||
451 | #define MERGESTATUS(x) cfi_merge_status((x), map, cfi) | 307 | #define MERGESTATUS(x) cfi_merge_status((x), map, cfi) |
452 | 308 | ||
453 | 309 | uint32_t cfi_send_gen_cmd(u_char cmd, uint32_t cmd_addr, uint32_t base, | |
454 | /* | ||
455 | * Sends a CFI command to a bank of flash for the given geometry. | ||
456 | * | ||
457 | * Returns the offset in flash where the command was written. | ||
458 | * If prev_val is non-null, it will be set to the value at the command address, | ||
459 | * before the command was written. | ||
460 | */ | ||
461 | static inline uint32_t cfi_send_gen_cmd(u_char cmd, uint32_t cmd_addr, uint32_t base, | ||
462 | struct map_info *map, struct cfi_private *cfi, | 310 | struct map_info *map, struct cfi_private *cfi, |
463 | int type, map_word *prev_val) | 311 | int type, map_word *prev_val); |
464 | { | ||
465 | map_word val; | ||
466 | uint32_t addr = base + cfi_build_cmd_addr(cmd_addr, map, cfi); | ||
467 | val = cfi_build_cmd(cmd, map, cfi); | ||
468 | |||
469 | if (prev_val) | ||
470 | *prev_val = map_read(map, addr); | ||
471 | |||
472 | map_write(map, val, addr); | ||
473 | |||
474 | return addr - base; | ||
475 | } | ||
476 | 312 | ||
477 | static inline uint8_t cfi_read_query(struct map_info *map, uint32_t addr) | 313 | static inline uint8_t cfi_read_query(struct map_info *map, uint32_t addr) |
478 | { | 314 | { |
@@ -506,15 +342,7 @@ static inline uint16_t cfi_read_query16(struct map_info *map, uint32_t addr) | |||
506 | } | 342 | } |
507 | } | 343 | } |
508 | 344 | ||
509 | static inline void cfi_udelay(int us) | 345 | void cfi_udelay(int us); |
510 | { | ||
511 | if (us >= 1000) { | ||
512 | msleep((us+999)/1000); | ||
513 | } else { | ||
514 | udelay(us); | ||
515 | cond_resched(); | ||
516 | } | ||
517 | } | ||
518 | 346 | ||
519 | int __xipram cfi_qry_present(struct map_info *map, __u32 base, | 347 | int __xipram cfi_qry_present(struct map_info *map, __u32 base, |
520 | struct cfi_private *cfi); | 348 | struct cfi_private *cfi); |