aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/mtd
diff options
context:
space:
mode:
authorJarkko Lavinen <jarkko.lavinen@nokia.com>2011-02-14 09:16:11 -0500
committerDavid Woodhouse <David.Woodhouse@intel.com>2011-03-11 09:22:46 -0500
commita321590246202f2598aca2f4ef63b992e8fc97ef (patch)
treecbb838704f7575bf5634c95fd1d91bef24e9d296 /drivers/mtd
parent115ee88c15b55859a8b59c5dccb3882bbd47e542 (diff)
mtd: Add mtdswap block driver
Add a driver for allowing an mtd device to be used as a block device for swapping. The block device is volatile, and the mapping of swapped pages is not stored on flash. Signed-off-by: Jarkko Lavinen <jarkko.lavinen@nokia.com> Tested-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com> Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
Diffstat (limited to 'drivers/mtd')
-rw-r--r--drivers/mtd/Kconfig18
-rw-r--r--drivers/mtd/Makefile1
-rw-r--r--drivers/mtd/mtdswap.c1593
3 files changed, 1612 insertions, 0 deletions
diff --git a/drivers/mtd/Kconfig b/drivers/mtd/Kconfig
index 23088b3372d0..5fe01d556657 100644
--- a/drivers/mtd/Kconfig
+++ b/drivers/mtd/Kconfig
@@ -325,6 +325,24 @@ config MTD_OOPS
325 To use, add console=ttyMTDx to the kernel command line, 325 To use, add console=ttyMTDx to the kernel command line,
326 where x is the MTD device number to use. 326 where x is the MTD device number to use.
327 327
328config MTD_SWAP
329 tristate "Swap on MTD device support"
330 depends on MTD && SWAP
331 select MTD_BLKDEVS
332 help
333 Provides volatile block device driver on top of mtd partition
334 suitable for swapping. The mapping of written blocks is not saved.
335 The driver provides wear leveling by storing erase counter into the
336 OOB.
337
338config MTD_SWAP_STRICT
339 bool "Strict erase error handling"
340 depends on MTD_SWAP
341 help
342 Enables strict tolerance on failed erasures, marking erase blocks bad
343 right after the first failed operation. With non-strict mode the
344 erase operation is retried.
345
328source "drivers/mtd/chips/Kconfig" 346source "drivers/mtd/chips/Kconfig"
329 347
330source "drivers/mtd/maps/Kconfig" 348source "drivers/mtd/maps/Kconfig"
diff --git a/drivers/mtd/Makefile b/drivers/mtd/Makefile
index eff3dd53cc05..d578095fb255 100644
--- a/drivers/mtd/Makefile
+++ b/drivers/mtd/Makefile
@@ -25,6 +25,7 @@ obj-$(CONFIG_RFD_FTL) += rfd_ftl.o
25obj-$(CONFIG_SSFDC) += ssfdc.o 25obj-$(CONFIG_SSFDC) += ssfdc.o
26obj-$(CONFIG_SM_FTL) += sm_ftl.o 26obj-$(CONFIG_SM_FTL) += sm_ftl.o
27obj-$(CONFIG_MTD_OOPS) += mtdoops.o 27obj-$(CONFIG_MTD_OOPS) += mtdoops.o
28obj-$(CONFIG_MTD_SWAP) += mtdswap.o
28 29
29nftl-objs := nftlcore.o nftlmount.o 30nftl-objs := nftlcore.o nftlmount.o
30inftl-objs := inftlcore.o inftlmount.o 31inftl-objs := inftlcore.o inftlmount.o
diff --git a/drivers/mtd/mtdswap.c b/drivers/mtd/mtdswap.c
new file mode 100644
index 000000000000..57e6cc30bc42
--- /dev/null
+++ b/drivers/mtd/mtdswap.c
@@ -0,0 +1,1593 @@
1/*
2 * Swap block device support for MTDs
3 * Turns an MTD device into a swap device with block wear leveling
4 *
5 * Copyright © 2007,2011 Nokia Corporation. All rights reserved.
6 *
7 * Authors: Jarkko Lavinen <jarkko.lavinen@nokia.com>
8 *
9 * Based on Richard Purdie's earlier implementation in 2007. Background
10 * support and lock-less operation written by Adrian Hunter.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * version 2 as published by the Free Software Foundation.
15 *
16 * This program is distributed in the hope that it will be useful, but
17 * WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
24 * 02110-1301 USA
25 */
26
27#include <linux/kernel.h>
28#include <linux/module.h>
29#include <linux/mtd/mtd.h>
30#include <linux/mtd/blktrans.h>
31#include <linux/rbtree.h>
32#include <linux/sched.h>
33#include <linux/slab.h>
34#include <linux/vmalloc.h>
35#include <linux/genhd.h>
36#include <linux/swap.h>
37#include <linux/debugfs.h>
38#include <linux/seq_file.h>
39#include <linux/device.h>
40#include <linux/math64.h>
41
42#define MTDSWAP_PREFIX "mtdswap"
43
44/*
45 * The number of free eraseblocks when GC should stop
46 */
47#define CLEAN_BLOCK_THRESHOLD 20
48
49/*
50 * Number of free eraseblocks below which GC can also collect low frag
51 * blocks.
52 */
53#define LOW_FRAG_GC_TRESHOLD 5
54
55/*
56 * Wear level cost amortization. We want to do wear leveling on the background
57 * without disturbing gc too much. This is made by defining max GC frequency.
58 * Frequency value 6 means 1/6 of the GC passes will pick an erase block based
59 * on the biggest wear difference rather than the biggest dirtiness.
60 *
61 * The lower freq2 should be chosen so that it makes sure the maximum erase
62 * difference will decrease even if a malicious application is deliberately
63 * trying to make erase differences large.
64 */
65#define MAX_ERASE_DIFF 4000
66#define COLLECT_NONDIRTY_BASE MAX_ERASE_DIFF
67#define COLLECT_NONDIRTY_FREQ1 6
68#define COLLECT_NONDIRTY_FREQ2 4
69
70#define PAGE_UNDEF UINT_MAX
71#define BLOCK_UNDEF UINT_MAX
72#define BLOCK_ERROR (UINT_MAX - 1)
73#define BLOCK_MAX (UINT_MAX - 2)
74
75#define EBLOCK_BAD (1 << 0)
76#define EBLOCK_NOMAGIC (1 << 1)
77#define EBLOCK_BITFLIP (1 << 2)
78#define EBLOCK_FAILED (1 << 3)
79#define EBLOCK_READERR (1 << 4)
80#define EBLOCK_IDX_SHIFT 5
81
82struct swap_eb {
83 struct rb_node rb;
84 struct rb_root *root;
85
86 unsigned int flags;
87 unsigned int active_count;
88 unsigned int erase_count;
89 unsigned int pad; /* speeds up pointer decremtnt */
90};
91
92#define MTDSWAP_ECNT_MIN(rbroot) (rb_entry(rb_first(rbroot), struct swap_eb, \
93 rb)->erase_count)
94#define MTDSWAP_ECNT_MAX(rbroot) (rb_entry(rb_last(rbroot), struct swap_eb, \
95 rb)->erase_count)
96
97struct mtdswap_tree {
98 struct rb_root root;
99 unsigned int count;
100};
101
102enum {
103 MTDSWAP_CLEAN,
104 MTDSWAP_USED,
105 MTDSWAP_LOWFRAG,
106 MTDSWAP_HIFRAG,
107 MTDSWAP_DIRTY,
108 MTDSWAP_BITFLIP,
109 MTDSWAP_FAILING,
110 MTDSWAP_TREE_CNT,
111};
112
113struct mtdswap_dev {
114 struct mtd_blktrans_dev *mbd_dev;
115 struct mtd_info *mtd;
116 struct device *dev;
117
118 unsigned int *page_data;
119 unsigned int *revmap;
120
121 unsigned int eblks;
122 unsigned int spare_eblks;
123 unsigned int pages_per_eblk;
124 unsigned int max_erase_count;
125 struct swap_eb *eb_data;
126
127 struct mtdswap_tree trees[MTDSWAP_TREE_CNT];
128
129 unsigned long long sect_read_count;
130 unsigned long long sect_write_count;
131 unsigned long long mtd_write_count;
132 unsigned long long mtd_read_count;
133 unsigned long long discard_count;
134 unsigned long long discard_page_count;
135
136 unsigned int curr_write_pos;
137 struct swap_eb *curr_write;
138
139 char *page_buf;
140 char *oob_buf;
141
142 struct dentry *debugfs_root;
143};
144
145struct mtdswap_oobdata {
146 __le16 magic;
147 __le32 count;
148} __attribute__((packed));
149
150#define MTDSWAP_MAGIC_CLEAN 0x2095
151#define MTDSWAP_MAGIC_DIRTY (MTDSWAP_MAGIC_CLEAN + 1)
152#define MTDSWAP_TYPE_CLEAN 0
153#define MTDSWAP_TYPE_DIRTY 1
154#define MTDSWAP_OOBSIZE sizeof(struct mtdswap_oobdata)
155
156#define MTDSWAP_ERASE_RETRIES 3 /* Before marking erase block bad */
157#define MTDSWAP_IO_RETRIES 3
158
159#ifdef CONFIG_MTD_SWAP_STRICT
160#define MTDSWAP_STRICT 1
161#else
162#define MTDSWAP_STRICT 0
163#endif
164
165enum {
166 MTDSWAP_SCANNED_CLEAN,
167 MTDSWAP_SCANNED_DIRTY,
168 MTDSWAP_SCANNED_BITFLIP,
169 MTDSWAP_SCANNED_BAD,
170};
171
172/*
173 * In the worst case mtdswap_writesect() has allocated the last clean
174 * page from the current block and is then pre-empted by the GC
175 * thread. The thread can consume a full erase block when moving a
176 * block.
177 */
178#define MIN_SPARE_EBLOCKS 2
179#define MIN_ERASE_BLOCKS (MIN_SPARE_EBLOCKS + 1)
180
181#define TREE_ROOT(d, name) (&d->trees[MTDSWAP_ ## name].root)
182#define TREE_EMPTY(d, name) (TREE_ROOT(d, name)->rb_node == NULL)
183#define TREE_NONEMPTY(d, name) (!TREE_EMPTY(d, name))
184#define TREE_COUNT(d, name) (d->trees[MTDSWAP_ ## name].count)
185
186#define MTDSWAP_MBD_TO_MTDSWAP(dev) ((struct mtdswap_dev *)dev->priv)
187
188static char partitions[128] = "";
189module_param_string(partitions, partitions, sizeof(partitions), 0444);
190MODULE_PARM_DESC(partitions, "MTD partition numbers to use as swap "
191 "partitions=\"1,3,5\"");
192
193static unsigned int spare_eblocks = 10;
194module_param(spare_eblocks, uint, 0444);
195MODULE_PARM_DESC(spare_eblocks, "Percentage of spare erase blocks for "
196 "garbage collection (default 10%)");
197
198static bool header; /* false */
199module_param(header, bool, 0444);
200MODULE_PARM_DESC(header,
201 "Include builtin swap header (default 0, without header)");
202
203static int mtdswap_gc(struct mtdswap_dev *d, unsigned int background);
204
205static loff_t mtdswap_eb_offset(struct mtdswap_dev *d, struct swap_eb *eb)
206{
207 return (loff_t)(eb - d->eb_data) * d->mtd->erasesize;
208}
209
210static void mtdswap_eb_detach(struct mtdswap_dev *d, struct swap_eb *eb)
211{
212 unsigned int oldidx;
213 struct mtdswap_tree *tp;
214
215 if (eb->root) {
216 tp = container_of(eb->root, struct mtdswap_tree, root);
217 oldidx = tp - &d->trees[0];
218
219 d->trees[oldidx].count--;
220 rb_erase(&eb->rb, eb->root);
221 }
222}
223
224static void __mtdswap_rb_add(struct rb_root *root, struct swap_eb *eb)
225{
226 struct rb_node **p, *parent = NULL;
227 struct swap_eb *cur;
228
229 p = &root->rb_node;
230 while (*p) {
231 parent = *p;
232 cur = rb_entry(parent, struct swap_eb, rb);
233 if (eb->erase_count > cur->erase_count)
234 p = &(*p)->rb_right;
235 else
236 p = &(*p)->rb_left;
237 }
238
239 rb_link_node(&eb->rb, parent, p);
240 rb_insert_color(&eb->rb, root);
241}
242
243static void mtdswap_rb_add(struct mtdswap_dev *d, struct swap_eb *eb, int idx)
244{
245 struct rb_root *root;
246
247 if (eb->root == &d->trees[idx].root)
248 return;
249
250 mtdswap_eb_detach(d, eb);
251 root = &d->trees[idx].root;
252 __mtdswap_rb_add(root, eb);
253 eb->root = root;
254 d->trees[idx].count++;
255}
256
257static struct rb_node *mtdswap_rb_index(struct rb_root *root, unsigned int idx)
258{
259 struct rb_node *p;
260 unsigned int i;
261
262 p = rb_first(root);
263 i = 0;
264 while (i < idx && p) {
265 p = rb_next(p);
266 i++;
267 }
268
269 return p;
270}
271
272static int mtdswap_handle_badblock(struct mtdswap_dev *d, struct swap_eb *eb)
273{
274 int ret;
275 loff_t offset;
276
277 d->spare_eblks--;
278 eb->flags |= EBLOCK_BAD;
279 mtdswap_eb_detach(d, eb);
280 eb->root = NULL;
281
282 /* badblocks not supported */
283 if (!d->mtd->block_markbad)
284 return 1;
285
286 offset = mtdswap_eb_offset(d, eb);
287 dev_warn(d->dev, "Marking bad block at %08llx\n", offset);
288 ret = d->mtd->block_markbad(d->mtd, offset);
289
290 if (ret) {
291 dev_warn(d->dev, "Mark block bad failed for block at %08llx "
292 "error %d\n", offset, ret);
293 return ret;
294 }
295
296 return 1;
297
298}
299
300static int mtdswap_handle_write_error(struct mtdswap_dev *d, struct swap_eb *eb)
301{
302 unsigned int marked = eb->flags & EBLOCK_FAILED;
303 struct swap_eb *curr_write = d->curr_write;
304
305 eb->flags |= EBLOCK_FAILED;
306 if (curr_write == eb) {
307 d->curr_write = NULL;
308
309 if (!marked && d->curr_write_pos != 0) {
310 mtdswap_rb_add(d, eb, MTDSWAP_FAILING);
311 return 0;
312 }
313 }
314
315 return mtdswap_handle_badblock(d, eb);
316}
317
318static int mtdswap_read_oob(struct mtdswap_dev *d, loff_t from,
319 struct mtd_oob_ops *ops)
320{
321 int ret = d->mtd->read_oob(d->mtd, from, ops);
322
323 if (ret == -EUCLEAN)
324 return ret;
325
326 if (ret) {
327 dev_warn(d->dev, "Read OOB failed %d for block at %08llx\n",
328 ret, from);
329 return ret;
330 }
331
332 if (ops->oobretlen < ops->ooblen) {
333 dev_warn(d->dev, "Read OOB return short read (%zd bytes not "
334 "%d) for block at %08llx\n",
335 ops->oobretlen, ops->ooblen, from);
336 return -EIO;
337 }
338
339 return 0;
340}
341
342static int mtdswap_read_markers(struct mtdswap_dev *d, struct swap_eb *eb)
343{
344 struct mtdswap_oobdata *data, *data2;
345 int ret;
346 loff_t offset;
347 struct mtd_oob_ops ops;
348
349 offset = mtdswap_eb_offset(d, eb);
350
351 /* Check first if the block is bad. */
352 if (d->mtd->block_isbad && d->mtd->block_isbad(d->mtd, offset))
353 return MTDSWAP_SCANNED_BAD;
354
355 ops.ooblen = 2 * d->mtd->ecclayout->oobavail;
356 ops.oobbuf = d->oob_buf;
357 ops.ooboffs = 0;
358 ops.datbuf = NULL;
359 ops.mode = MTD_OOB_AUTO;
360
361 ret = mtdswap_read_oob(d, offset, &ops);
362
363 if (ret && ret != -EUCLEAN)
364 return ret;
365
366 data = (struct mtdswap_oobdata *)d->oob_buf;
367 data2 = (struct mtdswap_oobdata *)
368 (d->oob_buf + d->mtd->ecclayout->oobavail);
369
370 if (le16_to_cpu(data->magic) == MTDSWAP_MAGIC_CLEAN) {
371 eb->erase_count = le32_to_cpu(data->count);
372 if (ret == -EUCLEAN)
373 ret = MTDSWAP_SCANNED_BITFLIP;
374 else {
375 if (le16_to_cpu(data2->magic) == MTDSWAP_MAGIC_DIRTY)
376 ret = MTDSWAP_SCANNED_DIRTY;
377 else
378 ret = MTDSWAP_SCANNED_CLEAN;
379 }
380 } else {
381 eb->flags |= EBLOCK_NOMAGIC;
382 ret = MTDSWAP_SCANNED_DIRTY;
383 }
384
385 return ret;
386}
387
388static int mtdswap_write_marker(struct mtdswap_dev *d, struct swap_eb *eb,
389 u16 marker)
390{
391 struct mtdswap_oobdata n;
392 int ret;
393 loff_t offset;
394 struct mtd_oob_ops ops;
395
396 ops.ooboffs = 0;
397 ops.oobbuf = (uint8_t *)&n;
398 ops.mode = MTD_OOB_AUTO;
399 ops.datbuf = NULL;
400
401 if (marker == MTDSWAP_TYPE_CLEAN) {
402 n.magic = cpu_to_le16(MTDSWAP_MAGIC_CLEAN);
403 n.count = cpu_to_le32(eb->erase_count);
404 ops.ooblen = MTDSWAP_OOBSIZE;
405 offset = mtdswap_eb_offset(d, eb);
406 } else {
407 n.magic = cpu_to_le16(MTDSWAP_MAGIC_DIRTY);
408 ops.ooblen = sizeof(n.magic);
409 offset = mtdswap_eb_offset(d, eb) + d->mtd->writesize;
410 }
411
412 ret = d->mtd->write_oob(d->mtd, offset , &ops);
413
414 if (ret) {
415 dev_warn(d->dev, "Write OOB failed for block at %08llx "
416 "error %d\n", offset, ret);
417 if (ret == -EIO || ret == -EBADMSG)
418 mtdswap_handle_write_error(d, eb);
419 return ret;
420 }
421
422 if (ops.oobretlen != ops.ooblen) {
423 dev_warn(d->dev, "Short OOB write for block at %08llx: "
424 "%zd not %d\n",
425 offset, ops.oobretlen, ops.ooblen);
426 return ret;
427 }
428
429 return 0;
430}
431
432/*
433 * Are there any erase blocks without MAGIC_CLEAN header, presumably
434 * because power was cut off after erase but before header write? We
435 * need to guestimate the erase count.
436 */
437static void mtdswap_check_counts(struct mtdswap_dev *d)
438{
439 struct rb_root hist_root = RB_ROOT;
440 struct rb_node *medrb;
441 struct swap_eb *eb;
442 unsigned int i, cnt, median;
443
444 cnt = 0;
445 for (i = 0; i < d->eblks; i++) {
446 eb = d->eb_data + i;
447
448 if (eb->flags & (EBLOCK_NOMAGIC | EBLOCK_BAD | EBLOCK_READERR))
449 continue;
450
451 __mtdswap_rb_add(&hist_root, eb);
452 cnt++;
453 }
454
455 if (cnt == 0)
456 return;
457
458 medrb = mtdswap_rb_index(&hist_root, cnt / 2);
459 median = rb_entry(medrb, struct swap_eb, rb)->erase_count;
460
461 d->max_erase_count = MTDSWAP_ECNT_MAX(&hist_root);
462
463 for (i = 0; i < d->eblks; i++) {
464 eb = d->eb_data + i;
465
466 if (eb->flags & (EBLOCK_NOMAGIC | EBLOCK_READERR))
467 eb->erase_count = median;
468
469 if (eb->flags & (EBLOCK_NOMAGIC | EBLOCK_BAD | EBLOCK_READERR))
470 continue;
471
472 rb_erase(&eb->rb, &hist_root);
473 }
474}
475
476static void mtdswap_scan_eblks(struct mtdswap_dev *d)
477{
478 int status;
479 unsigned int i, idx;
480 struct swap_eb *eb;
481
482 for (i = 0; i < d->eblks; i++) {
483 eb = d->eb_data + i;
484
485 status = mtdswap_read_markers(d, eb);
486 if (status < 0)
487 eb->flags |= EBLOCK_READERR;
488 else if (status == MTDSWAP_SCANNED_BAD) {
489 eb->flags |= EBLOCK_BAD;
490 continue;
491 }
492
493 switch (status) {
494 case MTDSWAP_SCANNED_CLEAN:
495 idx = MTDSWAP_CLEAN;
496 break;
497 case MTDSWAP_SCANNED_DIRTY:
498 case MTDSWAP_SCANNED_BITFLIP:
499 idx = MTDSWAP_DIRTY;
500 break;
501 default:
502 idx = MTDSWAP_FAILING;
503 }
504
505 eb->flags |= (idx << EBLOCK_IDX_SHIFT);
506 }
507
508 mtdswap_check_counts(d);
509
510 for (i = 0; i < d->eblks; i++) {
511 eb = d->eb_data + i;
512
513 if (eb->flags & EBLOCK_BAD)
514 continue;
515
516 idx = eb->flags >> EBLOCK_IDX_SHIFT;
517 mtdswap_rb_add(d, eb, idx);
518 }
519}
520
521/*
522 * Place eblk into a tree corresponding to its number of active blocks
523 * it contains.
524 */
525static void mtdswap_store_eb(struct mtdswap_dev *d, struct swap_eb *eb)
526{
527 unsigned int weight = eb->active_count;
528 unsigned int maxweight = d->pages_per_eblk;
529
530 if (eb == d->curr_write)
531 return;
532
533 if (eb->flags & EBLOCK_BITFLIP)
534 mtdswap_rb_add(d, eb, MTDSWAP_BITFLIP);
535 else if (eb->flags & (EBLOCK_READERR | EBLOCK_FAILED))
536 mtdswap_rb_add(d, eb, MTDSWAP_FAILING);
537 if (weight == maxweight)
538 mtdswap_rb_add(d, eb, MTDSWAP_USED);
539 else if (weight == 0)
540 mtdswap_rb_add(d, eb, MTDSWAP_DIRTY);
541 else if (weight > (maxweight/2))
542 mtdswap_rb_add(d, eb, MTDSWAP_LOWFRAG);
543 else
544 mtdswap_rb_add(d, eb, MTDSWAP_HIFRAG);
545}
546
547
548static void mtdswap_erase_callback(struct erase_info *done)
549{
550 wait_queue_head_t *wait_q = (wait_queue_head_t *)done->priv;
551 wake_up(wait_q);
552}
553
554static int mtdswap_erase_block(struct mtdswap_dev *d, struct swap_eb *eb)
555{
556 struct mtd_info *mtd = d->mtd;
557 struct erase_info erase;
558 wait_queue_head_t wq;
559 unsigned int retries = 0;
560 int ret;
561
562 eb->erase_count++;
563 if (eb->erase_count > d->max_erase_count)
564 d->max_erase_count = eb->erase_count;
565
566retry:
567 init_waitqueue_head(&wq);
568 memset(&erase, 0, sizeof(struct erase_info));
569
570 erase.mtd = mtd;
571 erase.callback = mtdswap_erase_callback;
572 erase.addr = mtdswap_eb_offset(d, eb);
573 erase.len = mtd->erasesize;
574 erase.priv = (u_long)&wq;
575
576 ret = mtd->erase(mtd, &erase);
577 if (ret) {
578 if (retries++ < MTDSWAP_ERASE_RETRIES && !MTDSWAP_STRICT) {
579 dev_warn(d->dev,
580 "erase of erase block %#llx on %s failed",
581 erase.addr, mtd->name);
582 yield();
583 goto retry;
584 }
585
586 dev_err(d->dev, "Cannot erase erase block %#llx on %s\n",
587 erase.addr, mtd->name);
588
589 mtdswap_handle_badblock(d, eb);
590 return -EIO;
591 }
592
593 ret = wait_event_interruptible(wq, erase.state == MTD_ERASE_DONE ||
594 erase.state == MTD_ERASE_FAILED);
595 if (ret) {
596 dev_err(d->dev, "Interrupted erase block %#llx erassure on %s",
597 erase.addr, mtd->name);
598 return -EINTR;
599 }
600
601 if (erase.state == MTD_ERASE_FAILED) {
602 if (retries++ < MTDSWAP_ERASE_RETRIES) {
603 dev_warn(d->dev,
604 "erase of erase block %#llx on %s failed",
605 erase.addr, mtd->name);
606 yield();
607 goto retry;
608 }
609
610 mtdswap_handle_badblock(d, eb);
611 return -EIO;
612 }
613
614 return 0;
615}
616
617static int mtdswap_map_free_block(struct mtdswap_dev *d, unsigned int page,
618 unsigned int *block)
619{
620 int ret;
621 struct swap_eb *old_eb = d->curr_write;
622 struct rb_root *clean_root;
623 struct swap_eb *eb;
624
625 if (old_eb == NULL || d->curr_write_pos >= d->pages_per_eblk) {
626 do {
627 if (TREE_EMPTY(d, CLEAN))
628 return -ENOSPC;
629
630 clean_root = TREE_ROOT(d, CLEAN);
631 eb = rb_entry(rb_first(clean_root), struct swap_eb, rb);
632 rb_erase(&eb->rb, clean_root);
633 eb->root = NULL;
634 TREE_COUNT(d, CLEAN)--;
635
636 ret = mtdswap_write_marker(d, eb, MTDSWAP_TYPE_DIRTY);
637 } while (ret == -EIO || ret == -EBADMSG);
638
639 if (ret)
640 return ret;
641
642 d->curr_write_pos = 0;
643 d->curr_write = eb;
644 if (old_eb)
645 mtdswap_store_eb(d, old_eb);
646 }
647
648 *block = (d->curr_write - d->eb_data) * d->pages_per_eblk +
649 d->curr_write_pos;
650
651 d->curr_write->active_count++;
652 d->revmap[*block] = page;
653 d->curr_write_pos++;
654
655 return 0;
656}
657
658static unsigned int mtdswap_free_page_cnt(struct mtdswap_dev *d)
659{
660 return TREE_COUNT(d, CLEAN) * d->pages_per_eblk +
661 d->pages_per_eblk - d->curr_write_pos;
662}
663
664static unsigned int mtdswap_enough_free_pages(struct mtdswap_dev *d)
665{
666 return mtdswap_free_page_cnt(d) > d->pages_per_eblk;
667}
668
669static int mtdswap_write_block(struct mtdswap_dev *d, char *buf,
670 unsigned int page, unsigned int *bp, int gc_context)
671{
672 struct mtd_info *mtd = d->mtd;
673 struct swap_eb *eb;
674 size_t retlen;
675 loff_t writepos;
676 int ret;
677
678retry:
679 if (!gc_context)
680 while (!mtdswap_enough_free_pages(d))
681 if (mtdswap_gc(d, 0) > 0)
682 return -ENOSPC;
683
684 ret = mtdswap_map_free_block(d, page, bp);
685 eb = d->eb_data + (*bp / d->pages_per_eblk);
686
687 if (ret == -EIO || ret == -EBADMSG) {
688 d->curr_write = NULL;
689 eb->active_count--;
690 d->revmap[*bp] = PAGE_UNDEF;
691 goto retry;
692 }
693
694 if (ret < 0)
695 return ret;
696
697 writepos = (loff_t)*bp << PAGE_SHIFT;
698 ret = mtd->write(mtd, writepos, PAGE_SIZE, &retlen, buf);
699 if (ret == -EIO || ret == -EBADMSG) {
700 d->curr_write_pos--;
701 eb->active_count--;
702 d->revmap[*bp] = PAGE_UNDEF;
703 mtdswap_handle_write_error(d, eb);
704 goto retry;
705 }
706
707 if (ret < 0) {
708 dev_err(d->dev, "Write to MTD device failed: %d (%d written)",
709 ret, retlen);
710 goto err;
711 }
712
713 if (retlen != PAGE_SIZE) {
714 dev_err(d->dev, "Short write to MTD device: %d written",
715 retlen);
716 ret = -EIO;
717 goto err;
718 }
719
720 return ret;
721
722err:
723 d->curr_write_pos--;
724 eb->active_count--;
725 d->revmap[*bp] = PAGE_UNDEF;
726
727 return ret;
728}
729
730static int mtdswap_move_block(struct mtdswap_dev *d, unsigned int oldblock,
731 unsigned int *newblock)
732{
733 struct mtd_info *mtd = d->mtd;
734 struct swap_eb *eb, *oldeb;
735 int ret;
736 size_t retlen;
737 unsigned int page, retries;
738 loff_t readpos;
739
740 page = d->revmap[oldblock];
741 readpos = (loff_t) oldblock << PAGE_SHIFT;
742 retries = 0;
743
744retry:
745 ret = mtd->read(mtd, readpos, PAGE_SIZE, &retlen, d->page_buf);
746
747 if (ret < 0 && ret != -EUCLEAN) {
748 oldeb = d->eb_data + oldblock / d->pages_per_eblk;
749 oldeb->flags |= EBLOCK_READERR;
750
751 dev_err(d->dev, "Read Error: %d (block %u)\n", ret,
752 oldblock);
753 retries++;
754 if (retries < MTDSWAP_IO_RETRIES)
755 goto retry;
756
757 goto read_error;
758 }
759
760 if (retlen != PAGE_SIZE) {
761 dev_err(d->dev, "Short read: %d (block %u)\n", retlen,
762 oldblock);
763 ret = -EIO;
764 goto read_error;
765 }
766
767 ret = mtdswap_write_block(d, d->page_buf, page, newblock, 1);
768 if (ret < 0) {
769 d->page_data[page] = BLOCK_ERROR;
770 dev_err(d->dev, "Write error: %d\n", ret);
771 return ret;
772 }
773
774 eb = d->eb_data + *newblock / d->pages_per_eblk;
775 d->page_data[page] = *newblock;
776 d->revmap[oldblock] = PAGE_UNDEF;
777 eb = d->eb_data + oldblock / d->pages_per_eblk;
778 eb->active_count--;
779
780 return 0;
781
782read_error:
783 d->page_data[page] = BLOCK_ERROR;
784 d->revmap[oldblock] = PAGE_UNDEF;
785 return ret;
786}
787
788static int mtdswap_gc_eblock(struct mtdswap_dev *d, struct swap_eb *eb)
789{
790 unsigned int i, block, eblk_base, newblock;
791 int ret, errcode;
792
793 errcode = 0;
794 eblk_base = (eb - d->eb_data) * d->pages_per_eblk;
795
796 for (i = 0; i < d->pages_per_eblk; i++) {
797 if (d->spare_eblks < MIN_SPARE_EBLOCKS)
798 return -ENOSPC;
799
800 block = eblk_base + i;
801 if (d->revmap[block] == PAGE_UNDEF)
802 continue;
803
804 ret = mtdswap_move_block(d, block, &newblock);
805 if (ret < 0 && !errcode)
806 errcode = ret;
807 }
808
809 return errcode;
810}
811
812static int __mtdswap_choose_gc_tree(struct mtdswap_dev *d)
813{
814 int idx, stopat;
815
816 if (TREE_COUNT(d, CLEAN) < LOW_FRAG_GC_TRESHOLD)
817 stopat = MTDSWAP_LOWFRAG;
818 else
819 stopat = MTDSWAP_HIFRAG;
820
821 for (idx = MTDSWAP_BITFLIP; idx >= stopat; idx--)
822 if (d->trees[idx].root.rb_node != NULL)
823 return idx;
824
825 return -1;
826}
827
828static int mtdswap_wlfreq(unsigned int maxdiff)
829{
830 unsigned int h, x, y, dist, base;
831
832 /*
833 * Calculate linear ramp down from f1 to f2 when maxdiff goes from
834 * MAX_ERASE_DIFF to MAX_ERASE_DIFF + COLLECT_NONDIRTY_BASE. Similar
835 * to triangle with height f1 - f1 and width COLLECT_NONDIRTY_BASE.
836 */
837
838 dist = maxdiff - MAX_ERASE_DIFF;
839 if (dist > COLLECT_NONDIRTY_BASE)
840 dist = COLLECT_NONDIRTY_BASE;
841
842 /*
843 * Modelling the slop as right angular triangle with base
844 * COLLECT_NONDIRTY_BASE and height freq1 - freq2. The ratio y/x is
845 * equal to the ratio h/base.
846 */
847 h = COLLECT_NONDIRTY_FREQ1 - COLLECT_NONDIRTY_FREQ2;
848 base = COLLECT_NONDIRTY_BASE;
849
850 x = dist - base;
851 y = (x * h + base / 2) / base;
852
853 return COLLECT_NONDIRTY_FREQ2 + y;
854}
855
856static int mtdswap_choose_wl_tree(struct mtdswap_dev *d)
857{
858 static unsigned int pick_cnt;
859 unsigned int i, idx, wear, max;
860 struct rb_root *root;
861
862 max = 0;
863 for (i = 0; i <= MTDSWAP_DIRTY; i++) {
864 root = &d->trees[i].root;
865 if (root->rb_node == NULL)
866 continue;
867
868 wear = d->max_erase_count - MTDSWAP_ECNT_MIN(root);
869 if (wear > max) {
870 max = wear;
871 idx = i;
872 }
873 }
874
875 if (max > MAX_ERASE_DIFF && pick_cnt >= mtdswap_wlfreq(max) - 1) {
876 pick_cnt = 0;
877 return idx;
878 }
879
880 pick_cnt++;
881 return -1;
882}
883
884static int mtdswap_choose_gc_tree(struct mtdswap_dev *d,
885 unsigned int background)
886{
887 int idx;
888
889 if (TREE_NONEMPTY(d, FAILING) &&
890 (background || (TREE_EMPTY(d, CLEAN) && TREE_EMPTY(d, DIRTY))))
891 return MTDSWAP_FAILING;
892
893 idx = mtdswap_choose_wl_tree(d);
894 if (idx >= MTDSWAP_CLEAN)
895 return idx;
896
897 return __mtdswap_choose_gc_tree(d);
898}
899
900static struct swap_eb *mtdswap_pick_gc_eblk(struct mtdswap_dev *d,
901 unsigned int background)
902{
903 struct rb_root *rp = NULL;
904 struct swap_eb *eb = NULL;
905 int idx;
906
907 if (background && TREE_COUNT(d, CLEAN) > CLEAN_BLOCK_THRESHOLD &&
908 TREE_EMPTY(d, DIRTY) && TREE_EMPTY(d, FAILING))
909 return NULL;
910
911 idx = mtdswap_choose_gc_tree(d, background);
912 if (idx < 0)
913 return NULL;
914
915 rp = &d->trees[idx].root;
916 eb = rb_entry(rb_first(rp), struct swap_eb, rb);
917
918 rb_erase(&eb->rb, rp);
919 eb->root = NULL;
920 d->trees[idx].count--;
921 return eb;
922}
923
924static unsigned int mtdswap_test_patt(unsigned int i)
925{
926 return i % 2 ? 0x55555555 : 0xAAAAAAAA;
927}
928
929static unsigned int mtdswap_eblk_passes(struct mtdswap_dev *d,
930 struct swap_eb *eb)
931{
932 struct mtd_info *mtd = d->mtd;
933 unsigned int test, i, j, patt, mtd_pages;
934 loff_t base, pos;
935 unsigned int *p1 = (unsigned int *)d->page_buf;
936 unsigned char *p2 = (unsigned char *)d->oob_buf;
937 struct mtd_oob_ops ops;
938 int ret;
939
940 ops.mode = MTD_OOB_AUTO;
941 ops.len = mtd->writesize;
942 ops.ooblen = mtd->ecclayout->oobavail;
943 ops.ooboffs = 0;
944 ops.datbuf = d->page_buf;
945 ops.oobbuf = d->oob_buf;
946 base = mtdswap_eb_offset(d, eb);
947 mtd_pages = d->pages_per_eblk * PAGE_SIZE / mtd->writesize;
948
949 for (test = 0; test < 2; test++) {
950 pos = base;
951 for (i = 0; i < mtd_pages; i++) {
952 patt = mtdswap_test_patt(test + i);
953 memset(d->page_buf, patt, mtd->writesize);
954 memset(d->oob_buf, patt, mtd->ecclayout->oobavail);
955 ret = mtd->write_oob(mtd, pos, &ops);
956 if (ret)
957 goto error;
958
959 pos += mtd->writesize;
960 }
961
962 pos = base;
963 for (i = 0; i < mtd_pages; i++) {
964 ret = mtd->read_oob(mtd, pos, &ops);
965 if (ret)
966 goto error;
967
968 patt = mtdswap_test_patt(test + i);
969 for (j = 0; j < mtd->writesize/sizeof(int); j++)
970 if (p1[j] != patt)
971 goto error;
972
973 for (j = 0; j < mtd->ecclayout->oobavail; j++)
974 if (p2[j] != (unsigned char)patt)
975 goto error;
976
977 pos += mtd->writesize;
978 }
979
980 ret = mtdswap_erase_block(d, eb);
981 if (ret)
982 goto error;
983 }
984
985 eb->flags &= ~EBLOCK_READERR;
986 return 1;
987
988error:
989 mtdswap_handle_badblock(d, eb);
990 return 0;
991}
992
993static int mtdswap_gc(struct mtdswap_dev *d, unsigned int background)
994{
995 struct swap_eb *eb;
996 int ret;
997
998 if (d->spare_eblks < MIN_SPARE_EBLOCKS)
999 return 1;
1000
1001 eb = mtdswap_pick_gc_eblk(d, background);
1002 if (!eb)
1003 return 1;
1004
1005 ret = mtdswap_gc_eblock(d, eb);
1006 if (ret == -ENOSPC)
1007 return 1;
1008
1009 if (eb->flags & EBLOCK_FAILED) {
1010 mtdswap_handle_badblock(d, eb);
1011 return 0;
1012 }
1013
1014 eb->flags &= ~EBLOCK_BITFLIP;
1015 ret = mtdswap_erase_block(d, eb);
1016 if ((eb->flags & EBLOCK_READERR) &&
1017 (ret || !mtdswap_eblk_passes(d, eb)))
1018 return 0;
1019
1020 if (ret == 0)
1021 ret = mtdswap_write_marker(d, eb, MTDSWAP_TYPE_CLEAN);
1022
1023 if (ret == 0)
1024 mtdswap_rb_add(d, eb, MTDSWAP_CLEAN);
1025 else if (ret != -EIO && ret != -EBADMSG)
1026 mtdswap_rb_add(d, eb, MTDSWAP_DIRTY);
1027
1028 return 0;
1029}
1030
1031static void mtdswap_background(struct mtd_blktrans_dev *dev)
1032{
1033 struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev);
1034 int ret;
1035
1036 while (1) {
1037 ret = mtdswap_gc(d, 1);
1038 if (ret || mtd_blktrans_cease_background(dev))
1039 return;
1040 }
1041}
1042
1043static void mtdswap_cleanup(struct mtdswap_dev *d)
1044{
1045 vfree(d->eb_data);
1046 vfree(d->revmap);
1047 vfree(d->page_data);
1048 kfree(d->oob_buf);
1049 kfree(d->page_buf);
1050}
1051
1052static int mtdswap_flush(struct mtd_blktrans_dev *dev)
1053{
1054 struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev);
1055
1056 if (d->mtd->sync)
1057 d->mtd->sync(d->mtd);
1058 return 0;
1059}
1060
1061static unsigned int mtdswap_badblocks(struct mtd_info *mtd, uint64_t size)
1062{
1063 loff_t offset;
1064 unsigned int badcnt;
1065
1066 badcnt = 0;
1067
1068 if (mtd->block_isbad)
1069 for (offset = 0; offset < size; offset += mtd->erasesize)
1070 if (mtd->block_isbad(mtd, offset))
1071 badcnt++;
1072
1073 return badcnt;
1074}
1075
1076static int mtdswap_writesect(struct mtd_blktrans_dev *dev,
1077 unsigned long page, char *buf)
1078{
1079 struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev);
1080 unsigned int newblock, mapped;
1081 struct swap_eb *eb;
1082 int ret;
1083
1084 d->sect_write_count++;
1085
1086 if (d->spare_eblks < MIN_SPARE_EBLOCKS)
1087 return -ENOSPC;
1088
1089 if (header) {
1090 /* Ignore writes to the header page */
1091 if (unlikely(page == 0))
1092 return 0;
1093
1094 page--;
1095 }
1096
1097 mapped = d->page_data[page];
1098 if (mapped <= BLOCK_MAX) {
1099 eb = d->eb_data + (mapped / d->pages_per_eblk);
1100 eb->active_count--;
1101 mtdswap_store_eb(d, eb);
1102 d->page_data[page] = BLOCK_UNDEF;
1103 d->revmap[mapped] = PAGE_UNDEF;
1104 }
1105
1106 ret = mtdswap_write_block(d, buf, page, &newblock, 0);
1107 d->mtd_write_count++;
1108
1109 if (ret < 0)
1110 return ret;
1111
1112 eb = d->eb_data + (newblock / d->pages_per_eblk);
1113 d->page_data[page] = newblock;
1114
1115 return 0;
1116}
1117
1118/* Provide a dummy swap header for the kernel */
1119static int mtdswap_auto_header(struct mtdswap_dev *d, char *buf)
1120{
1121 union swap_header *hd = (union swap_header *)(buf);
1122
1123 memset(buf, 0, PAGE_SIZE - 10);
1124
1125 hd->info.version = 1;
1126 hd->info.last_page = d->mbd_dev->size - 1;
1127 hd->info.nr_badpages = 0;
1128
1129 memcpy(buf + PAGE_SIZE - 10, "SWAPSPACE2", 10);
1130
1131 return 0;
1132}
1133
1134static int mtdswap_readsect(struct mtd_blktrans_dev *dev,
1135 unsigned long page, char *buf)
1136{
1137 struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev);
1138 struct mtd_info *mtd = d->mtd;
1139 unsigned int realblock, retries;
1140 loff_t readpos;
1141 struct swap_eb *eb;
1142 size_t retlen;
1143 int ret;
1144
1145 d->sect_read_count++;
1146
1147 if (header) {
1148 if (unlikely(page == 0))
1149 return mtdswap_auto_header(d, buf);
1150
1151 page--;
1152 }
1153
1154 realblock = d->page_data[page];
1155 if (realblock > BLOCK_MAX) {
1156 memset(buf, 0x0, PAGE_SIZE);
1157 if (realblock == BLOCK_UNDEF)
1158 return 0;
1159 else
1160 return -EIO;
1161 }
1162
1163 eb = d->eb_data + (realblock / d->pages_per_eblk);
1164 BUG_ON(d->revmap[realblock] == PAGE_UNDEF);
1165
1166 readpos = (loff_t)realblock << PAGE_SHIFT;
1167 retries = 0;
1168
1169retry:
1170 ret = mtd->read(mtd, readpos, PAGE_SIZE, &retlen, buf);
1171
1172 d->mtd_read_count++;
1173 if (ret == -EUCLEAN) {
1174 eb->flags |= EBLOCK_BITFLIP;
1175 mtdswap_rb_add(d, eb, MTDSWAP_BITFLIP);
1176 ret = 0;
1177 }
1178
1179 if (ret < 0) {
1180 dev_err(d->dev, "Read error %d\n", ret);
1181 eb->flags |= EBLOCK_READERR;
1182 mtdswap_rb_add(d, eb, MTDSWAP_FAILING);
1183 retries++;
1184 if (retries < MTDSWAP_IO_RETRIES)
1185 goto retry;
1186
1187 return ret;
1188 }
1189
1190 if (retlen != PAGE_SIZE) {
1191 dev_err(d->dev, "Short read %d\n", retlen);
1192 return -EIO;
1193 }
1194
1195 return 0;
1196}
1197
1198static int mtdswap_discard(struct mtd_blktrans_dev *dev, unsigned long first,
1199 unsigned nr_pages)
1200{
1201 struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev);
1202 unsigned long page;
1203 struct swap_eb *eb;
1204 unsigned int mapped;
1205
1206 d->discard_count++;
1207
1208 for (page = first; page < first + nr_pages; page++) {
1209 mapped = d->page_data[page];
1210 if (mapped <= BLOCK_MAX) {
1211 eb = d->eb_data + (mapped / d->pages_per_eblk);
1212 eb->active_count--;
1213 mtdswap_store_eb(d, eb);
1214 d->page_data[page] = BLOCK_UNDEF;
1215 d->revmap[mapped] = PAGE_UNDEF;
1216 d->discard_page_count++;
1217 } else if (mapped == BLOCK_ERROR) {
1218 d->page_data[page] = BLOCK_UNDEF;
1219 d->discard_page_count++;
1220 }
1221 }
1222
1223 return 0;
1224}
1225
1226static int mtdswap_show(struct seq_file *s, void *data)
1227{
1228 struct mtdswap_dev *d = (struct mtdswap_dev *) s->private;
1229 unsigned long sum;
1230 unsigned int count[MTDSWAP_TREE_CNT];
1231 unsigned int min[MTDSWAP_TREE_CNT];
1232 unsigned int max[MTDSWAP_TREE_CNT];
1233 unsigned int i, cw = 0, cwp = 0, cwecount = 0, bb_cnt, mapped, pages;
1234 uint64_t use_size;
1235 char *name[] = {"clean", "used", "low", "high", "dirty", "bitflip",
1236 "failing"};
1237
1238 mutex_lock(&d->mbd_dev->lock);
1239
1240 for (i = 0; i < MTDSWAP_TREE_CNT; i++) {
1241 struct rb_root *root = &d->trees[i].root;
1242
1243 if (root->rb_node) {
1244 count[i] = d->trees[i].count;
1245 min[i] = rb_entry(rb_first(root), struct swap_eb,
1246 rb)->erase_count;
1247 max[i] = rb_entry(rb_last(root), struct swap_eb,
1248 rb)->erase_count;
1249 } else
1250 count[i] = 0;
1251 }
1252
1253 if (d->curr_write) {
1254 cw = 1;
1255 cwp = d->curr_write_pos;
1256 cwecount = d->curr_write->erase_count;
1257 }
1258
1259 sum = 0;
1260 for (i = 0; i < d->eblks; i++)
1261 sum += d->eb_data[i].erase_count;
1262
1263 use_size = (uint64_t)d->eblks * d->mtd->erasesize;
1264 bb_cnt = mtdswap_badblocks(d->mtd, use_size);
1265
1266 mapped = 0;
1267 pages = d->mbd_dev->size;
1268 for (i = 0; i < pages; i++)
1269 if (d->page_data[i] != BLOCK_UNDEF)
1270 mapped++;
1271
1272 mutex_unlock(&d->mbd_dev->lock);
1273
1274 for (i = 0; i < MTDSWAP_TREE_CNT; i++) {
1275 if (!count[i])
1276 continue;
1277
1278 if (min[i] != max[i])
1279 seq_printf(s, "%s:\t%5d erase blocks, erased min %d, "
1280 "max %d times\n",
1281 name[i], count[i], min[i], max[i]);
1282 else
1283 seq_printf(s, "%s:\t%5d erase blocks, all erased %d "
1284 "times\n", name[i], count[i], min[i]);
1285 }
1286
1287 if (bb_cnt)
1288 seq_printf(s, "bad:\t%5u erase blocks\n", bb_cnt);
1289
1290 if (cw)
1291 seq_printf(s, "current erase block: %u pages used, %u free, "
1292 "erased %u times\n",
1293 cwp, d->pages_per_eblk - cwp, cwecount);
1294
1295 seq_printf(s, "total erasures: %lu\n", sum);
1296
1297 seq_printf(s, "\n");
1298
1299 seq_printf(s, "mtdswap_readsect count: %llu\n", d->sect_read_count);
1300 seq_printf(s, "mtdswap_writesect count: %llu\n", d->sect_write_count);
1301 seq_printf(s, "mtdswap_discard count: %llu\n", d->discard_count);
1302 seq_printf(s, "mtd read count: %llu\n", d->mtd_read_count);
1303 seq_printf(s, "mtd write count: %llu\n", d->mtd_write_count);
1304 seq_printf(s, "discarded pages count: %llu\n", d->discard_page_count);
1305
1306 seq_printf(s, "\n");
1307 seq_printf(s, "total pages: %lu\n", pages);
1308 seq_printf(s, "pages mapped: %u\n", mapped);
1309
1310 return 0;
1311}
1312
1313static int mtdswap_open(struct inode *inode, struct file *file)
1314{
1315 return single_open(file, mtdswap_show, inode->i_private);
1316}
1317
1318static const struct file_operations mtdswap_fops = {
1319 .open = mtdswap_open,
1320 .read = seq_read,
1321 .llseek = seq_lseek,
1322 .release = single_release,
1323};
1324
1325static int mtdswap_add_debugfs(struct mtdswap_dev *d)
1326{
1327 struct gendisk *gd = d->mbd_dev->disk;
1328 struct device *dev = disk_to_dev(gd);
1329
1330 struct dentry *root;
1331 struct dentry *dent;
1332
1333 root = debugfs_create_dir(gd->disk_name, NULL);
1334 if (IS_ERR(root))
1335 return 0;
1336
1337 if (!root) {
1338 dev_err(dev, "failed to initialize debugfs\n");
1339 return -1;
1340 }
1341
1342 d->debugfs_root = root;
1343
1344 dent = debugfs_create_file("stats", S_IRUSR, root, d,
1345 &mtdswap_fops);
1346 if (!dent) {
1347 dev_err(d->dev, "debugfs_create_file failed\n");
1348 debugfs_remove_recursive(root);
1349 d->debugfs_root = NULL;
1350 return -1;
1351 }
1352
1353 return 0;
1354}
1355
1356static int mtdswap_init(struct mtdswap_dev *d, unsigned int eblocks,
1357 unsigned int spare_cnt)
1358{
1359 struct mtd_info *mtd = d->mbd_dev->mtd;
1360 unsigned int i, eblk_bytes, pages, blocks;
1361 int ret = -ENOMEM;
1362
1363 d->mtd = mtd;
1364 d->eblks = eblocks;
1365 d->spare_eblks = spare_cnt;
1366 d->pages_per_eblk = mtd->erasesize >> PAGE_SHIFT;
1367
1368 pages = d->mbd_dev->size;
1369 blocks = eblocks * d->pages_per_eblk;
1370
1371 for (i = 0; i < MTDSWAP_TREE_CNT; i++)
1372 d->trees[i].root = RB_ROOT;
1373
1374 d->page_data = vmalloc(sizeof(int)*pages);
1375 if (!d->page_data)
1376 goto page_data_fail;
1377
1378 d->revmap = vmalloc(sizeof(int)*blocks);
1379 if (!d->revmap)
1380 goto revmap_fail;
1381
1382 eblk_bytes = sizeof(struct swap_eb)*d->eblks;
1383 d->eb_data = vmalloc(eblk_bytes);
1384 if (!d->eb_data)
1385 goto eb_data_fail;
1386
1387 memset(d->eb_data, 0, eblk_bytes);
1388 for (i = 0; i < pages; i++)
1389 d->page_data[i] = BLOCK_UNDEF;
1390
1391 for (i = 0; i < blocks; i++)
1392 d->revmap[i] = PAGE_UNDEF;
1393
1394 d->page_buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
1395 if (!d->page_buf)
1396 goto page_buf_fail;
1397
1398 d->oob_buf = kmalloc(2 * mtd->ecclayout->oobavail, GFP_KERNEL);
1399 if (!d->oob_buf)
1400 goto oob_buf_fail;
1401
1402 mtdswap_scan_eblks(d);
1403
1404 return 0;
1405
1406oob_buf_fail:
1407 kfree(d->page_buf);
1408page_buf_fail:
1409 vfree(d->eb_data);
1410eb_data_fail:
1411 vfree(d->revmap);
1412revmap_fail:
1413 vfree(d->page_data);
1414page_data_fail:
1415 printk(KERN_ERR "%s: init failed (%d)\n", MTDSWAP_PREFIX, ret);
1416 return ret;
1417}
1418
1419static void mtdswap_add_mtd(struct mtd_blktrans_ops *tr, struct mtd_info *mtd)
1420{
1421 struct mtdswap_dev *d;
1422 struct mtd_blktrans_dev *mbd_dev;
1423 char *parts;
1424 char *this_opt;
1425 unsigned long part;
1426 unsigned int eblocks, eavailable, bad_blocks, spare_cnt;
1427 uint64_t swap_size, use_size, size_limit;
1428 struct nand_ecclayout *oinfo;
1429 int ret;
1430
1431 parts = &partitions[0];
1432 if (!*parts)
1433 return;
1434
1435 while ((this_opt = strsep(&parts, ",")) != NULL) {
1436 if (strict_strtoul(this_opt, 0, &part) < 0)
1437 return;
1438
1439 if (mtd->index == part)
1440 break;
1441 }
1442
1443 if (mtd->index != part)
1444 return;
1445
1446 if (mtd->erasesize < PAGE_SIZE || mtd->erasesize % PAGE_SIZE) {
1447 printk(KERN_ERR "%s: Erase size %u not multiple of PAGE_SIZE "
1448 "%lu\n", MTDSWAP_PREFIX, mtd->erasesize, PAGE_SIZE);
1449 return;
1450 }
1451
1452 if (PAGE_SIZE % mtd->writesize || mtd->writesize > PAGE_SIZE) {
1453 printk(KERN_ERR "%s: PAGE_SIZE %lu not multiple of write size"
1454 " %u\n", MTDSWAP_PREFIX, PAGE_SIZE, mtd->writesize);
1455 return;
1456 }
1457
1458 oinfo = mtd->ecclayout;
1459 if (!mtd->oobsize || !oinfo || oinfo->oobavail < MTDSWAP_OOBSIZE) {
1460 printk(KERN_ERR "%s: Not enough free bytes in OOB, "
1461 "%d available, %u needed.\n",
1462 MTDSWAP_PREFIX, oinfo->oobavail, MTDSWAP_OOBSIZE);
1463 return;
1464 }
1465
1466 if (spare_eblocks > 100)
1467 spare_eblocks = 100;
1468
1469 use_size = mtd->size;
1470 size_limit = (uint64_t) BLOCK_MAX * PAGE_SIZE;
1471
1472 if (mtd->size > size_limit) {
1473 printk(KERN_WARNING "%s: Device too large. Limiting size to "
1474 "%llu bytes\n", MTDSWAP_PREFIX, size_limit);
1475 use_size = size_limit;
1476 }
1477
1478 eblocks = mtd_div_by_eb(use_size, mtd);
1479 use_size = eblocks * mtd->erasesize;
1480 bad_blocks = mtdswap_badblocks(mtd, use_size);
1481 eavailable = eblocks - bad_blocks;
1482
1483 if (eavailable < MIN_ERASE_BLOCKS) {
1484 printk(KERN_ERR "%s: Not enough erase blocks. %u available, "
1485 "%d needed\n", MTDSWAP_PREFIX, eavailable,
1486 MIN_ERASE_BLOCKS);
1487 return;
1488 }
1489
1490 spare_cnt = div_u64((uint64_t)eavailable * spare_eblocks, 100);
1491
1492 if (spare_cnt < MIN_SPARE_EBLOCKS)
1493 spare_cnt = MIN_SPARE_EBLOCKS;
1494
1495 if (spare_cnt > eavailable - 1)
1496 spare_cnt = eavailable - 1;
1497
1498 swap_size = (uint64_t)(eavailable - spare_cnt) * mtd->erasesize +
1499 (header ? PAGE_SIZE : 0);
1500
1501 printk(KERN_INFO "%s: Enabling MTD swap on device %lu, size %llu KB, "
1502 "%u spare, %u bad blocks\n",
1503 MTDSWAP_PREFIX, part, swap_size / 1024, spare_cnt, bad_blocks);
1504
1505 d = kzalloc(sizeof(struct mtdswap_dev), GFP_KERNEL);
1506 if (!d)
1507 return;
1508
1509 mbd_dev = kzalloc(sizeof(struct mtd_blktrans_dev), GFP_KERNEL);
1510 if (!mbd_dev) {
1511 kfree(d);
1512 return;
1513 }
1514
1515 d->mbd_dev = mbd_dev;
1516 mbd_dev->priv = d;
1517
1518 mbd_dev->mtd = mtd;
1519 mbd_dev->devnum = mtd->index;
1520 mbd_dev->size = swap_size >> PAGE_SHIFT;
1521 mbd_dev->tr = tr;
1522
1523 if (!(mtd->flags & MTD_WRITEABLE))
1524 mbd_dev->readonly = 1;
1525
1526 if (mtdswap_init(d, eblocks, spare_cnt) < 0)
1527 goto init_failed;
1528
1529 if (add_mtd_blktrans_dev(mbd_dev) < 0)
1530 goto cleanup;
1531
1532 d->dev = disk_to_dev(mbd_dev->disk);
1533
1534 ret = mtdswap_add_debugfs(d);
1535 if (ret < 0)
1536 goto debugfs_failed;
1537
1538 return;
1539
1540debugfs_failed:
1541 del_mtd_blktrans_dev(mbd_dev);
1542
1543cleanup:
1544 mtdswap_cleanup(d);
1545
1546init_failed:
1547 kfree(mbd_dev);
1548 kfree(d);
1549}
1550
1551static void mtdswap_remove_dev(struct mtd_blktrans_dev *dev)
1552{
1553 struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev);
1554
1555 debugfs_remove_recursive(d->debugfs_root);
1556 del_mtd_blktrans_dev(dev);
1557 mtdswap_cleanup(d);
1558 kfree(d);
1559}
1560
1561static struct mtd_blktrans_ops mtdswap_ops = {
1562 .name = "mtdswap",
1563 .major = 0,
1564 .part_bits = 0,
1565 .blksize = PAGE_SIZE,
1566 .flush = mtdswap_flush,
1567 .readsect = mtdswap_readsect,
1568 .writesect = mtdswap_writesect,
1569 .discard = mtdswap_discard,
1570 .background = mtdswap_background,
1571 .add_mtd = mtdswap_add_mtd,
1572 .remove_dev = mtdswap_remove_dev,
1573 .owner = THIS_MODULE,
1574};
1575
1576static int __init mtdswap_modinit(void)
1577{
1578 return register_mtd_blktrans(&mtdswap_ops);
1579}
1580
1581static void __exit mtdswap_modexit(void)
1582{
1583 deregister_mtd_blktrans(&mtdswap_ops);
1584}
1585
1586module_init(mtdswap_modinit);
1587module_exit(mtdswap_modexit);
1588
1589
1590MODULE_LICENSE("GPL");
1591MODULE_AUTHOR("Jarkko Lavinen <jarkko.lavinen@nokia.com>");
1592MODULE_DESCRIPTION("Block device access to an MTD suitable for using as "
1593 "swap space");