diff options
| author | Matthew L. Creech <mlcreech@gmail.com> | 2011-03-04 17:55:02 -0500 |
|---|---|---|
| committer | Artem Bityutskiy <Artem.Bityutskiy@nokia.com> | 2011-03-11 03:52:07 -0500 |
| commit | d882962f6af2b484b62a7fb05ef959e1bf355fc4 (patch) | |
| tree | 040a303908493e5edca3fe5c7aeecab2912bd3b0 | |
| parent | 2765df7da540687c4d57ca840182122f074c5b9c (diff) | |
UBIFS: handle allocation failures in UBIFS write path
Running kernel 2.6.37, my PPC-based device occasionally gets an
order-2 allocation failure in UBIFS, which causes the root FS to
become unwritable:
kswapd0: page allocation failure. order:2, mode:0x4050
Call Trace:
[c787dc30] [c00085b8] show_stack+0x7c/0x194 (unreliable)
[c787dc70] [c0061aec] __alloc_pages_nodemask+0x4f0/0x57c
[c787dd00] [c0061b98] __get_free_pages+0x20/0x50
[c787dd10] [c00e4f88] ubifs_jnl_write_data+0x54/0x200
[c787dd50] [c00e82d4] do_writepage+0x94/0x198
[c787dd90] [c00675e4] shrink_page_list+0x40c/0x77c
[c787de40] [c0067de0] shrink_inactive_list+0x1e0/0x370
[c787de90] [c0068224] shrink_zone+0x2b4/0x2b8
[c787df00] [c0068854] kswapd+0x408/0x5d4
[c787dfb0] [c0037bcc] kthread+0x80/0x84
[c787dff0] [c000ef44] kernel_thread+0x4c/0x68
Similar problems were encountered last April by Tomasz Stanislawski:
http://patchwork.ozlabs.org/patch/50965/
This patch implements Artem's suggested fix: fall back to a
mutex-protected static buffer, allocated at mount time. I tested it
by forcing execution down the failure path, and didn't see any ill
effects.
Artem: massaged the patch a little, improved it so that we'd not
allocate the write reserve buffer when we are in R/O mode.
Signed-off-by: Matthew L. Creech <mlcreech@gmail.com>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
| -rw-r--r-- | fs/ubifs/journal.c | 28 | ||||
| -rw-r--r-- | fs/ubifs/super.c | 18 | ||||
| -rw-r--r-- | fs/ubifs/ubifs.h | 14 |
3 files changed, 54 insertions, 6 deletions
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c index 914f1bd89e57..aed25e864227 100644 --- a/fs/ubifs/journal.c +++ b/fs/ubifs/journal.c | |||
| @@ -690,7 +690,7 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode, | |||
| 690 | { | 690 | { |
| 691 | struct ubifs_data_node *data; | 691 | struct ubifs_data_node *data; |
| 692 | int err, lnum, offs, compr_type, out_len; | 692 | int err, lnum, offs, compr_type, out_len; |
| 693 | int dlen = UBIFS_DATA_NODE_SZ + UBIFS_BLOCK_SIZE * WORST_COMPR_FACTOR; | 693 | int dlen = COMPRESSED_DATA_NODE_BUF_SZ, allocated = 1; |
| 694 | struct ubifs_inode *ui = ubifs_inode(inode); | 694 | struct ubifs_inode *ui = ubifs_inode(inode); |
| 695 | 695 | ||
| 696 | dbg_jnl("ino %lu, blk %u, len %d, key %s", | 696 | dbg_jnl("ino %lu, blk %u, len %d, key %s", |
| @@ -698,9 +698,19 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode, | |||
| 698 | DBGKEY(key)); | 698 | DBGKEY(key)); |
| 699 | ubifs_assert(len <= UBIFS_BLOCK_SIZE); | 699 | ubifs_assert(len <= UBIFS_BLOCK_SIZE); |
| 700 | 700 | ||
| 701 | data = kmalloc(dlen, GFP_NOFS); | 701 | data = kmalloc(dlen, GFP_NOFS | __GFP_NOWARN); |
| 702 | if (!data) | 702 | if (!data) { |
| 703 | return -ENOMEM; | 703 | /* |
| 704 | * Fall-back to the write reserve buffer. Note, we might be | ||
| 705 | * currently on the memory reclaim path, when the kernel is | ||
| 706 | * trying to free some memory by writing out dirty pages. The | ||
| 707 | * write reserve buffer helps us to guarantee that we are | ||
| 708 | * always able to write the data. | ||
| 709 | */ | ||
| 710 | allocated = 0; | ||
| 711 | mutex_lock(&c->write_reserve_mutex); | ||
| 712 | data = c->write_reserve_buf; | ||
| 713 | } | ||
| 704 | 714 | ||
| 705 | data->ch.node_type = UBIFS_DATA_NODE; | 715 | data->ch.node_type = UBIFS_DATA_NODE; |
| 706 | key_write(c, key, &data->key); | 716 | key_write(c, key, &data->key); |
| @@ -736,7 +746,10 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode, | |||
| 736 | goto out_ro; | 746 | goto out_ro; |
| 737 | 747 | ||
| 738 | finish_reservation(c); | 748 | finish_reservation(c); |
| 739 | kfree(data); | 749 | if (!allocated) |
| 750 | mutex_unlock(&c->write_reserve_mutex); | ||
| 751 | else | ||
| 752 | kfree(data); | ||
| 740 | return 0; | 753 | return 0; |
| 741 | 754 | ||
| 742 | out_release: | 755 | out_release: |
| @@ -745,7 +758,10 @@ out_ro: | |||
| 745 | ubifs_ro_mode(c, err); | 758 | ubifs_ro_mode(c, err); |
| 746 | finish_reservation(c); | 759 | finish_reservation(c); |
| 747 | out_free: | 760 | out_free: |
| 748 | kfree(data); | 761 | if (!allocated) |
| 762 | mutex_unlock(&c->write_reserve_mutex); | ||
| 763 | else | ||
| 764 | kfree(data); | ||
| 749 | return err; | 765 | return err; |
| 750 | } | 766 | } |
| 751 | 767 | ||
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index d4b4cb4596e2..e360c7a71f9e 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c | |||
| @@ -1213,6 +1213,13 @@ static int mount_ubifs(struct ubifs_info *c) | |||
| 1213 | if (c->bulk_read == 1) | 1213 | if (c->bulk_read == 1) |
| 1214 | bu_init(c); | 1214 | bu_init(c); |
| 1215 | 1215 | ||
| 1216 | if (!c->ro_mount) { | ||
| 1217 | c->write_reserve_buf = kmalloc(COMPRESSED_DATA_NODE_BUF_SZ, | ||
| 1218 | GFP_KERNEL); | ||
| 1219 | if (!c->write_reserve_buf) | ||
| 1220 | goto out_free; | ||
| 1221 | } | ||
| 1222 | |||
| 1216 | c->mounting = 1; | 1223 | c->mounting = 1; |
| 1217 | 1224 | ||
| 1218 | err = ubifs_read_superblock(c); | 1225 | err = ubifs_read_superblock(c); |
| @@ -1482,6 +1489,7 @@ out_wbufs: | |||
| 1482 | out_cbuf: | 1489 | out_cbuf: |
| 1483 | kfree(c->cbuf); | 1490 | kfree(c->cbuf); |
| 1484 | out_free: | 1491 | out_free: |
| 1492 | kfree(c->write_reserve_buf); | ||
| 1485 | kfree(c->bu.buf); | 1493 | kfree(c->bu.buf); |
| 1486 | vfree(c->ileb_buf); | 1494 | vfree(c->ileb_buf); |
| 1487 | vfree(c->sbuf); | 1495 | vfree(c->sbuf); |
| @@ -1520,6 +1528,7 @@ static void ubifs_umount(struct ubifs_info *c) | |||
| 1520 | kfree(c->cbuf); | 1528 | kfree(c->cbuf); |
| 1521 | kfree(c->rcvrd_mst_node); | 1529 | kfree(c->rcvrd_mst_node); |
| 1522 | kfree(c->mst_node); | 1530 | kfree(c->mst_node); |
| 1531 | kfree(c->write_reserve_buf); | ||
| 1523 | kfree(c->bu.buf); | 1532 | kfree(c->bu.buf); |
| 1524 | vfree(c->ileb_buf); | 1533 | vfree(c->ileb_buf); |
| 1525 | vfree(c->sbuf); | 1534 | vfree(c->sbuf); |
| @@ -1605,6 +1614,10 @@ static int ubifs_remount_rw(struct ubifs_info *c) | |||
| 1605 | goto out; | 1614 | goto out; |
| 1606 | } | 1615 | } |
| 1607 | 1616 | ||
| 1617 | c->write_reserve_buf = kmalloc(COMPRESSED_DATA_NODE_BUF_SZ, GFP_KERNEL); | ||
| 1618 | if (!c->write_reserve_buf) | ||
| 1619 | goto out; | ||
| 1620 | |||
| 1608 | err = ubifs_lpt_init(c, 0, 1); | 1621 | err = ubifs_lpt_init(c, 0, 1); |
| 1609 | if (err) | 1622 | if (err) |
| 1610 | goto out; | 1623 | goto out; |
| @@ -1669,6 +1682,8 @@ out: | |||
| 1669 | c->bgt = NULL; | 1682 | c->bgt = NULL; |
| 1670 | } | 1683 | } |
| 1671 | free_wbufs(c); | 1684 | free_wbufs(c); |
| 1685 | kfree(c->write_reserve_buf); | ||
| 1686 | c->write_reserve_buf = NULL; | ||
| 1672 | vfree(c->ileb_buf); | 1687 | vfree(c->ileb_buf); |
| 1673 | c->ileb_buf = NULL; | 1688 | c->ileb_buf = NULL; |
| 1674 | ubifs_lpt_free(c, 1); | 1689 | ubifs_lpt_free(c, 1); |
| @@ -1712,6 +1727,8 @@ static void ubifs_remount_ro(struct ubifs_info *c) | |||
| 1712 | free_wbufs(c); | 1727 | free_wbufs(c); |
| 1713 | vfree(c->orph_buf); | 1728 | vfree(c->orph_buf); |
| 1714 | c->orph_buf = NULL; | 1729 | c->orph_buf = NULL; |
| 1730 | kfree(c->write_reserve_buf); | ||
| 1731 | c->write_reserve_buf = NULL; | ||
| 1715 | vfree(c->ileb_buf); | 1732 | vfree(c->ileb_buf); |
| 1716 | c->ileb_buf = NULL; | 1733 | c->ileb_buf = NULL; |
| 1717 | ubifs_lpt_free(c, 1); | 1734 | ubifs_lpt_free(c, 1); |
| @@ -1942,6 +1959,7 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent) | |||
| 1942 | mutex_init(&c->mst_mutex); | 1959 | mutex_init(&c->mst_mutex); |
| 1943 | mutex_init(&c->umount_mutex); | 1960 | mutex_init(&c->umount_mutex); |
| 1944 | mutex_init(&c->bu_mutex); | 1961 | mutex_init(&c->bu_mutex); |
| 1962 | mutex_init(&c->write_reserve_mutex); | ||
| 1945 | init_waitqueue_head(&c->cmt_wq); | 1963 | init_waitqueue_head(&c->cmt_wq); |
| 1946 | c->buds = RB_ROOT; | 1964 | c->buds = RB_ROOT; |
| 1947 | c->old_idx = RB_ROOT; | 1965 | c->old_idx = RB_ROOT; |
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 362495078489..8c40ad3c6721 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h | |||
| @@ -151,6 +151,12 @@ | |||
| 151 | */ | 151 | */ |
| 152 | #define WORST_COMPR_FACTOR 2 | 152 | #define WORST_COMPR_FACTOR 2 |
| 153 | 153 | ||
| 154 | /* | ||
| 155 | * How much memory is needed for a buffer where we comress a data node. | ||
| 156 | */ | ||
| 157 | #define COMPRESSED_DATA_NODE_BUF_SZ \ | ||
| 158 | (UBIFS_DATA_NODE_SZ + UBIFS_BLOCK_SIZE * WORST_COMPR_FACTOR) | ||
| 159 | |||
| 154 | /* Maximum expected tree height for use by bottom_up_buf */ | 160 | /* Maximum expected tree height for use by bottom_up_buf */ |
| 155 | #define BOTTOM_UP_HEIGHT 64 | 161 | #define BOTTOM_UP_HEIGHT 64 |
| 156 | 162 | ||
| @@ -1005,6 +1011,11 @@ struct ubifs_debug_info; | |||
| 1005 | * @bu_mutex: protects the pre-allocated bulk-read buffer and @c->bu | 1011 | * @bu_mutex: protects the pre-allocated bulk-read buffer and @c->bu |
| 1006 | * @bu: pre-allocated bulk-read information | 1012 | * @bu: pre-allocated bulk-read information |
| 1007 | * | 1013 | * |
| 1014 | * @write_reserve_mutex: protects @write_reserve_buf | ||
| 1015 | * @write_reserve_buf: on the write path we allocate memory, which might | ||
| 1016 | * sometimes be unavailable, in which case we use this | ||
| 1017 | * write reserve buffer | ||
| 1018 | * | ||
| 1008 | * @log_lebs: number of logical eraseblocks in the log | 1019 | * @log_lebs: number of logical eraseblocks in the log |
| 1009 | * @log_bytes: log size in bytes | 1020 | * @log_bytes: log size in bytes |
| 1010 | * @log_last: last LEB of the log | 1021 | * @log_last: last LEB of the log |
| @@ -1256,6 +1267,9 @@ struct ubifs_info { | |||
| 1256 | struct mutex bu_mutex; | 1267 | struct mutex bu_mutex; |
| 1257 | struct bu_info bu; | 1268 | struct bu_info bu; |
| 1258 | 1269 | ||
| 1270 | struct mutex write_reserve_mutex; | ||
| 1271 | void *write_reserve_buf; | ||
| 1272 | |||
| 1259 | int log_lebs; | 1273 | int log_lebs; |
| 1260 | long long log_bytes; | 1274 | long long log_bytes; |
| 1261 | int log_last; | 1275 | int log_last; |
