diff options
author | Badari Pulavarty <pbadari@us.ibm.com> | 2006-08-27 04:23:52 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-08-27 14:01:32 -0400 |
commit | ea817398e68dfa25612229fda7fc74580cf915fb (patch) | |
tree | f511458f277c30b74e26bed69fe9d36dd2fe857c | |
parent | 4c4d50f7b39cc58f1064b93a61ad617451ae41df (diff) |
[PATCH] Manage jbd allocations from its own slabs
JBD currently allocates commit and frozen buffers from slabs. With
CONFIG_SLAB_DEBUG, its possible for an allocation to cross the page
boundary causing IO problems.
https://bugzilla.redhat.com/bugzilla/show_bug.cgi?id=200127
So, instead of allocating these from regular slabs - manage allocation from
its own slabs and disable slab debug for these slabs.
[akpm@osdl.org: cleanups]
Signed-off-by: Badari Pulavarty <pbadari@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | fs/jbd/commit.c | 6 | ||||
-rw-r--r-- | fs/jbd/journal.c | 92 | ||||
-rw-r--r-- | fs/jbd/transaction.c | 9 | ||||
-rw-r--r-- | include/linux/jbd.h | 3 |
4 files changed, 97 insertions, 13 deletions
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 0971814c38b8..42da60784311 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c | |||
@@ -261,7 +261,7 @@ void journal_commit_transaction(journal_t *journal) | |||
261 | struct buffer_head *bh = jh2bh(jh); | 261 | struct buffer_head *bh = jh2bh(jh); |
262 | 262 | ||
263 | jbd_lock_bh_state(bh); | 263 | jbd_lock_bh_state(bh); |
264 | kfree(jh->b_committed_data); | 264 | jbd_slab_free(jh->b_committed_data, bh->b_size); |
265 | jh->b_committed_data = NULL; | 265 | jh->b_committed_data = NULL; |
266 | jbd_unlock_bh_state(bh); | 266 | jbd_unlock_bh_state(bh); |
267 | } | 267 | } |
@@ -745,14 +745,14 @@ restart_loop: | |||
745 | * Otherwise, we can just throw away the frozen data now. | 745 | * Otherwise, we can just throw away the frozen data now. |
746 | */ | 746 | */ |
747 | if (jh->b_committed_data) { | 747 | if (jh->b_committed_data) { |
748 | kfree(jh->b_committed_data); | 748 | jbd_slab_free(jh->b_committed_data, bh->b_size); |
749 | jh->b_committed_data = NULL; | 749 | jh->b_committed_data = NULL; |
750 | if (jh->b_frozen_data) { | 750 | if (jh->b_frozen_data) { |
751 | jh->b_committed_data = jh->b_frozen_data; | 751 | jh->b_committed_data = jh->b_frozen_data; |
752 | jh->b_frozen_data = NULL; | 752 | jh->b_frozen_data = NULL; |
753 | } | 753 | } |
754 | } else if (jh->b_frozen_data) { | 754 | } else if (jh->b_frozen_data) { |
755 | kfree(jh->b_frozen_data); | 755 | jbd_slab_free(jh->b_frozen_data, bh->b_size); |
756 | jh->b_frozen_data = NULL; | 756 | jh->b_frozen_data = NULL; |
757 | } | 757 | } |
758 | 758 | ||
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index 8c9b28dff119..f66724ce443a 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c | |||
@@ -84,6 +84,7 @@ EXPORT_SYMBOL(journal_force_commit); | |||
84 | 84 | ||
85 | static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *); | 85 | static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *); |
86 | static void __journal_abort_soft (journal_t *journal, int errno); | 86 | static void __journal_abort_soft (journal_t *journal, int errno); |
87 | static int journal_create_jbd_slab(size_t slab_size); | ||
87 | 88 | ||
88 | /* | 89 | /* |
89 | * Helper function used to manage commit timeouts | 90 | * Helper function used to manage commit timeouts |
@@ -328,10 +329,10 @@ repeat: | |||
328 | char *tmp; | 329 | char *tmp; |
329 | 330 | ||
330 | jbd_unlock_bh_state(bh_in); | 331 | jbd_unlock_bh_state(bh_in); |
331 | tmp = jbd_rep_kmalloc(bh_in->b_size, GFP_NOFS); | 332 | tmp = jbd_slab_alloc(bh_in->b_size, GFP_NOFS); |
332 | jbd_lock_bh_state(bh_in); | 333 | jbd_lock_bh_state(bh_in); |
333 | if (jh_in->b_frozen_data) { | 334 | if (jh_in->b_frozen_data) { |
334 | kfree(tmp); | 335 | jbd_slab_free(tmp, bh_in->b_size); |
335 | goto repeat; | 336 | goto repeat; |
336 | } | 337 | } |
337 | 338 | ||
@@ -1069,17 +1070,17 @@ static int load_superblock(journal_t *journal) | |||
1069 | int journal_load(journal_t *journal) | 1070 | int journal_load(journal_t *journal) |
1070 | { | 1071 | { |
1071 | int err; | 1072 | int err; |
1073 | journal_superblock_t *sb; | ||
1072 | 1074 | ||
1073 | err = load_superblock(journal); | 1075 | err = load_superblock(journal); |
1074 | if (err) | 1076 | if (err) |
1075 | return err; | 1077 | return err; |
1076 | 1078 | ||
1079 | sb = journal->j_superblock; | ||
1077 | /* If this is a V2 superblock, then we have to check the | 1080 | /* If this is a V2 superblock, then we have to check the |
1078 | * features flags on it. */ | 1081 | * features flags on it. */ |
1079 | 1082 | ||
1080 | if (journal->j_format_version >= 2) { | 1083 | if (journal->j_format_version >= 2) { |
1081 | journal_superblock_t *sb = journal->j_superblock; | ||
1082 | |||
1083 | if ((sb->s_feature_ro_compat & | 1084 | if ((sb->s_feature_ro_compat & |
1084 | ~cpu_to_be32(JFS_KNOWN_ROCOMPAT_FEATURES)) || | 1085 | ~cpu_to_be32(JFS_KNOWN_ROCOMPAT_FEATURES)) || |
1085 | (sb->s_feature_incompat & | 1086 | (sb->s_feature_incompat & |
@@ -1090,6 +1091,13 @@ int journal_load(journal_t *journal) | |||
1090 | } | 1091 | } |
1091 | } | 1092 | } |
1092 | 1093 | ||
1094 | /* | ||
1095 | * Create a slab for this blocksize | ||
1096 | */ | ||
1097 | err = journal_create_jbd_slab(cpu_to_be32(sb->s_blocksize)); | ||
1098 | if (err) | ||
1099 | return err; | ||
1100 | |||
1093 | /* Let the recovery code check whether it needs to recover any | 1101 | /* Let the recovery code check whether it needs to recover any |
1094 | * data from the journal. */ | 1102 | * data from the journal. */ |
1095 | if (journal_recover(journal)) | 1103 | if (journal_recover(journal)) |
@@ -1612,6 +1620,77 @@ void * __jbd_kmalloc (const char *where, size_t size, gfp_t flags, int retry) | |||
1612 | } | 1620 | } |
1613 | 1621 | ||
1614 | /* | 1622 | /* |
1623 | * jbd slab management: create 1k, 2k, 4k, 8k slabs as needed | ||
1624 | * and allocate frozen and commit buffers from these slabs. | ||
1625 | * | ||
1626 | * Reason for doing this is to avoid, SLAB_DEBUG - since it could | ||
1627 | * cause bh to cross page boundary. | ||
1628 | */ | ||
1629 | |||
1630 | #define JBD_MAX_SLABS 5 | ||
1631 | #define JBD_SLAB_INDEX(size) (size >> 11) | ||
1632 | |||
1633 | static kmem_cache_t *jbd_slab[JBD_MAX_SLABS]; | ||
1634 | static const char *jbd_slab_names[JBD_MAX_SLABS] = { | ||
1635 | "jbd_1k", "jbd_2k", "jbd_4k", NULL, "jbd_8k" | ||
1636 | }; | ||
1637 | |||
1638 | static void journal_destroy_jbd_slabs(void) | ||
1639 | { | ||
1640 | int i; | ||
1641 | |||
1642 | for (i = 0; i < JBD_MAX_SLABS; i++) { | ||
1643 | if (jbd_slab[i]) | ||
1644 | kmem_cache_destroy(jbd_slab[i]); | ||
1645 | jbd_slab[i] = NULL; | ||
1646 | } | ||
1647 | } | ||
1648 | |||
1649 | static int journal_create_jbd_slab(size_t slab_size) | ||
1650 | { | ||
1651 | int i = JBD_SLAB_INDEX(slab_size); | ||
1652 | |||
1653 | BUG_ON(i >= JBD_MAX_SLABS); | ||
1654 | |||
1655 | /* | ||
1656 | * Check if we already have a slab created for this size | ||
1657 | */ | ||
1658 | if (jbd_slab[i]) | ||
1659 | return 0; | ||
1660 | |||
1661 | /* | ||
1662 | * Create a slab and force alignment to be same as slabsize - | ||
1663 | * this will make sure that allocations won't cross the page | ||
1664 | * boundary. | ||
1665 | */ | ||
1666 | jbd_slab[i] = kmem_cache_create(jbd_slab_names[i], | ||
1667 | slab_size, slab_size, 0, NULL, NULL); | ||
1668 | if (!jbd_slab[i]) { | ||
1669 | printk(KERN_EMERG "JBD: no memory for jbd_slab cache\n"); | ||
1670 | return -ENOMEM; | ||
1671 | } | ||
1672 | return 0; | ||
1673 | } | ||
1674 | |||
1675 | void * jbd_slab_alloc(size_t size, gfp_t flags) | ||
1676 | { | ||
1677 | int idx; | ||
1678 | |||
1679 | idx = JBD_SLAB_INDEX(size); | ||
1680 | BUG_ON(jbd_slab[idx] == NULL); | ||
1681 | return kmem_cache_alloc(jbd_slab[idx], flags | __GFP_NOFAIL); | ||
1682 | } | ||
1683 | |||
1684 | void jbd_slab_free(void *ptr, size_t size) | ||
1685 | { | ||
1686 | int idx; | ||
1687 | |||
1688 | idx = JBD_SLAB_INDEX(size); | ||
1689 | BUG_ON(jbd_slab[idx] == NULL); | ||
1690 | kmem_cache_free(jbd_slab[idx], ptr); | ||
1691 | } | ||
1692 | |||
1693 | /* | ||
1615 | * Journal_head storage management | 1694 | * Journal_head storage management |
1616 | */ | 1695 | */ |
1617 | static kmem_cache_t *journal_head_cache; | 1696 | static kmem_cache_t *journal_head_cache; |
@@ -1799,13 +1878,13 @@ static void __journal_remove_journal_head(struct buffer_head *bh) | |||
1799 | printk(KERN_WARNING "%s: freeing " | 1878 | printk(KERN_WARNING "%s: freeing " |
1800 | "b_frozen_data\n", | 1879 | "b_frozen_data\n", |
1801 | __FUNCTION__); | 1880 | __FUNCTION__); |
1802 | kfree(jh->b_frozen_data); | 1881 | jbd_slab_free(jh->b_frozen_data, bh->b_size); |
1803 | } | 1882 | } |
1804 | if (jh->b_committed_data) { | 1883 | if (jh->b_committed_data) { |
1805 | printk(KERN_WARNING "%s: freeing " | 1884 | printk(KERN_WARNING "%s: freeing " |
1806 | "b_committed_data\n", | 1885 | "b_committed_data\n", |
1807 | __FUNCTION__); | 1886 | __FUNCTION__); |
1808 | kfree(jh->b_committed_data); | 1887 | jbd_slab_free(jh->b_committed_data, bh->b_size); |
1809 | } | 1888 | } |
1810 | bh->b_private = NULL; | 1889 | bh->b_private = NULL; |
1811 | jh->b_bh = NULL; /* debug, really */ | 1890 | jh->b_bh = NULL; /* debug, really */ |
@@ -1961,6 +2040,7 @@ static void journal_destroy_caches(void) | |||
1961 | journal_destroy_revoke_caches(); | 2040 | journal_destroy_revoke_caches(); |
1962 | journal_destroy_journal_head_cache(); | 2041 | journal_destroy_journal_head_cache(); |
1963 | journal_destroy_handle_cache(); | 2042 | journal_destroy_handle_cache(); |
2043 | journal_destroy_jbd_slabs(); | ||
1964 | } | 2044 | } |
1965 | 2045 | ||
1966 | static int __init journal_init(void) | 2046 | static int __init journal_init(void) |
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index 508b2ea91f43..de2e4cbbf79a 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c | |||
@@ -666,8 +666,9 @@ repeat: | |||
666 | if (!frozen_buffer) { | 666 | if (!frozen_buffer) { |
667 | JBUFFER_TRACE(jh, "allocate memory for buffer"); | 667 | JBUFFER_TRACE(jh, "allocate memory for buffer"); |
668 | jbd_unlock_bh_state(bh); | 668 | jbd_unlock_bh_state(bh); |
669 | frozen_buffer = jbd_kmalloc(jh2bh(jh)->b_size, | 669 | frozen_buffer = |
670 | GFP_NOFS); | 670 | jbd_slab_alloc(jh2bh(jh)->b_size, |
671 | GFP_NOFS); | ||
671 | if (!frozen_buffer) { | 672 | if (!frozen_buffer) { |
672 | printk(KERN_EMERG | 673 | printk(KERN_EMERG |
673 | "%s: OOM for frozen_buffer\n", | 674 | "%s: OOM for frozen_buffer\n", |
@@ -879,7 +880,7 @@ int journal_get_undo_access(handle_t *handle, struct buffer_head *bh) | |||
879 | 880 | ||
880 | repeat: | 881 | repeat: |
881 | if (!jh->b_committed_data) { | 882 | if (!jh->b_committed_data) { |
882 | committed_data = jbd_kmalloc(jh2bh(jh)->b_size, GFP_NOFS); | 883 | committed_data = jbd_slab_alloc(jh2bh(jh)->b_size, GFP_NOFS); |
883 | if (!committed_data) { | 884 | if (!committed_data) { |
884 | printk(KERN_EMERG "%s: No memory for committed data\n", | 885 | printk(KERN_EMERG "%s: No memory for committed data\n", |
885 | __FUNCTION__); | 886 | __FUNCTION__); |
@@ -906,7 +907,7 @@ repeat: | |||
906 | out: | 907 | out: |
907 | journal_put_journal_head(jh); | 908 | journal_put_journal_head(jh); |
908 | if (unlikely(committed_data)) | 909 | if (unlikely(committed_data)) |
909 | kfree(committed_data); | 910 | jbd_slab_free(committed_data, bh->b_size); |
910 | return err; | 911 | return err; |
911 | } | 912 | } |
912 | 913 | ||
diff --git a/include/linux/jbd.h b/include/linux/jbd.h index 20eb34403d0c..a04c154c5207 100644 --- a/include/linux/jbd.h +++ b/include/linux/jbd.h | |||
@@ -72,6 +72,9 @@ extern int journal_enable_debug; | |||
72 | #endif | 72 | #endif |
73 | 73 | ||
74 | extern void * __jbd_kmalloc (const char *where, size_t size, gfp_t flags, int retry); | 74 | extern void * __jbd_kmalloc (const char *where, size_t size, gfp_t flags, int retry); |
75 | extern void * jbd_slab_alloc(size_t size, gfp_t flags); | ||
76 | extern void jbd_slab_free(void *ptr, size_t size); | ||
77 | |||
75 | #define jbd_kmalloc(size, flags) \ | 78 | #define jbd_kmalloc(size, flags) \ |
76 | __jbd_kmalloc(__FUNCTION__, (size), (flags), journal_oom_retry) | 79 | __jbd_kmalloc(__FUNCTION__, (size), (flags), journal_oom_retry) |
77 | #define jbd_rep_kmalloc(size, flags) \ | 80 | #define jbd_rep_kmalloc(size, flags) \ |