diff options
author | Theodore Ts'o <tytso@mit.edu> | 2009-12-07 10:36:20 -0500 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2009-12-07 10:36:20 -0500 |
commit | d2eecb03936878ec574ade5532fa83df7d75dde7 (patch) | |
tree | f7ef8bd29096ba6c073308cd3b674857be1545d4 /fs | |
parent | f8ec9d6837241865cf99bed97bb99f4399fd5a03 (diff) |
ext4: Use slab allocator for sub-page sized allocations
Now that the SLUB seems to be fixed so that it respects the requested
alignment, use kmem_cache_alloc() to allocator if the block size of
the buffer heads to be allocated is less than the page size.
Previously, we were using 16k page on a Power system for each buffer,
even when the file system was using 1k or 4k block size.
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/jbd2/journal.c | 132 |
1 files changed, 132 insertions, 0 deletions
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index ac0d027595d0..c03d4dce4d76 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c | |||
@@ -39,6 +39,8 @@ | |||
39 | #include <linux/seq_file.h> | 39 | #include <linux/seq_file.h> |
40 | #include <linux/math64.h> | 40 | #include <linux/math64.h> |
41 | #include <linux/hash.h> | 41 | #include <linux/hash.h> |
42 | #include <linux/log2.h> | ||
43 | #include <linux/vmalloc.h> | ||
42 | 44 | ||
43 | #define CREATE_TRACE_POINTS | 45 | #define CREATE_TRACE_POINTS |
44 | #include <trace/events/jbd2.h> | 46 | #include <trace/events/jbd2.h> |
@@ -93,6 +95,7 @@ EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate); | |||
93 | 95 | ||
94 | static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *); | 96 | static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *); |
95 | static void __journal_abort_soft (journal_t *journal, int errno); | 97 | static void __journal_abort_soft (journal_t *journal, int errno); |
98 | static int jbd2_journal_create_slab(size_t slab_size); | ||
96 | 99 | ||
97 | /* | 100 | /* |
98 | * Helper function used to manage commit timeouts | 101 | * Helper function used to manage commit timeouts |
@@ -1248,6 +1251,13 @@ int jbd2_journal_load(journal_t *journal) | |||
1248 | } | 1251 | } |
1249 | } | 1252 | } |
1250 | 1253 | ||
1254 | /* | ||
1255 | * Create a slab for this blocksize | ||
1256 | */ | ||
1257 | err = jbd2_journal_create_slab(be32_to_cpu(sb->s_blocksize)); | ||
1258 | if (err) | ||
1259 | return err; | ||
1260 | |||
1251 | /* Let the recovery code check whether it needs to recover any | 1261 | /* Let the recovery code check whether it needs to recover any |
1252 | * data from the journal. */ | 1262 | * data from the journal. */ |
1253 | if (jbd2_journal_recover(journal)) | 1263 | if (jbd2_journal_recover(journal)) |
@@ -1807,6 +1817,127 @@ size_t journal_tag_bytes(journal_t *journal) | |||
1807 | } | 1817 | } |
1808 | 1818 | ||
1809 | /* | 1819 | /* |
1820 | * JBD memory management | ||
1821 | * | ||
1822 | * These functions are used to allocate block-sized chunks of memory | ||
1823 | * used for making copies of buffer_head data. Very often it will be | ||
1824 | * page-sized chunks of data, but sometimes it will be in | ||
1825 | * sub-page-size chunks. (For example, 16k pages on Power systems | ||
1826 | * with a 4k block file system.) For blocks smaller than a page, we | ||
1827 | * use a SLAB allocator. There are slab caches for each block size, | ||
1828 | * which are allocated at mount time, if necessary, and we only free | ||
1829 | * (all of) the slab caches when/if the jbd2 module is unloaded. For | ||
1830 | * this reason we don't need to a mutex to protect access to | ||
1831 | * jbd2_slab[] allocating or releasing memory; only in | ||
1832 | * jbd2_journal_create_slab(). | ||
1833 | */ | ||
1834 | #define JBD2_MAX_SLABS 8 | ||
1835 | static struct kmem_cache *jbd2_slab[JBD2_MAX_SLABS]; | ||
1836 | static DECLARE_MUTEX(jbd2_slab_create_sem); | ||
1837 | |||
1838 | static const char *jbd2_slab_names[JBD2_MAX_SLABS] = { | ||
1839 | "jbd2_1k", "jbd2_2k", "jbd2_4k", "jbd2_8k", | ||
1840 | "jbd2_16k", "jbd2_32k", "jbd2_64k", "jbd2_128k" | ||
1841 | }; | ||
1842 | |||
1843 | |||
1844 | static void jbd2_journal_destroy_slabs(void) | ||
1845 | { | ||
1846 | int i; | ||
1847 | |||
1848 | for (i = 0; i < JBD2_MAX_SLABS; i++) { | ||
1849 | if (jbd2_slab[i]) | ||
1850 | kmem_cache_destroy(jbd2_slab[i]); | ||
1851 | jbd2_slab[i] = NULL; | ||
1852 | } | ||
1853 | } | ||
1854 | |||
1855 | static int jbd2_journal_create_slab(size_t size) | ||
1856 | { | ||
1857 | int i = order_base_2(size) - 10; | ||
1858 | size_t slab_size; | ||
1859 | |||
1860 | if (size == PAGE_SIZE) | ||
1861 | return 0; | ||
1862 | |||
1863 | if (i >= JBD2_MAX_SLABS) | ||
1864 | return -EINVAL; | ||
1865 | |||
1866 | if (unlikely(i < 0)) | ||
1867 | i = 0; | ||
1868 | down(&jbd2_slab_create_sem); | ||
1869 | if (jbd2_slab[i]) { | ||
1870 | up(&jbd2_slab_create_sem); | ||
1871 | return 0; /* Already created */ | ||
1872 | } | ||
1873 | |||
1874 | slab_size = 1 << (i+10); | ||
1875 | jbd2_slab[i] = kmem_cache_create(jbd2_slab_names[i], slab_size, | ||
1876 | slab_size, 0, NULL); | ||
1877 | up(&jbd2_slab_create_sem); | ||
1878 | if (!jbd2_slab[i]) { | ||
1879 | printk(KERN_EMERG "JBD2: no memory for jbd2_slab cache\n"); | ||
1880 | return -ENOMEM; | ||
1881 | } | ||
1882 | return 0; | ||
1883 | } | ||
1884 | |||
1885 | static struct kmem_cache *get_slab(size_t size) | ||
1886 | { | ||
1887 | int i = order_base_2(size) - 10; | ||
1888 | |||
1889 | BUG_ON(i >= JBD2_MAX_SLABS); | ||
1890 | if (unlikely(i < 0)) | ||
1891 | i = 0; | ||
1892 | BUG_ON(jbd2_slab[i] == 0); | ||
1893 | return jbd2_slab[i]; | ||
1894 | } | ||
1895 | |||
1896 | void *jbd2_alloc(size_t size, gfp_t flags) | ||
1897 | { | ||
1898 | void *ptr; | ||
1899 | |||
1900 | BUG_ON(size & (size-1)); /* Must be a power of 2 */ | ||
1901 | |||
1902 | flags |= __GFP_REPEAT; | ||
1903 | if (size == PAGE_SIZE) | ||
1904 | ptr = (void *)__get_free_pages(flags, 0); | ||
1905 | else if (size > PAGE_SIZE) { | ||
1906 | int order = get_order(size); | ||
1907 | |||
1908 | if (order < 3) | ||
1909 | ptr = (void *)__get_free_pages(flags, order); | ||
1910 | else | ||
1911 | ptr = vmalloc(size); | ||
1912 | } else | ||
1913 | ptr = kmem_cache_alloc(get_slab(size), flags); | ||
1914 | |||
1915 | /* Check alignment; SLUB has gotten this wrong in the past, | ||
1916 | * and this can lead to user data corruption! */ | ||
1917 | BUG_ON(((unsigned long) ptr) & (size-1)); | ||
1918 | |||
1919 | return ptr; | ||
1920 | } | ||
1921 | |||
1922 | void jbd2_free(void *ptr, size_t size) | ||
1923 | { | ||
1924 | if (size == PAGE_SIZE) { | ||
1925 | free_pages((unsigned long)ptr, 0); | ||
1926 | return; | ||
1927 | } | ||
1928 | if (size > PAGE_SIZE) { | ||
1929 | int order = get_order(size); | ||
1930 | |||
1931 | if (order < 3) | ||
1932 | free_pages((unsigned long)ptr, order); | ||
1933 | else | ||
1934 | vfree(ptr); | ||
1935 | return; | ||
1936 | } | ||
1937 | kmem_cache_free(get_slab(size), ptr); | ||
1938 | }; | ||
1939 | |||
1940 | /* | ||
1810 | * Journal_head storage management | 1941 | * Journal_head storage management |
1811 | */ | 1942 | */ |
1812 | static struct kmem_cache *jbd2_journal_head_cache; | 1943 | static struct kmem_cache *jbd2_journal_head_cache; |
@@ -2204,6 +2335,7 @@ static void jbd2_journal_destroy_caches(void) | |||
2204 | jbd2_journal_destroy_revoke_caches(); | 2335 | jbd2_journal_destroy_revoke_caches(); |
2205 | jbd2_journal_destroy_jbd2_journal_head_cache(); | 2336 | jbd2_journal_destroy_jbd2_journal_head_cache(); |
2206 | jbd2_journal_destroy_handle_cache(); | 2337 | jbd2_journal_destroy_handle_cache(); |
2338 | jbd2_journal_destroy_slabs(); | ||
2207 | } | 2339 | } |
2208 | 2340 | ||
2209 | static int __init journal_init(void) | 2341 | static int __init journal_init(void) |