diff options
Diffstat (limited to 'fs/jbd2')
| -rw-r--r-- | fs/jbd2/checkpoint.c | 1 | ||||
| -rw-r--r-- | fs/jbd2/commit.c | 13 | ||||
| -rw-r--r-- | fs/jbd2/journal.c | 132 | ||||
| -rw-r--r-- | fs/jbd2/transaction.c | 43 |
4 files changed, 170 insertions, 19 deletions
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 886849370950..30beb11ef928 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c | |||
| @@ -507,6 +507,7 @@ int jbd2_cleanup_journal_tail(journal_t *journal) | |||
| 507 | if (blocknr < journal->j_tail) | 507 | if (blocknr < journal->j_tail) |
| 508 | freed = freed + journal->j_last - journal->j_first; | 508 | freed = freed + journal->j_last - journal->j_first; |
| 509 | 509 | ||
| 510 | trace_jbd2_cleanup_journal_tail(journal, first_tid, blocknr, freed); | ||
| 510 | jbd_debug(1, | 511 | jbd_debug(1, |
| 511 | "Cleaning journal tail from %d to %d (offset %lu), " | 512 | "Cleaning journal tail from %d to %d (offset %lu), " |
| 512 | "freeing %lu\n", | 513 | "freeing %lu\n", |
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 1bc74b6f26d2..671da7fb7ffd 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c | |||
| @@ -883,8 +883,7 @@ restart_loop: | |||
| 883 | spin_unlock(&journal->j_list_lock); | 883 | spin_unlock(&journal->j_list_lock); |
| 884 | bh = jh2bh(jh); | 884 | bh = jh2bh(jh); |
| 885 | jbd_lock_bh_state(bh); | 885 | jbd_lock_bh_state(bh); |
| 886 | J_ASSERT_JH(jh, jh->b_transaction == commit_transaction || | 886 | J_ASSERT_JH(jh, jh->b_transaction == commit_transaction); |
| 887 | jh->b_transaction == journal->j_running_transaction); | ||
| 888 | 887 | ||
| 889 | /* | 888 | /* |
| 890 | * If there is undo-protected committed data against | 889 | * If there is undo-protected committed data against |
| @@ -930,12 +929,12 @@ restart_loop: | |||
| 930 | /* A buffer which has been freed while still being | 929 | /* A buffer which has been freed while still being |
| 931 | * journaled by a previous transaction may end up still | 930 | * journaled by a previous transaction may end up still |
| 932 | * being dirty here, but we want to avoid writing back | 931 | * being dirty here, but we want to avoid writing back |
| 933 | * that buffer in the future now that the last use has | 932 | * that buffer in the future after the "add to orphan" |
| 934 | * been committed. That's not only a performance gain, | 933 | * operation been committed, That's not only a performance |
| 935 | * it also stops aliasing problems if the buffer is left | 934 | * gain, it also stops aliasing problems if the buffer is |
| 936 | * behind for writeback and gets reallocated for another | 935 | * left behind for writeback and gets reallocated for another |
| 937 | * use in a different page. */ | 936 | * use in a different page. */ |
| 938 | if (buffer_freed(bh)) { | 937 | if (buffer_freed(bh) && !jh->b_next_transaction) { |
| 939 | clear_buffer_freed(bh); | 938 | clear_buffer_freed(bh); |
| 940 | clear_buffer_jbddirty(bh); | 939 | clear_buffer_jbddirty(bh); |
| 941 | } | 940 | } |
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index ac0d027595d0..c03d4dce4d76 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c | |||
| @@ -39,6 +39,8 @@ | |||
| 39 | #include <linux/seq_file.h> | 39 | #include <linux/seq_file.h> |
| 40 | #include <linux/math64.h> | 40 | #include <linux/math64.h> |
| 41 | #include <linux/hash.h> | 41 | #include <linux/hash.h> |
| 42 | #include <linux/log2.h> | ||
| 43 | #include <linux/vmalloc.h> | ||
| 42 | 44 | ||
| 43 | #define CREATE_TRACE_POINTS | 45 | #define CREATE_TRACE_POINTS |
| 44 | #include <trace/events/jbd2.h> | 46 | #include <trace/events/jbd2.h> |
| @@ -93,6 +95,7 @@ EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate); | |||
| 93 | 95 | ||
| 94 | static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *); | 96 | static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *); |
| 95 | static void __journal_abort_soft (journal_t *journal, int errno); | 97 | static void __journal_abort_soft (journal_t *journal, int errno); |
| 98 | static int jbd2_journal_create_slab(size_t slab_size); | ||
| 96 | 99 | ||
| 97 | /* | 100 | /* |
| 98 | * Helper function used to manage commit timeouts | 101 | * Helper function used to manage commit timeouts |
| @@ -1248,6 +1251,13 @@ int jbd2_journal_load(journal_t *journal) | |||
| 1248 | } | 1251 | } |
| 1249 | } | 1252 | } |
| 1250 | 1253 | ||
| 1254 | /* | ||
| 1255 | * Create a slab for this blocksize | ||
| 1256 | */ | ||
| 1257 | err = jbd2_journal_create_slab(be32_to_cpu(sb->s_blocksize)); | ||
| 1258 | if (err) | ||
| 1259 | return err; | ||
| 1260 | |||
| 1251 | /* Let the recovery code check whether it needs to recover any | 1261 | /* Let the recovery code check whether it needs to recover any |
| 1252 | * data from the journal. */ | 1262 | * data from the journal. */ |
| 1253 | if (jbd2_journal_recover(journal)) | 1263 | if (jbd2_journal_recover(journal)) |
| @@ -1807,6 +1817,127 @@ size_t journal_tag_bytes(journal_t *journal) | |||
| 1807 | } | 1817 | } |
| 1808 | 1818 | ||
| 1809 | /* | 1819 | /* |
| 1820 | * JBD memory management | ||
| 1821 | * | ||
| 1822 | * These functions are used to allocate block-sized chunks of memory | ||
| 1823 | * used for making copies of buffer_head data. Very often it will be | ||
| 1824 | * page-sized chunks of data, but sometimes it will be in | ||
| 1825 | * sub-page-size chunks. (For example, 16k pages on Power systems | ||
| 1826 | * with a 4k block file system.) For blocks smaller than a page, we | ||
| 1827 | * use a SLAB allocator. There are slab caches for each block size, | ||
| 1828 | * which are allocated at mount time, if necessary, and we only free | ||
| 1829 | * (all of) the slab caches when/if the jbd2 module is unloaded. For | ||
| 1830 | * this reason we don't need to a mutex to protect access to | ||
| 1831 | * jbd2_slab[] allocating or releasing memory; only in | ||
| 1832 | * jbd2_journal_create_slab(). | ||
| 1833 | */ | ||
| 1834 | #define JBD2_MAX_SLABS 8 | ||
| 1835 | static struct kmem_cache *jbd2_slab[JBD2_MAX_SLABS]; | ||
| 1836 | static DECLARE_MUTEX(jbd2_slab_create_sem); | ||
| 1837 | |||
| 1838 | static const char *jbd2_slab_names[JBD2_MAX_SLABS] = { | ||
| 1839 | "jbd2_1k", "jbd2_2k", "jbd2_4k", "jbd2_8k", | ||
| 1840 | "jbd2_16k", "jbd2_32k", "jbd2_64k", "jbd2_128k" | ||
| 1841 | }; | ||
| 1842 | |||
| 1843 | |||
| 1844 | static void jbd2_journal_destroy_slabs(void) | ||
| 1845 | { | ||
| 1846 | int i; | ||
| 1847 | |||
| 1848 | for (i = 0; i < JBD2_MAX_SLABS; i++) { | ||
| 1849 | if (jbd2_slab[i]) | ||
| 1850 | kmem_cache_destroy(jbd2_slab[i]); | ||
| 1851 | jbd2_slab[i] = NULL; | ||
| 1852 | } | ||
| 1853 | } | ||
| 1854 | |||
| 1855 | static int jbd2_journal_create_slab(size_t size) | ||
| 1856 | { | ||
| 1857 | int i = order_base_2(size) - 10; | ||
| 1858 | size_t slab_size; | ||
| 1859 | |||
| 1860 | if (size == PAGE_SIZE) | ||
| 1861 | return 0; | ||
| 1862 | |||
| 1863 | if (i >= JBD2_MAX_SLABS) | ||
| 1864 | return -EINVAL; | ||
| 1865 | |||
| 1866 | if (unlikely(i < 0)) | ||
| 1867 | i = 0; | ||
| 1868 | down(&jbd2_slab_create_sem); | ||
| 1869 | if (jbd2_slab[i]) { | ||
| 1870 | up(&jbd2_slab_create_sem); | ||
| 1871 | return 0; /* Already created */ | ||
| 1872 | } | ||
| 1873 | |||
| 1874 | slab_size = 1 << (i+10); | ||
| 1875 | jbd2_slab[i] = kmem_cache_create(jbd2_slab_names[i], slab_size, | ||
| 1876 | slab_size, 0, NULL); | ||
| 1877 | up(&jbd2_slab_create_sem); | ||
| 1878 | if (!jbd2_slab[i]) { | ||
| 1879 | printk(KERN_EMERG "JBD2: no memory for jbd2_slab cache\n"); | ||
| 1880 | return -ENOMEM; | ||
| 1881 | } | ||
| 1882 | return 0; | ||
| 1883 | } | ||
| 1884 | |||
| 1885 | static struct kmem_cache *get_slab(size_t size) | ||
| 1886 | { | ||
| 1887 | int i = order_base_2(size) - 10; | ||
| 1888 | |||
| 1889 | BUG_ON(i >= JBD2_MAX_SLABS); | ||
| 1890 | if (unlikely(i < 0)) | ||
| 1891 | i = 0; | ||
| 1892 | BUG_ON(jbd2_slab[i] == 0); | ||
| 1893 | return jbd2_slab[i]; | ||
| 1894 | } | ||
| 1895 | |||
| 1896 | void *jbd2_alloc(size_t size, gfp_t flags) | ||
| 1897 | { | ||
| 1898 | void *ptr; | ||
| 1899 | |||
| 1900 | BUG_ON(size & (size-1)); /* Must be a power of 2 */ | ||
| 1901 | |||
| 1902 | flags |= __GFP_REPEAT; | ||
| 1903 | if (size == PAGE_SIZE) | ||
| 1904 | ptr = (void *)__get_free_pages(flags, 0); | ||
| 1905 | else if (size > PAGE_SIZE) { | ||
| 1906 | int order = get_order(size); | ||
| 1907 | |||
| 1908 | if (order < 3) | ||
| 1909 | ptr = (void *)__get_free_pages(flags, order); | ||
| 1910 | else | ||
| 1911 | ptr = vmalloc(size); | ||
| 1912 | } else | ||
| 1913 | ptr = kmem_cache_alloc(get_slab(size), flags); | ||
| 1914 | |||
| 1915 | /* Check alignment; SLUB has gotten this wrong in the past, | ||
| 1916 | * and this can lead to user data corruption! */ | ||
| 1917 | BUG_ON(((unsigned long) ptr) & (size-1)); | ||
| 1918 | |||
| 1919 | return ptr; | ||
| 1920 | } | ||
| 1921 | |||
| 1922 | void jbd2_free(void *ptr, size_t size) | ||
| 1923 | { | ||
| 1924 | if (size == PAGE_SIZE) { | ||
| 1925 | free_pages((unsigned long)ptr, 0); | ||
| 1926 | return; | ||
| 1927 | } | ||
| 1928 | if (size > PAGE_SIZE) { | ||
| 1929 | int order = get_order(size); | ||
| 1930 | |||
| 1931 | if (order < 3) | ||
| 1932 | free_pages((unsigned long)ptr, order); | ||
| 1933 | else | ||
| 1934 | vfree(ptr); | ||
| 1935 | return; | ||
| 1936 | } | ||
| 1937 | kmem_cache_free(get_slab(size), ptr); | ||
| 1938 | }; | ||
| 1939 | |||
| 1940 | /* | ||
| 1810 | * Journal_head storage management | 1941 | * Journal_head storage management |
| 1811 | */ | 1942 | */ |
| 1812 | static struct kmem_cache *jbd2_journal_head_cache; | 1943 | static struct kmem_cache *jbd2_journal_head_cache; |
| @@ -2204,6 +2335,7 @@ static void jbd2_journal_destroy_caches(void) | |||
| 2204 | jbd2_journal_destroy_revoke_caches(); | 2335 | jbd2_journal_destroy_revoke_caches(); |
| 2205 | jbd2_journal_destroy_jbd2_journal_head_cache(); | 2336 | jbd2_journal_destroy_jbd2_journal_head_cache(); |
| 2206 | jbd2_journal_destroy_handle_cache(); | 2337 | jbd2_journal_destroy_handle_cache(); |
| 2338 | jbd2_journal_destroy_slabs(); | ||
| 2207 | } | 2339 | } |
| 2208 | 2340 | ||
| 2209 | static int __init journal_init(void) | 2341 | static int __init journal_init(void) |
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index a0512700542f..bfc70f57900f 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c | |||
| @@ -1727,6 +1727,21 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) | |||
| 1727 | if (!jh) | 1727 | if (!jh) |
| 1728 | goto zap_buffer_no_jh; | 1728 | goto zap_buffer_no_jh; |
| 1729 | 1729 | ||
| 1730 | /* | ||
| 1731 | * We cannot remove the buffer from checkpoint lists until the | ||
| 1732 | * transaction adding inode to orphan list (let's call it T) | ||
| 1733 | * is committed. Otherwise if the transaction changing the | ||
| 1734 | * buffer would be cleaned from the journal before T is | ||
| 1735 | * committed, a crash will cause that the correct contents of | ||
| 1736 | * the buffer will be lost. On the other hand we have to | ||
| 1737 | * clear the buffer dirty bit at latest at the moment when the | ||
| 1738 | * transaction marking the buffer as freed in the filesystem | ||
| 1739 | * structures is committed because from that moment on the | ||
| 1740 | * buffer can be reallocated and used by a different page. | ||
| 1741 | * Since the block hasn't been freed yet but the inode has | ||
| 1742 | * already been added to orphan list, it is safe for us to add | ||
| 1743 | * the buffer to BJ_Forget list of the newest transaction. | ||
| 1744 | */ | ||
| 1730 | transaction = jh->b_transaction; | 1745 | transaction = jh->b_transaction; |
| 1731 | if (transaction == NULL) { | 1746 | if (transaction == NULL) { |
| 1732 | /* First case: not on any transaction. If it | 1747 | /* First case: not on any transaction. If it |
| @@ -1783,16 +1798,15 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) | |||
| 1783 | } else if (transaction == journal->j_committing_transaction) { | 1798 | } else if (transaction == journal->j_committing_transaction) { |
| 1784 | JBUFFER_TRACE(jh, "on committing transaction"); | 1799 | JBUFFER_TRACE(jh, "on committing transaction"); |
| 1785 | /* | 1800 | /* |
| 1786 | * If it is committing, we simply cannot touch it. We | 1801 | * The buffer is committing, we simply cannot touch |
| 1787 | * can remove it's next_transaction pointer from the | 1802 | * it. So we just set j_next_transaction to the |
| 1788 | * running transaction if that is set, but nothing | 1803 | * running transaction (if there is one) and mark |
| 1789 | * else. */ | 1804 | * buffer as freed so that commit code knows it should |
| 1805 | * clear dirty bits when it is done with the buffer. | ||
| 1806 | */ | ||
| 1790 | set_buffer_freed(bh); | 1807 | set_buffer_freed(bh); |
| 1791 | if (jh->b_next_transaction) { | 1808 | if (journal->j_running_transaction && buffer_jbddirty(bh)) |
| 1792 | J_ASSERT(jh->b_next_transaction == | 1809 | jh->b_next_transaction = journal->j_running_transaction; |
| 1793 | journal->j_running_transaction); | ||
| 1794 | jh->b_next_transaction = NULL; | ||
| 1795 | } | ||
| 1796 | jbd2_journal_put_journal_head(jh); | 1810 | jbd2_journal_put_journal_head(jh); |
| 1797 | spin_unlock(&journal->j_list_lock); | 1811 | spin_unlock(&journal->j_list_lock); |
| 1798 | jbd_unlock_bh_state(bh); | 1812 | jbd_unlock_bh_state(bh); |
| @@ -1969,7 +1983,7 @@ void jbd2_journal_file_buffer(struct journal_head *jh, | |||
| 1969 | */ | 1983 | */ |
| 1970 | void __jbd2_journal_refile_buffer(struct journal_head *jh) | 1984 | void __jbd2_journal_refile_buffer(struct journal_head *jh) |
| 1971 | { | 1985 | { |
| 1972 | int was_dirty; | 1986 | int was_dirty, jlist; |
| 1973 | struct buffer_head *bh = jh2bh(jh); | 1987 | struct buffer_head *bh = jh2bh(jh); |
| 1974 | 1988 | ||
| 1975 | J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh)); | 1989 | J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh)); |
| @@ -1991,8 +2005,13 @@ void __jbd2_journal_refile_buffer(struct journal_head *jh) | |||
| 1991 | __jbd2_journal_temp_unlink_buffer(jh); | 2005 | __jbd2_journal_temp_unlink_buffer(jh); |
| 1992 | jh->b_transaction = jh->b_next_transaction; | 2006 | jh->b_transaction = jh->b_next_transaction; |
| 1993 | jh->b_next_transaction = NULL; | 2007 | jh->b_next_transaction = NULL; |
| 1994 | __jbd2_journal_file_buffer(jh, jh->b_transaction, | 2008 | if (buffer_freed(bh)) |
| 1995 | jh->b_modified ? BJ_Metadata : BJ_Reserved); | 2009 | jlist = BJ_Forget; |
| 2010 | else if (jh->b_modified) | ||
| 2011 | jlist = BJ_Metadata; | ||
| 2012 | else | ||
| 2013 | jlist = BJ_Reserved; | ||
| 2014 | __jbd2_journal_file_buffer(jh, jh->b_transaction, jlist); | ||
| 1996 | J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING); | 2015 | J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING); |
| 1997 | 2016 | ||
| 1998 | if (was_dirty) | 2017 | if (was_dirty) |
