diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-10-07 17:36:39 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-10-07 17:36:39 -0400 |
commit | 6432f2128414edbea5fd4f6c4fa4c28d0e1c6151 (patch) | |
tree | d3c63c5f2f043ce52d98d8dfd3c9c0a7bc76ed95 /fs/jbd2 | |
parent | 1b033447bf847ba49c3816c564c9191c97456b36 (diff) | |
parent | c278531d39f3158bfee93dc67da0b77e09776de2 (diff) |
Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
Pull ext4 updates from Ted Ts'o:
"The big new feature added this time is supporting online resizing
using the meta_bg feature. This allows us to resize file systems
which are greater than 16TB. In addition, the speed of online
resizing has been improved in general.
We also fix a number of races, some of which could lead to deadlocks,
in ext4's Asynchronous I/O and online defrag support, thanks to good
work by Dmitry Monakhov.
There are also a large number of more minor bug fixes and cleanups
from a number of other ext4 contributors, quite of few of which have
submitted fixes for the first time."
* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (69 commits)
ext4: fix ext4_flush_completed_IO wait semantics
ext4: fix mtime update in nodelalloc mode
ext4: fix ext_remove_space for punch_hole case
ext4: punch_hole should wait for DIO writers
ext4: serialize truncate with owerwrite DIO workers
ext4: endless truncate due to nonlocked dio readers
ext4: serialize unlocked dio reads with truncate
ext4: serialize dio nonlocked reads with defrag workers
ext4: completed_io locking cleanup
ext4: fix unwritten counter leakage
ext4: give i_aiodio_unwritten a more appropriate name
ext4: ext4_inode_info diet
ext4: convert to use leXX_add_cpu()
ext4: ext4_bread usage audit
fs: reserve fallocate flag codepoint
ext4: remove redundant offset check in mext_check_arguments()
ext4: don't clear orphan list on ro mount with errors
jbd2: fix assertion failure in commit code due to lacking transaction credits
ext4: release donor reference when EXT4_IOC_MOVE_EXT ioctl fails
ext4: enable FITRIM ioctl on bigalloc file system
...
Diffstat (limited to 'fs/jbd2')
-rw-r--r-- | fs/jbd2/commit.c | 40 | ||||
-rw-r--r-- | fs/jbd2/journal.c | 5 | ||||
-rw-r--r-- | fs/jbd2/recovery.c | 7 | ||||
-rw-r--r-- | fs/jbd2/transaction.c | 65 |
4 files changed, 84 insertions, 33 deletions
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index af5280fb579b..3091d42992f0 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c | |||
@@ -1014,17 +1014,35 @@ restart_loop: | |||
1014 | * there's no point in keeping a checkpoint record for | 1014 | * there's no point in keeping a checkpoint record for |
1015 | * it. */ | 1015 | * it. */ |
1016 | 1016 | ||
1017 | /* A buffer which has been freed while still being | 1017 | /* |
1018 | * journaled by a previous transaction may end up still | 1018 | * A buffer which has been freed while still being journaled by |
1019 | * being dirty here, but we want to avoid writing back | 1019 | * a previous transaction. |
1020 | * that buffer in the future after the "add to orphan" | 1020 | */ |
1021 | * operation been committed, That's not only a performance | 1021 | if (buffer_freed(bh)) { |
1022 | * gain, it also stops aliasing problems if the buffer is | 1022 | /* |
1023 | * left behind for writeback and gets reallocated for another | 1023 | * If the running transaction is the one containing |
1024 | * use in a different page. */ | 1024 | * "add to orphan" operation (b_next_transaction != |
1025 | if (buffer_freed(bh) && !jh->b_next_transaction) { | 1025 | * NULL), we have to wait for that transaction to |
1026 | clear_buffer_freed(bh); | 1026 | * commit before we can really get rid of the buffer. |
1027 | clear_buffer_jbddirty(bh); | 1027 | * So just clear b_modified to not confuse transaction |
1028 | * credit accounting and refile the buffer to | ||
1029 | * BJ_Forget of the running transaction. If the just | ||
1030 | * committed transaction contains "add to orphan" | ||
1031 | * operation, we can completely invalidate the buffer | ||
1032 | * now. We are rather through in that since the | ||
1033 | * buffer may be still accessible when blocksize < | ||
1034 | * pagesize and it is attached to the last partial | ||
1035 | * page. | ||
1036 | */ | ||
1037 | jh->b_modified = 0; | ||
1038 | if (!jh->b_next_transaction) { | ||
1039 | clear_buffer_freed(bh); | ||
1040 | clear_buffer_jbddirty(bh); | ||
1041 | clear_buffer_mapped(bh); | ||
1042 | clear_buffer_new(bh); | ||
1043 | clear_buffer_req(bh); | ||
1044 | bh->b_bdev = NULL; | ||
1045 | } | ||
1028 | } | 1046 | } |
1029 | 1047 | ||
1030 | if (buffer_jbddirty(bh)) { | 1048 | if (buffer_jbddirty(bh)) { |
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index e149b99a7ffb..484b8d1c6cb6 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c | |||
@@ -1354,6 +1354,11 @@ static void jbd2_mark_journal_empty(journal_t *journal) | |||
1354 | 1354 | ||
1355 | BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex)); | 1355 | BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex)); |
1356 | read_lock(&journal->j_state_lock); | 1356 | read_lock(&journal->j_state_lock); |
1357 | /* Is it already empty? */ | ||
1358 | if (sb->s_start == 0) { | ||
1359 | read_unlock(&journal->j_state_lock); | ||
1360 | return; | ||
1361 | } | ||
1357 | jbd_debug(1, "JBD2: Marking journal as empty (seq %d)\n", | 1362 | jbd_debug(1, "JBD2: Marking journal as empty (seq %d)\n", |
1358 | journal->j_tail_sequence); | 1363 | journal->j_tail_sequence); |
1359 | 1364 | ||
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index 0131e4362534..626846bac32f 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c | |||
@@ -289,8 +289,11 @@ int jbd2_journal_recover(journal_t *journal) | |||
289 | if (!err) | 289 | if (!err) |
290 | err = err2; | 290 | err = err2; |
291 | /* Make sure all replayed data is on permanent storage */ | 291 | /* Make sure all replayed data is on permanent storage */ |
292 | if (journal->j_flags & JBD2_BARRIER) | 292 | if (journal->j_flags & JBD2_BARRIER) { |
293 | blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); | 293 | err2 = blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); |
294 | if (!err) | ||
295 | err = err2; | ||
296 | } | ||
294 | return err; | 297 | return err; |
295 | } | 298 | } |
296 | 299 | ||
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index fb1ab9533b67..a74ba4659549 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c | |||
@@ -1841,15 +1841,16 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction) | |||
1841 | * We're outside-transaction here. Either or both of j_running_transaction | 1841 | * We're outside-transaction here. Either or both of j_running_transaction |
1842 | * and j_committing_transaction may be NULL. | 1842 | * and j_committing_transaction may be NULL. |
1843 | */ | 1843 | */ |
1844 | static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) | 1844 | static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh, |
1845 | int partial_page) | ||
1845 | { | 1846 | { |
1846 | transaction_t *transaction; | 1847 | transaction_t *transaction; |
1847 | struct journal_head *jh; | 1848 | struct journal_head *jh; |
1848 | int may_free = 1; | 1849 | int may_free = 1; |
1849 | int ret; | ||
1850 | 1850 | ||
1851 | BUFFER_TRACE(bh, "entry"); | 1851 | BUFFER_TRACE(bh, "entry"); |
1852 | 1852 | ||
1853 | retry: | ||
1853 | /* | 1854 | /* |
1854 | * It is safe to proceed here without the j_list_lock because the | 1855 | * It is safe to proceed here without the j_list_lock because the |
1855 | * buffers cannot be stolen by try_to_free_buffers as long as we are | 1856 | * buffers cannot be stolen by try_to_free_buffers as long as we are |
@@ -1878,10 +1879,18 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) | |||
1878 | * clear the buffer dirty bit at latest at the moment when the | 1879 | * clear the buffer dirty bit at latest at the moment when the |
1879 | * transaction marking the buffer as freed in the filesystem | 1880 | * transaction marking the buffer as freed in the filesystem |
1880 | * structures is committed because from that moment on the | 1881 | * structures is committed because from that moment on the |
1881 | * buffer can be reallocated and used by a different page. | 1882 | * block can be reallocated and used by a different page. |
1882 | * Since the block hasn't been freed yet but the inode has | 1883 | * Since the block hasn't been freed yet but the inode has |
1883 | * already been added to orphan list, it is safe for us to add | 1884 | * already been added to orphan list, it is safe for us to add |
1884 | * the buffer to BJ_Forget list of the newest transaction. | 1885 | * the buffer to BJ_Forget list of the newest transaction. |
1886 | * | ||
1887 | * Also we have to clear buffer_mapped flag of a truncated buffer | ||
1888 | * because the buffer_head may be attached to the page straddling | ||
1889 | * i_size (can happen only when blocksize < pagesize) and thus the | ||
1890 | * buffer_head can be reused when the file is extended again. So we end | ||
1891 | * up keeping around invalidated buffers attached to transactions' | ||
1892 | * BJ_Forget list just to stop checkpointing code from cleaning up | ||
1893 | * the transaction this buffer was modified in. | ||
1885 | */ | 1894 | */ |
1886 | transaction = jh->b_transaction; | 1895 | transaction = jh->b_transaction; |
1887 | if (transaction == NULL) { | 1896 | if (transaction == NULL) { |
@@ -1908,13 +1917,9 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) | |||
1908 | * committed, the buffer won't be needed any | 1917 | * committed, the buffer won't be needed any |
1909 | * longer. */ | 1918 | * longer. */ |
1910 | JBUFFER_TRACE(jh, "checkpointed: add to BJ_Forget"); | 1919 | JBUFFER_TRACE(jh, "checkpointed: add to BJ_Forget"); |
1911 | ret = __dispose_buffer(jh, | 1920 | may_free = __dispose_buffer(jh, |
1912 | journal->j_running_transaction); | 1921 | journal->j_running_transaction); |
1913 | jbd2_journal_put_journal_head(jh); | 1922 | goto zap_buffer; |
1914 | spin_unlock(&journal->j_list_lock); | ||
1915 | jbd_unlock_bh_state(bh); | ||
1916 | write_unlock(&journal->j_state_lock); | ||
1917 | return ret; | ||
1918 | } else { | 1923 | } else { |
1919 | /* There is no currently-running transaction. So the | 1924 | /* There is no currently-running transaction. So the |
1920 | * orphan record which we wrote for this file must have | 1925 | * orphan record which we wrote for this file must have |
@@ -1922,13 +1927,9 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) | |||
1922 | * the committing transaction, if it exists. */ | 1927 | * the committing transaction, if it exists. */ |
1923 | if (journal->j_committing_transaction) { | 1928 | if (journal->j_committing_transaction) { |
1924 | JBUFFER_TRACE(jh, "give to committing trans"); | 1929 | JBUFFER_TRACE(jh, "give to committing trans"); |
1925 | ret = __dispose_buffer(jh, | 1930 | may_free = __dispose_buffer(jh, |
1926 | journal->j_committing_transaction); | 1931 | journal->j_committing_transaction); |
1927 | jbd2_journal_put_journal_head(jh); | 1932 | goto zap_buffer; |
1928 | spin_unlock(&journal->j_list_lock); | ||
1929 | jbd_unlock_bh_state(bh); | ||
1930 | write_unlock(&journal->j_state_lock); | ||
1931 | return ret; | ||
1932 | } else { | 1933 | } else { |
1933 | /* The orphan record's transaction has | 1934 | /* The orphan record's transaction has |
1934 | * committed. We can cleanse this buffer */ | 1935 | * committed. We can cleanse this buffer */ |
@@ -1940,10 +1941,24 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) | |||
1940 | JBUFFER_TRACE(jh, "on committing transaction"); | 1941 | JBUFFER_TRACE(jh, "on committing transaction"); |
1941 | /* | 1942 | /* |
1942 | * The buffer is committing, we simply cannot touch | 1943 | * The buffer is committing, we simply cannot touch |
1943 | * it. So we just set j_next_transaction to the | 1944 | * it. If the page is straddling i_size we have to wait |
1944 | * running transaction (if there is one) and mark | 1945 | * for commit and try again. |
1945 | * buffer as freed so that commit code knows it should | 1946 | */ |
1946 | * clear dirty bits when it is done with the buffer. | 1947 | if (partial_page) { |
1948 | tid_t tid = journal->j_committing_transaction->t_tid; | ||
1949 | |||
1950 | jbd2_journal_put_journal_head(jh); | ||
1951 | spin_unlock(&journal->j_list_lock); | ||
1952 | jbd_unlock_bh_state(bh); | ||
1953 | write_unlock(&journal->j_state_lock); | ||
1954 | jbd2_log_wait_commit(journal, tid); | ||
1955 | goto retry; | ||
1956 | } | ||
1957 | /* | ||
1958 | * OK, buffer won't be reachable after truncate. We just set | ||
1959 | * j_next_transaction to the running transaction (if there is | ||
1960 | * one) and mark buffer as freed so that commit code knows it | ||
1961 | * should clear dirty bits when it is done with the buffer. | ||
1947 | */ | 1962 | */ |
1948 | set_buffer_freed(bh); | 1963 | set_buffer_freed(bh); |
1949 | if (journal->j_running_transaction && buffer_jbddirty(bh)) | 1964 | if (journal->j_running_transaction && buffer_jbddirty(bh)) |
@@ -1966,6 +1981,15 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) | |||
1966 | } | 1981 | } |
1967 | 1982 | ||
1968 | zap_buffer: | 1983 | zap_buffer: |
1984 | /* | ||
1985 | * This is tricky. Although the buffer is truncated, it may be reused | ||
1986 | * if blocksize < pagesize and it is attached to the page straddling | ||
1987 | * EOF. Since the buffer might have been added to BJ_Forget list of the | ||
1988 | * running transaction, journal_get_write_access() won't clear | ||
1989 | * b_modified and credit accounting gets confused. So clear b_modified | ||
1990 | * here. | ||
1991 | */ | ||
1992 | jh->b_modified = 0; | ||
1969 | jbd2_journal_put_journal_head(jh); | 1993 | jbd2_journal_put_journal_head(jh); |
1970 | zap_buffer_no_jh: | 1994 | zap_buffer_no_jh: |
1971 | spin_unlock(&journal->j_list_lock); | 1995 | spin_unlock(&journal->j_list_lock); |
@@ -2017,7 +2041,8 @@ void jbd2_journal_invalidatepage(journal_t *journal, | |||
2017 | if (offset <= curr_off) { | 2041 | if (offset <= curr_off) { |
2018 | /* This block is wholly outside the truncation point */ | 2042 | /* This block is wholly outside the truncation point */ |
2019 | lock_buffer(bh); | 2043 | lock_buffer(bh); |
2020 | may_free &= journal_unmap_buffer(journal, bh); | 2044 | may_free &= journal_unmap_buffer(journal, bh, |
2045 | offset > 0); | ||
2021 | unlock_buffer(bh); | 2046 | unlock_buffer(bh); |
2022 | } | 2047 | } |
2023 | curr_off = next_off; | 2048 | curr_off = next_off; |