diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-01-02 12:57:34 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-01-02 12:57:34 -0500 |
commit | 5439ca6b8ff8cf8d758c19eb28b617a5912904ee (patch) | |
tree | 676f9e0b25074d2d8c5ab29df30c962b3cb7311e | |
parent | a7a88b23737095e6c18a20c5d4eef9e25ec5b829 (diff) | |
parent | 0e9a9a1ad619e7e987815d20262d36a2f95717ca (diff) |
Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
Pull ext4 bug fixes from Ted Ts'o:
"Various bug fixes for ext4. Perhaps the most serious bug fixed is one
which could cause file system corruptions when performing file punch
operations."
* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
ext4: avoid hang when mounting non-journal filesystems with orphan list
ext4: lock i_mutex when truncating orphan inodes
ext4: do not try to write superblock on ro remount w/o journal
ext4: include journal blocks in df overhead calcs
ext4: remove unaligned AIO warning printk
ext4: fix an incorrect comment about i_mutex
ext4: fix deadlock in journal_unmap_buffer()
ext4: split off ext4_journalled_invalidatepage()
jbd2: fix assertion failure in jbd2_journal_flush()
ext4: check dioread_nolock on remount
ext4: fix extent tree corruption caused by hole punch
-rw-r--r-- | fs/ext4/extents.c | 22 | ||||
-rw-r--r-- | fs/ext4/file.c | 8 | ||||
-rw-r--r-- | fs/ext4/fsync.c | 2 | ||||
-rw-r--r-- | fs/ext4/inode.c | 99 | ||||
-rw-r--r-- | fs/ext4/namei.c | 3 | ||||
-rw-r--r-- | fs/ext4/super.c | 30 | ||||
-rw-r--r-- | fs/jbd2/transaction.c | 30 | ||||
-rw-r--r-- | include/linux/jbd2.h | 2 | ||||
-rw-r--r-- | include/trace/events/ext4.h | 14 |
9 files changed, 152 insertions, 58 deletions
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 26af22832a84..5ae1674ec12f 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -2226,13 +2226,14 @@ errout: | |||
2226 | * removes index from the index block. | 2226 | * removes index from the index block. |
2227 | */ | 2227 | */ |
2228 | static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, | 2228 | static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, |
2229 | struct ext4_ext_path *path) | 2229 | struct ext4_ext_path *path, int depth) |
2230 | { | 2230 | { |
2231 | int err; | 2231 | int err; |
2232 | ext4_fsblk_t leaf; | 2232 | ext4_fsblk_t leaf; |
2233 | 2233 | ||
2234 | /* free index block */ | 2234 | /* free index block */ |
2235 | path--; | 2235 | depth--; |
2236 | path = path + depth; | ||
2236 | leaf = ext4_idx_pblock(path->p_idx); | 2237 | leaf = ext4_idx_pblock(path->p_idx); |
2237 | if (unlikely(path->p_hdr->eh_entries == 0)) { | 2238 | if (unlikely(path->p_hdr->eh_entries == 0)) { |
2238 | EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0"); | 2239 | EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0"); |
@@ -2257,6 +2258,19 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, | |||
2257 | 2258 | ||
2258 | ext4_free_blocks(handle, inode, NULL, leaf, 1, | 2259 | ext4_free_blocks(handle, inode, NULL, leaf, 1, |
2259 | EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); | 2260 | EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); |
2261 | |||
2262 | while (--depth >= 0) { | ||
2263 | if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr)) | ||
2264 | break; | ||
2265 | path--; | ||
2266 | err = ext4_ext_get_access(handle, inode, path); | ||
2267 | if (err) | ||
2268 | break; | ||
2269 | path->p_idx->ei_block = (path+1)->p_idx->ei_block; | ||
2270 | err = ext4_ext_dirty(handle, inode, path); | ||
2271 | if (err) | ||
2272 | break; | ||
2273 | } | ||
2260 | return err; | 2274 | return err; |
2261 | } | 2275 | } |
2262 | 2276 | ||
@@ -2599,7 +2613,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
2599 | /* if this leaf is free, then we should | 2613 | /* if this leaf is free, then we should |
2600 | * remove it from index block above */ | 2614 | * remove it from index block above */ |
2601 | if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) | 2615 | if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) |
2602 | err = ext4_ext_rm_idx(handle, inode, path + depth); | 2616 | err = ext4_ext_rm_idx(handle, inode, path, depth); |
2603 | 2617 | ||
2604 | out: | 2618 | out: |
2605 | return err; | 2619 | return err; |
@@ -2802,7 +2816,7 @@ again: | |||
2802 | /* index is empty, remove it; | 2816 | /* index is empty, remove it; |
2803 | * handle must be already prepared by the | 2817 | * handle must be already prepared by the |
2804 | * truncatei_leaf() */ | 2818 | * truncatei_leaf() */ |
2805 | err = ext4_ext_rm_idx(handle, inode, path + i); | 2819 | err = ext4_ext_rm_idx(handle, inode, path, i); |
2806 | } | 2820 | } |
2807 | /* root level has p_bh == NULL, brelse() eats this */ | 2821 | /* root level has p_bh == NULL, brelse() eats this */ |
2808 | brelse(path[i].p_bh); | 2822 | brelse(path[i].p_bh); |
diff --git a/fs/ext4/file.c b/fs/ext4/file.c index d07c27ca594a..405565a62277 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c | |||
@@ -108,14 +108,6 @@ ext4_file_dio_write(struct kiocb *iocb, const struct iovec *iov, | |||
108 | 108 | ||
109 | /* Unaligned direct AIO must be serialized; see comment above */ | 109 | /* Unaligned direct AIO must be serialized; see comment above */ |
110 | if (unaligned_aio) { | 110 | if (unaligned_aio) { |
111 | static unsigned long unaligned_warn_time; | ||
112 | |||
113 | /* Warn about this once per day */ | ||
114 | if (printk_timed_ratelimit(&unaligned_warn_time, 60*60*24*HZ)) | ||
115 | ext4_msg(inode->i_sb, KERN_WARNING, | ||
116 | "Unaligned AIO/DIO on inode %ld by %s; " | ||
117 | "performance will be poor.", | ||
118 | inode->i_ino, current->comm); | ||
119 | mutex_lock(ext4_aio_mutex(inode)); | 111 | mutex_lock(ext4_aio_mutex(inode)); |
120 | ext4_unwritten_wait(inode); | 112 | ext4_unwritten_wait(inode); |
121 | } | 113 | } |
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index dfbc1fe96674..3278e64e57b6 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c | |||
@@ -109,8 +109,6 @@ static int __sync_inode(struct inode *inode, int datasync) | |||
109 | * | 109 | * |
110 | * What we do is just kick off a commit and wait on it. This will snapshot the | 110 | * What we do is just kick off a commit and wait on it. This will snapshot the |
111 | * inode to disk. | 111 | * inode to disk. |
112 | * | ||
113 | * i_mutex lock is held when entering and exiting this function | ||
114 | */ | 112 | */ |
115 | 113 | ||
116 | int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | 114 | int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index cb1c1ab2720b..cbfe13bf5b2a 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -2880,8 +2880,6 @@ static void ext4_invalidatepage_free_endio(struct page *page, unsigned long offs | |||
2880 | 2880 | ||
2881 | static void ext4_invalidatepage(struct page *page, unsigned long offset) | 2881 | static void ext4_invalidatepage(struct page *page, unsigned long offset) |
2882 | { | 2882 | { |
2883 | journal_t *journal = EXT4_JOURNAL(page->mapping->host); | ||
2884 | |||
2885 | trace_ext4_invalidatepage(page, offset); | 2883 | trace_ext4_invalidatepage(page, offset); |
2886 | 2884 | ||
2887 | /* | 2885 | /* |
@@ -2889,16 +2887,34 @@ static void ext4_invalidatepage(struct page *page, unsigned long offset) | |||
2889 | */ | 2887 | */ |
2890 | if (ext4_should_dioread_nolock(page->mapping->host)) | 2888 | if (ext4_should_dioread_nolock(page->mapping->host)) |
2891 | ext4_invalidatepage_free_endio(page, offset); | 2889 | ext4_invalidatepage_free_endio(page, offset); |
2890 | |||
2891 | /* No journalling happens on data buffers when this function is used */ | ||
2892 | WARN_ON(page_has_buffers(page) && buffer_jbd(page_buffers(page))); | ||
2893 | |||
2894 | block_invalidatepage(page, offset); | ||
2895 | } | ||
2896 | |||
2897 | static int __ext4_journalled_invalidatepage(struct page *page, | ||
2898 | unsigned long offset) | ||
2899 | { | ||
2900 | journal_t *journal = EXT4_JOURNAL(page->mapping->host); | ||
2901 | |||
2902 | trace_ext4_journalled_invalidatepage(page, offset); | ||
2903 | |||
2892 | /* | 2904 | /* |
2893 | * If it's a full truncate we just forget about the pending dirtying | 2905 | * If it's a full truncate we just forget about the pending dirtying |
2894 | */ | 2906 | */ |
2895 | if (offset == 0) | 2907 | if (offset == 0) |
2896 | ClearPageChecked(page); | 2908 | ClearPageChecked(page); |
2897 | 2909 | ||
2898 | if (journal) | 2910 | return jbd2_journal_invalidatepage(journal, page, offset); |
2899 | jbd2_journal_invalidatepage(journal, page, offset); | 2911 | } |
2900 | else | 2912 | |
2901 | block_invalidatepage(page, offset); | 2913 | /* Wrapper for aops... */ |
2914 | static void ext4_journalled_invalidatepage(struct page *page, | ||
2915 | unsigned long offset) | ||
2916 | { | ||
2917 | WARN_ON(__ext4_journalled_invalidatepage(page, offset) < 0); | ||
2902 | } | 2918 | } |
2903 | 2919 | ||
2904 | static int ext4_releasepage(struct page *page, gfp_t wait) | 2920 | static int ext4_releasepage(struct page *page, gfp_t wait) |
@@ -3264,7 +3280,7 @@ static const struct address_space_operations ext4_journalled_aops = { | |||
3264 | .write_end = ext4_journalled_write_end, | 3280 | .write_end = ext4_journalled_write_end, |
3265 | .set_page_dirty = ext4_journalled_set_page_dirty, | 3281 | .set_page_dirty = ext4_journalled_set_page_dirty, |
3266 | .bmap = ext4_bmap, | 3282 | .bmap = ext4_bmap, |
3267 | .invalidatepage = ext4_invalidatepage, | 3283 | .invalidatepage = ext4_journalled_invalidatepage, |
3268 | .releasepage = ext4_releasepage, | 3284 | .releasepage = ext4_releasepage, |
3269 | .direct_IO = ext4_direct_IO, | 3285 | .direct_IO = ext4_direct_IO, |
3270 | .is_partially_uptodate = block_is_partially_uptodate, | 3286 | .is_partially_uptodate = block_is_partially_uptodate, |
@@ -4305,6 +4321,47 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
4305 | } | 4321 | } |
4306 | 4322 | ||
4307 | /* | 4323 | /* |
4324 | * In data=journal mode ext4_journalled_invalidatepage() may fail to invalidate | ||
4325 | * buffers that are attached to a page stradding i_size and are undergoing | ||
4326 | * commit. In that case we have to wait for commit to finish and try again. | ||
4327 | */ | ||
4328 | static void ext4_wait_for_tail_page_commit(struct inode *inode) | ||
4329 | { | ||
4330 | struct page *page; | ||
4331 | unsigned offset; | ||
4332 | journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; | ||
4333 | tid_t commit_tid = 0; | ||
4334 | int ret; | ||
4335 | |||
4336 | offset = inode->i_size & (PAGE_CACHE_SIZE - 1); | ||
4337 | /* | ||
4338 | * All buffers in the last page remain valid? Then there's nothing to | ||
4339 | * do. We do the check mainly to optimize the common PAGE_CACHE_SIZE == | ||
4340 | * blocksize case | ||
4341 | */ | ||
4342 | if (offset > PAGE_CACHE_SIZE - (1 << inode->i_blkbits)) | ||
4343 | return; | ||
4344 | while (1) { | ||
4345 | page = find_lock_page(inode->i_mapping, | ||
4346 | inode->i_size >> PAGE_CACHE_SHIFT); | ||
4347 | if (!page) | ||
4348 | return; | ||
4349 | ret = __ext4_journalled_invalidatepage(page, offset); | ||
4350 | unlock_page(page); | ||
4351 | page_cache_release(page); | ||
4352 | if (ret != -EBUSY) | ||
4353 | return; | ||
4354 | commit_tid = 0; | ||
4355 | read_lock(&journal->j_state_lock); | ||
4356 | if (journal->j_committing_transaction) | ||
4357 | commit_tid = journal->j_committing_transaction->t_tid; | ||
4358 | read_unlock(&journal->j_state_lock); | ||
4359 | if (commit_tid) | ||
4360 | jbd2_log_wait_commit(journal, commit_tid); | ||
4361 | } | ||
4362 | } | ||
4363 | |||
4364 | /* | ||
4308 | * ext4_setattr() | 4365 | * ext4_setattr() |
4309 | * | 4366 | * |
4310 | * Called from notify_change. | 4367 | * Called from notify_change. |
@@ -4417,16 +4474,28 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
4417 | } | 4474 | } |
4418 | 4475 | ||
4419 | if (attr->ia_valid & ATTR_SIZE) { | 4476 | if (attr->ia_valid & ATTR_SIZE) { |
4420 | if (attr->ia_size != i_size_read(inode)) { | 4477 | if (attr->ia_size != inode->i_size) { |
4421 | truncate_setsize(inode, attr->ia_size); | 4478 | loff_t oldsize = inode->i_size; |
4422 | /* Inode size will be reduced, wait for dio in flight. | 4479 | |
4423 | * Temporarily disable dioread_nolock to prevent | 4480 | i_size_write(inode, attr->ia_size); |
4424 | * livelock. */ | 4481 | /* |
4482 | * Blocks are going to be removed from the inode. Wait | ||
4483 | * for dio in flight. Temporarily disable | ||
4484 | * dioread_nolock to prevent livelock. | ||
4485 | */ | ||
4425 | if (orphan) { | 4486 | if (orphan) { |
4426 | ext4_inode_block_unlocked_dio(inode); | 4487 | if (!ext4_should_journal_data(inode)) { |
4427 | inode_dio_wait(inode); | 4488 | ext4_inode_block_unlocked_dio(inode); |
4428 | ext4_inode_resume_unlocked_dio(inode); | 4489 | inode_dio_wait(inode); |
4490 | ext4_inode_resume_unlocked_dio(inode); | ||
4491 | } else | ||
4492 | ext4_wait_for_tail_page_commit(inode); | ||
4429 | } | 4493 | } |
4494 | /* | ||
4495 | * Truncate pagecache after we've waited for commit | ||
4496 | * in data=journal mode to make pages freeable. | ||
4497 | */ | ||
4498 | truncate_pagecache(inode, oldsize, inode->i_size); | ||
4430 | } | 4499 | } |
4431 | ext4_truncate(inode); | 4500 | ext4_truncate(inode); |
4432 | } | 4501 | } |
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index cac448282331..8990165346ee 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c | |||
@@ -2648,7 +2648,8 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode) | |||
2648 | struct ext4_iloc iloc; | 2648 | struct ext4_iloc iloc; |
2649 | int err = 0; | 2649 | int err = 0; |
2650 | 2650 | ||
2651 | if (!EXT4_SB(inode->i_sb)->s_journal) | 2651 | if ((!EXT4_SB(inode->i_sb)->s_journal) && |
2652 | !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) | ||
2652 | return 0; | 2653 | return 0; |
2653 | 2654 | ||
2654 | mutex_lock(&EXT4_SB(inode->i_sb)->s_orphan_lock); | 2655 | mutex_lock(&EXT4_SB(inode->i_sb)->s_orphan_lock); |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 3cdb0a2fc648..3d4fb81bacd5 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -1645,9 +1645,7 @@ static int parse_options(char *options, struct super_block *sb, | |||
1645 | unsigned int *journal_ioprio, | 1645 | unsigned int *journal_ioprio, |
1646 | int is_remount) | 1646 | int is_remount) |
1647 | { | 1647 | { |
1648 | #ifdef CONFIG_QUOTA | ||
1649 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 1648 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
1650 | #endif | ||
1651 | char *p; | 1649 | char *p; |
1652 | substring_t args[MAX_OPT_ARGS]; | 1650 | substring_t args[MAX_OPT_ARGS]; |
1653 | int token; | 1651 | int token; |
@@ -1696,6 +1694,16 @@ static int parse_options(char *options, struct super_block *sb, | |||
1696 | } | 1694 | } |
1697 | } | 1695 | } |
1698 | #endif | 1696 | #endif |
1697 | if (test_opt(sb, DIOREAD_NOLOCK)) { | ||
1698 | int blocksize = | ||
1699 | BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size); | ||
1700 | |||
1701 | if (blocksize < PAGE_CACHE_SIZE) { | ||
1702 | ext4_msg(sb, KERN_ERR, "can't mount with " | ||
1703 | "dioread_nolock if block size != PAGE_SIZE"); | ||
1704 | return 0; | ||
1705 | } | ||
1706 | } | ||
1699 | return 1; | 1707 | return 1; |
1700 | } | 1708 | } |
1701 | 1709 | ||
@@ -2212,7 +2220,9 @@ static void ext4_orphan_cleanup(struct super_block *sb, | |||
2212 | __func__, inode->i_ino, inode->i_size); | 2220 | __func__, inode->i_ino, inode->i_size); |
2213 | jbd_debug(2, "truncating inode %lu to %lld bytes\n", | 2221 | jbd_debug(2, "truncating inode %lu to %lld bytes\n", |
2214 | inode->i_ino, inode->i_size); | 2222 | inode->i_ino, inode->i_size); |
2223 | mutex_lock(&inode->i_mutex); | ||
2215 | ext4_truncate(inode); | 2224 | ext4_truncate(inode); |
2225 | mutex_unlock(&inode->i_mutex); | ||
2216 | nr_truncates++; | 2226 | nr_truncates++; |
2217 | } else { | 2227 | } else { |
2218 | ext4_msg(sb, KERN_DEBUG, | 2228 | ext4_msg(sb, KERN_DEBUG, |
@@ -3223,6 +3233,10 @@ int ext4_calculate_overhead(struct super_block *sb) | |||
3223 | memset(buf, 0, PAGE_SIZE); | 3233 | memset(buf, 0, PAGE_SIZE); |
3224 | cond_resched(); | 3234 | cond_resched(); |
3225 | } | 3235 | } |
3236 | /* Add the journal blocks as well */ | ||
3237 | if (sbi->s_journal) | ||
3238 | overhead += EXT4_B2C(sbi, sbi->s_journal->j_maxlen); | ||
3239 | |||
3226 | sbi->s_overhead = overhead; | 3240 | sbi->s_overhead = overhead; |
3227 | smp_wmb(); | 3241 | smp_wmb(); |
3228 | free_page((unsigned long) buf); | 3242 | free_page((unsigned long) buf); |
@@ -3436,15 +3450,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3436 | clear_opt(sb, DELALLOC); | 3450 | clear_opt(sb, DELALLOC); |
3437 | } | 3451 | } |
3438 | 3452 | ||
3439 | blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); | ||
3440 | if (test_opt(sb, DIOREAD_NOLOCK)) { | ||
3441 | if (blocksize < PAGE_SIZE) { | ||
3442 | ext4_msg(sb, KERN_ERR, "can't mount with " | ||
3443 | "dioread_nolock if block size != PAGE_SIZE"); | ||
3444 | goto failed_mount; | ||
3445 | } | ||
3446 | } | ||
3447 | |||
3448 | sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | | 3453 | sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | |
3449 | (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0); | 3454 | (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0); |
3450 | 3455 | ||
@@ -3486,6 +3491,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3486 | if (!ext4_feature_set_ok(sb, (sb->s_flags & MS_RDONLY))) | 3491 | if (!ext4_feature_set_ok(sb, (sb->s_flags & MS_RDONLY))) |
3487 | goto failed_mount; | 3492 | goto failed_mount; |
3488 | 3493 | ||
3494 | blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); | ||
3489 | if (blocksize < EXT4_MIN_BLOCK_SIZE || | 3495 | if (blocksize < EXT4_MIN_BLOCK_SIZE || |
3490 | blocksize > EXT4_MAX_BLOCK_SIZE) { | 3496 | blocksize > EXT4_MAX_BLOCK_SIZE) { |
3491 | ext4_msg(sb, KERN_ERR, | 3497 | ext4_msg(sb, KERN_ERR, |
@@ -4725,7 +4731,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) | |||
4725 | } | 4731 | } |
4726 | 4732 | ||
4727 | ext4_setup_system_zone(sb); | 4733 | ext4_setup_system_zone(sb); |
4728 | if (sbi->s_journal == NULL) | 4734 | if (sbi->s_journal == NULL && !(old_sb_flags & MS_RDONLY)) |
4729 | ext4_commit_super(sb, 1); | 4735 | ext4_commit_super(sb, 1); |
4730 | 4736 | ||
4731 | #ifdef CONFIG_QUOTA | 4737 | #ifdef CONFIG_QUOTA |
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 42f6615af0ac..df9f29760efa 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c | |||
@@ -209,7 +209,8 @@ repeat: | |||
209 | if (!new_transaction) | 209 | if (!new_transaction) |
210 | goto alloc_transaction; | 210 | goto alloc_transaction; |
211 | write_lock(&journal->j_state_lock); | 211 | write_lock(&journal->j_state_lock); |
212 | if (!journal->j_running_transaction) { | 212 | if (!journal->j_running_transaction && |
213 | !journal->j_barrier_count) { | ||
213 | jbd2_get_transaction(journal, new_transaction); | 214 | jbd2_get_transaction(journal, new_transaction); |
214 | new_transaction = NULL; | 215 | new_transaction = NULL; |
215 | } | 216 | } |
@@ -1839,7 +1840,6 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh, | |||
1839 | 1840 | ||
1840 | BUFFER_TRACE(bh, "entry"); | 1841 | BUFFER_TRACE(bh, "entry"); |
1841 | 1842 | ||
1842 | retry: | ||
1843 | /* | 1843 | /* |
1844 | * It is safe to proceed here without the j_list_lock because the | 1844 | * It is safe to proceed here without the j_list_lock because the |
1845 | * buffers cannot be stolen by try_to_free_buffers as long as we are | 1845 | * buffers cannot be stolen by try_to_free_buffers as long as we are |
@@ -1934,14 +1934,11 @@ retry: | |||
1934 | * for commit and try again. | 1934 | * for commit and try again. |
1935 | */ | 1935 | */ |
1936 | if (partial_page) { | 1936 | if (partial_page) { |
1937 | tid_t tid = journal->j_committing_transaction->t_tid; | ||
1938 | |||
1939 | jbd2_journal_put_journal_head(jh); | 1937 | jbd2_journal_put_journal_head(jh); |
1940 | spin_unlock(&journal->j_list_lock); | 1938 | spin_unlock(&journal->j_list_lock); |
1941 | jbd_unlock_bh_state(bh); | 1939 | jbd_unlock_bh_state(bh); |
1942 | write_unlock(&journal->j_state_lock); | 1940 | write_unlock(&journal->j_state_lock); |
1943 | jbd2_log_wait_commit(journal, tid); | 1941 | return -EBUSY; |
1944 | goto retry; | ||
1945 | } | 1942 | } |
1946 | /* | 1943 | /* |
1947 | * OK, buffer won't be reachable after truncate. We just set | 1944 | * OK, buffer won't be reachable after truncate. We just set |
@@ -2002,21 +1999,23 @@ zap_buffer_unlocked: | |||
2002 | * @page: page to flush | 1999 | * @page: page to flush |
2003 | * @offset: length of page to invalidate. | 2000 | * @offset: length of page to invalidate. |
2004 | * | 2001 | * |
2005 | * Reap page buffers containing data after offset in page. | 2002 | * Reap page buffers containing data after offset in page. Can return -EBUSY |
2006 | * | 2003 | * if buffers are part of the committing transaction and the page is straddling |
2004 | * i_size. Caller then has to wait for current commit and try again. | ||
2007 | */ | 2005 | */ |
2008 | void jbd2_journal_invalidatepage(journal_t *journal, | 2006 | int jbd2_journal_invalidatepage(journal_t *journal, |
2009 | struct page *page, | 2007 | struct page *page, |
2010 | unsigned long offset) | 2008 | unsigned long offset) |
2011 | { | 2009 | { |
2012 | struct buffer_head *head, *bh, *next; | 2010 | struct buffer_head *head, *bh, *next; |
2013 | unsigned int curr_off = 0; | 2011 | unsigned int curr_off = 0; |
2014 | int may_free = 1; | 2012 | int may_free = 1; |
2013 | int ret = 0; | ||
2015 | 2014 | ||
2016 | if (!PageLocked(page)) | 2015 | if (!PageLocked(page)) |
2017 | BUG(); | 2016 | BUG(); |
2018 | if (!page_has_buffers(page)) | 2017 | if (!page_has_buffers(page)) |
2019 | return; | 2018 | return 0; |
2020 | 2019 | ||
2021 | /* We will potentially be playing with lists other than just the | 2020 | /* We will potentially be playing with lists other than just the |
2022 | * data lists (especially for journaled data mode), so be | 2021 | * data lists (especially for journaled data mode), so be |
@@ -2030,9 +2029,11 @@ void jbd2_journal_invalidatepage(journal_t *journal, | |||
2030 | if (offset <= curr_off) { | 2029 | if (offset <= curr_off) { |
2031 | /* This block is wholly outside the truncation point */ | 2030 | /* This block is wholly outside the truncation point */ |
2032 | lock_buffer(bh); | 2031 | lock_buffer(bh); |
2033 | may_free &= journal_unmap_buffer(journal, bh, | 2032 | ret = journal_unmap_buffer(journal, bh, offset > 0); |
2034 | offset > 0); | ||
2035 | unlock_buffer(bh); | 2033 | unlock_buffer(bh); |
2034 | if (ret < 0) | ||
2035 | return ret; | ||
2036 | may_free &= ret; | ||
2036 | } | 2037 | } |
2037 | curr_off = next_off; | 2038 | curr_off = next_off; |
2038 | bh = next; | 2039 | bh = next; |
@@ -2043,6 +2044,7 @@ void jbd2_journal_invalidatepage(journal_t *journal, | |||
2043 | if (may_free && try_to_free_buffers(page)) | 2044 | if (may_free && try_to_free_buffers(page)) |
2044 | J_ASSERT(!page_has_buffers(page)); | 2045 | J_ASSERT(!page_has_buffers(page)); |
2045 | } | 2046 | } |
2047 | return 0; | ||
2046 | } | 2048 | } |
2047 | 2049 | ||
2048 | /* | 2050 | /* |
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 1be23d9fdacb..e30b66346942 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h | |||
@@ -1098,7 +1098,7 @@ void jbd2_journal_set_triggers(struct buffer_head *, | |||
1098 | extern int jbd2_journal_dirty_metadata (handle_t *, struct buffer_head *); | 1098 | extern int jbd2_journal_dirty_metadata (handle_t *, struct buffer_head *); |
1099 | extern int jbd2_journal_forget (handle_t *, struct buffer_head *); | 1099 | extern int jbd2_journal_forget (handle_t *, struct buffer_head *); |
1100 | extern void journal_sync_buffer (struct buffer_head *); | 1100 | extern void journal_sync_buffer (struct buffer_head *); |
1101 | extern void jbd2_journal_invalidatepage(journal_t *, | 1101 | extern int jbd2_journal_invalidatepage(journal_t *, |
1102 | struct page *, unsigned long); | 1102 | struct page *, unsigned long); |
1103 | extern int jbd2_journal_try_to_free_buffers(journal_t *, struct page *, gfp_t); | 1103 | extern int jbd2_journal_try_to_free_buffers(journal_t *, struct page *, gfp_t); |
1104 | extern int jbd2_journal_stop(handle_t *); | 1104 | extern int jbd2_journal_stop(handle_t *); |
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index f6372b011366..7e8c36bc7082 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h | |||
@@ -451,7 +451,7 @@ DEFINE_EVENT(ext4__page_op, ext4_releasepage, | |||
451 | TP_ARGS(page) | 451 | TP_ARGS(page) |
452 | ); | 452 | ); |
453 | 453 | ||
454 | TRACE_EVENT(ext4_invalidatepage, | 454 | DECLARE_EVENT_CLASS(ext4_invalidatepage_op, |
455 | TP_PROTO(struct page *page, unsigned long offset), | 455 | TP_PROTO(struct page *page, unsigned long offset), |
456 | 456 | ||
457 | TP_ARGS(page, offset), | 457 | TP_ARGS(page, offset), |
@@ -477,6 +477,18 @@ TRACE_EVENT(ext4_invalidatepage, | |||
477 | (unsigned long) __entry->index, __entry->offset) | 477 | (unsigned long) __entry->index, __entry->offset) |
478 | ); | 478 | ); |
479 | 479 | ||
480 | DEFINE_EVENT(ext4_invalidatepage_op, ext4_invalidatepage, | ||
481 | TP_PROTO(struct page *page, unsigned long offset), | ||
482 | |||
483 | TP_ARGS(page, offset) | ||
484 | ); | ||
485 | |||
486 | DEFINE_EVENT(ext4_invalidatepage_op, ext4_journalled_invalidatepage, | ||
487 | TP_PROTO(struct page *page, unsigned long offset), | ||
488 | |||
489 | TP_ARGS(page, offset) | ||
490 | ); | ||
491 | |||
480 | TRACE_EVENT(ext4_discard_blocks, | 492 | TRACE_EVENT(ext4_discard_blocks, |
481 | TP_PROTO(struct super_block *sb, unsigned long long blk, | 493 | TP_PROTO(struct super_block *sb, unsigned long long blk, |
482 | unsigned long long count), | 494 | unsigned long long count), |