From 78a4a50a86b0a54f7ecbc164267b6c762760254c Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 29 Feb 2008 22:02:31 -0800 Subject: docbook: fix filesystems.tmpl source files Fix docbook problems in filesystems.tmpl. These cause the generated docbook to be incorrect. Signed-off-by: Randy Dunlap Signed-off-by: Linus Torvalds --- fs/buffer.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/buffer.c') diff --git a/fs/buffer.c b/fs/buffer.c index 3ebccf4aa7e3..897cd7477b34 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -627,8 +627,7 @@ repeat: } /** - * sync_mapping_buffers - write out and wait upon a mapping's "associated" - * buffers + * sync_mapping_buffers - write out & wait upon a mapping's "associated" buffers * @mapping: the mapping which wants those buffers written * * Starts I/O against the buffers at mapping->private_list, and waits upon -- cgit v1.2.2 From e3892296de632e3f9299d9fabe0c746740004891 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 4 Mar 2008 14:28:33 -0800 Subject: vfs: fix NULL pointer dereference in fsync_buffers_list() Fix NULL pointer dereference in fsync_buffers_list() introduced by recent fix of races in private_list handling. Since bh->b_assoc_map has been cleared in __remove_assoc_queue() we should really use original value stored in the 'mapping' variable. Signed-off-by: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/buffer.c') diff --git a/fs/buffer.c b/fs/buffer.c index 897cd7477b34..ddfdd2c80bf9 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -835,7 +835,7 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list) smp_mb(); if (buffer_dirty(bh)) { list_add(&bh->b_assoc_buffers, - &bh->b_assoc_map->private_list); + &mapping->private_list); bh->b_assoc_map = mapping; } spin_unlock(lock); -- cgit v1.2.2 From a6b91919e0881a0d0a4ae5211d5c879a8c7ca92b Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 19 Mar 2008 17:01:00 -0700 Subject: fs: fix kernel-doc notation warnings Fix kernel-doc notation warnings in fs/. Warning(mmotm-2008-0314-1449//fs/super.c:560): missing initial short description on line: * mark_files_ro Warning(mmotm-2008-0314-1449//fs/locks.c:1277): missing initial short description on line: * lease_get_mtime Warning(mmotm-2008-0314-1449//fs/locks.c:1277): missing initial short description on line: * lease_get_mtime Warning(mmotm-2008-0314-1449//fs/namei.c:1368): missing initial short description on line: * lookup_one_len: filesystem helper to lookup single pathname component Warning(mmotm-2008-0314-1449//fs/buffer.c:3221): missing initial short description on line: * bh_uptodate_or_lock: Test whether the buffer is uptodate Warning(mmotm-2008-0314-1449//fs/buffer.c:3240): missing initial short description on line: * bh_submit_read: Submit a locked buffer for reading Warning(mmotm-2008-0314-1449//fs/fs-writeback.c:30): missing initial short description on line: * writeback_acquire: attempt to get exclusive writeback access to a device Warning(mmotm-2008-0314-1449//fs/fs-writeback.c:47): missing initial short description on line: * writeback_in_progress: determine whether there is writeback in progress Warning(mmotm-2008-0314-1449//fs/fs-writeback.c:58): missing initial short description on line: * writeback_release: relinquish exclusive writeback access against a device. Warning(mmotm-2008-0314-1449//include/linux/jbd.h:351): contents before sections Warning(mmotm-2008-0314-1449//include/linux/jbd.h:561): contents before sections Warning(mmotm-2008-0314-1449//fs/jbd/transaction.c:1935): missing initial short description on line: * void journal_invalidatepage() Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/buffer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/buffer.c') diff --git a/fs/buffer.c b/fs/buffer.c index ddfdd2c80bf9..7ba58386beee 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -3213,7 +3213,7 @@ static int buffer_cpu_notify(struct notifier_block *self, } /** - * bh_uptodate_or_lock: Test whether the buffer is uptodate + * bh_uptodate_or_lock - Test whether the buffer is uptodate * @bh: struct buffer_head * * Return true if the buffer is up-to-date and false, @@ -3232,7 +3232,7 @@ int bh_uptodate_or_lock(struct buffer_head *bh) EXPORT_SYMBOL(bh_uptodate_or_lock); /** - * bh_submit_read: Submit a locked buffer for reading + * bh_submit_read - Submit a locked buffer for reading * @bh: struct buffer_head * * Returns zero on success and -EIO on error. -- cgit v1.2.2 From 5b41e74ad1b0bf7bc51765ae74e5dc564afc3e48 Mon Sep 17 00:00:00 2001 From: Dmitri Monakhov Date: Fri, 28 Mar 2008 14:15:52 -0700 Subject: vfs: fix data leak in nobh_write_end() Current nobh_write_end() implementation ignore partial writes(copied < len) case if page was fully mapped and simply mark page as Uptodate, which is totally wrong because area [pos+copied, pos+len) wasn't updated explicitly in previous write_begin call. It simply contains garbage from pagecache and result in data leakage. #TEST_CASE_BEGIN: ~~~~~~~~~~~~~~~~ In fact issue triggered by classical testcase open("/mnt/test", O_RDWR|O_CREAT|O_TRUNC, 0666) = 3 ftruncate(3, 409600) = 0 writev(3, [{"a", 1}, {NULL, 4095}], 2) = 1 ##TESTCASE_SOURCE: ~~~~~~~~~~~~~~~~~ #include #include #include #include #include #include int main(int argc, char **argv) { int fd, ret; void* p; struct iovec iov[2]; fd = open(argv[1], O_RDWR|O_CREAT|O_TRUNC, 0666); ftruncate(fd, 409600); iov[0].iov_base="a"; iov[0].iov_len=1; iov[1].iov_base=NULL; iov[1].iov_len=4096; ret = writev(fd, iov, sizeof(iov)/sizeof(struct iovec)); printf("writev = %d, err = %d\n", ret, errno); return 0; } ##TESTCASE RESULT: ~~~~~~~~~~~~~~~~~~ [root@ts63 ~]# mount | grep mnt2 /dev/mapper/test on /mnt2 type ext2 (rw,nobh) [root@ts63 ~]# /tmp/writev /mnt2/test writev = 1, err = 0 [root@ts63 ~]# hexdump -C /mnt2/test 00000000 61 65 62 6f 6f 74 00 00 f0 b9 b4 59 3a 00 00 00 |aeboot.....Y:...| 00000010 20 00 00 00 00 00 00 00 21 00 00 00 00 00 00 00 | .......!.......| 00000020 df df df df df df df df df df df df df df df df |................| 00000030 3a 00 00 00 2a 00 00 00 21 00 00 00 00 00 00 00 |:...*...!.......| 00000040 60 c0 8c 00 00 00 00 00 40 4a 8d 00 00 00 00 00 |`.......@J......| 00000050 00 00 00 00 00 00 00 00 41 00 00 00 00 00 00 00 |........A.......| 00000060 74 69 6d 65 20 64 64 20 69 66 3d 2f 64 65 76 2f |time dd if=/dev/| 00000070 6c 6f 6f 70 30 20 20 6f 66 3d 2f 64 65 76 2f 6e |loop0 of=/dev/n| skip.. 00000f50 00 00 00 00 00 00 00 00 31 00 00 00 00 00 00 00 |........1.......| 00000f60 6d 6b 66 73 2e 65 78 74 33 20 2f 64 65 76 2f 76 |mkfs.ext3 /dev/v| 00000f70 7a 76 67 2f 74 65 73 74 20 2d 62 34 30 39 36 00 |zvg/test -b4096.| 00000f80 a0 fe 8c 00 00 00 00 00 21 00 00 00 00 00 00 00 |........!.......| 00000f90 23 31 32 30 35 39 35 30 34 30 34 00 3a 00 00 00 |#1205950404.:...| 00000fa0 20 00 8d 00 00 00 00 00 21 00 00 00 00 00 00 00 | .......!.......| 00000fb0 d0 cf 8c 00 00 00 00 00 10 d0 8c 00 00 00 00 00 |................| 00000fc0 00 00 00 00 00 00 00 00 41 00 00 00 00 00 00 00 |........A.......| 00000fd0 6d 6f 75 6e 74 20 2f 64 65 76 2f 76 7a 76 67 2f |mount /dev/vzvg/| 00000fe0 74 65 73 74 20 20 2f 76 7a 20 2d 6f 20 64 61 74 |test /vz -o dat| 00000ff0 61 3d 77 72 69 74 65 62 61 63 6b 00 00 00 00 00 |a=writeback.....| 00001000 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................| As you can see file's page contains garbage from pagecache instead of zeros. #TEST_CASE_END Attached patch: - Add sanity check BUG_ON in order to prevent incorrect usage by caller, This is function invariant because page can has buffers and in no zero *fadata pointer at the same time. - Always attach buffers to page is it is partial write case. - Always switch back to generic_write_end if page has buffers. This is reasonable because if page already has buffer then generic_write_begin was called previously. Signed-off-by: Dmitri Monakhov Reviewed-by: Nick Piggin Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/buffer.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'fs/buffer.c') diff --git a/fs/buffer.c b/fs/buffer.c index 7ba58386beee..98196327ddf0 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2564,14 +2564,13 @@ int nobh_write_end(struct file *file, struct address_space *mapping, struct inode *inode = page->mapping->host; struct buffer_head *head = fsdata; struct buffer_head *bh; + BUG_ON(fsdata != NULL && page_has_buffers(page)); - if (!PageMappedToDisk(page)) { - if (unlikely(copied < len) && !page_has_buffers(page)) - attach_nobh_buffers(page, head); - if (page_has_buffers(page)) - return generic_write_end(file, mapping, pos, len, - copied, page, fsdata); - } + if (unlikely(copied < len) && !page_has_buffers(page)) + attach_nobh_buffers(page, head); + if (page_has_buffers(page)) + return generic_write_end(file, mapping, pos, len, + copied, page, fsdata); SetPageUptodate(page); set_page_dirty(page); -- cgit v1.2.2 From 1be62dc190ebaca331038962c873e7967de6cc4b Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 4 Apr 2008 14:38:17 -0700 Subject: Be more careful about marking buffers dirty Mikulas Patocka noted that the optimization where we check if a buffer was already dirty (and we avoid re-dirtying it) was not really SMP-safe. Since the read of the old status was not synchronized with anything, an aggressive CPU re-ordering of memory accesses might have moved that read up to before the data was even written to the buffer, and another CPU that cleaned it again, causing the newly dirty state to never actually hit the disk. Admittedly this would probably never trigger in practice, but it's still wrong. Mikulas sent a patch that fixed the problem, but I dislike the subtlety of the whole optimization, so this is an alternate fix that is more explicit about the particular SMP ordering for the optimization, and separates out the speculative reads of the buffer state into its own conditional (and makes the memory barrier only happen if we are likely to actually hit the optimized case in the first place). I considered removing the optimization entirely, but Andrew argued for it's continued existence. I'm a push-over. Cc: Mikulas Patocka Cc: Andrew Morton Signed-off-by: Linus Torvalds --- fs/buffer.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'fs/buffer.c') diff --git a/fs/buffer.c b/fs/buffer.c index 98196327ddf0..39ff14403d13 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1181,7 +1181,20 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size) void mark_buffer_dirty(struct buffer_head *bh) { WARN_ON_ONCE(!buffer_uptodate(bh)); - if (!buffer_dirty(bh) && !test_set_buffer_dirty(bh)) + + /* + * Very *carefully* optimize the it-is-already-dirty case. + * + * Don't let the final "is it dirty" escape to before we + * perhaps modified the buffer. + */ + if (buffer_dirty(bh)) { + smp_mb(); + if (buffer_dirty(bh)) + return; + } + + if (!test_set_buffer_dirty(bh)) __set_page_dirty(bh->b_page, page_mapping(bh->b_page), 0); } -- cgit v1.2.2