ocfs2: serialize unaligned aio

Fix a corruption that can happen when we have (two or more) outstanding aio's to an overlapping unaligned region. Ext4 (e9e3bcecf44c04b9e6b505fd8e2eb9cea58fb94d) and xfs recently had to fix similar issues. In our case what happens is that we can have an outstanding aio on a region and if a write comes in with some bytes overlapping the original aio we may decide to read that region into a page before continuing (typically because of buffered-io fallback). Since we have no ordering guarantees with the aio, we can read stale or bad data into the page and then write it back out. If the i/o is page and block aligned, then we avoid this issue as there won't be any need to read data from disk. I took the same approach as Eric in the ext4 patch and introduced some serialization of unaligned async direct i/o. I don't expect this to have an effect on the most common cases of AIO. Unaligned aio will be slower though, but that's far more acceptable than data corruption. Signed-off-by: Mark Fasheh <mfasheh@suse.com> Signed-off-by: Joel Becker <jlbec@evilplan.org>
author: Mark Fasheh <mfasheh@suse.com> 2011-06-22 17:23:38 -0400
committer: Joel Becker <jlbec@evilplan.org> 2011-07-28 05:07:16 -0400
commit: a11f7e63c59810a81494d4c4b028af707d4c7ca4 (patch)
tree: 6d28cfc9519f96db5c20780bf765de9e0fc03bef /fs/ocfs2/super.c
parent: 730e663bd82c1a10a85ff00728d34152a5a67ec8 (diff)
1 files changed, 8 insertions, 2 deletions
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 029c4cd8a691..603f5fe9f816 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -54,6 +54,7 @@
 #include "ocfs1_fs_compat.h"
 #include "alloc.h"
+#include "aops.h"
 #include "blockcheck.h"
 #include "dlmglue.h"
 #include "export.h"
@@ -1616,12 +1617,17 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
        return 0;
 }
+wait_queue_head_t ocfs2__ioend_wq[OCFS2_IOEND_WQ_HASH_SZ];
 static int __init ocfs2_init(void)
 {
-        int status;
+        int status, i;
        ocfs2_print_version();
+        for (i = 0; i < OCFS2_IOEND_WQ_HASH_SZ; i++)
+                init_waitqueue_head(&ocfs2__ioend_wq[i]);
        status = init_ocfs2_uptodate_cache();
        if (status < 0) {
                mlog_errno(status);
@@ -1760,7 +1766,7 @@ static void ocfs2_inode_init_once(void *data)
        ocfs2_extent_map_init(&oi->vfs_inode);
        INIT_LIST_HEAD(&oi->ip_io_markers);
        oi->ip_dir_start_lookup = 0;
+        atomic_set(&oi->ip_unaligned_aio, 0);
        init_rwsem(&oi->ip_alloc_sem);
        init_rwsem(&oi->ip_xattr_sem);
        mutex_init(&oi->ip_io_mutex);
author	Mark Fasheh <mfasheh@suse.com>	2011-06-22 17:23:38 -0400
committer	Joel Becker <jlbec@evilplan.org>	2011-07-28 05:07:16 -0400
commit	a11f7e63c59810a81494d4c4b028af707d4c7ca4 (patch)
tree	6d28cfc9519f96db5c20780bf765de9e0fc03bef /fs/ocfs2/super.c
parent	730e663bd82c1a10a85ff00728d34152a5a67ec8 (diff)

diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 029c4cd8a691..603f5fe9f816 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c
@@ -54,6 +54,7 @@
54	#include "ocfs1_fs_compat.h"	54	#include "ocfs1_fs_compat.h"
55		55
56	#include "alloc.h"	56	#include "alloc.h"
		57	#include "aops.h"
57	#include "blockcheck.h"	58	#include "blockcheck.h"
58	#include "dlmglue.h"	59	#include "dlmglue.h"
59	#include "export.h"	60	#include "export.h"
@@ -1616,12 +1617,17 @@ static int ocfs2_show_options(struct seq_file s, struct vfsmount mnt)
1616	return 0;	1617	return 0;
1617	}	1618	}
1618		1619
		1620	wait_queue_head_t ocfs2__ioend_wq[OCFS2_IOEND_WQ_HASH_SZ];
		1621
1619	static int __init ocfs2_init(void)	1622	static int __init ocfs2_init(void)
1620	{	1623	{
1621	int status;	1624	int status, i;
1622		1625
1623	ocfs2_print_version();	1626	ocfs2_print_version();
1624		1627
		1628	for (i = 0; i < OCFS2_IOEND_WQ_HASH_SZ; i++)
		1629	init_waitqueue_head(&ocfs2__ioend_wq[i]);
		1630
1625	status = init_ocfs2_uptodate_cache();	1631	status = init_ocfs2_uptodate_cache();
1626	if (status < 0) {	1632	if (status < 0) {
1627	mlog_errno(status);	1633	mlog_errno(status);
@@ -1760,7 +1766,7 @@ static void ocfs2_inode_init_once(void *data)
1760	ocfs2_extent_map_init(&oi->vfs_inode);	1766	ocfs2_extent_map_init(&oi->vfs_inode);
1761	INIT_LIST_HEAD(&oi->ip_io_markers);	1767	INIT_LIST_HEAD(&oi->ip_io_markers);
1762	oi->ip_dir_start_lookup = 0;	1768	oi->ip_dir_start_lookup = 0;
1763		1769	atomic_set(&oi->ip_unaligned_aio, 0);
1764	init_rwsem(&oi->ip_alloc_sem);	1770	init_rwsem(&oi->ip_alloc_sem);
1765	init_rwsem(&oi->ip_xattr_sem);	1771	init_rwsem(&oi->ip_xattr_sem);
1766	mutex_init(&oi->ip_io_mutex);	1772	mutex_init(&oi->ip_io_mutex);