aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ocfs2/super.c
diff options
context:
space:
mode:
authorMark Fasheh <mfasheh@suse.com>2011-06-22 17:23:38 -0400
committerJoel Becker <jlbec@evilplan.org>2011-07-28 05:07:16 -0400
commita11f7e63c59810a81494d4c4b028af707d4c7ca4 (patch)
tree6d28cfc9519f96db5c20780bf765de9e0fc03bef /fs/ocfs2/super.c
parent730e663bd82c1a10a85ff00728d34152a5a67ec8 (diff)
ocfs2: serialize unaligned aio
Fix a corruption that can happen when we have (two or more) outstanding aio's to an overlapping unaligned region. Ext4 (e9e3bcecf44c04b9e6b505fd8e2eb9cea58fb94d) and xfs recently had to fix similar issues. In our case what happens is that we can have an outstanding aio on a region and if a write comes in with some bytes overlapping the original aio we may decide to read that region into a page before continuing (typically because of buffered-io fallback). Since we have no ordering guarantees with the aio, we can read stale or bad data into the page and then write it back out. If the i/o is page and block aligned, then we avoid this issue as there won't be any need to read data from disk. I took the same approach as Eric in the ext4 patch and introduced some serialization of unaligned async direct i/o. I don't expect this to have an effect on the most common cases of AIO. Unaligned aio will be slower though, but that's far more acceptable than data corruption. Signed-off-by: Mark Fasheh <mfasheh@suse.com> Signed-off-by: Joel Becker <jlbec@evilplan.org>
Diffstat (limited to 'fs/ocfs2/super.c')
-rw-r--r--fs/ocfs2/super.c10
1 files changed, 8 insertions, 2 deletions
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 029c4cd8a691..603f5fe9f816 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -54,6 +54,7 @@
54#include "ocfs1_fs_compat.h" 54#include "ocfs1_fs_compat.h"
55 55
56#include "alloc.h" 56#include "alloc.h"
57#include "aops.h"
57#include "blockcheck.h" 58#include "blockcheck.h"
58#include "dlmglue.h" 59#include "dlmglue.h"
59#include "export.h" 60#include "export.h"
@@ -1616,12 +1617,17 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
1616 return 0; 1617 return 0;
1617} 1618}
1618 1619
1620wait_queue_head_t ocfs2__ioend_wq[OCFS2_IOEND_WQ_HASH_SZ];
1621
1619static int __init ocfs2_init(void) 1622static int __init ocfs2_init(void)
1620{ 1623{
1621 int status; 1624 int status, i;
1622 1625
1623 ocfs2_print_version(); 1626 ocfs2_print_version();
1624 1627
1628 for (i = 0; i < OCFS2_IOEND_WQ_HASH_SZ; i++)
1629 init_waitqueue_head(&ocfs2__ioend_wq[i]);
1630
1625 status = init_ocfs2_uptodate_cache(); 1631 status = init_ocfs2_uptodate_cache();
1626 if (status < 0) { 1632 if (status < 0) {
1627 mlog_errno(status); 1633 mlog_errno(status);
@@ -1760,7 +1766,7 @@ static void ocfs2_inode_init_once(void *data)
1760 ocfs2_extent_map_init(&oi->vfs_inode); 1766 ocfs2_extent_map_init(&oi->vfs_inode);
1761 INIT_LIST_HEAD(&oi->ip_io_markers); 1767 INIT_LIST_HEAD(&oi->ip_io_markers);
1762 oi->ip_dir_start_lookup = 0; 1768 oi->ip_dir_start_lookup = 0;
1763 1769 atomic_set(&oi->ip_unaligned_aio, 0);
1764 init_rwsem(&oi->ip_alloc_sem); 1770 init_rwsem(&oi->ip_alloc_sem);
1765 init_rwsem(&oi->ip_xattr_sem); 1771 init_rwsem(&oi->ip_xattr_sem);
1766 mutex_init(&oi->ip_io_mutex); 1772 mutex_init(&oi->ip_io_mutex);