aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ocfs2
diff options
context:
space:
mode:
authorTristan Ye <tristan.ye@oracle.com>2010-10-11 04:46:39 -0400
committerJoel Becker <joel.becker@oracle.com>2010-10-11 17:14:55 -0400
commit7bdb0d18bfd381cc5491eb95973ec5604b356c7e (patch)
tree1abe07df935a336eeac5c7705dc9b59341b47b0a /fs/ocfs2
parent75d9bbc73804285020aa4d99bd2a9600edea8945 (diff)
ocfs2: Add a mount option "coherency=*" to handle cluster coherency for O_DIRECT writes.
Currently, the default behavior of O_DIRECT writes was allowing concurrent writing among nodes to the same file, with no cluster coherency guaranteed (no EX lock held). This can leave stale data in the cache for buffered reads on other nodes. The new mount option introduce a chance to choose two different behaviors for O_DIRECT writes: * coherency=full, as the default value, will disallow concurrent O_DIRECT writes by taking EX locks. * coherency=buffered, allow concurrent O_DIRECT writes without EX lock among nodes, which gains high performance at risk of getting stale data on other nodes. Signed-off-by: Tristan Ye <tristan.ye@oracle.com> Signed-off-by: Joel Becker <joel.becker@oracle.com>
Diffstat (limited to 'fs/ocfs2')
-rw-r--r--fs/ocfs2/file.c29
-rw-r--r--fs/ocfs2/ocfs2.h3
-rw-r--r--fs/ocfs2/super.c15
3 files changed, 45 insertions, 2 deletions
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 13af9937bdda..9e8cc4346b76 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2225,6 +2225,8 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
2225 struct file *file = iocb->ki_filp; 2225 struct file *file = iocb->ki_filp;
2226 struct inode *inode = file->f_path.dentry->d_inode; 2226 struct inode *inode = file->f_path.dentry->d_inode;
2227 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2227 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2228 int full_coherency = !(osb->s_mount_opt &
2229 OCFS2_MOUNT_COHERENCY_BUFFERED);
2228 2230
2229 mlog_entry("(0x%p, %u, '%.*s')\n", file, 2231 mlog_entry("(0x%p, %u, '%.*s')\n", file,
2230 (unsigned int)nr_segs, 2232 (unsigned int)nr_segs,
@@ -2248,14 +2250,37 @@ relock:
2248 have_alloc_sem = 1; 2250 have_alloc_sem = 1;
2249 } 2251 }
2250 2252
2251 /* concurrent O_DIRECT writes are allowed */ 2253 /*
2252 rw_level = !direct_io; 2254 * Concurrent O_DIRECT writes are allowed with
2255 * mount_option "coherency=buffered".
2256 */
2257 rw_level = (!direct_io || full_coherency);
2258
2253 ret = ocfs2_rw_lock(inode, rw_level); 2259 ret = ocfs2_rw_lock(inode, rw_level);
2254 if (ret < 0) { 2260 if (ret < 0) {
2255 mlog_errno(ret); 2261 mlog_errno(ret);
2256 goto out_sems; 2262 goto out_sems;
2257 } 2263 }
2258 2264
2265 /*
2266 * O_DIRECT writes with "coherency=full" need to take EX cluster
2267 * inode_lock to guarantee coherency.
2268 */
2269 if (direct_io && full_coherency) {
2270 /*
2271 * We need to take and drop the inode lock to force
2272 * other nodes to drop their caches. Buffered I/O
2273 * already does this in write_begin().
2274 */
2275 ret = ocfs2_inode_lock(inode, NULL, 1);
2276 if (ret < 0) {
2277 mlog_errno(ret);
2278 goto out_sems;
2279 }
2280
2281 ocfs2_inode_unlock(inode, 1);
2282 }
2283
2259 can_do_direct = direct_io; 2284 can_do_direct = direct_io;
2260 ret = ocfs2_prepare_inode_for_write(file, ppos, 2285 ret = ocfs2_prepare_inode_for_write(file, ppos,
2261 iocb->ki_left, appending, 2286 iocb->ki_left, appending,
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 687e291d73f2..3064feef1430 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -263,6 +263,9 @@ enum ocfs2_mount_options
263 control lists */ 263 control lists */
264 OCFS2_MOUNT_USRQUOTA = 1 << 10, /* We support user quotas */ 264 OCFS2_MOUNT_USRQUOTA = 1 << 10, /* We support user quotas */
265 OCFS2_MOUNT_GRPQUOTA = 1 << 11, /* We support group quotas */ 265 OCFS2_MOUNT_GRPQUOTA = 1 << 11, /* We support group quotas */
266
267 OCFS2_MOUNT_COHERENCY_BUFFERED = 1 << 12 /* Allow concurrent O_DIRECT
268 writes */
266}; 269};
267 270
268#define OCFS2_OSB_SOFT_RO 0x0001 271#define OCFS2_OSB_SOFT_RO 0x0001
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index b578644b6637..9122d59f8127 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -177,6 +177,8 @@ enum {
177 Opt_noacl, 177 Opt_noacl,
178 Opt_usrquota, 178 Opt_usrquota,
179 Opt_grpquota, 179 Opt_grpquota,
180 Opt_coherency_buffered,
181 Opt_coherency_full,
180 Opt_resv_level, 182 Opt_resv_level,
181 Opt_dir_resv_level, 183 Opt_dir_resv_level,
182 Opt_err, 184 Opt_err,
@@ -205,6 +207,8 @@ static const match_table_t tokens = {
205 {Opt_noacl, "noacl"}, 207 {Opt_noacl, "noacl"},
206 {Opt_usrquota, "usrquota"}, 208 {Opt_usrquota, "usrquota"},
207 {Opt_grpquota, "grpquota"}, 209 {Opt_grpquota, "grpquota"},
210 {Opt_coherency_buffered, "coherency=buffered"},
211 {Opt_coherency_full, "coherency=full"},
208 {Opt_resv_level, "resv_level=%u"}, 212 {Opt_resv_level, "resv_level=%u"},
209 {Opt_dir_resv_level, "dir_resv_level=%u"}, 213 {Opt_dir_resv_level, "dir_resv_level=%u"},
210 {Opt_err, NULL} 214 {Opt_err, NULL}
@@ -1452,6 +1456,12 @@ static int ocfs2_parse_options(struct super_block *sb,
1452 case Opt_grpquota: 1456 case Opt_grpquota:
1453 mopt->mount_opt |= OCFS2_MOUNT_GRPQUOTA; 1457 mopt->mount_opt |= OCFS2_MOUNT_GRPQUOTA;
1454 break; 1458 break;
1459 case Opt_coherency_buffered:
1460 mopt->mount_opt |= OCFS2_MOUNT_COHERENCY_BUFFERED;
1461 break;
1462 case Opt_coherency_full:
1463 mopt->mount_opt &= ~OCFS2_MOUNT_COHERENCY_BUFFERED;
1464 break;
1455 case Opt_acl: 1465 case Opt_acl:
1456 mopt->mount_opt |= OCFS2_MOUNT_POSIX_ACL; 1466 mopt->mount_opt |= OCFS2_MOUNT_POSIX_ACL;
1457 mopt->mount_opt &= ~OCFS2_MOUNT_NO_POSIX_ACL; 1467 mopt->mount_opt &= ~OCFS2_MOUNT_NO_POSIX_ACL;
@@ -1550,6 +1560,11 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
1550 if (opts & OCFS2_MOUNT_GRPQUOTA) 1560 if (opts & OCFS2_MOUNT_GRPQUOTA)
1551 seq_printf(s, ",grpquota"); 1561 seq_printf(s, ",grpquota");
1552 1562
1563 if (opts & OCFS2_MOUNT_COHERENCY_BUFFERED)
1564 seq_printf(s, ",coherency=buffered");
1565 else
1566 seq_printf(s, ",coherency=full");
1567
1553 if (opts & OCFS2_MOUNT_NOUSERXATTR) 1568 if (opts & OCFS2_MOUNT_NOUSERXATTR)
1554 seq_printf(s, ",nouser_xattr"); 1569 seq_printf(s, ",nouser_xattr");
1555 else 1570 else