diff options
author | Tristan Ye <tristan.ye@oracle.com> | 2010-10-11 04:46:39 -0400 |
---|---|---|
committer | Joel Becker <joel.becker@oracle.com> | 2010-10-11 17:14:55 -0400 |
commit | 7bdb0d18bfd381cc5491eb95973ec5604b356c7e (patch) | |
tree | 1abe07df935a336eeac5c7705dc9b59341b47b0a /fs/ocfs2 | |
parent | 75d9bbc73804285020aa4d99bd2a9600edea8945 (diff) |
ocfs2: Add a mount option "coherency=*" to handle cluster coherency for O_DIRECT writes.
Currently, the default behavior of O_DIRECT writes was allowing
concurrent writing among nodes to the same file, with no cluster
coherency guaranteed (no EX lock held). This can leave stale data in
the cache for buffered reads on other nodes.
The new mount option introduce a chance to choose two different
behaviors for O_DIRECT writes:
* coherency=full, as the default value, will disallow
concurrent O_DIRECT writes by taking
EX locks.
* coherency=buffered, allow concurrent O_DIRECT writes
without EX lock among nodes, which
gains high performance at risk of
getting stale data on other nodes.
Signed-off-by: Tristan Ye <tristan.ye@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
Diffstat (limited to 'fs/ocfs2')
-rw-r--r-- | fs/ocfs2/file.c | 29 | ||||
-rw-r--r-- | fs/ocfs2/ocfs2.h | 3 | ||||
-rw-r--r-- | fs/ocfs2/super.c | 15 |
3 files changed, 45 insertions, 2 deletions
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 13af9937bdda..9e8cc4346b76 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
@@ -2225,6 +2225,8 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, | |||
2225 | struct file *file = iocb->ki_filp; | 2225 | struct file *file = iocb->ki_filp; |
2226 | struct inode *inode = file->f_path.dentry->d_inode; | 2226 | struct inode *inode = file->f_path.dentry->d_inode; |
2227 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 2227 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
2228 | int full_coherency = !(osb->s_mount_opt & | ||
2229 | OCFS2_MOUNT_COHERENCY_BUFFERED); | ||
2228 | 2230 | ||
2229 | mlog_entry("(0x%p, %u, '%.*s')\n", file, | 2231 | mlog_entry("(0x%p, %u, '%.*s')\n", file, |
2230 | (unsigned int)nr_segs, | 2232 | (unsigned int)nr_segs, |
@@ -2248,14 +2250,37 @@ relock: | |||
2248 | have_alloc_sem = 1; | 2250 | have_alloc_sem = 1; |
2249 | } | 2251 | } |
2250 | 2252 | ||
2251 | /* concurrent O_DIRECT writes are allowed */ | 2253 | /* |
2252 | rw_level = !direct_io; | 2254 | * Concurrent O_DIRECT writes are allowed with |
2255 | * mount_option "coherency=buffered". | ||
2256 | */ | ||
2257 | rw_level = (!direct_io || full_coherency); | ||
2258 | |||
2253 | ret = ocfs2_rw_lock(inode, rw_level); | 2259 | ret = ocfs2_rw_lock(inode, rw_level); |
2254 | if (ret < 0) { | 2260 | if (ret < 0) { |
2255 | mlog_errno(ret); | 2261 | mlog_errno(ret); |
2256 | goto out_sems; | 2262 | goto out_sems; |
2257 | } | 2263 | } |
2258 | 2264 | ||
2265 | /* | ||
2266 | * O_DIRECT writes with "coherency=full" need to take EX cluster | ||
2267 | * inode_lock to guarantee coherency. | ||
2268 | */ | ||
2269 | if (direct_io && full_coherency) { | ||
2270 | /* | ||
2271 | * We need to take and drop the inode lock to force | ||
2272 | * other nodes to drop their caches. Buffered I/O | ||
2273 | * already does this in write_begin(). | ||
2274 | */ | ||
2275 | ret = ocfs2_inode_lock(inode, NULL, 1); | ||
2276 | if (ret < 0) { | ||
2277 | mlog_errno(ret); | ||
2278 | goto out_sems; | ||
2279 | } | ||
2280 | |||
2281 | ocfs2_inode_unlock(inode, 1); | ||
2282 | } | ||
2283 | |||
2259 | can_do_direct = direct_io; | 2284 | can_do_direct = direct_io; |
2260 | ret = ocfs2_prepare_inode_for_write(file, ppos, | 2285 | ret = ocfs2_prepare_inode_for_write(file, ppos, |
2261 | iocb->ki_left, appending, | 2286 | iocb->ki_left, appending, |
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 687e291d73f2..3064feef1430 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h | |||
@@ -263,6 +263,9 @@ enum ocfs2_mount_options | |||
263 | control lists */ | 263 | control lists */ |
264 | OCFS2_MOUNT_USRQUOTA = 1 << 10, /* We support user quotas */ | 264 | OCFS2_MOUNT_USRQUOTA = 1 << 10, /* We support user quotas */ |
265 | OCFS2_MOUNT_GRPQUOTA = 1 << 11, /* We support group quotas */ | 265 | OCFS2_MOUNT_GRPQUOTA = 1 << 11, /* We support group quotas */ |
266 | |||
267 | OCFS2_MOUNT_COHERENCY_BUFFERED = 1 << 12 /* Allow concurrent O_DIRECT | ||
268 | writes */ | ||
266 | }; | 269 | }; |
267 | 270 | ||
268 | #define OCFS2_OSB_SOFT_RO 0x0001 | 271 | #define OCFS2_OSB_SOFT_RO 0x0001 |
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index b578644b6637..9122d59f8127 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
@@ -177,6 +177,8 @@ enum { | |||
177 | Opt_noacl, | 177 | Opt_noacl, |
178 | Opt_usrquota, | 178 | Opt_usrquota, |
179 | Opt_grpquota, | 179 | Opt_grpquota, |
180 | Opt_coherency_buffered, | ||
181 | Opt_coherency_full, | ||
180 | Opt_resv_level, | 182 | Opt_resv_level, |
181 | Opt_dir_resv_level, | 183 | Opt_dir_resv_level, |
182 | Opt_err, | 184 | Opt_err, |
@@ -205,6 +207,8 @@ static const match_table_t tokens = { | |||
205 | {Opt_noacl, "noacl"}, | 207 | {Opt_noacl, "noacl"}, |
206 | {Opt_usrquota, "usrquota"}, | 208 | {Opt_usrquota, "usrquota"}, |
207 | {Opt_grpquota, "grpquota"}, | 209 | {Opt_grpquota, "grpquota"}, |
210 | {Opt_coherency_buffered, "coherency=buffered"}, | ||
211 | {Opt_coherency_full, "coherency=full"}, | ||
208 | {Opt_resv_level, "resv_level=%u"}, | 212 | {Opt_resv_level, "resv_level=%u"}, |
209 | {Opt_dir_resv_level, "dir_resv_level=%u"}, | 213 | {Opt_dir_resv_level, "dir_resv_level=%u"}, |
210 | {Opt_err, NULL} | 214 | {Opt_err, NULL} |
@@ -1452,6 +1456,12 @@ static int ocfs2_parse_options(struct super_block *sb, | |||
1452 | case Opt_grpquota: | 1456 | case Opt_grpquota: |
1453 | mopt->mount_opt |= OCFS2_MOUNT_GRPQUOTA; | 1457 | mopt->mount_opt |= OCFS2_MOUNT_GRPQUOTA; |
1454 | break; | 1458 | break; |
1459 | case Opt_coherency_buffered: | ||
1460 | mopt->mount_opt |= OCFS2_MOUNT_COHERENCY_BUFFERED; | ||
1461 | break; | ||
1462 | case Opt_coherency_full: | ||
1463 | mopt->mount_opt &= ~OCFS2_MOUNT_COHERENCY_BUFFERED; | ||
1464 | break; | ||
1455 | case Opt_acl: | 1465 | case Opt_acl: |
1456 | mopt->mount_opt |= OCFS2_MOUNT_POSIX_ACL; | 1466 | mopt->mount_opt |= OCFS2_MOUNT_POSIX_ACL; |
1457 | mopt->mount_opt &= ~OCFS2_MOUNT_NO_POSIX_ACL; | 1467 | mopt->mount_opt &= ~OCFS2_MOUNT_NO_POSIX_ACL; |
@@ -1550,6 +1560,11 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt) | |||
1550 | if (opts & OCFS2_MOUNT_GRPQUOTA) | 1560 | if (opts & OCFS2_MOUNT_GRPQUOTA) |
1551 | seq_printf(s, ",grpquota"); | 1561 | seq_printf(s, ",grpquota"); |
1552 | 1562 | ||
1563 | if (opts & OCFS2_MOUNT_COHERENCY_BUFFERED) | ||
1564 | seq_printf(s, ",coherency=buffered"); | ||
1565 | else | ||
1566 | seq_printf(s, ",coherency=full"); | ||
1567 | |||
1553 | if (opts & OCFS2_MOUNT_NOUSERXATTR) | 1568 | if (opts & OCFS2_MOUNT_NOUSERXATTR) |
1554 | seq_printf(s, ",nouser_xattr"); | 1569 | seq_printf(s, ",nouser_xattr"); |
1555 | else | 1570 | else |