summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJeff Layton <jlayton@redhat.com>2017-04-04 08:39:46 -0400
committerIlya Dryomov <idryomov@gmail.com>2017-05-04 03:19:22 -0400
commit26544c623e741ac6445f8b1ae369ee32ae1794ad (patch)
tree72810f59f5735dad3b29c5a127ae2a823fc1ff1d
parent6fc1fe5e4cfc8939ee59a570b087946042a30140 (diff)
ceph: when seeing write errors on an inode, switch to sync writes
Currently, we don't have a real feedback mechanism in place for when we start seeing buffered writeback errors. If writeback is failing, there is nothing that prevents an application from continuing to dirty pages that aren't being cleaned. In the event that we're seeing write errors of any sort occur on an inode, have the callback set a flag to force further writes to be synchronous. When the next write succeeds, clear the flag to allow buffered writeback to continue. Since this is just a hint to the write submission mechanism, we only take the i_ceph_lock when a lockless check shows that the flag needs to be changed. Signed-off-by: Jeff Layton <jlayton@redhat.com> Reviewed-by: "Yan, Zhengā€¯ <zyan@redhat.com> Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
-rw-r--r--fs/ceph/addr.c6
-rw-r--r--fs/ceph/file.c31
-rw-r--r--fs/ceph/super.h26
3 files changed, 49 insertions, 14 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 6cdf94459ac4..e253102b43cd 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -670,8 +670,12 @@ static void writepages_finish(struct ceph_osd_request *req)
670 bool remove_page; 670 bool remove_page;
671 671
672 dout("writepages_finish %p rc %d\n", inode, rc); 672 dout("writepages_finish %p rc %d\n", inode, rc);
673 if (rc < 0) 673 if (rc < 0) {
674 mapping_set_error(mapping, rc); 674 mapping_set_error(mapping, rc);
675 ceph_set_error_write(ci);
676 } else {
677 ceph_clear_error_write(ci);
678 }
675 679
676 /* 680 /*
677 * We lost the cache cap, need to truncate the page before 681 * We lost the cache cap, need to truncate the page before
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 134c978141d0..39866d6a34b6 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -1089,19 +1089,22 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
1089 1089
1090out: 1090out:
1091 ceph_osdc_put_request(req); 1091 ceph_osdc_put_request(req);
1092 if (ret == 0) { 1092 if (ret != 0) {
1093 pos += len; 1093 ceph_set_error_write(ci);
1094 written += len;
1095
1096 if (pos > i_size_read(inode)) {
1097 check_caps = ceph_inode_set_size(inode, pos);
1098 if (check_caps)
1099 ceph_check_caps(ceph_inode(inode),
1100 CHECK_CAPS_AUTHONLY,
1101 NULL);
1102 }
1103 } else
1104 break; 1094 break;
1095 }
1096
1097 ceph_clear_error_write(ci);
1098 pos += len;
1099 written += len;
1100 if (pos > i_size_read(inode)) {
1101 check_caps = ceph_inode_set_size(inode, pos);
1102 if (check_caps)
1103 ceph_check_caps(ceph_inode(inode),
1104 CHECK_CAPS_AUTHONLY,
1105 NULL);
1106 }
1107
1105 } 1108 }
1106 1109
1107 if (ret != -EOLDSNAPC && written > 0) { 1110 if (ret != -EOLDSNAPC && written > 0) {
@@ -1307,6 +1310,7 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
1307 } 1310 }
1308 1311
1309retry_snap: 1312retry_snap:
1313 /* FIXME: not complete since it doesn't account for being at quota */
1310 if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL)) { 1314 if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL)) {
1311 err = -ENOSPC; 1315 err = -ENOSPC;
1312 goto out; 1316 goto out;
@@ -1328,7 +1332,8 @@ retry_snap:
1328 inode, ceph_vinop(inode), pos, count, ceph_cap_string(got)); 1332 inode, ceph_vinop(inode), pos, count, ceph_cap_string(got));
1329 1333
1330 if ((got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO)) == 0 || 1334 if ((got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO)) == 0 ||
1331 (iocb->ki_flags & IOCB_DIRECT) || (fi->flags & CEPH_F_SYNC)) { 1335 (iocb->ki_flags & IOCB_DIRECT) || (fi->flags & CEPH_F_SYNC) ||
1336 (ci->i_ceph_flags & CEPH_I_ERROR_WRITE)) {
1332 struct ceph_snap_context *snapc; 1337 struct ceph_snap_context *snapc;
1333 struct iov_iter data; 1338 struct iov_iter data;
1334 inode_unlock(inode); 1339 inode_unlock(inode);
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index c68e6a045fb9..7334ee86b9e8 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -474,6 +474,32 @@ static inline struct inode *ceph_find_inode(struct super_block *sb,
474#define CEPH_I_CAP_DROPPED (1 << 8) /* caps were forcibly dropped */ 474#define CEPH_I_CAP_DROPPED (1 << 8) /* caps were forcibly dropped */
475#define CEPH_I_KICK_FLUSH (1 << 9) /* kick flushing caps */ 475#define CEPH_I_KICK_FLUSH (1 << 9) /* kick flushing caps */
476#define CEPH_I_FLUSH_SNAPS (1 << 10) /* need flush snapss */ 476#define CEPH_I_FLUSH_SNAPS (1 << 10) /* need flush snapss */
477#define CEPH_I_ERROR_WRITE (1 << 11) /* have seen write errors */
478
479/*
480 * We set the ERROR_WRITE bit when we start seeing write errors on an inode
481 * and then clear it when they start succeeding. Note that we do a lockless
482 * check first, and only take the lock if it looks like it needs to be changed.
483 * The write submission code just takes this as a hint, so we're not too
484 * worried if a few slip through in either direction.
485 */
486static inline void ceph_set_error_write(struct ceph_inode_info *ci)
487{
488 if (!(READ_ONCE(ci->i_ceph_flags) & CEPH_I_ERROR_WRITE)) {
489 spin_lock(&ci->i_ceph_lock);
490 ci->i_ceph_flags |= CEPH_I_ERROR_WRITE;
491 spin_unlock(&ci->i_ceph_lock);
492 }
493}
494
495static inline void ceph_clear_error_write(struct ceph_inode_info *ci)
496{
497 if (READ_ONCE(ci->i_ceph_flags) & CEPH_I_ERROR_WRITE) {
498 spin_lock(&ci->i_ceph_lock);
499 ci->i_ceph_flags &= ~CEPH_I_ERROR_WRITE;
500 spin_unlock(&ci->i_ceph_lock);
501 }
502}
477 503
478static inline void __ceph_dir_set_complete(struct ceph_inode_info *ci, 504static inline void __ceph_dir_set_complete(struct ceph_inode_info *ci,
479 long long release_count, 505 long long release_count,