aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYan, Zheng <zyan@redhat.com>2017-09-01 04:53:58 -0400
committerIlya Dryomov <idryomov@gmail.com>2017-09-06 13:56:58 -0400
commit2a2d927e35dd8dc4faf8fbc211533cf5f8840f5b (patch)
tree8acb82e8f3217c8fcf9877a606398fb65940f5b0
parent590e9d9861f5f21fbbb0266e40e6a17bc5084dd0 (diff)
ceph: ignore wbc->range_{start,end} when write back snapshot data
writepages() needs to write dirty pages to OSD in strict order of snapshot context. It must first write dirty pages associated with the oldest snapshot context. In the write range case, dirty pages in the specified range can be associated with newer snapc. They are not writeable until we write all dirty pages associated with the oldest snapc. Signed-off-by: "Yan, Zheng" <zyan@redhat.com> Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
-rw-r--r--fs/ceph/addr.c80
1 files changed, 46 insertions, 34 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 4a54f7369f51..201e529e8a6c 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -469,6 +469,7 @@ struct ceph_writeback_ctl
469 u64 truncate_size; 469 u64 truncate_size;
470 u32 truncate_seq; 470 u32 truncate_seq;
471 bool size_stable; 471 bool size_stable;
472 bool head_snapc;
472}; 473};
473 474
474/* 475/*
@@ -504,6 +505,7 @@ get_oldest_context(struct inode *inode, struct ceph_writeback_ctl *ctl,
504 } 505 }
505 ctl->truncate_size = capsnap->truncate_size; 506 ctl->truncate_size = capsnap->truncate_size;
506 ctl->truncate_seq = capsnap->truncate_seq; 507 ctl->truncate_seq = capsnap->truncate_seq;
508 ctl->head_snapc = false;
507 } 509 }
508 510
509 if (snapc) 511 if (snapc)
@@ -524,6 +526,7 @@ get_oldest_context(struct inode *inode, struct ceph_writeback_ctl *ctl,
524 ctl->truncate_size = ci->i_truncate_size; 526 ctl->truncate_size = ci->i_truncate_size;
525 ctl->truncate_seq = ci->i_truncate_seq; 527 ctl->truncate_seq = ci->i_truncate_seq;
526 ctl->size_stable = false; 528 ctl->size_stable = false;
529 ctl->head_snapc = true;
527 } 530 }
528 } 531 }
529 spin_unlock(&ci->i_ceph_lock); 532 spin_unlock(&ci->i_ceph_lock);
@@ -781,7 +784,7 @@ static int ceph_writepages_start(struct address_space *mapping,
781 struct ceph_inode_info *ci = ceph_inode(inode); 784 struct ceph_inode_info *ci = ceph_inode(inode);
782 struct ceph_fs_client *fsc = ceph_inode_to_client(inode); 785 struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
783 struct ceph_vino vino = ceph_vino(inode); 786 struct ceph_vino vino = ceph_vino(inode);
784 pgoff_t index, start_index, end; 787 pgoff_t index, start_index, end = -1;
785 struct ceph_snap_context *snapc = NULL, *last_snapc = NULL, *pgsnapc; 788 struct ceph_snap_context *snapc = NULL, *last_snapc = NULL, *pgsnapc;
786 struct pagevec pvec; 789 struct pagevec pvec;
787 int rc = 0; 790 int rc = 0;
@@ -810,25 +813,10 @@ static int ceph_writepages_start(struct address_space *mapping,
810 pagevec_init(&pvec, 0); 813 pagevec_init(&pvec, 0);
811 814
812 start_index = wbc->range_cyclic ? mapping->writeback_index : 0; 815 start_index = wbc->range_cyclic ? mapping->writeback_index : 0;
813 816 index = start_index;
814 /* where to start/end? */
815 if (wbc->range_cyclic) {
816 index = start_index;
817 end = -1;
818 should_loop = (index > 0);
819 dout(" cyclic, start at %lu\n", index);
820 } else {
821 index = wbc->range_start >> PAGE_SHIFT;
822 end = wbc->range_end >> PAGE_SHIFT;
823 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
824 range_whole = true;
825 should_loop = false;
826 dout(" not cyclic, %lu to %lu\n", index, end);
827 }
828 817
829retry: 818retry:
830 /* find oldest snap context with dirty data */ 819 /* find oldest snap context with dirty data */
831 ceph_put_snap_context(snapc);
832 snapc = get_oldest_context(inode, &ceph_wbc, NULL); 820 snapc = get_oldest_context(inode, &ceph_wbc, NULL);
833 if (!snapc) { 821 if (!snapc) {
834 /* hmm, why does writepages get called when there 822 /* hmm, why does writepages get called when there
@@ -839,13 +827,33 @@ retry:
839 dout(" oldest snapc is %p seq %lld (%d snaps)\n", 827 dout(" oldest snapc is %p seq %lld (%d snaps)\n",
840 snapc, snapc->seq, snapc->num_snaps); 828 snapc, snapc->seq, snapc->num_snaps);
841 829
842 if (last_snapc && snapc != last_snapc) { 830 should_loop = false;
843 /* if we switched to a newer snapc, restart our scan at the 831 if (ceph_wbc.head_snapc && snapc != last_snapc) {
844 * start of the original file range. */ 832 /* where to start/end? */
845 dout(" snapc differs from last pass, restarting at %lu\n", 833 if (wbc->range_cyclic) {
846 index); 834 index = start_index;
847 index = start; 835 end = -1;
836 if (index > 0)
837 should_loop = true;
838 dout(" cyclic, start at %lu\n", index);
839 } else {
840 index = wbc->range_start >> PAGE_SHIFT;
841 end = wbc->range_end >> PAGE_SHIFT;
842 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
843 range_whole = true;
844 dout(" not cyclic, %lu to %lu\n", index, end);
845 }
846 } else if (!ceph_wbc.head_snapc) {
847 /* Do not respect wbc->range_{start,end}. Dirty pages
848 * in that range can be associated with newer snapc.
849 * They are not writeable until we write all dirty pages
850 * associated with 'snapc' get written */
851 if (index > 0 || wbc->sync_mode != WB_SYNC_NONE)
852 should_loop = true;
853 dout(" non-head snapc, range whole\n");
848 } 854 }
855
856 ceph_put_snap_context(last_snapc);
849 last_snapc = snapc; 857 last_snapc = snapc;
850 858
851 stop = false; 859 stop = false;
@@ -891,7 +899,9 @@ get_more_pages:
891 dout("end of range %p\n", page); 899 dout("end of range %p\n", page);
892 /* can't be range_cyclic (1st pass) because 900 /* can't be range_cyclic (1st pass) because
893 * end == -1 in that case. */ 901 * end == -1 in that case. */
894 stop = done = true; 902 stop = true;
903 if (ceph_wbc.head_snapc)
904 done = true;
895 unlock_page(page); 905 unlock_page(page);
896 break; 906 break;
897 } 907 }
@@ -1136,24 +1146,26 @@ new_request:
1136 if (pages) 1146 if (pages)
1137 goto new_request; 1147 goto new_request;
1138 1148
1139 if (wbc->nr_to_write <= 0) 1149 /*
1140 stop = done = true; 1150 * We stop writing back only if we are not doing
1151 * integrity sync. In case of integrity sync we have to
1152 * keep going until we have written all the pages
1153 * we tagged for writeback prior to entering this loop.
1154 */
1155 if (wbc->nr_to_write <= 0 && wbc->sync_mode == WB_SYNC_NONE)
1156 done = stop = true;
1141 1157
1142release_pvec_pages: 1158release_pvec_pages:
1143 dout("pagevec_release on %d pages (%p)\n", (int)pvec.nr, 1159 dout("pagevec_release on %d pages (%p)\n", (int)pvec.nr,
1144 pvec.nr ? pvec.pages[0] : NULL); 1160 pvec.nr ? pvec.pages[0] : NULL);
1145 pagevec_release(&pvec); 1161 pagevec_release(&pvec);
1146
1147 if (locked_pages && !done)
1148 goto retry;
1149 } 1162 }
1150 1163
1151 if (should_loop && !done) { 1164 if (should_loop && !done) {
1152 /* more to do; loop back to beginning of file */ 1165 /* more to do; loop back to beginning of file */
1153 dout("writepages looping back to beginning of file\n"); 1166 dout("writepages looping back to beginning of file\n");
1154 should_loop = false; 1167 end = start_index - 1; /* OK even when start_index == 0 */
1155 end = start_index - 1; 1168 start_index = 0;
1156
1157 index = 0; 1169 index = 0;
1158 goto retry; 1170 goto retry;
1159 } 1171 }
@@ -1163,8 +1175,8 @@ release_pvec_pages:
1163 1175
1164out: 1176out:
1165 ceph_osdc_put_request(req); 1177 ceph_osdc_put_request(req);
1166 ceph_put_snap_context(snapc); 1178 ceph_put_snap_context(last_snapc);
1167 dout("writepages done, rc = %d\n", rc); 1179 dout("writepages dend - startone, rc = %d\n", rc);
1168 return rc; 1180 return rc;
1169} 1181}
1170 1182