aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatthew Wilcox <willy@infradead.org>2018-03-28 16:03:45 -0400
committerMatthew Wilcox <willy@infradead.org>2018-10-21 10:46:43 -0400
commit9fc747f68d49f4b63029e3a1e87c49d23771a199 (patch)
tree9adbc9df531f7c72afe148788628b6c21e2b71cc
parent07f2d89cc270936ac314e7cc4ac57077d7f08aef (diff)
dax: Convert dax writeback to XArray
Use XArray iteration instead of a pagevec. Signed-off-by: Matthew Wilcox <willy@infradead.org>
-rw-r--r--fs/dax.c130
1 files changed, 62 insertions, 68 deletions
diff --git a/fs/dax.c b/fs/dax.c
index 83a510068b95..de3ba829a3f4 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -997,11 +997,9 @@ unlock_pte:
997 i_mmap_unlock_read(mapping); 997 i_mmap_unlock_read(mapping);
998} 998}
999 999
1000static int dax_writeback_one(struct dax_device *dax_dev, 1000static int dax_writeback_one(struct xa_state *xas, struct dax_device *dax_dev,
1001 struct address_space *mapping, pgoff_t index, void *entry) 1001 struct address_space *mapping, void *entry)
1002{ 1002{
1003 struct radix_tree_root *pages = &mapping->i_pages;
1004 void *entry2, **slot;
1005 unsigned long pfn; 1003 unsigned long pfn;
1006 long ret = 0; 1004 long ret = 0;
1007 size_t size; 1005 size_t size;
@@ -1013,29 +1011,35 @@ static int dax_writeback_one(struct dax_device *dax_dev,
1013 if (WARN_ON(!xa_is_value(entry))) 1011 if (WARN_ON(!xa_is_value(entry)))
1014 return -EIO; 1012 return -EIO;
1015 1013
1016 xa_lock_irq(pages); 1014 if (unlikely(dax_is_locked(entry))) {
1017 entry2 = get_unlocked_mapping_entry(mapping, index, &slot); 1015 void *old_entry = entry;
1018 /* Entry got punched out / reallocated? */ 1016
1019 if (!entry2 || WARN_ON_ONCE(!xa_is_value(entry2))) 1017 entry = get_unlocked_entry(xas);
1020 goto put_unlocked; 1018
1021 /* 1019 /* Entry got punched out / reallocated? */
1022 * Entry got reallocated elsewhere? No need to writeback. We have to 1020 if (!entry || WARN_ON_ONCE(!xa_is_value(entry)))
1023 * compare pfns as we must not bail out due to difference in lockbit 1021 goto put_unlocked;
1024 * or entry type. 1022 /*
1025 */ 1023 * Entry got reallocated elsewhere? No need to writeback.
1026 if (dax_to_pfn(entry2) != dax_to_pfn(entry)) 1024 * We have to compare pfns as we must not bail out due to
1027 goto put_unlocked; 1025 * difference in lockbit or entry type.
1028 if (WARN_ON_ONCE(dax_is_empty_entry(entry) || 1026 */
1029 dax_is_zero_entry(entry))) { 1027 if (dax_to_pfn(old_entry) != dax_to_pfn(entry))
1030 ret = -EIO; 1028 goto put_unlocked;
1031 goto put_unlocked; 1029 if (WARN_ON_ONCE(dax_is_empty_entry(entry) ||
1030 dax_is_zero_entry(entry))) {
1031 ret = -EIO;
1032 goto put_unlocked;
1033 }
1034
1035 /* Another fsync thread may have already done this entry */
1036 if (!xas_get_mark(xas, PAGECACHE_TAG_TOWRITE))
1037 goto put_unlocked;
1032 } 1038 }
1033 1039
1034 /* Another fsync thread may have already written back this entry */
1035 if (!radix_tree_tag_get(pages, index, PAGECACHE_TAG_TOWRITE))
1036 goto put_unlocked;
1037 /* Lock the entry to serialize with page faults */ 1040 /* Lock the entry to serialize with page faults */
1038 entry = lock_slot(mapping, slot); 1041 dax_lock_entry(xas, entry);
1042
1039 /* 1043 /*
1040 * We can clear the tag now but we have to be careful so that concurrent 1044 * We can clear the tag now but we have to be careful so that concurrent
1041 * dax_writeback_one() calls for the same index cannot finish before we 1045 * dax_writeback_one() calls for the same index cannot finish before we
@@ -1043,8 +1047,8 @@ static int dax_writeback_one(struct dax_device *dax_dev,
1043 * at the entry only under the i_pages lock and once they do that 1047 * at the entry only under the i_pages lock and once they do that
1044 * they will see the entry locked and wait for it to unlock. 1048 * they will see the entry locked and wait for it to unlock.
1045 */ 1049 */
1046 radix_tree_tag_clear(pages, index, PAGECACHE_TAG_TOWRITE); 1050 xas_clear_mark(xas, PAGECACHE_TAG_TOWRITE);
1047 xa_unlock_irq(pages); 1051 xas_unlock_irq(xas);
1048 1052
1049 /* 1053 /*
1050 * Even if dax_writeback_mapping_range() was given a wbc->range_start 1054 * Even if dax_writeback_mapping_range() was given a wbc->range_start
@@ -1056,7 +1060,7 @@ static int dax_writeback_one(struct dax_device *dax_dev,
1056 pfn = dax_to_pfn(entry); 1060 pfn = dax_to_pfn(entry);
1057 size = PAGE_SIZE << dax_entry_order(entry); 1061 size = PAGE_SIZE << dax_entry_order(entry);
1058 1062
1059 dax_entry_mkclean(mapping, index, pfn); 1063 dax_entry_mkclean(mapping, xas->xa_index, pfn);
1060 dax_flush(dax_dev, page_address(pfn_to_page(pfn)), size); 1064 dax_flush(dax_dev, page_address(pfn_to_page(pfn)), size);
1061 /* 1065 /*
1062 * After we have flushed the cache, we can clear the dirty tag. There 1066 * After we have flushed the cache, we can clear the dirty tag. There
@@ -1064,16 +1068,18 @@ static int dax_writeback_one(struct dax_device *dax_dev,
1064 * the pfn mappings are writeprotected and fault waits for mapping 1068 * the pfn mappings are writeprotected and fault waits for mapping
1065 * entry lock. 1069 * entry lock.
1066 */ 1070 */
1067 xa_lock_irq(pages); 1071 xas_reset(xas);
1068 radix_tree_tag_clear(pages, index, PAGECACHE_TAG_DIRTY); 1072 xas_lock_irq(xas);
1069 xa_unlock_irq(pages); 1073 xas_store(xas, entry);
1070 trace_dax_writeback_one(mapping->host, index, size >> PAGE_SHIFT); 1074 xas_clear_mark(xas, PAGECACHE_TAG_DIRTY);
1071 put_locked_mapping_entry(mapping, index); 1075 dax_wake_entry(xas, entry, false);
1076
1077 trace_dax_writeback_one(mapping->host, xas->xa_index,
1078 size >> PAGE_SHIFT);
1072 return ret; 1079 return ret;
1073 1080
1074 put_unlocked: 1081 put_unlocked:
1075 put_unlocked_mapping_entry(mapping, index, entry2); 1082 put_unlocked_entry(xas, entry);
1076 xa_unlock_irq(pages);
1077 return ret; 1083 return ret;
1078} 1084}
1079 1085
@@ -1085,13 +1091,13 @@ static int dax_writeback_one(struct dax_device *dax_dev,
1085int dax_writeback_mapping_range(struct address_space *mapping, 1091int dax_writeback_mapping_range(struct address_space *mapping,
1086 struct block_device *bdev, struct writeback_control *wbc) 1092 struct block_device *bdev, struct writeback_control *wbc)
1087{ 1093{
1094 XA_STATE(xas, &mapping->i_pages, wbc->range_start >> PAGE_SHIFT);
1088 struct inode *inode = mapping->host; 1095 struct inode *inode = mapping->host;
1089 pgoff_t start_index, end_index; 1096 pgoff_t end_index = wbc->range_end >> PAGE_SHIFT;
1090 pgoff_t indices[PAGEVEC_SIZE];
1091 struct dax_device *dax_dev; 1097 struct dax_device *dax_dev;
1092 struct pagevec pvec; 1098 void *entry;
1093 bool done = false; 1099 int ret = 0;
1094 int i, ret = 0; 1100 unsigned int scanned = 0;
1095 1101
1096 if (WARN_ON_ONCE(inode->i_blkbits != PAGE_SHIFT)) 1102 if (WARN_ON_ONCE(inode->i_blkbits != PAGE_SHIFT))
1097 return -EIO; 1103 return -EIO;
@@ -1103,41 +1109,29 @@ int dax_writeback_mapping_range(struct address_space *mapping,
1103 if (!dax_dev) 1109 if (!dax_dev)
1104 return -EIO; 1110 return -EIO;
1105 1111
1106 start_index = wbc->range_start >> PAGE_SHIFT; 1112 trace_dax_writeback_range(inode, xas.xa_index, end_index);
1107 end_index = wbc->range_end >> PAGE_SHIFT;
1108
1109 trace_dax_writeback_range(inode, start_index, end_index);
1110
1111 tag_pages_for_writeback(mapping, start_index, end_index);
1112 1113
1113 pagevec_init(&pvec); 1114 tag_pages_for_writeback(mapping, xas.xa_index, end_index);
1114 while (!done) {
1115 pvec.nr = find_get_entries_tag(mapping, start_index,
1116 PAGECACHE_TAG_TOWRITE, PAGEVEC_SIZE,
1117 pvec.pages, indices);
1118 1115
1119 if (pvec.nr == 0) 1116 xas_lock_irq(&xas);
1117 xas_for_each_marked(&xas, entry, end_index, PAGECACHE_TAG_TOWRITE) {
1118 ret = dax_writeback_one(&xas, dax_dev, mapping, entry);
1119 if (ret < 0) {
1120 mapping_set_error(mapping, ret);
1120 break; 1121 break;
1121
1122 for (i = 0; i < pvec.nr; i++) {
1123 if (indices[i] > end_index) {
1124 done = true;
1125 break;
1126 }
1127
1128 ret = dax_writeback_one(dax_dev, mapping, indices[i],
1129 pvec.pages[i]);
1130 if (ret < 0) {
1131 mapping_set_error(mapping, ret);
1132 goto out;
1133 }
1134 } 1122 }
1135 start_index = indices[pvec.nr - 1] + 1; 1123 if (++scanned % XA_CHECK_SCHED)
1124 continue;
1125
1126 xas_pause(&xas);
1127 xas_unlock_irq(&xas);
1128 cond_resched();
1129 xas_lock_irq(&xas);
1136 } 1130 }
1137out: 1131 xas_unlock_irq(&xas);
1138 put_dax(dax_dev); 1132 put_dax(dax_dev);
1139 trace_dax_writeback_range_done(inode, start_index, end_index); 1133 trace_dax_writeback_range_done(inode, xas.xa_index, end_index);
1140 return (ret < 0 ? ret : 0); 1134 return ret;
1141} 1135}
1142EXPORT_SYMBOL_GPL(dax_writeback_mapping_range); 1136EXPORT_SYMBOL_GPL(dax_writeback_mapping_range);
1143 1137