diff options
author | Matthew Wilcox <willy@infradead.org> | 2018-03-28 16:03:45 -0400 |
---|---|---|
committer | Matthew Wilcox <willy@infradead.org> | 2018-10-21 10:46:43 -0400 |
commit | 9fc747f68d49f4b63029e3a1e87c49d23771a199 (patch) | |
tree | 9adbc9df531f7c72afe148788628b6c21e2b71cc | |
parent | 07f2d89cc270936ac314e7cc4ac57077d7f08aef (diff) |
dax: Convert dax writeback to XArray
Use XArray iteration instead of a pagevec.
Signed-off-by: Matthew Wilcox <willy@infradead.org>
-rw-r--r-- | fs/dax.c | 130 |
1 files changed, 62 insertions, 68 deletions
@@ -997,11 +997,9 @@ unlock_pte: | |||
997 | i_mmap_unlock_read(mapping); | 997 | i_mmap_unlock_read(mapping); |
998 | } | 998 | } |
999 | 999 | ||
1000 | static int dax_writeback_one(struct dax_device *dax_dev, | 1000 | static int dax_writeback_one(struct xa_state *xas, struct dax_device *dax_dev, |
1001 | struct address_space *mapping, pgoff_t index, void *entry) | 1001 | struct address_space *mapping, void *entry) |
1002 | { | 1002 | { |
1003 | struct radix_tree_root *pages = &mapping->i_pages; | ||
1004 | void *entry2, **slot; | ||
1005 | unsigned long pfn; | 1003 | unsigned long pfn; |
1006 | long ret = 0; | 1004 | long ret = 0; |
1007 | size_t size; | 1005 | size_t size; |
@@ -1013,29 +1011,35 @@ static int dax_writeback_one(struct dax_device *dax_dev, | |||
1013 | if (WARN_ON(!xa_is_value(entry))) | 1011 | if (WARN_ON(!xa_is_value(entry))) |
1014 | return -EIO; | 1012 | return -EIO; |
1015 | 1013 | ||
1016 | xa_lock_irq(pages); | 1014 | if (unlikely(dax_is_locked(entry))) { |
1017 | entry2 = get_unlocked_mapping_entry(mapping, index, &slot); | 1015 | void *old_entry = entry; |
1018 | /* Entry got punched out / reallocated? */ | 1016 | |
1019 | if (!entry2 || WARN_ON_ONCE(!xa_is_value(entry2))) | 1017 | entry = get_unlocked_entry(xas); |
1020 | goto put_unlocked; | 1018 | |
1021 | /* | 1019 | /* Entry got punched out / reallocated? */ |
1022 | * Entry got reallocated elsewhere? No need to writeback. We have to | 1020 | if (!entry || WARN_ON_ONCE(!xa_is_value(entry))) |
1023 | * compare pfns as we must not bail out due to difference in lockbit | 1021 | goto put_unlocked; |
1024 | * or entry type. | 1022 | /* |
1025 | */ | 1023 | * Entry got reallocated elsewhere? No need to writeback. |
1026 | if (dax_to_pfn(entry2) != dax_to_pfn(entry)) | 1024 | * We have to compare pfns as we must not bail out due to |
1027 | goto put_unlocked; | 1025 | * difference in lockbit or entry type. |
1028 | if (WARN_ON_ONCE(dax_is_empty_entry(entry) || | 1026 | */ |
1029 | dax_is_zero_entry(entry))) { | 1027 | if (dax_to_pfn(old_entry) != dax_to_pfn(entry)) |
1030 | ret = -EIO; | 1028 | goto put_unlocked; |
1031 | goto put_unlocked; | 1029 | if (WARN_ON_ONCE(dax_is_empty_entry(entry) || |
1030 | dax_is_zero_entry(entry))) { | ||
1031 | ret = -EIO; | ||
1032 | goto put_unlocked; | ||
1033 | } | ||
1034 | |||
1035 | /* Another fsync thread may have already done this entry */ | ||
1036 | if (!xas_get_mark(xas, PAGECACHE_TAG_TOWRITE)) | ||
1037 | goto put_unlocked; | ||
1032 | } | 1038 | } |
1033 | 1039 | ||
1034 | /* Another fsync thread may have already written back this entry */ | ||
1035 | if (!radix_tree_tag_get(pages, index, PAGECACHE_TAG_TOWRITE)) | ||
1036 | goto put_unlocked; | ||
1037 | /* Lock the entry to serialize with page faults */ | 1040 | /* Lock the entry to serialize with page faults */ |
1038 | entry = lock_slot(mapping, slot); | 1041 | dax_lock_entry(xas, entry); |
1042 | |||
1039 | /* | 1043 | /* |
1040 | * We can clear the tag now but we have to be careful so that concurrent | 1044 | * We can clear the tag now but we have to be careful so that concurrent |
1041 | * dax_writeback_one() calls for the same index cannot finish before we | 1045 | * dax_writeback_one() calls for the same index cannot finish before we |
@@ -1043,8 +1047,8 @@ static int dax_writeback_one(struct dax_device *dax_dev, | |||
1043 | * at the entry only under the i_pages lock and once they do that | 1047 | * at the entry only under the i_pages lock and once they do that |
1044 | * they will see the entry locked and wait for it to unlock. | 1048 | * they will see the entry locked and wait for it to unlock. |
1045 | */ | 1049 | */ |
1046 | radix_tree_tag_clear(pages, index, PAGECACHE_TAG_TOWRITE); | 1050 | xas_clear_mark(xas, PAGECACHE_TAG_TOWRITE); |
1047 | xa_unlock_irq(pages); | 1051 | xas_unlock_irq(xas); |
1048 | 1052 | ||
1049 | /* | 1053 | /* |
1050 | * Even if dax_writeback_mapping_range() was given a wbc->range_start | 1054 | * Even if dax_writeback_mapping_range() was given a wbc->range_start |
@@ -1056,7 +1060,7 @@ static int dax_writeback_one(struct dax_device *dax_dev, | |||
1056 | pfn = dax_to_pfn(entry); | 1060 | pfn = dax_to_pfn(entry); |
1057 | size = PAGE_SIZE << dax_entry_order(entry); | 1061 | size = PAGE_SIZE << dax_entry_order(entry); |
1058 | 1062 | ||
1059 | dax_entry_mkclean(mapping, index, pfn); | 1063 | dax_entry_mkclean(mapping, xas->xa_index, pfn); |
1060 | dax_flush(dax_dev, page_address(pfn_to_page(pfn)), size); | 1064 | dax_flush(dax_dev, page_address(pfn_to_page(pfn)), size); |
1061 | /* | 1065 | /* |
1062 | * After we have flushed the cache, we can clear the dirty tag. There | 1066 | * After we have flushed the cache, we can clear the dirty tag. There |
@@ -1064,16 +1068,18 @@ static int dax_writeback_one(struct dax_device *dax_dev, | |||
1064 | * the pfn mappings are writeprotected and fault waits for mapping | 1068 | * the pfn mappings are writeprotected and fault waits for mapping |
1065 | * entry lock. | 1069 | * entry lock. |
1066 | */ | 1070 | */ |
1067 | xa_lock_irq(pages); | 1071 | xas_reset(xas); |
1068 | radix_tree_tag_clear(pages, index, PAGECACHE_TAG_DIRTY); | 1072 | xas_lock_irq(xas); |
1069 | xa_unlock_irq(pages); | 1073 | xas_store(xas, entry); |
1070 | trace_dax_writeback_one(mapping->host, index, size >> PAGE_SHIFT); | 1074 | xas_clear_mark(xas, PAGECACHE_TAG_DIRTY); |
1071 | put_locked_mapping_entry(mapping, index); | 1075 | dax_wake_entry(xas, entry, false); |
1076 | |||
1077 | trace_dax_writeback_one(mapping->host, xas->xa_index, | ||
1078 | size >> PAGE_SHIFT); | ||
1072 | return ret; | 1079 | return ret; |
1073 | 1080 | ||
1074 | put_unlocked: | 1081 | put_unlocked: |
1075 | put_unlocked_mapping_entry(mapping, index, entry2); | 1082 | put_unlocked_entry(xas, entry); |
1076 | xa_unlock_irq(pages); | ||
1077 | return ret; | 1083 | return ret; |
1078 | } | 1084 | } |
1079 | 1085 | ||
@@ -1085,13 +1091,13 @@ static int dax_writeback_one(struct dax_device *dax_dev, | |||
1085 | int dax_writeback_mapping_range(struct address_space *mapping, | 1091 | int dax_writeback_mapping_range(struct address_space *mapping, |
1086 | struct block_device *bdev, struct writeback_control *wbc) | 1092 | struct block_device *bdev, struct writeback_control *wbc) |
1087 | { | 1093 | { |
1094 | XA_STATE(xas, &mapping->i_pages, wbc->range_start >> PAGE_SHIFT); | ||
1088 | struct inode *inode = mapping->host; | 1095 | struct inode *inode = mapping->host; |
1089 | pgoff_t start_index, end_index; | 1096 | pgoff_t end_index = wbc->range_end >> PAGE_SHIFT; |
1090 | pgoff_t indices[PAGEVEC_SIZE]; | ||
1091 | struct dax_device *dax_dev; | 1097 | struct dax_device *dax_dev; |
1092 | struct pagevec pvec; | 1098 | void *entry; |
1093 | bool done = false; | 1099 | int ret = 0; |
1094 | int i, ret = 0; | 1100 | unsigned int scanned = 0; |
1095 | 1101 | ||
1096 | if (WARN_ON_ONCE(inode->i_blkbits != PAGE_SHIFT)) | 1102 | if (WARN_ON_ONCE(inode->i_blkbits != PAGE_SHIFT)) |
1097 | return -EIO; | 1103 | return -EIO; |
@@ -1103,41 +1109,29 @@ int dax_writeback_mapping_range(struct address_space *mapping, | |||
1103 | if (!dax_dev) | 1109 | if (!dax_dev) |
1104 | return -EIO; | 1110 | return -EIO; |
1105 | 1111 | ||
1106 | start_index = wbc->range_start >> PAGE_SHIFT; | 1112 | trace_dax_writeback_range(inode, xas.xa_index, end_index); |
1107 | end_index = wbc->range_end >> PAGE_SHIFT; | ||
1108 | |||
1109 | trace_dax_writeback_range(inode, start_index, end_index); | ||
1110 | |||
1111 | tag_pages_for_writeback(mapping, start_index, end_index); | ||
1112 | 1113 | ||
1113 | pagevec_init(&pvec); | 1114 | tag_pages_for_writeback(mapping, xas.xa_index, end_index); |
1114 | while (!done) { | ||
1115 | pvec.nr = find_get_entries_tag(mapping, start_index, | ||
1116 | PAGECACHE_TAG_TOWRITE, PAGEVEC_SIZE, | ||
1117 | pvec.pages, indices); | ||
1118 | 1115 | ||
1119 | if (pvec.nr == 0) | 1116 | xas_lock_irq(&xas); |
1117 | xas_for_each_marked(&xas, entry, end_index, PAGECACHE_TAG_TOWRITE) { | ||
1118 | ret = dax_writeback_one(&xas, dax_dev, mapping, entry); | ||
1119 | if (ret < 0) { | ||
1120 | mapping_set_error(mapping, ret); | ||
1120 | break; | 1121 | break; |
1121 | |||
1122 | for (i = 0; i < pvec.nr; i++) { | ||
1123 | if (indices[i] > end_index) { | ||
1124 | done = true; | ||
1125 | break; | ||
1126 | } | ||
1127 | |||
1128 | ret = dax_writeback_one(dax_dev, mapping, indices[i], | ||
1129 | pvec.pages[i]); | ||
1130 | if (ret < 0) { | ||
1131 | mapping_set_error(mapping, ret); | ||
1132 | goto out; | ||
1133 | } | ||
1134 | } | 1122 | } |
1135 | start_index = indices[pvec.nr - 1] + 1; | 1123 | if (++scanned % XA_CHECK_SCHED) |
1124 | continue; | ||
1125 | |||
1126 | xas_pause(&xas); | ||
1127 | xas_unlock_irq(&xas); | ||
1128 | cond_resched(); | ||
1129 | xas_lock_irq(&xas); | ||
1136 | } | 1130 | } |
1137 | out: | 1131 | xas_unlock_irq(&xas); |
1138 | put_dax(dax_dev); | 1132 | put_dax(dax_dev); |
1139 | trace_dax_writeback_range_done(inode, start_index, end_index); | 1133 | trace_dax_writeback_range_done(inode, xas.xa_index, end_index); |
1140 | return (ret < 0 ? ret : 0); | 1134 | return ret; |
1141 | } | 1135 | } |
1142 | EXPORT_SYMBOL_GPL(dax_writeback_mapping_range); | 1136 | EXPORT_SYMBOL_GPL(dax_writeback_mapping_range); |
1143 | 1137 | ||