diff options
author | Kirill A. Shutemov <kirill.shutemov@linux.intel.com> | 2016-07-26 18:26:04 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-07-26 19:19:19 -0400 |
commit | 83929372f629001568d43069a63376e13bfc497b (patch) | |
tree | 5eaa57bd71cce45101ad6e6d50b4532318c4ca74 | |
parent | c78c66d1ddfdbd2353f3fcfeba0268524537b096 (diff) |
filemap: prepare find and delete operations for huge pages
For now, we would have HPAGE_PMD_NR entries in radix tree for every huge
page. That's suboptimal and it will be changed to use Matthew's
multi-order entries later.
'add' operation is not changed, because we don't need it to implement
hugetmpfs: shmem uses its own implementation.
Link: http://lkml.kernel.org/r/1466021202-61880-25-git-send-email-kirill.shutemov@linux.intel.com
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | mm/filemap.c | 178 |
1 files changed, 122 insertions, 56 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index 1efd2994dccf..21508ea25717 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -114,14 +114,14 @@ static void page_cache_tree_delete(struct address_space *mapping, | |||
114 | struct page *page, void *shadow) | 114 | struct page *page, void *shadow) |
115 | { | 115 | { |
116 | struct radix_tree_node *node; | 116 | struct radix_tree_node *node; |
117 | int i, nr = PageHuge(page) ? 1 : hpage_nr_pages(page); | ||
117 | 118 | ||
118 | VM_BUG_ON(!PageLocked(page)); | 119 | VM_BUG_ON_PAGE(!PageLocked(page), page); |
119 | 120 | VM_BUG_ON_PAGE(PageTail(page), page); | |
120 | node = radix_tree_replace_clear_tags(&mapping->page_tree, page->index, | 121 | VM_BUG_ON_PAGE(nr != 1 && shadow, page); |
121 | shadow); | ||
122 | 122 | ||
123 | if (shadow) { | 123 | if (shadow) { |
124 | mapping->nrexceptional++; | 124 | mapping->nrexceptional += nr; |
125 | /* | 125 | /* |
126 | * Make sure the nrexceptional update is committed before | 126 | * Make sure the nrexceptional update is committed before |
127 | * the nrpages update so that final truncate racing | 127 | * the nrpages update so that final truncate racing |
@@ -130,31 +130,38 @@ static void page_cache_tree_delete(struct address_space *mapping, | |||
130 | */ | 130 | */ |
131 | smp_wmb(); | 131 | smp_wmb(); |
132 | } | 132 | } |
133 | mapping->nrpages--; | 133 | mapping->nrpages -= nr; |
134 | |||
135 | if (!node) | ||
136 | return; | ||
137 | 134 | ||
138 | workingset_node_pages_dec(node); | 135 | for (i = 0; i < nr; i++) { |
139 | if (shadow) | 136 | node = radix_tree_replace_clear_tags(&mapping->page_tree, |
140 | workingset_node_shadows_inc(node); | 137 | page->index + i, shadow); |
141 | else | 138 | if (!node) { |
142 | if (__radix_tree_delete_node(&mapping->page_tree, node)) | 139 | VM_BUG_ON_PAGE(nr != 1, page); |
143 | return; | 140 | return; |
141 | } | ||
144 | 142 | ||
145 | /* | 143 | workingset_node_pages_dec(node); |
146 | * Track node that only contains shadow entries. DAX mappings contain | 144 | if (shadow) |
147 | * no shadow entries and may contain other exceptional entries so skip | 145 | workingset_node_shadows_inc(node); |
148 | * those. | 146 | else |
149 | * | 147 | if (__radix_tree_delete_node(&mapping->page_tree, node)) |
150 | * Avoid acquiring the list_lru lock if already tracked. The | 148 | continue; |
151 | * list_empty() test is safe as node->private_list is | 149 | |
152 | * protected by mapping->tree_lock. | 150 | /* |
153 | */ | 151 | * Track node that only contains shadow entries. DAX mappings |
154 | if (!dax_mapping(mapping) && !workingset_node_pages(node) && | 152 | * contain no shadow entries and may contain other exceptional |
155 | list_empty(&node->private_list)) { | 153 | * entries so skip those. |
156 | node->private_data = mapping; | 154 | * |
157 | list_lru_add(&workingset_shadow_nodes, &node->private_list); | 155 | * Avoid acquiring the list_lru lock if already tracked. |
156 | * The list_empty() test is safe as node->private_list is | ||
157 | * protected by mapping->tree_lock. | ||
158 | */ | ||
159 | if (!dax_mapping(mapping) && !workingset_node_pages(node) && | ||
160 | list_empty(&node->private_list)) { | ||
161 | node->private_data = mapping; | ||
162 | list_lru_add(&workingset_shadow_nodes, | ||
163 | &node->private_list); | ||
164 | } | ||
158 | } | 165 | } |
159 | } | 166 | } |
160 | 167 | ||
@@ -166,6 +173,7 @@ static void page_cache_tree_delete(struct address_space *mapping, | |||
166 | void __delete_from_page_cache(struct page *page, void *shadow) | 173 | void __delete_from_page_cache(struct page *page, void *shadow) |
167 | { | 174 | { |
168 | struct address_space *mapping = page->mapping; | 175 | struct address_space *mapping = page->mapping; |
176 | int nr = hpage_nr_pages(page); | ||
169 | 177 | ||
170 | trace_mm_filemap_delete_from_page_cache(page); | 178 | trace_mm_filemap_delete_from_page_cache(page); |
171 | /* | 179 | /* |
@@ -178,6 +186,7 @@ void __delete_from_page_cache(struct page *page, void *shadow) | |||
178 | else | 186 | else |
179 | cleancache_invalidate_page(mapping, page); | 187 | cleancache_invalidate_page(mapping, page); |
180 | 188 | ||
189 | VM_BUG_ON_PAGE(PageTail(page), page); | ||
181 | VM_BUG_ON_PAGE(page_mapped(page), page); | 190 | VM_BUG_ON_PAGE(page_mapped(page), page); |
182 | if (!IS_ENABLED(CONFIG_DEBUG_VM) && unlikely(page_mapped(page))) { | 191 | if (!IS_ENABLED(CONFIG_DEBUG_VM) && unlikely(page_mapped(page))) { |
183 | int mapcount; | 192 | int mapcount; |
@@ -209,9 +218,9 @@ void __delete_from_page_cache(struct page *page, void *shadow) | |||
209 | 218 | ||
210 | /* hugetlb pages do not participate in page cache accounting. */ | 219 | /* hugetlb pages do not participate in page cache accounting. */ |
211 | if (!PageHuge(page)) | 220 | if (!PageHuge(page)) |
212 | __dec_zone_page_state(page, NR_FILE_PAGES); | 221 | __mod_zone_page_state(page_zone(page), NR_FILE_PAGES, -nr); |
213 | if (PageSwapBacked(page)) | 222 | if (PageSwapBacked(page)) |
214 | __dec_zone_page_state(page, NR_SHMEM); | 223 | __mod_zone_page_state(page_zone(page), NR_SHMEM, -nr); |
215 | 224 | ||
216 | /* | 225 | /* |
217 | * At this point page must be either written or cleaned by truncate. | 226 | * At this point page must be either written or cleaned by truncate. |
@@ -235,9 +244,8 @@ void __delete_from_page_cache(struct page *page, void *shadow) | |||
235 | */ | 244 | */ |
236 | void delete_from_page_cache(struct page *page) | 245 | void delete_from_page_cache(struct page *page) |
237 | { | 246 | { |
238 | struct address_space *mapping = page->mapping; | 247 | struct address_space *mapping = page_mapping(page); |
239 | unsigned long flags; | 248 | unsigned long flags; |
240 | |||
241 | void (*freepage)(struct page *); | 249 | void (*freepage)(struct page *); |
242 | 250 | ||
243 | BUG_ON(!PageLocked(page)); | 251 | BUG_ON(!PageLocked(page)); |
@@ -250,7 +258,13 @@ void delete_from_page_cache(struct page *page) | |||
250 | 258 | ||
251 | if (freepage) | 259 | if (freepage) |
252 | freepage(page); | 260 | freepage(page); |
253 | put_page(page); | 261 | |
262 | if (PageTransHuge(page) && !PageHuge(page)) { | ||
263 | page_ref_sub(page, HPAGE_PMD_NR); | ||
264 | VM_BUG_ON_PAGE(page_count(page) <= 0, page); | ||
265 | } else { | ||
266 | put_page(page); | ||
267 | } | ||
254 | } | 268 | } |
255 | EXPORT_SYMBOL(delete_from_page_cache); | 269 | EXPORT_SYMBOL(delete_from_page_cache); |
256 | 270 | ||
@@ -1053,7 +1067,7 @@ EXPORT_SYMBOL(page_cache_prev_hole); | |||
1053 | struct page *find_get_entry(struct address_space *mapping, pgoff_t offset) | 1067 | struct page *find_get_entry(struct address_space *mapping, pgoff_t offset) |
1054 | { | 1068 | { |
1055 | void **pagep; | 1069 | void **pagep; |
1056 | struct page *page; | 1070 | struct page *head, *page; |
1057 | 1071 | ||
1058 | rcu_read_lock(); | 1072 | rcu_read_lock(); |
1059 | repeat: | 1073 | repeat: |
@@ -1073,8 +1087,16 @@ repeat: | |||
1073 | */ | 1087 | */ |
1074 | goto out; | 1088 | goto out; |
1075 | } | 1089 | } |
1076 | if (!page_cache_get_speculative(page)) | 1090 | |
1091 | head = compound_head(page); | ||
1092 | if (!page_cache_get_speculative(head)) | ||
1093 | goto repeat; | ||
1094 | |||
1095 | /* The page was split under us? */ | ||
1096 | if (compound_head(page) != head) { | ||
1097 | put_page(head); | ||
1077 | goto repeat; | 1098 | goto repeat; |
1099 | } | ||
1078 | 1100 | ||
1079 | /* | 1101 | /* |
1080 | * Has the page moved? | 1102 | * Has the page moved? |
@@ -1082,7 +1104,7 @@ repeat: | |||
1082 | * include/linux/pagemap.h for details. | 1104 | * include/linux/pagemap.h for details. |
1083 | */ | 1105 | */ |
1084 | if (unlikely(page != *pagep)) { | 1106 | if (unlikely(page != *pagep)) { |
1085 | put_page(page); | 1107 | put_page(head); |
1086 | goto repeat; | 1108 | goto repeat; |
1087 | } | 1109 | } |
1088 | } | 1110 | } |
@@ -1118,12 +1140,12 @@ repeat: | |||
1118 | if (page && !radix_tree_exception(page)) { | 1140 | if (page && !radix_tree_exception(page)) { |
1119 | lock_page(page); | 1141 | lock_page(page); |
1120 | /* Has the page been truncated? */ | 1142 | /* Has the page been truncated? */ |
1121 | if (unlikely(page->mapping != mapping)) { | 1143 | if (unlikely(page_mapping(page) != mapping)) { |
1122 | unlock_page(page); | 1144 | unlock_page(page); |
1123 | put_page(page); | 1145 | put_page(page); |
1124 | goto repeat; | 1146 | goto repeat; |
1125 | } | 1147 | } |
1126 | VM_BUG_ON_PAGE(page->index != offset, page); | 1148 | VM_BUG_ON_PAGE(page_to_pgoff(page) != offset, page); |
1127 | } | 1149 | } |
1128 | return page; | 1150 | return page; |
1129 | } | 1151 | } |
@@ -1255,7 +1277,7 @@ unsigned find_get_entries(struct address_space *mapping, | |||
1255 | 1277 | ||
1256 | rcu_read_lock(); | 1278 | rcu_read_lock(); |
1257 | radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) { | 1279 | radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) { |
1258 | struct page *page; | 1280 | struct page *head, *page; |
1259 | repeat: | 1281 | repeat: |
1260 | page = radix_tree_deref_slot(slot); | 1282 | page = radix_tree_deref_slot(slot); |
1261 | if (unlikely(!page)) | 1283 | if (unlikely(!page)) |
@@ -1272,12 +1294,20 @@ repeat: | |||
1272 | */ | 1294 | */ |
1273 | goto export; | 1295 | goto export; |
1274 | } | 1296 | } |
1275 | if (!page_cache_get_speculative(page)) | 1297 | |
1298 | head = compound_head(page); | ||
1299 | if (!page_cache_get_speculative(head)) | ||
1300 | goto repeat; | ||
1301 | |||
1302 | /* The page was split under us? */ | ||
1303 | if (compound_head(page) != head) { | ||
1304 | put_page(head); | ||
1276 | goto repeat; | 1305 | goto repeat; |
1306 | } | ||
1277 | 1307 | ||
1278 | /* Has the page moved? */ | 1308 | /* Has the page moved? */ |
1279 | if (unlikely(page != *slot)) { | 1309 | if (unlikely(page != *slot)) { |
1280 | put_page(page); | 1310 | put_page(head); |
1281 | goto repeat; | 1311 | goto repeat; |
1282 | } | 1312 | } |
1283 | export: | 1313 | export: |
@@ -1318,7 +1348,7 @@ unsigned find_get_pages(struct address_space *mapping, pgoff_t start, | |||
1318 | 1348 | ||
1319 | rcu_read_lock(); | 1349 | rcu_read_lock(); |
1320 | radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) { | 1350 | radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) { |
1321 | struct page *page; | 1351 | struct page *head, *page; |
1322 | repeat: | 1352 | repeat: |
1323 | page = radix_tree_deref_slot(slot); | 1353 | page = radix_tree_deref_slot(slot); |
1324 | if (unlikely(!page)) | 1354 | if (unlikely(!page)) |
@@ -1337,12 +1367,19 @@ repeat: | |||
1337 | continue; | 1367 | continue; |
1338 | } | 1368 | } |
1339 | 1369 | ||
1340 | if (!page_cache_get_speculative(page)) | 1370 | head = compound_head(page); |
1371 | if (!page_cache_get_speculative(head)) | ||
1372 | goto repeat; | ||
1373 | |||
1374 | /* The page was split under us? */ | ||
1375 | if (compound_head(page) != head) { | ||
1376 | put_page(head); | ||
1341 | goto repeat; | 1377 | goto repeat; |
1378 | } | ||
1342 | 1379 | ||
1343 | /* Has the page moved? */ | 1380 | /* Has the page moved? */ |
1344 | if (unlikely(page != *slot)) { | 1381 | if (unlikely(page != *slot)) { |
1345 | put_page(page); | 1382 | put_page(head); |
1346 | goto repeat; | 1383 | goto repeat; |
1347 | } | 1384 | } |
1348 | 1385 | ||
@@ -1379,7 +1416,7 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index, | |||
1379 | 1416 | ||
1380 | rcu_read_lock(); | 1417 | rcu_read_lock(); |
1381 | radix_tree_for_each_contig(slot, &mapping->page_tree, &iter, index) { | 1418 | radix_tree_for_each_contig(slot, &mapping->page_tree, &iter, index) { |
1382 | struct page *page; | 1419 | struct page *head, *page; |
1383 | repeat: | 1420 | repeat: |
1384 | page = radix_tree_deref_slot(slot); | 1421 | page = radix_tree_deref_slot(slot); |
1385 | /* The hole, there no reason to continue */ | 1422 | /* The hole, there no reason to continue */ |
@@ -1399,12 +1436,19 @@ repeat: | |||
1399 | break; | 1436 | break; |
1400 | } | 1437 | } |
1401 | 1438 | ||
1402 | if (!page_cache_get_speculative(page)) | 1439 | head = compound_head(page); |
1440 | if (!page_cache_get_speculative(head)) | ||
1441 | goto repeat; | ||
1442 | |||
1443 | /* The page was split under us? */ | ||
1444 | if (compound_head(page) != head) { | ||
1445 | put_page(head); | ||
1403 | goto repeat; | 1446 | goto repeat; |
1447 | } | ||
1404 | 1448 | ||
1405 | /* Has the page moved? */ | 1449 | /* Has the page moved? */ |
1406 | if (unlikely(page != *slot)) { | 1450 | if (unlikely(page != *slot)) { |
1407 | put_page(page); | 1451 | put_page(head); |
1408 | goto repeat; | 1452 | goto repeat; |
1409 | } | 1453 | } |
1410 | 1454 | ||
@@ -1413,7 +1457,7 @@ repeat: | |||
1413 | * otherwise we can get both false positives and false | 1457 | * otherwise we can get both false positives and false |
1414 | * negatives, which is just confusing to the caller. | 1458 | * negatives, which is just confusing to the caller. |
1415 | */ | 1459 | */ |
1416 | if (page->mapping == NULL || page->index != iter.index) { | 1460 | if (page->mapping == NULL || page_to_pgoff(page) != iter.index) { |
1417 | put_page(page); | 1461 | put_page(page); |
1418 | break; | 1462 | break; |
1419 | } | 1463 | } |
@@ -1451,7 +1495,7 @@ unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index, | |||
1451 | rcu_read_lock(); | 1495 | rcu_read_lock(); |
1452 | radix_tree_for_each_tagged(slot, &mapping->page_tree, | 1496 | radix_tree_for_each_tagged(slot, &mapping->page_tree, |
1453 | &iter, *index, tag) { | 1497 | &iter, *index, tag) { |
1454 | struct page *page; | 1498 | struct page *head, *page; |
1455 | repeat: | 1499 | repeat: |
1456 | page = radix_tree_deref_slot(slot); | 1500 | page = radix_tree_deref_slot(slot); |
1457 | if (unlikely(!page)) | 1501 | if (unlikely(!page)) |
@@ -1476,12 +1520,19 @@ repeat: | |||
1476 | continue; | 1520 | continue; |
1477 | } | 1521 | } |
1478 | 1522 | ||
1479 | if (!page_cache_get_speculative(page)) | 1523 | head = compound_head(page); |
1524 | if (!page_cache_get_speculative(head)) | ||
1480 | goto repeat; | 1525 | goto repeat; |
1481 | 1526 | ||
1527 | /* The page was split under us? */ | ||
1528 | if (compound_head(page) != head) { | ||
1529 | put_page(head); | ||
1530 | goto repeat; | ||
1531 | } | ||
1532 | |||
1482 | /* Has the page moved? */ | 1533 | /* Has the page moved? */ |
1483 | if (unlikely(page != *slot)) { | 1534 | if (unlikely(page != *slot)) { |
1484 | put_page(page); | 1535 | put_page(head); |
1485 | goto repeat; | 1536 | goto repeat; |
1486 | } | 1537 | } |
1487 | 1538 | ||
@@ -1525,7 +1576,7 @@ unsigned find_get_entries_tag(struct address_space *mapping, pgoff_t start, | |||
1525 | rcu_read_lock(); | 1576 | rcu_read_lock(); |
1526 | radix_tree_for_each_tagged(slot, &mapping->page_tree, | 1577 | radix_tree_for_each_tagged(slot, &mapping->page_tree, |
1527 | &iter, start, tag) { | 1578 | &iter, start, tag) { |
1528 | struct page *page; | 1579 | struct page *head, *page; |
1529 | repeat: | 1580 | repeat: |
1530 | page = radix_tree_deref_slot(slot); | 1581 | page = radix_tree_deref_slot(slot); |
1531 | if (unlikely(!page)) | 1582 | if (unlikely(!page)) |
@@ -1543,12 +1594,20 @@ repeat: | |||
1543 | */ | 1594 | */ |
1544 | goto export; | 1595 | goto export; |
1545 | } | 1596 | } |
1546 | if (!page_cache_get_speculative(page)) | 1597 | |
1598 | head = compound_head(page); | ||
1599 | if (!page_cache_get_speculative(head)) | ||
1547 | goto repeat; | 1600 | goto repeat; |
1548 | 1601 | ||
1602 | /* The page was split under us? */ | ||
1603 | if (compound_head(page) != head) { | ||
1604 | put_page(head); | ||
1605 | goto repeat; | ||
1606 | } | ||
1607 | |||
1549 | /* Has the page moved? */ | 1608 | /* Has the page moved? */ |
1550 | if (unlikely(page != *slot)) { | 1609 | if (unlikely(page != *slot)) { |
1551 | put_page(page); | 1610 | put_page(head); |
1552 | goto repeat; | 1611 | goto repeat; |
1553 | } | 1612 | } |
1554 | export: | 1613 | export: |
@@ -2137,7 +2196,7 @@ void filemap_map_pages(struct fault_env *fe, | |||
2137 | struct address_space *mapping = file->f_mapping; | 2196 | struct address_space *mapping = file->f_mapping; |
2138 | pgoff_t last_pgoff = start_pgoff; | 2197 | pgoff_t last_pgoff = start_pgoff; |
2139 | loff_t size; | 2198 | loff_t size; |
2140 | struct page *page; | 2199 | struct page *head, *page; |
2141 | 2200 | ||
2142 | rcu_read_lock(); | 2201 | rcu_read_lock(); |
2143 | radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, | 2202 | radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, |
@@ -2156,12 +2215,19 @@ repeat: | |||
2156 | goto next; | 2215 | goto next; |
2157 | } | 2216 | } |
2158 | 2217 | ||
2159 | if (!page_cache_get_speculative(page)) | 2218 | head = compound_head(page); |
2219 | if (!page_cache_get_speculative(head)) | ||
2160 | goto repeat; | 2220 | goto repeat; |
2161 | 2221 | ||
2222 | /* The page was split under us? */ | ||
2223 | if (compound_head(page) != head) { | ||
2224 | put_page(head); | ||
2225 | goto repeat; | ||
2226 | } | ||
2227 | |||
2162 | /* Has the page moved? */ | 2228 | /* Has the page moved? */ |
2163 | if (unlikely(page != *slot)) { | 2229 | if (unlikely(page != *slot)) { |
2164 | put_page(page); | 2230 | put_page(head); |
2165 | goto repeat; | 2231 | goto repeat; |
2166 | } | 2232 | } |
2167 | 2233 | ||