aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKirill A. Shutemov <kirill.shutemov@linux.intel.com>2016-07-26 18:26:04 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-07-26 19:19:19 -0400
commit83929372f629001568d43069a63376e13bfc497b (patch)
tree5eaa57bd71cce45101ad6e6d50b4532318c4ca74
parentc78c66d1ddfdbd2353f3fcfeba0268524537b096 (diff)
filemap: prepare find and delete operations for huge pages
For now, we would have HPAGE_PMD_NR entries in radix tree for every huge page. That's suboptimal and it will be changed to use Matthew's multi-order entries later. 'add' operation is not changed, because we don't need it to implement hugetmpfs: shmem uses its own implementation. Link: http://lkml.kernel.org/r/1466021202-61880-25-git-send-email-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--mm/filemap.c178
1 files changed, 122 insertions, 56 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index 1efd2994dccf..21508ea25717 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -114,14 +114,14 @@ static void page_cache_tree_delete(struct address_space *mapping,
114 struct page *page, void *shadow) 114 struct page *page, void *shadow)
115{ 115{
116 struct radix_tree_node *node; 116 struct radix_tree_node *node;
117 int i, nr = PageHuge(page) ? 1 : hpage_nr_pages(page);
117 118
118 VM_BUG_ON(!PageLocked(page)); 119 VM_BUG_ON_PAGE(!PageLocked(page), page);
119 120 VM_BUG_ON_PAGE(PageTail(page), page);
120 node = radix_tree_replace_clear_tags(&mapping->page_tree, page->index, 121 VM_BUG_ON_PAGE(nr != 1 && shadow, page);
121 shadow);
122 122
123 if (shadow) { 123 if (shadow) {
124 mapping->nrexceptional++; 124 mapping->nrexceptional += nr;
125 /* 125 /*
126 * Make sure the nrexceptional update is committed before 126 * Make sure the nrexceptional update is committed before
127 * the nrpages update so that final truncate racing 127 * the nrpages update so that final truncate racing
@@ -130,31 +130,38 @@ static void page_cache_tree_delete(struct address_space *mapping,
130 */ 130 */
131 smp_wmb(); 131 smp_wmb();
132 } 132 }
133 mapping->nrpages--; 133 mapping->nrpages -= nr;
134
135 if (!node)
136 return;
137 134
138 workingset_node_pages_dec(node); 135 for (i = 0; i < nr; i++) {
139 if (shadow) 136 node = radix_tree_replace_clear_tags(&mapping->page_tree,
140 workingset_node_shadows_inc(node); 137 page->index + i, shadow);
141 else 138 if (!node) {
142 if (__radix_tree_delete_node(&mapping->page_tree, node)) 139 VM_BUG_ON_PAGE(nr != 1, page);
143 return; 140 return;
141 }
144 142
145 /* 143 workingset_node_pages_dec(node);
146 * Track node that only contains shadow entries. DAX mappings contain 144 if (shadow)
147 * no shadow entries and may contain other exceptional entries so skip 145 workingset_node_shadows_inc(node);
148 * those. 146 else
149 * 147 if (__radix_tree_delete_node(&mapping->page_tree, node))
150 * Avoid acquiring the list_lru lock if already tracked. The 148 continue;
151 * list_empty() test is safe as node->private_list is 149
152 * protected by mapping->tree_lock. 150 /*
153 */ 151 * Track node that only contains shadow entries. DAX mappings
154 if (!dax_mapping(mapping) && !workingset_node_pages(node) && 152 * contain no shadow entries and may contain other exceptional
155 list_empty(&node->private_list)) { 153 * entries so skip those.
156 node->private_data = mapping; 154 *
157 list_lru_add(&workingset_shadow_nodes, &node->private_list); 155 * Avoid acquiring the list_lru lock if already tracked.
156 * The list_empty() test is safe as node->private_list is
157 * protected by mapping->tree_lock.
158 */
159 if (!dax_mapping(mapping) && !workingset_node_pages(node) &&
160 list_empty(&node->private_list)) {
161 node->private_data = mapping;
162 list_lru_add(&workingset_shadow_nodes,
163 &node->private_list);
164 }
158 } 165 }
159} 166}
160 167
@@ -166,6 +173,7 @@ static void page_cache_tree_delete(struct address_space *mapping,
166void __delete_from_page_cache(struct page *page, void *shadow) 173void __delete_from_page_cache(struct page *page, void *shadow)
167{ 174{
168 struct address_space *mapping = page->mapping; 175 struct address_space *mapping = page->mapping;
176 int nr = hpage_nr_pages(page);
169 177
170 trace_mm_filemap_delete_from_page_cache(page); 178 trace_mm_filemap_delete_from_page_cache(page);
171 /* 179 /*
@@ -178,6 +186,7 @@ void __delete_from_page_cache(struct page *page, void *shadow)
178 else 186 else
179 cleancache_invalidate_page(mapping, page); 187 cleancache_invalidate_page(mapping, page);
180 188
189 VM_BUG_ON_PAGE(PageTail(page), page);
181 VM_BUG_ON_PAGE(page_mapped(page), page); 190 VM_BUG_ON_PAGE(page_mapped(page), page);
182 if (!IS_ENABLED(CONFIG_DEBUG_VM) && unlikely(page_mapped(page))) { 191 if (!IS_ENABLED(CONFIG_DEBUG_VM) && unlikely(page_mapped(page))) {
183 int mapcount; 192 int mapcount;
@@ -209,9 +218,9 @@ void __delete_from_page_cache(struct page *page, void *shadow)
209 218
210 /* hugetlb pages do not participate in page cache accounting. */ 219 /* hugetlb pages do not participate in page cache accounting. */
211 if (!PageHuge(page)) 220 if (!PageHuge(page))
212 __dec_zone_page_state(page, NR_FILE_PAGES); 221 __mod_zone_page_state(page_zone(page), NR_FILE_PAGES, -nr);
213 if (PageSwapBacked(page)) 222 if (PageSwapBacked(page))
214 __dec_zone_page_state(page, NR_SHMEM); 223 __mod_zone_page_state(page_zone(page), NR_SHMEM, -nr);
215 224
216 /* 225 /*
217 * At this point page must be either written or cleaned by truncate. 226 * At this point page must be either written or cleaned by truncate.
@@ -235,9 +244,8 @@ void __delete_from_page_cache(struct page *page, void *shadow)
235 */ 244 */
236void delete_from_page_cache(struct page *page) 245void delete_from_page_cache(struct page *page)
237{ 246{
238 struct address_space *mapping = page->mapping; 247 struct address_space *mapping = page_mapping(page);
239 unsigned long flags; 248 unsigned long flags;
240
241 void (*freepage)(struct page *); 249 void (*freepage)(struct page *);
242 250
243 BUG_ON(!PageLocked(page)); 251 BUG_ON(!PageLocked(page));
@@ -250,7 +258,13 @@ void delete_from_page_cache(struct page *page)
250 258
251 if (freepage) 259 if (freepage)
252 freepage(page); 260 freepage(page);
253 put_page(page); 261
262 if (PageTransHuge(page) && !PageHuge(page)) {
263 page_ref_sub(page, HPAGE_PMD_NR);
264 VM_BUG_ON_PAGE(page_count(page) <= 0, page);
265 } else {
266 put_page(page);
267 }
254} 268}
255EXPORT_SYMBOL(delete_from_page_cache); 269EXPORT_SYMBOL(delete_from_page_cache);
256 270
@@ -1053,7 +1067,7 @@ EXPORT_SYMBOL(page_cache_prev_hole);
1053struct page *find_get_entry(struct address_space *mapping, pgoff_t offset) 1067struct page *find_get_entry(struct address_space *mapping, pgoff_t offset)
1054{ 1068{
1055 void **pagep; 1069 void **pagep;
1056 struct page *page; 1070 struct page *head, *page;
1057 1071
1058 rcu_read_lock(); 1072 rcu_read_lock();
1059repeat: 1073repeat:
@@ -1073,8 +1087,16 @@ repeat:
1073 */ 1087 */
1074 goto out; 1088 goto out;
1075 } 1089 }
1076 if (!page_cache_get_speculative(page)) 1090
1091 head = compound_head(page);
1092 if (!page_cache_get_speculative(head))
1093 goto repeat;
1094
1095 /* The page was split under us? */
1096 if (compound_head(page) != head) {
1097 put_page(head);
1077 goto repeat; 1098 goto repeat;
1099 }
1078 1100
1079 /* 1101 /*
1080 * Has the page moved? 1102 * Has the page moved?
@@ -1082,7 +1104,7 @@ repeat:
1082 * include/linux/pagemap.h for details. 1104 * include/linux/pagemap.h for details.
1083 */ 1105 */
1084 if (unlikely(page != *pagep)) { 1106 if (unlikely(page != *pagep)) {
1085 put_page(page); 1107 put_page(head);
1086 goto repeat; 1108 goto repeat;
1087 } 1109 }
1088 } 1110 }
@@ -1118,12 +1140,12 @@ repeat:
1118 if (page && !radix_tree_exception(page)) { 1140 if (page && !radix_tree_exception(page)) {
1119 lock_page(page); 1141 lock_page(page);
1120 /* Has the page been truncated? */ 1142 /* Has the page been truncated? */
1121 if (unlikely(page->mapping != mapping)) { 1143 if (unlikely(page_mapping(page) != mapping)) {
1122 unlock_page(page); 1144 unlock_page(page);
1123 put_page(page); 1145 put_page(page);
1124 goto repeat; 1146 goto repeat;
1125 } 1147 }
1126 VM_BUG_ON_PAGE(page->index != offset, page); 1148 VM_BUG_ON_PAGE(page_to_pgoff(page) != offset, page);
1127 } 1149 }
1128 return page; 1150 return page;
1129} 1151}
@@ -1255,7 +1277,7 @@ unsigned find_get_entries(struct address_space *mapping,
1255 1277
1256 rcu_read_lock(); 1278 rcu_read_lock();
1257 radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) { 1279 radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
1258 struct page *page; 1280 struct page *head, *page;
1259repeat: 1281repeat:
1260 page = radix_tree_deref_slot(slot); 1282 page = radix_tree_deref_slot(slot);
1261 if (unlikely(!page)) 1283 if (unlikely(!page))
@@ -1272,12 +1294,20 @@ repeat:
1272 */ 1294 */
1273 goto export; 1295 goto export;
1274 } 1296 }
1275 if (!page_cache_get_speculative(page)) 1297
1298 head = compound_head(page);
1299 if (!page_cache_get_speculative(head))
1300 goto repeat;
1301
1302 /* The page was split under us? */
1303 if (compound_head(page) != head) {
1304 put_page(head);
1276 goto repeat; 1305 goto repeat;
1306 }
1277 1307
1278 /* Has the page moved? */ 1308 /* Has the page moved? */
1279 if (unlikely(page != *slot)) { 1309 if (unlikely(page != *slot)) {
1280 put_page(page); 1310 put_page(head);
1281 goto repeat; 1311 goto repeat;
1282 } 1312 }
1283export: 1313export:
@@ -1318,7 +1348,7 @@ unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
1318 1348
1319 rcu_read_lock(); 1349 rcu_read_lock();
1320 radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) { 1350 radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
1321 struct page *page; 1351 struct page *head, *page;
1322repeat: 1352repeat:
1323 page = radix_tree_deref_slot(slot); 1353 page = radix_tree_deref_slot(slot);
1324 if (unlikely(!page)) 1354 if (unlikely(!page))
@@ -1337,12 +1367,19 @@ repeat:
1337 continue; 1367 continue;
1338 } 1368 }
1339 1369
1340 if (!page_cache_get_speculative(page)) 1370 head = compound_head(page);
1371 if (!page_cache_get_speculative(head))
1372 goto repeat;
1373
1374 /* The page was split under us? */
1375 if (compound_head(page) != head) {
1376 put_page(head);
1341 goto repeat; 1377 goto repeat;
1378 }
1342 1379
1343 /* Has the page moved? */ 1380 /* Has the page moved? */
1344 if (unlikely(page != *slot)) { 1381 if (unlikely(page != *slot)) {
1345 put_page(page); 1382 put_page(head);
1346 goto repeat; 1383 goto repeat;
1347 } 1384 }
1348 1385
@@ -1379,7 +1416,7 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
1379 1416
1380 rcu_read_lock(); 1417 rcu_read_lock();
1381 radix_tree_for_each_contig(slot, &mapping->page_tree, &iter, index) { 1418 radix_tree_for_each_contig(slot, &mapping->page_tree, &iter, index) {
1382 struct page *page; 1419 struct page *head, *page;
1383repeat: 1420repeat:
1384 page = radix_tree_deref_slot(slot); 1421 page = radix_tree_deref_slot(slot);
1385 /* The hole, there no reason to continue */ 1422 /* The hole, there no reason to continue */
@@ -1399,12 +1436,19 @@ repeat:
1399 break; 1436 break;
1400 } 1437 }
1401 1438
1402 if (!page_cache_get_speculative(page)) 1439 head = compound_head(page);
1440 if (!page_cache_get_speculative(head))
1441 goto repeat;
1442
1443 /* The page was split under us? */
1444 if (compound_head(page) != head) {
1445 put_page(head);
1403 goto repeat; 1446 goto repeat;
1447 }
1404 1448
1405 /* Has the page moved? */ 1449 /* Has the page moved? */
1406 if (unlikely(page != *slot)) { 1450 if (unlikely(page != *slot)) {
1407 put_page(page); 1451 put_page(head);
1408 goto repeat; 1452 goto repeat;
1409 } 1453 }
1410 1454
@@ -1413,7 +1457,7 @@ repeat:
1413 * otherwise we can get both false positives and false 1457 * otherwise we can get both false positives and false
1414 * negatives, which is just confusing to the caller. 1458 * negatives, which is just confusing to the caller.
1415 */ 1459 */
1416 if (page->mapping == NULL || page->index != iter.index) { 1460 if (page->mapping == NULL || page_to_pgoff(page) != iter.index) {
1417 put_page(page); 1461 put_page(page);
1418 break; 1462 break;
1419 } 1463 }
@@ -1451,7 +1495,7 @@ unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
1451 rcu_read_lock(); 1495 rcu_read_lock();
1452 radix_tree_for_each_tagged(slot, &mapping->page_tree, 1496 radix_tree_for_each_tagged(slot, &mapping->page_tree,
1453 &iter, *index, tag) { 1497 &iter, *index, tag) {
1454 struct page *page; 1498 struct page *head, *page;
1455repeat: 1499repeat:
1456 page = radix_tree_deref_slot(slot); 1500 page = radix_tree_deref_slot(slot);
1457 if (unlikely(!page)) 1501 if (unlikely(!page))
@@ -1476,12 +1520,19 @@ repeat:
1476 continue; 1520 continue;
1477 } 1521 }
1478 1522
1479 if (!page_cache_get_speculative(page)) 1523 head = compound_head(page);
1524 if (!page_cache_get_speculative(head))
1480 goto repeat; 1525 goto repeat;
1481 1526
1527 /* The page was split under us? */
1528 if (compound_head(page) != head) {
1529 put_page(head);
1530 goto repeat;
1531 }
1532
1482 /* Has the page moved? */ 1533 /* Has the page moved? */
1483 if (unlikely(page != *slot)) { 1534 if (unlikely(page != *slot)) {
1484 put_page(page); 1535 put_page(head);
1485 goto repeat; 1536 goto repeat;
1486 } 1537 }
1487 1538
@@ -1525,7 +1576,7 @@ unsigned find_get_entries_tag(struct address_space *mapping, pgoff_t start,
1525 rcu_read_lock(); 1576 rcu_read_lock();
1526 radix_tree_for_each_tagged(slot, &mapping->page_tree, 1577 radix_tree_for_each_tagged(slot, &mapping->page_tree,
1527 &iter, start, tag) { 1578 &iter, start, tag) {
1528 struct page *page; 1579 struct page *head, *page;
1529repeat: 1580repeat:
1530 page = radix_tree_deref_slot(slot); 1581 page = radix_tree_deref_slot(slot);
1531 if (unlikely(!page)) 1582 if (unlikely(!page))
@@ -1543,12 +1594,20 @@ repeat:
1543 */ 1594 */
1544 goto export; 1595 goto export;
1545 } 1596 }
1546 if (!page_cache_get_speculative(page)) 1597
1598 head = compound_head(page);
1599 if (!page_cache_get_speculative(head))
1547 goto repeat; 1600 goto repeat;
1548 1601
1602 /* The page was split under us? */
1603 if (compound_head(page) != head) {
1604 put_page(head);
1605 goto repeat;
1606 }
1607
1549 /* Has the page moved? */ 1608 /* Has the page moved? */
1550 if (unlikely(page != *slot)) { 1609 if (unlikely(page != *slot)) {
1551 put_page(page); 1610 put_page(head);
1552 goto repeat; 1611 goto repeat;
1553 } 1612 }
1554export: 1613export:
@@ -2137,7 +2196,7 @@ void filemap_map_pages(struct fault_env *fe,
2137 struct address_space *mapping = file->f_mapping; 2196 struct address_space *mapping = file->f_mapping;
2138 pgoff_t last_pgoff = start_pgoff; 2197 pgoff_t last_pgoff = start_pgoff;
2139 loff_t size; 2198 loff_t size;
2140 struct page *page; 2199 struct page *head, *page;
2141 2200
2142 rcu_read_lock(); 2201 rcu_read_lock();
2143 radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, 2202 radix_tree_for_each_slot(slot, &mapping->page_tree, &iter,
@@ -2156,12 +2215,19 @@ repeat:
2156 goto next; 2215 goto next;
2157 } 2216 }
2158 2217
2159 if (!page_cache_get_speculative(page)) 2218 head = compound_head(page);
2219 if (!page_cache_get_speculative(head))
2160 goto repeat; 2220 goto repeat;
2161 2221
2222 /* The page was split under us? */
2223 if (compound_head(page) != head) {
2224 put_page(head);
2225 goto repeat;
2226 }
2227
2162 /* Has the page moved? */ 2228 /* Has the page moved? */
2163 if (unlikely(page != *slot)) { 2229 if (unlikely(page != *slot)) {
2164 put_page(page); 2230 put_page(head);
2165 goto repeat; 2231 goto repeat;
2166 } 2232 }
2167 2233