aboutsummaryrefslogtreecommitdiffstats
path: root/mm/vmscan.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r--mm/vmscan.c680
1 files changed, 420 insertions, 260 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 26f4a8a4e0c7..2880396f7953 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -103,8 +103,11 @@ struct scan_control {
103 */ 103 */
104 reclaim_mode_t reclaim_mode; 104 reclaim_mode_t reclaim_mode;
105 105
106 /* Which cgroup do we reclaim from */ 106 /*
107 struct mem_cgroup *mem_cgroup; 107 * The memory cgroup that hit its limit and as a result is the
108 * primary target of this reclaim invocation.
109 */
110 struct mem_cgroup *target_mem_cgroup;
108 111
109 /* 112 /*
110 * Nodemask of nodes allowed by the caller. If NULL, all nodes 113 * Nodemask of nodes allowed by the caller. If NULL, all nodes
@@ -113,6 +116,11 @@ struct scan_control {
113 nodemask_t *nodemask; 116 nodemask_t *nodemask;
114}; 117};
115 118
119struct mem_cgroup_zone {
120 struct mem_cgroup *mem_cgroup;
121 struct zone *zone;
122};
123
116#define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru)) 124#define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru))
117 125
118#ifdef ARCH_HAS_PREFETCH 126#ifdef ARCH_HAS_PREFETCH
@@ -153,28 +161,45 @@ static LIST_HEAD(shrinker_list);
153static DECLARE_RWSEM(shrinker_rwsem); 161static DECLARE_RWSEM(shrinker_rwsem);
154 162
155#ifdef CONFIG_CGROUP_MEM_RES_CTLR 163#ifdef CONFIG_CGROUP_MEM_RES_CTLR
156#define scanning_global_lru(sc) (!(sc)->mem_cgroup) 164static bool global_reclaim(struct scan_control *sc)
165{
166 return !sc->target_mem_cgroup;
167}
168
169static bool scanning_global_lru(struct mem_cgroup_zone *mz)
170{
171 return !mz->mem_cgroup;
172}
157#else 173#else
158#define scanning_global_lru(sc) (1) 174static bool global_reclaim(struct scan_control *sc)
175{
176 return true;
177}
178
179static bool scanning_global_lru(struct mem_cgroup_zone *mz)
180{
181 return true;
182}
159#endif 183#endif
160 184
161static struct zone_reclaim_stat *get_reclaim_stat(struct zone *zone, 185static struct zone_reclaim_stat *get_reclaim_stat(struct mem_cgroup_zone *mz)
162 struct scan_control *sc)
163{ 186{
164 if (!scanning_global_lru(sc)) 187 if (!scanning_global_lru(mz))
165 return mem_cgroup_get_reclaim_stat(sc->mem_cgroup, zone); 188 return mem_cgroup_get_reclaim_stat(mz->mem_cgroup, mz->zone);
166 189
167 return &zone->reclaim_stat; 190 return &mz->zone->reclaim_stat;
168} 191}
169 192
170static unsigned long zone_nr_lru_pages(struct zone *zone, 193static unsigned long zone_nr_lru_pages(struct mem_cgroup_zone *mz,
171 struct scan_control *sc, enum lru_list lru) 194 enum lru_list lru)
172{ 195{
173 if (!scanning_global_lru(sc)) 196 if (!scanning_global_lru(mz))
174 return mem_cgroup_zone_nr_lru_pages(sc->mem_cgroup, 197 return mem_cgroup_zone_nr_lru_pages(mz->mem_cgroup,
175 zone_to_nid(zone), zone_idx(zone), BIT(lru)); 198 zone_to_nid(mz->zone),
199 zone_idx(mz->zone),
200 BIT(lru));
176 201
177 return zone_page_state(zone, NR_LRU_BASE + lru); 202 return zone_page_state(mz->zone, NR_LRU_BASE + lru);
178} 203}
179 204
180 205
@@ -677,12 +702,13 @@ enum page_references {
677}; 702};
678 703
679static enum page_references page_check_references(struct page *page, 704static enum page_references page_check_references(struct page *page,
705 struct mem_cgroup_zone *mz,
680 struct scan_control *sc) 706 struct scan_control *sc)
681{ 707{
682 int referenced_ptes, referenced_page; 708 int referenced_ptes, referenced_page;
683 unsigned long vm_flags; 709 unsigned long vm_flags;
684 710
685 referenced_ptes = page_referenced(page, 1, sc->mem_cgroup, &vm_flags); 711 referenced_ptes = page_referenced(page, 1, mz->mem_cgroup, &vm_flags);
686 referenced_page = TestClearPageReferenced(page); 712 referenced_page = TestClearPageReferenced(page);
687 713
688 /* Lumpy reclaim - ignore references */ 714 /* Lumpy reclaim - ignore references */
@@ -738,7 +764,7 @@ static enum page_references page_check_references(struct page *page,
738 * shrink_page_list() returns the number of reclaimed pages 764 * shrink_page_list() returns the number of reclaimed pages
739 */ 765 */
740static unsigned long shrink_page_list(struct list_head *page_list, 766static unsigned long shrink_page_list(struct list_head *page_list,
741 struct zone *zone, 767 struct mem_cgroup_zone *mz,
742 struct scan_control *sc, 768 struct scan_control *sc,
743 int priority, 769 int priority,
744 unsigned long *ret_nr_dirty, 770 unsigned long *ret_nr_dirty,
@@ -769,7 +795,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
769 goto keep; 795 goto keep;
770 796
771 VM_BUG_ON(PageActive(page)); 797 VM_BUG_ON(PageActive(page));
772 VM_BUG_ON(page_zone(page) != zone); 798 VM_BUG_ON(page_zone(page) != mz->zone);
773 799
774 sc->nr_scanned++; 800 sc->nr_scanned++;
775 801
@@ -803,7 +829,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
803 } 829 }
804 } 830 }
805 831
806 references = page_check_references(page, sc); 832 references = page_check_references(page, mz, sc);
807 switch (references) { 833 switch (references) {
808 case PAGEREF_ACTIVATE: 834 case PAGEREF_ACTIVATE:
809 goto activate_locked; 835 goto activate_locked;
@@ -994,8 +1020,8 @@ keep_lumpy:
994 * back off and wait for congestion to clear because further reclaim 1020 * back off and wait for congestion to clear because further reclaim
995 * will encounter the same problem 1021 * will encounter the same problem
996 */ 1022 */
997 if (nr_dirty && nr_dirty == nr_congested && scanning_global_lru(sc)) 1023 if (nr_dirty && nr_dirty == nr_congested && global_reclaim(sc))
998 zone_set_flag(zone, ZONE_CONGESTED); 1024 zone_set_flag(mz->zone, ZONE_CONGESTED);
999 1025
1000 free_hot_cold_page_list(&free_pages, 1); 1026 free_hot_cold_page_list(&free_pages, 1);
1001 1027
@@ -1049,8 +1075,39 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode, int file)
1049 1075
1050 ret = -EBUSY; 1076 ret = -EBUSY;
1051 1077
1052 if ((mode & ISOLATE_CLEAN) && (PageDirty(page) || PageWriteback(page))) 1078 /*
1053 return ret; 1079 * To minimise LRU disruption, the caller can indicate that it only
1080 * wants to isolate pages it will be able to operate on without
1081 * blocking - clean pages for the most part.
1082 *
1083 * ISOLATE_CLEAN means that only clean pages should be isolated. This
1084 * is used by reclaim when it is cannot write to backing storage
1085 *
1086 * ISOLATE_ASYNC_MIGRATE is used to indicate that it only wants to pages
1087 * that it is possible to migrate without blocking
1088 */
1089 if (mode & (ISOLATE_CLEAN|ISOLATE_ASYNC_MIGRATE)) {
1090 /* All the caller can do on PageWriteback is block */
1091 if (PageWriteback(page))
1092 return ret;
1093
1094 if (PageDirty(page)) {
1095 struct address_space *mapping;
1096
1097 /* ISOLATE_CLEAN means only clean pages */
1098 if (mode & ISOLATE_CLEAN)
1099 return ret;
1100
1101 /*
1102 * Only pages without mappings or that have a
1103 * ->migratepage callback are possible to migrate
1104 * without blocking
1105 */
1106 mapping = page_mapping(page);
1107 if (mapping && !mapping->a_ops->migratepage)
1108 return ret;
1109 }
1110 }
1054 1111
1055 if ((mode & ISOLATE_UNMAPPED) && page_mapped(page)) 1112 if ((mode & ISOLATE_UNMAPPED) && page_mapped(page))
1056 return ret; 1113 return ret;
@@ -1079,25 +1136,36 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode, int file)
1079 * Appropriate locks must be held before calling this function. 1136 * Appropriate locks must be held before calling this function.
1080 * 1137 *
1081 * @nr_to_scan: The number of pages to look through on the list. 1138 * @nr_to_scan: The number of pages to look through on the list.
1082 * @src: The LRU list to pull pages off. 1139 * @mz: The mem_cgroup_zone to pull pages from.
1083 * @dst: The temp list to put pages on to. 1140 * @dst: The temp list to put pages on to.
1084 * @scanned: The number of pages that were scanned. 1141 * @nr_scanned: The number of pages that were scanned.
1085 * @order: The caller's attempted allocation order 1142 * @order: The caller's attempted allocation order
1086 * @mode: One of the LRU isolation modes 1143 * @mode: One of the LRU isolation modes
1144 * @active: True [1] if isolating active pages
1087 * @file: True [1] if isolating file [!anon] pages 1145 * @file: True [1] if isolating file [!anon] pages
1088 * 1146 *
1089 * returns how many pages were moved onto *@dst. 1147 * returns how many pages were moved onto *@dst.
1090 */ 1148 */
1091static unsigned long isolate_lru_pages(unsigned long nr_to_scan, 1149static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
1092 struct list_head *src, struct list_head *dst, 1150 struct mem_cgroup_zone *mz, struct list_head *dst,
1093 unsigned long *scanned, int order, isolate_mode_t mode, 1151 unsigned long *nr_scanned, int order, isolate_mode_t mode,
1094 int file) 1152 int active, int file)
1095{ 1153{
1154 struct lruvec *lruvec;
1155 struct list_head *src;
1096 unsigned long nr_taken = 0; 1156 unsigned long nr_taken = 0;
1097 unsigned long nr_lumpy_taken = 0; 1157 unsigned long nr_lumpy_taken = 0;
1098 unsigned long nr_lumpy_dirty = 0; 1158 unsigned long nr_lumpy_dirty = 0;
1099 unsigned long nr_lumpy_failed = 0; 1159 unsigned long nr_lumpy_failed = 0;
1100 unsigned long scan; 1160 unsigned long scan;
1161 int lru = LRU_BASE;
1162
1163 lruvec = mem_cgroup_zone_lruvec(mz->zone, mz->mem_cgroup);
1164 if (active)
1165 lru += LRU_ACTIVE;
1166 if (file)
1167 lru += LRU_FILE;
1168 src = &lruvec->lists[lru];
1101 1169
1102 for (scan = 0; scan < nr_to_scan && !list_empty(src); scan++) { 1170 for (scan = 0; scan < nr_to_scan && !list_empty(src); scan++) {
1103 struct page *page; 1171 struct page *page;
@@ -1113,15 +1181,14 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
1113 1181
1114 switch (__isolate_lru_page(page, mode, file)) { 1182 switch (__isolate_lru_page(page, mode, file)) {
1115 case 0: 1183 case 0:
1184 mem_cgroup_lru_del(page);
1116 list_move(&page->lru, dst); 1185 list_move(&page->lru, dst);
1117 mem_cgroup_del_lru(page);
1118 nr_taken += hpage_nr_pages(page); 1186 nr_taken += hpage_nr_pages(page);
1119 break; 1187 break;
1120 1188
1121 case -EBUSY: 1189 case -EBUSY:
1122 /* else it is being freed elsewhere */ 1190 /* else it is being freed elsewhere */
1123 list_move(&page->lru, src); 1191 list_move(&page->lru, src);
1124 mem_cgroup_rotate_lru_list(page, page_lru(page));
1125 continue; 1192 continue;
1126 1193
1127 default: 1194 default:
@@ -1171,13 +1238,17 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
1171 break; 1238 break;
1172 1239
1173 if (__isolate_lru_page(cursor_page, mode, file) == 0) { 1240 if (__isolate_lru_page(cursor_page, mode, file) == 0) {
1241 unsigned int isolated_pages;
1242
1243 mem_cgroup_lru_del(cursor_page);
1174 list_move(&cursor_page->lru, dst); 1244 list_move(&cursor_page->lru, dst);
1175 mem_cgroup_del_lru(cursor_page); 1245 isolated_pages = hpage_nr_pages(cursor_page);
1176 nr_taken += hpage_nr_pages(cursor_page); 1246 nr_taken += isolated_pages;
1177 nr_lumpy_taken++; 1247 nr_lumpy_taken += isolated_pages;
1178 if (PageDirty(cursor_page)) 1248 if (PageDirty(cursor_page))
1179 nr_lumpy_dirty++; 1249 nr_lumpy_dirty += isolated_pages;
1180 scan++; 1250 scan++;
1251 pfn += isolated_pages - 1;
1181 } else { 1252 } else {
1182 /* 1253 /*
1183 * Check if the page is freed already. 1254 * Check if the page is freed already.
@@ -1203,57 +1274,16 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
1203 nr_lumpy_failed++; 1274 nr_lumpy_failed++;
1204 } 1275 }
1205 1276
1206 *scanned = scan; 1277 *nr_scanned = scan;
1207 1278
1208 trace_mm_vmscan_lru_isolate(order, 1279 trace_mm_vmscan_lru_isolate(order,
1209 nr_to_scan, scan, 1280 nr_to_scan, scan,
1210 nr_taken, 1281 nr_taken,
1211 nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed, 1282 nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed,
1212 mode); 1283 mode, file);
1213 return nr_taken; 1284 return nr_taken;
1214} 1285}
1215 1286
1216static unsigned long isolate_pages_global(unsigned long nr,
1217 struct list_head *dst,
1218 unsigned long *scanned, int order,
1219 isolate_mode_t mode,
1220 struct zone *z, int active, int file)
1221{
1222 int lru = LRU_BASE;
1223 if (active)
1224 lru += LRU_ACTIVE;
1225 if (file)
1226 lru += LRU_FILE;
1227 return isolate_lru_pages(nr, &z->lru[lru].list, dst, scanned, order,
1228 mode, file);
1229}
1230
1231/*
1232 * clear_active_flags() is a helper for shrink_active_list(), clearing
1233 * any active bits from the pages in the list.
1234 */
1235static unsigned long clear_active_flags(struct list_head *page_list,
1236 unsigned int *count)
1237{
1238 int nr_active = 0;
1239 int lru;
1240 struct page *page;
1241
1242 list_for_each_entry(page, page_list, lru) {
1243 int numpages = hpage_nr_pages(page);
1244 lru = page_lru_base_type(page);
1245 if (PageActive(page)) {
1246 lru += LRU_ACTIVE;
1247 ClearPageActive(page);
1248 nr_active += numpages;
1249 }
1250 if (count)
1251 count[lru] += numpages;
1252 }
1253
1254 return nr_active;
1255}
1256
1257/** 1287/**
1258 * isolate_lru_page - tries to isolate a page from its LRU list 1288 * isolate_lru_page - tries to isolate a page from its LRU list
1259 * @page: page to isolate from its LRU list 1289 * @page: page to isolate from its LRU list
@@ -1313,7 +1343,7 @@ static int too_many_isolated(struct zone *zone, int file,
1313 if (current_is_kswapd()) 1343 if (current_is_kswapd())
1314 return 0; 1344 return 0;
1315 1345
1316 if (!scanning_global_lru(sc)) 1346 if (!global_reclaim(sc))
1317 return 0; 1347 return 0;
1318 1348
1319 if (file) { 1349 if (file) {
@@ -1327,27 +1357,21 @@ static int too_many_isolated(struct zone *zone, int file,
1327 return isolated > inactive; 1357 return isolated > inactive;
1328} 1358}
1329 1359
1330/*
1331 * TODO: Try merging with migrations version of putback_lru_pages
1332 */
1333static noinline_for_stack void 1360static noinline_for_stack void
1334putback_lru_pages(struct zone *zone, struct scan_control *sc, 1361putback_inactive_pages(struct mem_cgroup_zone *mz,
1335 unsigned long nr_anon, unsigned long nr_file, 1362 struct list_head *page_list)
1336 struct list_head *page_list)
1337{ 1363{
1338 struct page *page; 1364 struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz);
1339 struct pagevec pvec; 1365 struct zone *zone = mz->zone;
1340 struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); 1366 LIST_HEAD(pages_to_free);
1341
1342 pagevec_init(&pvec, 1);
1343 1367
1344 /* 1368 /*
1345 * Put back any unfreeable pages. 1369 * Put back any unfreeable pages.
1346 */ 1370 */
1347 spin_lock(&zone->lru_lock);
1348 while (!list_empty(page_list)) { 1371 while (!list_empty(page_list)) {
1372 struct page *page = lru_to_page(page_list);
1349 int lru; 1373 int lru;
1350 page = lru_to_page(page_list); 1374
1351 VM_BUG_ON(PageLRU(page)); 1375 VM_BUG_ON(PageLRU(page));
1352 list_del(&page->lru); 1376 list_del(&page->lru);
1353 if (unlikely(!page_evictable(page, NULL))) { 1377 if (unlikely(!page_evictable(page, NULL))) {
@@ -1364,30 +1388,53 @@ putback_lru_pages(struct zone *zone, struct scan_control *sc,
1364 int numpages = hpage_nr_pages(page); 1388 int numpages = hpage_nr_pages(page);
1365 reclaim_stat->recent_rotated[file] += numpages; 1389 reclaim_stat->recent_rotated[file] += numpages;
1366 } 1390 }
1367 if (!pagevec_add(&pvec, page)) { 1391 if (put_page_testzero(page)) {
1368 spin_unlock_irq(&zone->lru_lock); 1392 __ClearPageLRU(page);
1369 __pagevec_release(&pvec); 1393 __ClearPageActive(page);
1370 spin_lock_irq(&zone->lru_lock); 1394 del_page_from_lru_list(zone, page, lru);
1395
1396 if (unlikely(PageCompound(page))) {
1397 spin_unlock_irq(&zone->lru_lock);
1398 (*get_compound_page_dtor(page))(page);
1399 spin_lock_irq(&zone->lru_lock);
1400 } else
1401 list_add(&page->lru, &pages_to_free);
1371 } 1402 }
1372 } 1403 }
1373 __mod_zone_page_state(zone, NR_ISOLATED_ANON, -nr_anon);
1374 __mod_zone_page_state(zone, NR_ISOLATED_FILE, -nr_file);
1375 1404
1376 spin_unlock_irq(&zone->lru_lock); 1405 /*
1377 pagevec_release(&pvec); 1406 * To save our caller's stack, now use input list for pages to free.
1407 */
1408 list_splice(&pages_to_free, page_list);
1378} 1409}
1379 1410
1380static noinline_for_stack void update_isolated_counts(struct zone *zone, 1411static noinline_for_stack void
1381 struct scan_control *sc, 1412update_isolated_counts(struct mem_cgroup_zone *mz,
1382 unsigned long *nr_anon, 1413 struct list_head *page_list,
1383 unsigned long *nr_file, 1414 unsigned long *nr_anon,
1384 struct list_head *isolated_list) 1415 unsigned long *nr_file)
1385{ 1416{
1386 unsigned long nr_active; 1417 struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz);
1418 struct zone *zone = mz->zone;
1387 unsigned int count[NR_LRU_LISTS] = { 0, }; 1419 unsigned int count[NR_LRU_LISTS] = { 0, };
1388 struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); 1420 unsigned long nr_active = 0;
1421 struct page *page;
1422 int lru;
1423
1424 /*
1425 * Count pages and clear active flags
1426 */
1427 list_for_each_entry(page, page_list, lru) {
1428 int numpages = hpage_nr_pages(page);
1429 lru = page_lru_base_type(page);
1430 if (PageActive(page)) {
1431 lru += LRU_ACTIVE;
1432 ClearPageActive(page);
1433 nr_active += numpages;
1434 }
1435 count[lru] += numpages;
1436 }
1389 1437
1390 nr_active = clear_active_flags(isolated_list, count);
1391 __count_vm_events(PGDEACTIVATE, nr_active); 1438 __count_vm_events(PGDEACTIVATE, nr_active);
1392 1439
1393 __mod_zone_page_state(zone, NR_ACTIVE_FILE, 1440 __mod_zone_page_state(zone, NR_ACTIVE_FILE,
@@ -1401,8 +1448,6 @@ static noinline_for_stack void update_isolated_counts(struct zone *zone,
1401 1448
1402 *nr_anon = count[LRU_ACTIVE_ANON] + count[LRU_INACTIVE_ANON]; 1449 *nr_anon = count[LRU_ACTIVE_ANON] + count[LRU_INACTIVE_ANON];
1403 *nr_file = count[LRU_ACTIVE_FILE] + count[LRU_INACTIVE_FILE]; 1450 *nr_file = count[LRU_ACTIVE_FILE] + count[LRU_INACTIVE_FILE];
1404 __mod_zone_page_state(zone, NR_ISOLATED_ANON, *nr_anon);
1405 __mod_zone_page_state(zone, NR_ISOLATED_FILE, *nr_file);
1406 1451
1407 reclaim_stat->recent_scanned[0] += *nr_anon; 1452 reclaim_stat->recent_scanned[0] += *nr_anon;
1408 reclaim_stat->recent_scanned[1] += *nr_file; 1453 reclaim_stat->recent_scanned[1] += *nr_file;
@@ -1454,8 +1499,8 @@ static inline bool should_reclaim_stall(unsigned long nr_taken,
1454 * of reclaimed pages 1499 * of reclaimed pages
1455 */ 1500 */
1456static noinline_for_stack unsigned long 1501static noinline_for_stack unsigned long
1457shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, 1502shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz,
1458 struct scan_control *sc, int priority, int file) 1503 struct scan_control *sc, int priority, int file)
1459{ 1504{
1460 LIST_HEAD(page_list); 1505 LIST_HEAD(page_list);
1461 unsigned long nr_scanned; 1506 unsigned long nr_scanned;
@@ -1466,6 +1511,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone,
1466 unsigned long nr_dirty = 0; 1511 unsigned long nr_dirty = 0;
1467 unsigned long nr_writeback = 0; 1512 unsigned long nr_writeback = 0;
1468 isolate_mode_t reclaim_mode = ISOLATE_INACTIVE; 1513 isolate_mode_t reclaim_mode = ISOLATE_INACTIVE;
1514 struct zone *zone = mz->zone;
1469 1515
1470 while (unlikely(too_many_isolated(zone, file, sc))) { 1516 while (unlikely(too_many_isolated(zone, file, sc))) {
1471 congestion_wait(BLK_RW_ASYNC, HZ/10); 1517 congestion_wait(BLK_RW_ASYNC, HZ/10);
@@ -1488,9 +1534,10 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone,
1488 1534
1489 spin_lock_irq(&zone->lru_lock); 1535 spin_lock_irq(&zone->lru_lock);
1490 1536
1491 if (scanning_global_lru(sc)) { 1537 nr_taken = isolate_lru_pages(nr_to_scan, mz, &page_list,
1492 nr_taken = isolate_pages_global(nr_to_scan, &page_list, 1538 &nr_scanned, sc->order,
1493 &nr_scanned, sc->order, reclaim_mode, zone, 0, file); 1539 reclaim_mode, 0, file);
1540 if (global_reclaim(sc)) {
1494 zone->pages_scanned += nr_scanned; 1541 zone->pages_scanned += nr_scanned;
1495 if (current_is_kswapd()) 1542 if (current_is_kswapd())
1496 __count_zone_vm_events(PGSCAN_KSWAPD, zone, 1543 __count_zone_vm_events(PGSCAN_KSWAPD, zone,
@@ -1498,14 +1545,6 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone,
1498 else 1545 else
1499 __count_zone_vm_events(PGSCAN_DIRECT, zone, 1546 __count_zone_vm_events(PGSCAN_DIRECT, zone,
1500 nr_scanned); 1547 nr_scanned);
1501 } else {
1502 nr_taken = mem_cgroup_isolate_pages(nr_to_scan, &page_list,
1503 &nr_scanned, sc->order, reclaim_mode, zone,
1504 sc->mem_cgroup, 0, file);
1505 /*
1506 * mem_cgroup_isolate_pages() keeps track of
1507 * scanned pages on its own.
1508 */
1509 } 1548 }
1510 1549
1511 if (nr_taken == 0) { 1550 if (nr_taken == 0) {
@@ -1513,26 +1552,37 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone,
1513 return 0; 1552 return 0;
1514 } 1553 }
1515 1554
1516 update_isolated_counts(zone, sc, &nr_anon, &nr_file, &page_list); 1555 update_isolated_counts(mz, &page_list, &nr_anon, &nr_file);
1556
1557 __mod_zone_page_state(zone, NR_ISOLATED_ANON, nr_anon);
1558 __mod_zone_page_state(zone, NR_ISOLATED_FILE, nr_file);
1517 1559
1518 spin_unlock_irq(&zone->lru_lock); 1560 spin_unlock_irq(&zone->lru_lock);
1519 1561
1520 nr_reclaimed = shrink_page_list(&page_list, zone, sc, priority, 1562 nr_reclaimed = shrink_page_list(&page_list, mz, sc, priority,
1521 &nr_dirty, &nr_writeback); 1563 &nr_dirty, &nr_writeback);
1522 1564
1523 /* Check if we should syncronously wait for writeback */ 1565 /* Check if we should syncronously wait for writeback */
1524 if (should_reclaim_stall(nr_taken, nr_reclaimed, priority, sc)) { 1566 if (should_reclaim_stall(nr_taken, nr_reclaimed, priority, sc)) {
1525 set_reclaim_mode(priority, sc, true); 1567 set_reclaim_mode(priority, sc, true);
1526 nr_reclaimed += shrink_page_list(&page_list, zone, sc, 1568 nr_reclaimed += shrink_page_list(&page_list, mz, sc,
1527 priority, &nr_dirty, &nr_writeback); 1569 priority, &nr_dirty, &nr_writeback);
1528 } 1570 }
1529 1571
1530 local_irq_disable(); 1572 spin_lock_irq(&zone->lru_lock);
1573
1531 if (current_is_kswapd()) 1574 if (current_is_kswapd())
1532 __count_vm_events(KSWAPD_STEAL, nr_reclaimed); 1575 __count_vm_events(KSWAPD_STEAL, nr_reclaimed);
1533 __count_zone_vm_events(PGSTEAL, zone, nr_reclaimed); 1576 __count_zone_vm_events(PGSTEAL, zone, nr_reclaimed);
1534 1577
1535 putback_lru_pages(zone, sc, nr_anon, nr_file, &page_list); 1578 putback_inactive_pages(mz, &page_list);
1579
1580 __mod_zone_page_state(zone, NR_ISOLATED_ANON, -nr_anon);
1581 __mod_zone_page_state(zone, NR_ISOLATED_FILE, -nr_file);
1582
1583 spin_unlock_irq(&zone->lru_lock);
1584
1585 free_hot_cold_page_list(&page_list, 1);
1536 1586
1537 /* 1587 /*
1538 * If reclaim is isolating dirty pages under writeback, it implies 1588 * If reclaim is isolating dirty pages under writeback, it implies
@@ -1588,30 +1638,47 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone,
1588 1638
1589static void move_active_pages_to_lru(struct zone *zone, 1639static void move_active_pages_to_lru(struct zone *zone,
1590 struct list_head *list, 1640 struct list_head *list,
1641 struct list_head *pages_to_free,
1591 enum lru_list lru) 1642 enum lru_list lru)
1592{ 1643{
1593 unsigned long pgmoved = 0; 1644 unsigned long pgmoved = 0;
1594 struct pagevec pvec;
1595 struct page *page; 1645 struct page *page;
1596 1646
1597 pagevec_init(&pvec, 1); 1647 if (buffer_heads_over_limit) {
1648 spin_unlock_irq(&zone->lru_lock);
1649 list_for_each_entry(page, list, lru) {
1650 if (page_has_private(page) && trylock_page(page)) {
1651 if (page_has_private(page))
1652 try_to_release_page(page, 0);
1653 unlock_page(page);
1654 }
1655 }
1656 spin_lock_irq(&zone->lru_lock);
1657 }
1598 1658
1599 while (!list_empty(list)) { 1659 while (!list_empty(list)) {
1660 struct lruvec *lruvec;
1661
1600 page = lru_to_page(list); 1662 page = lru_to_page(list);
1601 1663
1602 VM_BUG_ON(PageLRU(page)); 1664 VM_BUG_ON(PageLRU(page));
1603 SetPageLRU(page); 1665 SetPageLRU(page);
1604 1666
1605 list_move(&page->lru, &zone->lru[lru].list); 1667 lruvec = mem_cgroup_lru_add_list(zone, page, lru);
1606 mem_cgroup_add_lru_list(page, lru); 1668 list_move(&page->lru, &lruvec->lists[lru]);
1607 pgmoved += hpage_nr_pages(page); 1669 pgmoved += hpage_nr_pages(page);
1608 1670
1609 if (!pagevec_add(&pvec, page) || list_empty(list)) { 1671 if (put_page_testzero(page)) {
1610 spin_unlock_irq(&zone->lru_lock); 1672 __ClearPageLRU(page);
1611 if (buffer_heads_over_limit) 1673 __ClearPageActive(page);
1612 pagevec_strip(&pvec); 1674 del_page_from_lru_list(zone, page, lru);
1613 __pagevec_release(&pvec); 1675
1614 spin_lock_irq(&zone->lru_lock); 1676 if (unlikely(PageCompound(page))) {
1677 spin_unlock_irq(&zone->lru_lock);
1678 (*get_compound_page_dtor(page))(page);
1679 spin_lock_irq(&zone->lru_lock);
1680 } else
1681 list_add(&page->lru, pages_to_free);
1615 } 1682 }
1616 } 1683 }
1617 __mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved); 1684 __mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved);
@@ -1619,19 +1686,22 @@ static void move_active_pages_to_lru(struct zone *zone,
1619 __count_vm_events(PGDEACTIVATE, pgmoved); 1686 __count_vm_events(PGDEACTIVATE, pgmoved);
1620} 1687}
1621 1688
1622static void shrink_active_list(unsigned long nr_pages, struct zone *zone, 1689static void shrink_active_list(unsigned long nr_to_scan,
1623 struct scan_control *sc, int priority, int file) 1690 struct mem_cgroup_zone *mz,
1691 struct scan_control *sc,
1692 int priority, int file)
1624{ 1693{
1625 unsigned long nr_taken; 1694 unsigned long nr_taken;
1626 unsigned long pgscanned; 1695 unsigned long nr_scanned;
1627 unsigned long vm_flags; 1696 unsigned long vm_flags;
1628 LIST_HEAD(l_hold); /* The pages which were snipped off */ 1697 LIST_HEAD(l_hold); /* The pages which were snipped off */
1629 LIST_HEAD(l_active); 1698 LIST_HEAD(l_active);
1630 LIST_HEAD(l_inactive); 1699 LIST_HEAD(l_inactive);
1631 struct page *page; 1700 struct page *page;
1632 struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); 1701 struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz);
1633 unsigned long nr_rotated = 0; 1702 unsigned long nr_rotated = 0;
1634 isolate_mode_t reclaim_mode = ISOLATE_ACTIVE; 1703 isolate_mode_t reclaim_mode = ISOLATE_ACTIVE;
1704 struct zone *zone = mz->zone;
1635 1705
1636 lru_add_drain(); 1706 lru_add_drain();
1637 1707
@@ -1641,26 +1711,16 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
1641 reclaim_mode |= ISOLATE_CLEAN; 1711 reclaim_mode |= ISOLATE_CLEAN;
1642 1712
1643 spin_lock_irq(&zone->lru_lock); 1713 spin_lock_irq(&zone->lru_lock);
1644 if (scanning_global_lru(sc)) { 1714
1645 nr_taken = isolate_pages_global(nr_pages, &l_hold, 1715 nr_taken = isolate_lru_pages(nr_to_scan, mz, &l_hold,
1646 &pgscanned, sc->order, 1716 &nr_scanned, sc->order,
1647 reclaim_mode, zone, 1717 reclaim_mode, 1, file);
1648 1, file); 1718 if (global_reclaim(sc))
1649 zone->pages_scanned += pgscanned; 1719 zone->pages_scanned += nr_scanned;
1650 } else {
1651 nr_taken = mem_cgroup_isolate_pages(nr_pages, &l_hold,
1652 &pgscanned, sc->order,
1653 reclaim_mode, zone,
1654 sc->mem_cgroup, 1, file);
1655 /*
1656 * mem_cgroup_isolate_pages() keeps track of
1657 * scanned pages on its own.
1658 */
1659 }
1660 1720
1661 reclaim_stat->recent_scanned[file] += nr_taken; 1721 reclaim_stat->recent_scanned[file] += nr_taken;
1662 1722
1663 __count_zone_vm_events(PGREFILL, zone, pgscanned); 1723 __count_zone_vm_events(PGREFILL, zone, nr_scanned);
1664 if (file) 1724 if (file)
1665 __mod_zone_page_state(zone, NR_ACTIVE_FILE, -nr_taken); 1725 __mod_zone_page_state(zone, NR_ACTIVE_FILE, -nr_taken);
1666 else 1726 else
@@ -1678,7 +1738,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
1678 continue; 1738 continue;
1679 } 1739 }
1680 1740
1681 if (page_referenced(page, 0, sc->mem_cgroup, &vm_flags)) { 1741 if (page_referenced(page, 0, mz->mem_cgroup, &vm_flags)) {
1682 nr_rotated += hpage_nr_pages(page); 1742 nr_rotated += hpage_nr_pages(page);
1683 /* 1743 /*
1684 * Identify referenced, file-backed active pages and 1744 * Identify referenced, file-backed active pages and
@@ -1711,12 +1771,14 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
1711 */ 1771 */
1712 reclaim_stat->recent_rotated[file] += nr_rotated; 1772 reclaim_stat->recent_rotated[file] += nr_rotated;
1713 1773
1714 move_active_pages_to_lru(zone, &l_active, 1774 move_active_pages_to_lru(zone, &l_active, &l_hold,
1715 LRU_ACTIVE + file * LRU_FILE); 1775 LRU_ACTIVE + file * LRU_FILE);
1716 move_active_pages_to_lru(zone, &l_inactive, 1776 move_active_pages_to_lru(zone, &l_inactive, &l_hold,
1717 LRU_BASE + file * LRU_FILE); 1777 LRU_BASE + file * LRU_FILE);
1718 __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken); 1778 __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken);
1719 spin_unlock_irq(&zone->lru_lock); 1779 spin_unlock_irq(&zone->lru_lock);
1780
1781 free_hot_cold_page_list(&l_hold, 1);
1720} 1782}
1721 1783
1722#ifdef CONFIG_SWAP 1784#ifdef CONFIG_SWAP
@@ -1741,10 +1803,8 @@ static int inactive_anon_is_low_global(struct zone *zone)
1741 * Returns true if the zone does not have enough inactive anon pages, 1803 * Returns true if the zone does not have enough inactive anon pages,
1742 * meaning some active anon pages need to be deactivated. 1804 * meaning some active anon pages need to be deactivated.
1743 */ 1805 */
1744static int inactive_anon_is_low(struct zone *zone, struct scan_control *sc) 1806static int inactive_anon_is_low(struct mem_cgroup_zone *mz)
1745{ 1807{
1746 int low;
1747
1748 /* 1808 /*
1749 * If we don't have swap space, anonymous page deactivation 1809 * If we don't have swap space, anonymous page deactivation
1750 * is pointless. 1810 * is pointless.
@@ -1752,15 +1812,14 @@ static int inactive_anon_is_low(struct zone *zone, struct scan_control *sc)
1752 if (!total_swap_pages) 1812 if (!total_swap_pages)
1753 return 0; 1813 return 0;
1754 1814
1755 if (scanning_global_lru(sc)) 1815 if (!scanning_global_lru(mz))
1756 low = inactive_anon_is_low_global(zone); 1816 return mem_cgroup_inactive_anon_is_low(mz->mem_cgroup,
1757 else 1817 mz->zone);
1758 low = mem_cgroup_inactive_anon_is_low(sc->mem_cgroup, zone); 1818
1759 return low; 1819 return inactive_anon_is_low_global(mz->zone);
1760} 1820}
1761#else 1821#else
1762static inline int inactive_anon_is_low(struct zone *zone, 1822static inline int inactive_anon_is_low(struct mem_cgroup_zone *mz)
1763 struct scan_control *sc)
1764{ 1823{
1765 return 0; 1824 return 0;
1766} 1825}
@@ -1778,8 +1837,7 @@ static int inactive_file_is_low_global(struct zone *zone)
1778 1837
1779/** 1838/**
1780 * inactive_file_is_low - check if file pages need to be deactivated 1839 * inactive_file_is_low - check if file pages need to be deactivated
1781 * @zone: zone to check 1840 * @mz: memory cgroup and zone to check
1782 * @sc: scan control of this context
1783 * 1841 *
1784 * When the system is doing streaming IO, memory pressure here 1842 * When the system is doing streaming IO, memory pressure here
1785 * ensures that active file pages get deactivated, until more 1843 * ensures that active file pages get deactivated, until more
@@ -1791,45 +1849,44 @@ static int inactive_file_is_low_global(struct zone *zone)
1791 * This uses a different ratio than the anonymous pages, because 1849 * This uses a different ratio than the anonymous pages, because
1792 * the page cache uses a use-once replacement algorithm. 1850 * the page cache uses a use-once replacement algorithm.
1793 */ 1851 */
1794static int inactive_file_is_low(struct zone *zone, struct scan_control *sc) 1852static int inactive_file_is_low(struct mem_cgroup_zone *mz)
1795{ 1853{
1796 int low; 1854 if (!scanning_global_lru(mz))
1855 return mem_cgroup_inactive_file_is_low(mz->mem_cgroup,
1856 mz->zone);
1797 1857
1798 if (scanning_global_lru(sc)) 1858 return inactive_file_is_low_global(mz->zone);
1799 low = inactive_file_is_low_global(zone);
1800 else
1801 low = mem_cgroup_inactive_file_is_low(sc->mem_cgroup, zone);
1802 return low;
1803} 1859}
1804 1860
1805static int inactive_list_is_low(struct zone *zone, struct scan_control *sc, 1861static int inactive_list_is_low(struct mem_cgroup_zone *mz, int file)
1806 int file)
1807{ 1862{
1808 if (file) 1863 if (file)
1809 return inactive_file_is_low(zone, sc); 1864 return inactive_file_is_low(mz);
1810 else 1865 else
1811 return inactive_anon_is_low(zone, sc); 1866 return inactive_anon_is_low(mz);
1812} 1867}
1813 1868
1814static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan, 1869static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
1815 struct zone *zone, struct scan_control *sc, int priority) 1870 struct mem_cgroup_zone *mz,
1871 struct scan_control *sc, int priority)
1816{ 1872{
1817 int file = is_file_lru(lru); 1873 int file = is_file_lru(lru);
1818 1874
1819 if (is_active_lru(lru)) { 1875 if (is_active_lru(lru)) {
1820 if (inactive_list_is_low(zone, sc, file)) 1876 if (inactive_list_is_low(mz, file))
1821 shrink_active_list(nr_to_scan, zone, sc, priority, file); 1877 shrink_active_list(nr_to_scan, mz, sc, priority, file);
1822 return 0; 1878 return 0;
1823 } 1879 }
1824 1880
1825 return shrink_inactive_list(nr_to_scan, zone, sc, priority, file); 1881 return shrink_inactive_list(nr_to_scan, mz, sc, priority, file);
1826} 1882}
1827 1883
1828static int vmscan_swappiness(struct scan_control *sc) 1884static int vmscan_swappiness(struct mem_cgroup_zone *mz,
1885 struct scan_control *sc)
1829{ 1886{
1830 if (scanning_global_lru(sc)) 1887 if (global_reclaim(sc))
1831 return vm_swappiness; 1888 return vm_swappiness;
1832 return mem_cgroup_swappiness(sc->mem_cgroup); 1889 return mem_cgroup_swappiness(mz->mem_cgroup);
1833} 1890}
1834 1891
1835/* 1892/*
@@ -1840,15 +1897,15 @@ static int vmscan_swappiness(struct scan_control *sc)
1840 * 1897 *
1841 * nr[0] = anon pages to scan; nr[1] = file pages to scan 1898 * nr[0] = anon pages to scan; nr[1] = file pages to scan
1842 */ 1899 */
1843static void get_scan_count(struct zone *zone, struct scan_control *sc, 1900static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc,
1844 unsigned long *nr, int priority) 1901 unsigned long *nr, int priority)
1845{ 1902{
1846 unsigned long anon, file, free; 1903 unsigned long anon, file, free;
1847 unsigned long anon_prio, file_prio; 1904 unsigned long anon_prio, file_prio;
1848 unsigned long ap, fp; 1905 unsigned long ap, fp;
1849 struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); 1906 struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz);
1850 u64 fraction[2], denominator; 1907 u64 fraction[2], denominator;
1851 enum lru_list l; 1908 enum lru_list lru;
1852 int noswap = 0; 1909 int noswap = 0;
1853 bool force_scan = false; 1910 bool force_scan = false;
1854 1911
@@ -1862,9 +1919,9 @@ static void get_scan_count(struct zone *zone, struct scan_control *sc,
1862 * latencies, so it's better to scan a minimum amount there as 1919 * latencies, so it's better to scan a minimum amount there as
1863 * well. 1920 * well.
1864 */ 1921 */
1865 if (scanning_global_lru(sc) && current_is_kswapd()) 1922 if (current_is_kswapd() && mz->zone->all_unreclaimable)
1866 force_scan = true; 1923 force_scan = true;
1867 if (!scanning_global_lru(sc)) 1924 if (!global_reclaim(sc))
1868 force_scan = true; 1925 force_scan = true;
1869 1926
1870 /* If we have no swap space, do not bother scanning anon pages. */ 1927 /* If we have no swap space, do not bother scanning anon pages. */
@@ -1876,16 +1933,16 @@ static void get_scan_count(struct zone *zone, struct scan_control *sc,
1876 goto out; 1933 goto out;
1877 } 1934 }
1878 1935
1879 anon = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_ANON) + 1936 anon = zone_nr_lru_pages(mz, LRU_ACTIVE_ANON) +
1880 zone_nr_lru_pages(zone, sc, LRU_INACTIVE_ANON); 1937 zone_nr_lru_pages(mz, LRU_INACTIVE_ANON);
1881 file = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_FILE) + 1938 file = zone_nr_lru_pages(mz, LRU_ACTIVE_FILE) +
1882 zone_nr_lru_pages(zone, sc, LRU_INACTIVE_FILE); 1939 zone_nr_lru_pages(mz, LRU_INACTIVE_FILE);
1883 1940
1884 if (scanning_global_lru(sc)) { 1941 if (global_reclaim(sc)) {
1885 free = zone_page_state(zone, NR_FREE_PAGES); 1942 free = zone_page_state(mz->zone, NR_FREE_PAGES);
1886 /* If we have very few page cache pages, 1943 /* If we have very few page cache pages,
1887 force-scan anon pages. */ 1944 force-scan anon pages. */
1888 if (unlikely(file + free <= high_wmark_pages(zone))) { 1945 if (unlikely(file + free <= high_wmark_pages(mz->zone))) {
1889 fraction[0] = 1; 1946 fraction[0] = 1;
1890 fraction[1] = 0; 1947 fraction[1] = 0;
1891 denominator = 1; 1948 denominator = 1;
@@ -1897,8 +1954,8 @@ static void get_scan_count(struct zone *zone, struct scan_control *sc,
1897 * With swappiness at 100, anonymous and file have the same priority. 1954 * With swappiness at 100, anonymous and file have the same priority.
1898 * This scanning priority is essentially the inverse of IO cost. 1955 * This scanning priority is essentially the inverse of IO cost.
1899 */ 1956 */
1900 anon_prio = vmscan_swappiness(sc); 1957 anon_prio = vmscan_swappiness(mz, sc);
1901 file_prio = 200 - vmscan_swappiness(sc); 1958 file_prio = 200 - vmscan_swappiness(mz, sc);
1902 1959
1903 /* 1960 /*
1904 * OK, so we have swap space and a fair amount of page cache 1961 * OK, so we have swap space and a fair amount of page cache
@@ -1911,7 +1968,7 @@ static void get_scan_count(struct zone *zone, struct scan_control *sc,
1911 * 1968 *
1912 * anon in [0], file in [1] 1969 * anon in [0], file in [1]
1913 */ 1970 */
1914 spin_lock_irq(&zone->lru_lock); 1971 spin_lock_irq(&mz->zone->lru_lock);
1915 if (unlikely(reclaim_stat->recent_scanned[0] > anon / 4)) { 1972 if (unlikely(reclaim_stat->recent_scanned[0] > anon / 4)) {
1916 reclaim_stat->recent_scanned[0] /= 2; 1973 reclaim_stat->recent_scanned[0] /= 2;
1917 reclaim_stat->recent_rotated[0] /= 2; 1974 reclaim_stat->recent_rotated[0] /= 2;
@@ -1932,24 +1989,24 @@ static void get_scan_count(struct zone *zone, struct scan_control *sc,
1932 1989
1933 fp = (file_prio + 1) * (reclaim_stat->recent_scanned[1] + 1); 1990 fp = (file_prio + 1) * (reclaim_stat->recent_scanned[1] + 1);
1934 fp /= reclaim_stat->recent_rotated[1] + 1; 1991 fp /= reclaim_stat->recent_rotated[1] + 1;
1935 spin_unlock_irq(&zone->lru_lock); 1992 spin_unlock_irq(&mz->zone->lru_lock);
1936 1993
1937 fraction[0] = ap; 1994 fraction[0] = ap;
1938 fraction[1] = fp; 1995 fraction[1] = fp;
1939 denominator = ap + fp + 1; 1996 denominator = ap + fp + 1;
1940out: 1997out:
1941 for_each_evictable_lru(l) { 1998 for_each_evictable_lru(lru) {
1942 int file = is_file_lru(l); 1999 int file = is_file_lru(lru);
1943 unsigned long scan; 2000 unsigned long scan;
1944 2001
1945 scan = zone_nr_lru_pages(zone, sc, l); 2002 scan = zone_nr_lru_pages(mz, lru);
1946 if (priority || noswap) { 2003 if (priority || noswap) {
1947 scan >>= priority; 2004 scan >>= priority;
1948 if (!scan && force_scan) 2005 if (!scan && force_scan)
1949 scan = SWAP_CLUSTER_MAX; 2006 scan = SWAP_CLUSTER_MAX;
1950 scan = div64_u64(scan * fraction[file], denominator); 2007 scan = div64_u64(scan * fraction[file], denominator);
1951 } 2008 }
1952 nr[l] = scan; 2009 nr[lru] = scan;
1953 } 2010 }
1954} 2011}
1955 2012
@@ -1960,7 +2017,7 @@ out:
1960 * back to the allocator and call try_to_compact_zone(), we ensure that 2017 * back to the allocator and call try_to_compact_zone(), we ensure that
1961 * there are enough free pages for it to be likely successful 2018 * there are enough free pages for it to be likely successful
1962 */ 2019 */
1963static inline bool should_continue_reclaim(struct zone *zone, 2020static inline bool should_continue_reclaim(struct mem_cgroup_zone *mz,
1964 unsigned long nr_reclaimed, 2021 unsigned long nr_reclaimed,
1965 unsigned long nr_scanned, 2022 unsigned long nr_scanned,
1966 struct scan_control *sc) 2023 struct scan_control *sc)
@@ -2000,15 +2057,15 @@ static inline bool should_continue_reclaim(struct zone *zone,
2000 * inactive lists are large enough, continue reclaiming 2057 * inactive lists are large enough, continue reclaiming
2001 */ 2058 */
2002 pages_for_compaction = (2UL << sc->order); 2059 pages_for_compaction = (2UL << sc->order);
2003 inactive_lru_pages = zone_nr_lru_pages(zone, sc, LRU_INACTIVE_FILE); 2060 inactive_lru_pages = zone_nr_lru_pages(mz, LRU_INACTIVE_FILE);
2004 if (nr_swap_pages > 0) 2061 if (nr_swap_pages > 0)
2005 inactive_lru_pages += zone_nr_lru_pages(zone, sc, LRU_INACTIVE_ANON); 2062 inactive_lru_pages += zone_nr_lru_pages(mz, LRU_INACTIVE_ANON);
2006 if (sc->nr_reclaimed < pages_for_compaction && 2063 if (sc->nr_reclaimed < pages_for_compaction &&
2007 inactive_lru_pages > pages_for_compaction) 2064 inactive_lru_pages > pages_for_compaction)
2008 return true; 2065 return true;
2009 2066
2010 /* If compaction would go ahead or the allocation would succeed, stop */ 2067 /* If compaction would go ahead or the allocation would succeed, stop */
2011 switch (compaction_suitable(zone, sc->order)) { 2068 switch (compaction_suitable(mz->zone, sc->order)) {
2012 case COMPACT_PARTIAL: 2069 case COMPACT_PARTIAL:
2013 case COMPACT_CONTINUE: 2070 case COMPACT_CONTINUE:
2014 return false; 2071 return false;
@@ -2020,12 +2077,12 @@ static inline bool should_continue_reclaim(struct zone *zone,
2020/* 2077/*
2021 * This is a basic per-zone page freer. Used by both kswapd and direct reclaim. 2078 * This is a basic per-zone page freer. Used by both kswapd and direct reclaim.
2022 */ 2079 */
2023static void shrink_zone(int priority, struct zone *zone, 2080static void shrink_mem_cgroup_zone(int priority, struct mem_cgroup_zone *mz,
2024 struct scan_control *sc) 2081 struct scan_control *sc)
2025{ 2082{
2026 unsigned long nr[NR_LRU_LISTS]; 2083 unsigned long nr[NR_LRU_LISTS];
2027 unsigned long nr_to_scan; 2084 unsigned long nr_to_scan;
2028 enum lru_list l; 2085 enum lru_list lru;
2029 unsigned long nr_reclaimed, nr_scanned; 2086 unsigned long nr_reclaimed, nr_scanned;
2030 unsigned long nr_to_reclaim = sc->nr_to_reclaim; 2087 unsigned long nr_to_reclaim = sc->nr_to_reclaim;
2031 struct blk_plug plug; 2088 struct blk_plug plug;
@@ -2033,19 +2090,19 @@ static void shrink_zone(int priority, struct zone *zone,
2033restart: 2090restart:
2034 nr_reclaimed = 0; 2091 nr_reclaimed = 0;
2035 nr_scanned = sc->nr_scanned; 2092 nr_scanned = sc->nr_scanned;
2036 get_scan_count(zone, sc, nr, priority); 2093 get_scan_count(mz, sc, nr, priority);
2037 2094
2038 blk_start_plug(&plug); 2095 blk_start_plug(&plug);
2039 while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || 2096 while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
2040 nr[LRU_INACTIVE_FILE]) { 2097 nr[LRU_INACTIVE_FILE]) {
2041 for_each_evictable_lru(l) { 2098 for_each_evictable_lru(lru) {
2042 if (nr[l]) { 2099 if (nr[lru]) {
2043 nr_to_scan = min_t(unsigned long, 2100 nr_to_scan = min_t(unsigned long,
2044 nr[l], SWAP_CLUSTER_MAX); 2101 nr[lru], SWAP_CLUSTER_MAX);
2045 nr[l] -= nr_to_scan; 2102 nr[lru] -= nr_to_scan;
2046 2103
2047 nr_reclaimed += shrink_list(l, nr_to_scan, 2104 nr_reclaimed += shrink_list(lru, nr_to_scan,
2048 zone, sc, priority); 2105 mz, sc, priority);
2049 } 2106 }
2050 } 2107 }
2051 /* 2108 /*
@@ -2066,17 +2123,89 @@ restart:
2066 * Even if we did not try to evict anon pages at all, we want to 2123 * Even if we did not try to evict anon pages at all, we want to
2067 * rebalance the anon lru active/inactive ratio. 2124 * rebalance the anon lru active/inactive ratio.
2068 */ 2125 */
2069 if (inactive_anon_is_low(zone, sc)) 2126 if (inactive_anon_is_low(mz))
2070 shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0); 2127 shrink_active_list(SWAP_CLUSTER_MAX, mz, sc, priority, 0);
2071 2128
2072 /* reclaim/compaction might need reclaim to continue */ 2129 /* reclaim/compaction might need reclaim to continue */
2073 if (should_continue_reclaim(zone, nr_reclaimed, 2130 if (should_continue_reclaim(mz, nr_reclaimed,
2074 sc->nr_scanned - nr_scanned, sc)) 2131 sc->nr_scanned - nr_scanned, sc))
2075 goto restart; 2132 goto restart;
2076 2133
2077 throttle_vm_writeout(sc->gfp_mask); 2134 throttle_vm_writeout(sc->gfp_mask);
2078} 2135}
2079 2136
2137static void shrink_zone(int priority, struct zone *zone,
2138 struct scan_control *sc)
2139{
2140 struct mem_cgroup *root = sc->target_mem_cgroup;
2141 struct mem_cgroup_reclaim_cookie reclaim = {
2142 .zone = zone,
2143 .priority = priority,
2144 };
2145 struct mem_cgroup *memcg;
2146
2147 memcg = mem_cgroup_iter(root, NULL, &reclaim);
2148 do {
2149 struct mem_cgroup_zone mz = {
2150 .mem_cgroup = memcg,
2151 .zone = zone,
2152 };
2153
2154 shrink_mem_cgroup_zone(priority, &mz, sc);
2155 /*
2156 * Limit reclaim has historically picked one memcg and
2157 * scanned it with decreasing priority levels until
2158 * nr_to_reclaim had been reclaimed. This priority
2159 * cycle is thus over after a single memcg.
2160 *
2161 * Direct reclaim and kswapd, on the other hand, have
2162 * to scan all memory cgroups to fulfill the overall
2163 * scan target for the zone.
2164 */
2165 if (!global_reclaim(sc)) {
2166 mem_cgroup_iter_break(root, memcg);
2167 break;
2168 }
2169 memcg = mem_cgroup_iter(root, memcg, &reclaim);
2170 } while (memcg);
2171}
2172
2173/* Returns true if compaction should go ahead for a high-order request */
2174static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
2175{
2176 unsigned long balance_gap, watermark;
2177 bool watermark_ok;
2178
2179 /* Do not consider compaction for orders reclaim is meant to satisfy */
2180 if (sc->order <= PAGE_ALLOC_COSTLY_ORDER)
2181 return false;
2182
2183 /*
2184 * Compaction takes time to run and there are potentially other
2185 * callers using the pages just freed. Continue reclaiming until
2186 * there is a buffer of free pages available to give compaction
2187 * a reasonable chance of completing and allocating the page
2188 */
2189 balance_gap = min(low_wmark_pages(zone),
2190 (zone->present_pages + KSWAPD_ZONE_BALANCE_GAP_RATIO-1) /
2191 KSWAPD_ZONE_BALANCE_GAP_RATIO);
2192 watermark = high_wmark_pages(zone) + balance_gap + (2UL << sc->order);
2193 watermark_ok = zone_watermark_ok_safe(zone, 0, watermark, 0, 0);
2194
2195 /*
2196 * If compaction is deferred, reclaim up to a point where
2197 * compaction will have a chance of success when re-enabled
2198 */
2199 if (compaction_deferred(zone))
2200 return watermark_ok;
2201
2202 /* If compaction is not ready to start, keep reclaiming */
2203 if (!compaction_suitable(zone, sc->order))
2204 return false;
2205
2206 return watermark_ok;
2207}
2208
2080/* 2209/*
2081 * This is the direct reclaim path, for page-allocating processes. We only 2210 * This is the direct reclaim path, for page-allocating processes. We only
2082 * try to reclaim pages from zones which will satisfy the caller's allocation 2211 * try to reclaim pages from zones which will satisfy the caller's allocation
@@ -2094,8 +2223,9 @@ restart:
2094 * scan then give up on it. 2223 * scan then give up on it.
2095 * 2224 *
2096 * This function returns true if a zone is being reclaimed for a costly 2225 * This function returns true if a zone is being reclaimed for a costly
2097 * high-order allocation and compaction is either ready to begin or deferred. 2226 * high-order allocation and compaction is ready to begin. This indicates to
2098 * This indicates to the caller that it should retry the allocation or fail. 2227 * the caller that it should consider retrying the allocation instead of
2228 * further reclaim.
2099 */ 2229 */
2100static bool shrink_zones(int priority, struct zonelist *zonelist, 2230static bool shrink_zones(int priority, struct zonelist *zonelist,
2101 struct scan_control *sc) 2231 struct scan_control *sc)
@@ -2104,7 +2234,7 @@ static bool shrink_zones(int priority, struct zonelist *zonelist,
2104 struct zone *zone; 2234 struct zone *zone;
2105 unsigned long nr_soft_reclaimed; 2235 unsigned long nr_soft_reclaimed;
2106 unsigned long nr_soft_scanned; 2236 unsigned long nr_soft_scanned;
2107 bool should_abort_reclaim = false; 2237 bool aborted_reclaim = false;
2108 2238
2109 for_each_zone_zonelist_nodemask(zone, z, zonelist, 2239 for_each_zone_zonelist_nodemask(zone, z, zonelist,
2110 gfp_zone(sc->gfp_mask), sc->nodemask) { 2240 gfp_zone(sc->gfp_mask), sc->nodemask) {
@@ -2114,7 +2244,7 @@ static bool shrink_zones(int priority, struct zonelist *zonelist,
2114 * Take care memory controller reclaiming has small influence 2244 * Take care memory controller reclaiming has small influence
2115 * to global LRU. 2245 * to global LRU.
2116 */ 2246 */
2117 if (scanning_global_lru(sc)) { 2247 if (global_reclaim(sc)) {
2118 if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) 2248 if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
2119 continue; 2249 continue;
2120 if (zone->all_unreclaimable && priority != DEF_PRIORITY) 2250 if (zone->all_unreclaimable && priority != DEF_PRIORITY)
@@ -2129,10 +2259,8 @@ static bool shrink_zones(int priority, struct zonelist *zonelist,
2129 * noticable problem, like transparent huge page 2259 * noticable problem, like transparent huge page
2130 * allocations. 2260 * allocations.
2131 */ 2261 */
2132 if (sc->order > PAGE_ALLOC_COSTLY_ORDER && 2262 if (compaction_ready(zone, sc)) {
2133 (compaction_suitable(zone, sc->order) || 2263 aborted_reclaim = true;
2134 compaction_deferred(zone))) {
2135 should_abort_reclaim = true;
2136 continue; 2264 continue;
2137 } 2265 }
2138 } 2266 }
@@ -2154,7 +2282,7 @@ static bool shrink_zones(int priority, struct zonelist *zonelist,
2154 shrink_zone(priority, zone, sc); 2282 shrink_zone(priority, zone, sc);
2155 } 2283 }
2156 2284
2157 return should_abort_reclaim; 2285 return aborted_reclaim;
2158} 2286}
2159 2287
2160static bool zone_reclaimable(struct zone *zone) 2288static bool zone_reclaimable(struct zone *zone)
@@ -2208,25 +2336,25 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
2208 struct zoneref *z; 2336 struct zoneref *z;
2209 struct zone *zone; 2337 struct zone *zone;
2210 unsigned long writeback_threshold; 2338 unsigned long writeback_threshold;
2339 bool aborted_reclaim;
2211 2340
2212 get_mems_allowed(); 2341 get_mems_allowed();
2213 delayacct_freepages_start(); 2342 delayacct_freepages_start();
2214 2343
2215 if (scanning_global_lru(sc)) 2344 if (global_reclaim(sc))
2216 count_vm_event(ALLOCSTALL); 2345 count_vm_event(ALLOCSTALL);
2217 2346
2218 for (priority = DEF_PRIORITY; priority >= 0; priority--) { 2347 for (priority = DEF_PRIORITY; priority >= 0; priority--) {
2219 sc->nr_scanned = 0; 2348 sc->nr_scanned = 0;
2220 if (!priority) 2349 if (!priority)
2221 disable_swap_token(sc->mem_cgroup); 2350 disable_swap_token(sc->target_mem_cgroup);
2222 if (shrink_zones(priority, zonelist, sc)) 2351 aborted_reclaim = shrink_zones(priority, zonelist, sc);
2223 break;
2224 2352
2225 /* 2353 /*
2226 * Don't shrink slabs when reclaiming memory from 2354 * Don't shrink slabs when reclaiming memory from
2227 * over limit cgroups 2355 * over limit cgroups
2228 */ 2356 */
2229 if (scanning_global_lru(sc)) { 2357 if (global_reclaim(sc)) {
2230 unsigned long lru_pages = 0; 2358 unsigned long lru_pages = 0;
2231 for_each_zone_zonelist(zone, z, zonelist, 2359 for_each_zone_zonelist(zone, z, zonelist,
2232 gfp_zone(sc->gfp_mask)) { 2360 gfp_zone(sc->gfp_mask)) {
@@ -2287,8 +2415,12 @@ out:
2287 if (oom_killer_disabled) 2415 if (oom_killer_disabled)
2288 return 0; 2416 return 0;
2289 2417
2418 /* Aborted reclaim to try compaction? don't OOM, then */
2419 if (aborted_reclaim)
2420 return 1;
2421
2290 /* top priority shrink_zones still had more to do? don't OOM, then */ 2422 /* top priority shrink_zones still had more to do? don't OOM, then */
2291 if (scanning_global_lru(sc) && !all_unreclaimable(zonelist, sc)) 2423 if (global_reclaim(sc) && !all_unreclaimable(zonelist, sc))
2292 return 1; 2424 return 1;
2293 2425
2294 return 0; 2426 return 0;
@@ -2305,7 +2437,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
2305 .may_unmap = 1, 2437 .may_unmap = 1,
2306 .may_swap = 1, 2438 .may_swap = 1,
2307 .order = order, 2439 .order = order,
2308 .mem_cgroup = NULL, 2440 .target_mem_cgroup = NULL,
2309 .nodemask = nodemask, 2441 .nodemask = nodemask,
2310 }; 2442 };
2311 struct shrink_control shrink = { 2443 struct shrink_control shrink = {
@@ -2325,7 +2457,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
2325 2457
2326#ifdef CONFIG_CGROUP_MEM_RES_CTLR 2458#ifdef CONFIG_CGROUP_MEM_RES_CTLR
2327 2459
2328unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, 2460unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg,
2329 gfp_t gfp_mask, bool noswap, 2461 gfp_t gfp_mask, bool noswap,
2330 struct zone *zone, 2462 struct zone *zone,
2331 unsigned long *nr_scanned) 2463 unsigned long *nr_scanned)
@@ -2337,7 +2469,11 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
2337 .may_unmap = 1, 2469 .may_unmap = 1,
2338 .may_swap = !noswap, 2470 .may_swap = !noswap,
2339 .order = 0, 2471 .order = 0,
2340 .mem_cgroup = mem, 2472 .target_mem_cgroup = memcg,
2473 };
2474 struct mem_cgroup_zone mz = {
2475 .mem_cgroup = memcg,
2476 .zone = zone,
2341 }; 2477 };
2342 2478
2343 sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | 2479 sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
@@ -2354,7 +2490,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
2354 * will pick up pages from other mem cgroup's as well. We hack 2490 * will pick up pages from other mem cgroup's as well. We hack
2355 * the priority and make it zero. 2491 * the priority and make it zero.
2356 */ 2492 */
2357 shrink_zone(0, zone, &sc); 2493 shrink_mem_cgroup_zone(0, &mz, &sc);
2358 2494
2359 trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed); 2495 trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed);
2360 2496
@@ -2362,7 +2498,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
2362 return sc.nr_reclaimed; 2498 return sc.nr_reclaimed;
2363} 2499}
2364 2500
2365unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, 2501unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
2366 gfp_t gfp_mask, 2502 gfp_t gfp_mask,
2367 bool noswap) 2503 bool noswap)
2368{ 2504{
@@ -2375,7 +2511,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
2375 .may_swap = !noswap, 2511 .may_swap = !noswap,
2376 .nr_to_reclaim = SWAP_CLUSTER_MAX, 2512 .nr_to_reclaim = SWAP_CLUSTER_MAX,
2377 .order = 0, 2513 .order = 0,
2378 .mem_cgroup = mem_cont, 2514 .target_mem_cgroup = memcg,
2379 .nodemask = NULL, /* we don't care the placement */ 2515 .nodemask = NULL, /* we don't care the placement */
2380 .gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | 2516 .gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
2381 (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK), 2517 (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK),
@@ -2389,7 +2525,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
2389 * take care of from where we get pages. So the node where we start the 2525 * take care of from where we get pages. So the node where we start the
2390 * scan does not need to be the current node. 2526 * scan does not need to be the current node.
2391 */ 2527 */
2392 nid = mem_cgroup_select_victim_node(mem_cont); 2528 nid = mem_cgroup_select_victim_node(memcg);
2393 2529
2394 zonelist = NODE_DATA(nid)->node_zonelists; 2530 zonelist = NODE_DATA(nid)->node_zonelists;
2395 2531
@@ -2405,6 +2541,29 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
2405} 2541}
2406#endif 2542#endif
2407 2543
2544static void age_active_anon(struct zone *zone, struct scan_control *sc,
2545 int priority)
2546{
2547 struct mem_cgroup *memcg;
2548
2549 if (!total_swap_pages)
2550 return;
2551
2552 memcg = mem_cgroup_iter(NULL, NULL, NULL);
2553 do {
2554 struct mem_cgroup_zone mz = {
2555 .mem_cgroup = memcg,
2556 .zone = zone,
2557 };
2558
2559 if (inactive_anon_is_low(&mz))
2560 shrink_active_list(SWAP_CLUSTER_MAX, &mz,
2561 sc, priority, 0);
2562
2563 memcg = mem_cgroup_iter(NULL, memcg, NULL);
2564 } while (memcg);
2565}
2566
2408/* 2567/*
2409 * pgdat_balanced is used when checking if a node is balanced for high-order 2568 * pgdat_balanced is used when checking if a node is balanced for high-order
2410 * allocations. Only zones that meet watermarks and are in a zone allowed 2569 * allocations. Only zones that meet watermarks and are in a zone allowed
@@ -2525,7 +2684,7 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
2525 */ 2684 */
2526 .nr_to_reclaim = ULONG_MAX, 2685 .nr_to_reclaim = ULONG_MAX,
2527 .order = order, 2686 .order = order,
2528 .mem_cgroup = NULL, 2687 .target_mem_cgroup = NULL,
2529 }; 2688 };
2530 struct shrink_control shrink = { 2689 struct shrink_control shrink = {
2531 .gfp_mask = sc.gfp_mask, 2690 .gfp_mask = sc.gfp_mask,
@@ -2564,9 +2723,7 @@ loop_again:
2564 * Do some background aging of the anon list, to give 2723 * Do some background aging of the anon list, to give
2565 * pages a chance to be referenced before reclaiming. 2724 * pages a chance to be referenced before reclaiming.
2566 */ 2725 */
2567 if (inactive_anon_is_low(zone, &sc)) 2726 age_active_anon(zone, &sc, priority);
2568 shrink_active_list(SWAP_CLUSTER_MAX, zone,
2569 &sc, priority, 0);
2570 2727
2571 if (!zone_watermark_ok_safe(zone, order, 2728 if (!zone_watermark_ok_safe(zone, order,
2572 high_wmark_pages(zone), 0, 0)) { 2729 high_wmark_pages(zone), 0, 0)) {
@@ -3355,16 +3512,18 @@ int page_evictable(struct page *page, struct vm_area_struct *vma)
3355 */ 3512 */
3356static void check_move_unevictable_page(struct page *page, struct zone *zone) 3513static void check_move_unevictable_page(struct page *page, struct zone *zone)
3357{ 3514{
3358 VM_BUG_ON(PageActive(page)); 3515 struct lruvec *lruvec;
3359 3516
3517 VM_BUG_ON(PageActive(page));
3360retry: 3518retry:
3361 ClearPageUnevictable(page); 3519 ClearPageUnevictable(page);
3362 if (page_evictable(page, NULL)) { 3520 if (page_evictable(page, NULL)) {
3363 enum lru_list l = page_lru_base_type(page); 3521 enum lru_list l = page_lru_base_type(page);
3364 3522
3365 __dec_zone_state(zone, NR_UNEVICTABLE); 3523 __dec_zone_state(zone, NR_UNEVICTABLE);
3366 list_move(&page->lru, &zone->lru[l].list); 3524 lruvec = mem_cgroup_lru_move_lists(zone, page,
3367 mem_cgroup_move_lists(page, LRU_UNEVICTABLE, l); 3525 LRU_UNEVICTABLE, l);
3526 list_move(&page->lru, &lruvec->lists[l]);
3368 __inc_zone_state(zone, NR_INACTIVE_ANON + l); 3527 __inc_zone_state(zone, NR_INACTIVE_ANON + l);
3369 __count_vm_event(UNEVICTABLE_PGRESCUED); 3528 __count_vm_event(UNEVICTABLE_PGRESCUED);
3370 } else { 3529 } else {
@@ -3372,8 +3531,9 @@ retry:
3372 * rotate unevictable list 3531 * rotate unevictable list
3373 */ 3532 */
3374 SetPageUnevictable(page); 3533 SetPageUnevictable(page);
3375 list_move(&page->lru, &zone->lru[LRU_UNEVICTABLE].list); 3534 lruvec = mem_cgroup_lru_move_lists(zone, page, LRU_UNEVICTABLE,
3376 mem_cgroup_rotate_lru_list(page, LRU_UNEVICTABLE); 3535 LRU_UNEVICTABLE);
3536 list_move(&page->lru, &lruvec->lists[LRU_UNEVICTABLE]);
3377 if (page_evictable(page, NULL)) 3537 if (page_evictable(page, NULL))
3378 goto retry; 3538 goto retry;
3379 } 3539 }