aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Makefile1
-rw-r--r--mm/filemap.c15
-rw-r--r--mm/memcontrol.c419
-rw-r--r--mm/memory.c2
-rw-r--r--mm/oom_kill.c75
-rw-r--r--mm/page_alloc.c22
-rw-r--r--mm/rmap.c4
-rw-r--r--mm/shmem.c71
-rw-r--r--mm/shmem_acl.c171
-rw-r--r--mm/truncate.c6
10 files changed, 433 insertions, 353 deletions
diff --git a/mm/Makefile b/mm/Makefile
index 82131d0f8d8..7a68d2ab556 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -22,7 +22,6 @@ obj-$(CONFIG_HUGETLBFS) += hugetlb.o
22obj-$(CONFIG_NUMA) += mempolicy.o 22obj-$(CONFIG_NUMA) += mempolicy.o
23obj-$(CONFIG_SPARSEMEM) += sparse.o 23obj-$(CONFIG_SPARSEMEM) += sparse.o
24obj-$(CONFIG_SPARSEMEM_VMEMMAP) += sparse-vmemmap.o 24obj-$(CONFIG_SPARSEMEM_VMEMMAP) += sparse-vmemmap.o
25obj-$(CONFIG_TMPFS_POSIX_ACL) += shmem_acl.o
26obj-$(CONFIG_SLOB) += slob.o 25obj-$(CONFIG_SLOB) += slob.o
27obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o 26obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o
28obj-$(CONFIG_KSM) += ksm.o 27obj-$(CONFIG_KSM) += ksm.o
diff --git a/mm/filemap.c b/mm/filemap.c
index 8b4d88f9249..96ac6b0eb6c 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2240,7 +2240,6 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
2240 size_t count, ssize_t written) 2240 size_t count, ssize_t written)
2241{ 2241{
2242 struct file *file = iocb->ki_filp; 2242 struct file *file = iocb->ki_filp;
2243 struct address_space *mapping = file->f_mapping;
2244 ssize_t status; 2243 ssize_t status;
2245 struct iov_iter i; 2244 struct iov_iter i;
2246 2245
@@ -2252,15 +2251,6 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
2252 *ppos = pos + status; 2251 *ppos = pos + status;
2253 } 2252 }
2254 2253
2255 /*
2256 * If we get here for O_DIRECT writes then we must have fallen through
2257 * to buffered writes (block instantiation inside i_size). So we sync
2258 * the file data here, to try to honour O_DIRECT expectations.
2259 */
2260 if (unlikely(file->f_flags & O_DIRECT) && written)
2261 status = filemap_write_and_wait_range(mapping,
2262 pos, pos + written - 1);
2263
2264 return written ? written : status; 2254 return written ? written : status;
2265} 2255}
2266EXPORT_SYMBOL(generic_file_buffered_write); 2256EXPORT_SYMBOL(generic_file_buffered_write);
@@ -2359,10 +2349,7 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
2359 * semantics. 2349 * semantics.
2360 */ 2350 */
2361 endbyte = pos + written_buffered - written - 1; 2351 endbyte = pos + written_buffered - written - 1;
2362 err = do_sync_mapping_range(file->f_mapping, pos, endbyte, 2352 err = filemap_write_and_wait_range(file->f_mapping, pos, endbyte);
2363 SYNC_FILE_RANGE_WAIT_BEFORE|
2364 SYNC_FILE_RANGE_WRITE|
2365 SYNC_FILE_RANGE_WAIT_AFTER);
2366 if (err == 0) { 2353 if (err == 0) {
2367 written = written_buffered; 2354 written = written_buffered;
2368 invalidate_mapping_pages(mapping, 2355 invalidate_mapping_pages(mapping,
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 9eee80d6d49..488b644e0e8 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -38,6 +38,7 @@
38#include <linux/vmalloc.h> 38#include <linux/vmalloc.h>
39#include <linux/mm_inline.h> 39#include <linux/mm_inline.h>
40#include <linux/page_cgroup.h> 40#include <linux/page_cgroup.h>
41#include <linux/cpu.h>
41#include "internal.h" 42#include "internal.h"
42 43
43#include <asm/uaccess.h> 44#include <asm/uaccess.h>
@@ -54,7 +55,6 @@ static int really_do_swap_account __initdata = 1; /* for remember boot option*/
54#define do_swap_account (0) 55#define do_swap_account (0)
55#endif 56#endif
56 57
57static DEFINE_MUTEX(memcg_tasklist); /* can be hold under cgroup_mutex */
58#define SOFTLIMIT_EVENTS_THRESH (1000) 58#define SOFTLIMIT_EVENTS_THRESH (1000)
59 59
60/* 60/*
@@ -66,7 +66,7 @@ enum mem_cgroup_stat_index {
66 */ 66 */
67 MEM_CGROUP_STAT_CACHE, /* # of pages charged as cache */ 67 MEM_CGROUP_STAT_CACHE, /* # of pages charged as cache */
68 MEM_CGROUP_STAT_RSS, /* # of pages charged as anon rss */ 68 MEM_CGROUP_STAT_RSS, /* # of pages charged as anon rss */
69 MEM_CGROUP_STAT_MAPPED_FILE, /* # of pages charged as file rss */ 69 MEM_CGROUP_STAT_FILE_MAPPED, /* # of pages charged as file rss */
70 MEM_CGROUP_STAT_PGPGIN_COUNT, /* # of pages paged in */ 70 MEM_CGROUP_STAT_PGPGIN_COUNT, /* # of pages paged in */
71 MEM_CGROUP_STAT_PGPGOUT_COUNT, /* # of pages paged out */ 71 MEM_CGROUP_STAT_PGPGOUT_COUNT, /* # of pages paged out */
72 MEM_CGROUP_STAT_EVENTS, /* sum of pagein + pageout for internal use */ 72 MEM_CGROUP_STAT_EVENTS, /* sum of pagein + pageout for internal use */
@@ -275,6 +275,7 @@ enum charge_type {
275static void mem_cgroup_get(struct mem_cgroup *mem); 275static void mem_cgroup_get(struct mem_cgroup *mem);
276static void mem_cgroup_put(struct mem_cgroup *mem); 276static void mem_cgroup_put(struct mem_cgroup *mem);
277static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *mem); 277static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *mem);
278static void drain_all_stock_async(void);
278 279
279static struct mem_cgroup_per_zone * 280static struct mem_cgroup_per_zone *
280mem_cgroup_zoneinfo(struct mem_cgroup *mem, int nid, int zid) 281mem_cgroup_zoneinfo(struct mem_cgroup *mem, int nid, int zid)
@@ -763,7 +764,13 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem)
763 task_unlock(task); 764 task_unlock(task);
764 if (!curr) 765 if (!curr)
765 return 0; 766 return 0;
766 if (curr->use_hierarchy) 767 /*
768 * We should check use_hierarchy of "mem" not "curr". Because checking
769 * use_hierarchy of "curr" here make this function true if hierarchy is
770 * enabled in "curr" and "curr" is a child of "mem" in *cgroup*
771 * hierarchy(even if use_hierarchy is disabled in "mem").
772 */
773 if (mem->use_hierarchy)
767 ret = css_is_ancestor(&curr->css, &mem->css); 774 ret = css_is_ancestor(&curr->css, &mem->css);
768 else 775 else
769 ret = (curr == mem); 776 ret = (curr == mem);
@@ -1012,7 +1019,7 @@ void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p)
1012 static char memcg_name[PATH_MAX]; 1019 static char memcg_name[PATH_MAX];
1013 int ret; 1020 int ret;
1014 1021
1015 if (!memcg) 1022 if (!memcg || !p)
1016 return; 1023 return;
1017 1024
1018 1025
@@ -1142,6 +1149,8 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
1142 victim = mem_cgroup_select_victim(root_mem); 1149 victim = mem_cgroup_select_victim(root_mem);
1143 if (victim == root_mem) { 1150 if (victim == root_mem) {
1144 loop++; 1151 loop++;
1152 if (loop >= 1)
1153 drain_all_stock_async();
1145 if (loop >= 2) { 1154 if (loop >= 2) {
1146 /* 1155 /*
1147 * If we have not been able to reclaim 1156 * If we have not been able to reclaim
@@ -1228,7 +1237,7 @@ static void record_last_oom(struct mem_cgroup *mem)
1228 * Currently used to update mapped file statistics, but the routine can be 1237 * Currently used to update mapped file statistics, but the routine can be
1229 * generalized to update other statistics as well. 1238 * generalized to update other statistics as well.
1230 */ 1239 */
1231void mem_cgroup_update_mapped_file_stat(struct page *page, int val) 1240void mem_cgroup_update_file_mapped(struct page *page, int val)
1232{ 1241{
1233 struct mem_cgroup *mem; 1242 struct mem_cgroup *mem;
1234 struct mem_cgroup_stat *stat; 1243 struct mem_cgroup_stat *stat;
@@ -1236,9 +1245,6 @@ void mem_cgroup_update_mapped_file_stat(struct page *page, int val)
1236 int cpu; 1245 int cpu;
1237 struct page_cgroup *pc; 1246 struct page_cgroup *pc;
1238 1247
1239 if (!page_is_file_cache(page))
1240 return;
1241
1242 pc = lookup_page_cgroup(page); 1248 pc = lookup_page_cgroup(page);
1243 if (unlikely(!pc)) 1249 if (unlikely(!pc))
1244 return; 1250 return;
@@ -1258,12 +1264,139 @@ void mem_cgroup_update_mapped_file_stat(struct page *page, int val)
1258 stat = &mem->stat; 1264 stat = &mem->stat;
1259 cpustat = &stat->cpustat[cpu]; 1265 cpustat = &stat->cpustat[cpu];
1260 1266
1261 __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_MAPPED_FILE, val); 1267 __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_FILE_MAPPED, val);
1262done: 1268done:
1263 unlock_page_cgroup(pc); 1269 unlock_page_cgroup(pc);
1264} 1270}
1265 1271
1266/* 1272/*
1273 * size of first charge trial. "32" comes from vmscan.c's magic value.
1274 * TODO: maybe necessary to use big numbers in big irons.
1275 */
1276#define CHARGE_SIZE (32 * PAGE_SIZE)
1277struct memcg_stock_pcp {
1278 struct mem_cgroup *cached; /* this never be root cgroup */
1279 int charge;
1280 struct work_struct work;
1281};
1282static DEFINE_PER_CPU(struct memcg_stock_pcp, memcg_stock);
1283static atomic_t memcg_drain_count;
1284
1285/*
1286 * Try to consume stocked charge on this cpu. If success, PAGE_SIZE is consumed
1287 * from local stock and true is returned. If the stock is 0 or charges from a
1288 * cgroup which is not current target, returns false. This stock will be
1289 * refilled.
1290 */
1291static bool consume_stock(struct mem_cgroup *mem)
1292{
1293 struct memcg_stock_pcp *stock;
1294 bool ret = true;
1295
1296 stock = &get_cpu_var(memcg_stock);
1297 if (mem == stock->cached && stock->charge)
1298 stock->charge -= PAGE_SIZE;
1299 else /* need to call res_counter_charge */
1300 ret = false;
1301 put_cpu_var(memcg_stock);
1302 return ret;
1303}
1304
1305/*
1306 * Returns stocks cached in percpu to res_counter and reset cached information.
1307 */
1308static void drain_stock(struct memcg_stock_pcp *stock)
1309{
1310 struct mem_cgroup *old = stock->cached;
1311
1312 if (stock->charge) {
1313 res_counter_uncharge(&old->res, stock->charge);
1314 if (do_swap_account)
1315 res_counter_uncharge(&old->memsw, stock->charge);
1316 }
1317 stock->cached = NULL;
1318 stock->charge = 0;
1319}
1320
1321/*
1322 * This must be called under preempt disabled or must be called by
1323 * a thread which is pinned to local cpu.
1324 */
1325static void drain_local_stock(struct work_struct *dummy)
1326{
1327 struct memcg_stock_pcp *stock = &__get_cpu_var(memcg_stock);
1328 drain_stock(stock);
1329}
1330
1331/*
1332 * Cache charges(val) which is from res_counter, to local per_cpu area.
1333 * This will be consumed by consumt_stock() function, later.
1334 */
1335static void refill_stock(struct mem_cgroup *mem, int val)
1336{
1337 struct memcg_stock_pcp *stock = &get_cpu_var(memcg_stock);
1338
1339 if (stock->cached != mem) { /* reset if necessary */
1340 drain_stock(stock);
1341 stock->cached = mem;
1342 }
1343 stock->charge += val;
1344 put_cpu_var(memcg_stock);
1345}
1346
1347/*
1348 * Tries to drain stocked charges in other cpus. This function is asynchronous
1349 * and just put a work per cpu for draining localy on each cpu. Caller can
1350 * expects some charges will be back to res_counter later but cannot wait for
1351 * it.
1352 */
1353static void drain_all_stock_async(void)
1354{
1355 int cpu;
1356 /* This function is for scheduling "drain" in asynchronous way.
1357 * The result of "drain" is not directly handled by callers. Then,
1358 * if someone is calling drain, we don't have to call drain more.
1359 * Anyway, WORK_STRUCT_PENDING check in queue_work_on() will catch if
1360 * there is a race. We just do loose check here.
1361 */
1362 if (atomic_read(&memcg_drain_count))
1363 return;
1364 /* Notify other cpus that system-wide "drain" is running */
1365 atomic_inc(&memcg_drain_count);
1366 get_online_cpus();
1367 for_each_online_cpu(cpu) {
1368 struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu);
1369 schedule_work_on(cpu, &stock->work);
1370 }
1371 put_online_cpus();
1372 atomic_dec(&memcg_drain_count);
1373 /* We don't wait for flush_work */
1374}
1375
1376/* This is a synchronous drain interface. */
1377static void drain_all_stock_sync(void)
1378{
1379 /* called when force_empty is called */
1380 atomic_inc(&memcg_drain_count);
1381 schedule_on_each_cpu(drain_local_stock);
1382 atomic_dec(&memcg_drain_count);
1383}
1384
1385static int __cpuinit memcg_stock_cpu_callback(struct notifier_block *nb,
1386 unsigned long action,
1387 void *hcpu)
1388{
1389 int cpu = (unsigned long)hcpu;
1390 struct memcg_stock_pcp *stock;
1391
1392 if (action != CPU_DEAD)
1393 return NOTIFY_OK;
1394 stock = &per_cpu(memcg_stock, cpu);
1395 drain_stock(stock);
1396 return NOTIFY_OK;
1397}
1398
1399/*
1267 * Unlike exported interface, "oom" parameter is added. if oom==true, 1400 * Unlike exported interface, "oom" parameter is added. if oom==true,
1268 * oom-killer can be invoked. 1401 * oom-killer can be invoked.
1269 */ 1402 */
@@ -1274,6 +1407,7 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
1274 struct mem_cgroup *mem, *mem_over_limit; 1407 struct mem_cgroup *mem, *mem_over_limit;
1275 int nr_retries = MEM_CGROUP_RECLAIM_RETRIES; 1408 int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
1276 struct res_counter *fail_res; 1409 struct res_counter *fail_res;
1410 int csize = CHARGE_SIZE;
1277 1411
1278 if (unlikely(test_thread_flag(TIF_MEMDIE))) { 1412 if (unlikely(test_thread_flag(TIF_MEMDIE))) {
1279 /* Don't account this! */ 1413 /* Don't account this! */
@@ -1298,23 +1432,25 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
1298 return 0; 1432 return 0;
1299 1433
1300 VM_BUG_ON(css_is_removed(&mem->css)); 1434 VM_BUG_ON(css_is_removed(&mem->css));
1435 if (mem_cgroup_is_root(mem))
1436 goto done;
1301 1437
1302 while (1) { 1438 while (1) {
1303 int ret = 0; 1439 int ret = 0;
1304 unsigned long flags = 0; 1440 unsigned long flags = 0;
1305 1441
1306 if (mem_cgroup_is_root(mem)) 1442 if (consume_stock(mem))
1307 goto done; 1443 goto charged;
1308 ret = res_counter_charge(&mem->res, PAGE_SIZE, &fail_res); 1444
1445 ret = res_counter_charge(&mem->res, csize, &fail_res);
1309 if (likely(!ret)) { 1446 if (likely(!ret)) {
1310 if (!do_swap_account) 1447 if (!do_swap_account)
1311 break; 1448 break;
1312 ret = res_counter_charge(&mem->memsw, PAGE_SIZE, 1449 ret = res_counter_charge(&mem->memsw, csize, &fail_res);
1313 &fail_res);
1314 if (likely(!ret)) 1450 if (likely(!ret))
1315 break; 1451 break;
1316 /* mem+swap counter fails */ 1452 /* mem+swap counter fails */
1317 res_counter_uncharge(&mem->res, PAGE_SIZE); 1453 res_counter_uncharge(&mem->res, csize);
1318 flags |= MEM_CGROUP_RECLAIM_NOSWAP; 1454 flags |= MEM_CGROUP_RECLAIM_NOSWAP;
1319 mem_over_limit = mem_cgroup_from_res_counter(fail_res, 1455 mem_over_limit = mem_cgroup_from_res_counter(fail_res,
1320 memsw); 1456 memsw);
@@ -1323,6 +1459,11 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
1323 mem_over_limit = mem_cgroup_from_res_counter(fail_res, 1459 mem_over_limit = mem_cgroup_from_res_counter(fail_res,
1324 res); 1460 res);
1325 1461
1462 /* reduce request size and retry */
1463 if (csize > PAGE_SIZE) {
1464 csize = PAGE_SIZE;
1465 continue;
1466 }
1326 if (!(gfp_mask & __GFP_WAIT)) 1467 if (!(gfp_mask & __GFP_WAIT))
1327 goto nomem; 1468 goto nomem;
1328 1469
@@ -1344,14 +1485,15 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
1344 1485
1345 if (!nr_retries--) { 1486 if (!nr_retries--) {
1346 if (oom) { 1487 if (oom) {
1347 mutex_lock(&memcg_tasklist);
1348 mem_cgroup_out_of_memory(mem_over_limit, gfp_mask); 1488 mem_cgroup_out_of_memory(mem_over_limit, gfp_mask);
1349 mutex_unlock(&memcg_tasklist);
1350 record_last_oom(mem_over_limit); 1489 record_last_oom(mem_over_limit);
1351 } 1490 }
1352 goto nomem; 1491 goto nomem;
1353 } 1492 }
1354 } 1493 }
1494 if (csize > PAGE_SIZE)
1495 refill_stock(mem, csize - PAGE_SIZE);
1496charged:
1355 /* 1497 /*
1356 * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree. 1498 * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree.
1357 * if they exceeds softlimit. 1499 * if they exceeds softlimit.
@@ -1366,6 +1508,21 @@ nomem:
1366} 1508}
1367 1509
1368/* 1510/*
1511 * Somemtimes we have to undo a charge we got by try_charge().
1512 * This function is for that and do uncharge, put css's refcnt.
1513 * gotten by try_charge().
1514 */
1515static void mem_cgroup_cancel_charge(struct mem_cgroup *mem)
1516{
1517 if (!mem_cgroup_is_root(mem)) {
1518 res_counter_uncharge(&mem->res, PAGE_SIZE);
1519 if (do_swap_account)
1520 res_counter_uncharge(&mem->memsw, PAGE_SIZE);
1521 }
1522 css_put(&mem->css);
1523}
1524
1525/*
1369 * A helper function to get mem_cgroup from ID. must be called under 1526 * A helper function to get mem_cgroup from ID. must be called under
1370 * rcu_read_lock(). The caller must check css_is_removed() or some if 1527 * rcu_read_lock(). The caller must check css_is_removed() or some if
1371 * it's concern. (dropping refcnt from swap can be called against removed 1528 * it's concern. (dropping refcnt from swap can be called against removed
@@ -1428,12 +1585,7 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem,
1428 lock_page_cgroup(pc); 1585 lock_page_cgroup(pc);
1429 if (unlikely(PageCgroupUsed(pc))) { 1586 if (unlikely(PageCgroupUsed(pc))) {
1430 unlock_page_cgroup(pc); 1587 unlock_page_cgroup(pc);
1431 if (!mem_cgroup_is_root(mem)) { 1588 mem_cgroup_cancel_charge(mem);
1432 res_counter_uncharge(&mem->res, PAGE_SIZE);
1433 if (do_swap_account)
1434 res_counter_uncharge(&mem->memsw, PAGE_SIZE);
1435 }
1436 css_put(&mem->css);
1437 return; 1589 return;
1438 } 1590 }
1439 1591
@@ -1466,27 +1618,22 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem,
1466} 1618}
1467 1619
1468/** 1620/**
1469 * mem_cgroup_move_account - move account of the page 1621 * __mem_cgroup_move_account - move account of the page
1470 * @pc: page_cgroup of the page. 1622 * @pc: page_cgroup of the page.
1471 * @from: mem_cgroup which the page is moved from. 1623 * @from: mem_cgroup which the page is moved from.
1472 * @to: mem_cgroup which the page is moved to. @from != @to. 1624 * @to: mem_cgroup which the page is moved to. @from != @to.
1473 * 1625 *
1474 * The caller must confirm following. 1626 * The caller must confirm following.
1475 * - page is not on LRU (isolate_page() is useful.) 1627 * - page is not on LRU (isolate_page() is useful.)
1476 * 1628 * - the pc is locked, used, and ->mem_cgroup points to @from.
1477 * returns 0 at success,
1478 * returns -EBUSY when lock is busy or "pc" is unstable.
1479 * 1629 *
1480 * This function does "uncharge" from old cgroup but doesn't do "charge" to 1630 * This function does "uncharge" from old cgroup but doesn't do "charge" to
1481 * new cgroup. It should be done by a caller. 1631 * new cgroup. It should be done by a caller.
1482 */ 1632 */
1483 1633
1484static int mem_cgroup_move_account(struct page_cgroup *pc, 1634static void __mem_cgroup_move_account(struct page_cgroup *pc,
1485 struct mem_cgroup *from, struct mem_cgroup *to) 1635 struct mem_cgroup *from, struct mem_cgroup *to)
1486{ 1636{
1487 struct mem_cgroup_per_zone *from_mz, *to_mz;
1488 int nid, zid;
1489 int ret = -EBUSY;
1490 struct page *page; 1637 struct page *page;
1491 int cpu; 1638 int cpu;
1492 struct mem_cgroup_stat *stat; 1639 struct mem_cgroup_stat *stat;
@@ -1494,38 +1641,27 @@ static int mem_cgroup_move_account(struct page_cgroup *pc,
1494 1641
1495 VM_BUG_ON(from == to); 1642 VM_BUG_ON(from == to);
1496 VM_BUG_ON(PageLRU(pc->page)); 1643 VM_BUG_ON(PageLRU(pc->page));
1497 1644 VM_BUG_ON(!PageCgroupLocked(pc));
1498 nid = page_cgroup_nid(pc); 1645 VM_BUG_ON(!PageCgroupUsed(pc));
1499 zid = page_cgroup_zid(pc); 1646 VM_BUG_ON(pc->mem_cgroup != from);
1500 from_mz = mem_cgroup_zoneinfo(from, nid, zid);
1501 to_mz = mem_cgroup_zoneinfo(to, nid, zid);
1502
1503 if (!trylock_page_cgroup(pc))
1504 return ret;
1505
1506 if (!PageCgroupUsed(pc))
1507 goto out;
1508
1509 if (pc->mem_cgroup != from)
1510 goto out;
1511 1647
1512 if (!mem_cgroup_is_root(from)) 1648 if (!mem_cgroup_is_root(from))
1513 res_counter_uncharge(&from->res, PAGE_SIZE); 1649 res_counter_uncharge(&from->res, PAGE_SIZE);
1514 mem_cgroup_charge_statistics(from, pc, false); 1650 mem_cgroup_charge_statistics(from, pc, false);
1515 1651
1516 page = pc->page; 1652 page = pc->page;
1517 if (page_is_file_cache(page) && page_mapped(page)) { 1653 if (page_mapped(page) && !PageAnon(page)) {
1518 cpu = smp_processor_id(); 1654 cpu = smp_processor_id();
1519 /* Update mapped_file data for mem_cgroup "from" */ 1655 /* Update mapped_file data for mem_cgroup "from" */
1520 stat = &from->stat; 1656 stat = &from->stat;
1521 cpustat = &stat->cpustat[cpu]; 1657 cpustat = &stat->cpustat[cpu];
1522 __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_MAPPED_FILE, 1658 __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_FILE_MAPPED,
1523 -1); 1659 -1);
1524 1660
1525 /* Update mapped_file data for mem_cgroup "to" */ 1661 /* Update mapped_file data for mem_cgroup "to" */
1526 stat = &to->stat; 1662 stat = &to->stat;
1527 cpustat = &stat->cpustat[cpu]; 1663 cpustat = &stat->cpustat[cpu];
1528 __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_MAPPED_FILE, 1664 __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_FILE_MAPPED,
1529 1); 1665 1);
1530 } 1666 }
1531 1667
@@ -1536,15 +1672,28 @@ static int mem_cgroup_move_account(struct page_cgroup *pc,
1536 css_get(&to->css); 1672 css_get(&to->css);
1537 pc->mem_cgroup = to; 1673 pc->mem_cgroup = to;
1538 mem_cgroup_charge_statistics(to, pc, true); 1674 mem_cgroup_charge_statistics(to, pc, true);
1539 ret = 0;
1540out:
1541 unlock_page_cgroup(pc);
1542 /* 1675 /*
1543 * We charges against "to" which may not have any tasks. Then, "to" 1676 * We charges against "to" which may not have any tasks. Then, "to"
1544 * can be under rmdir(). But in current implementation, caller of 1677 * can be under rmdir(). But in current implementation, caller of
1545 * this function is just force_empty() and it's garanteed that 1678 * this function is just force_empty() and it's garanteed that
1546 * "to" is never removed. So, we don't check rmdir status here. 1679 * "to" is never removed. So, we don't check rmdir status here.
1547 */ 1680 */
1681}
1682
1683/*
1684 * check whether the @pc is valid for moving account and call
1685 * __mem_cgroup_move_account()
1686 */
1687static int mem_cgroup_move_account(struct page_cgroup *pc,
1688 struct mem_cgroup *from, struct mem_cgroup *to)
1689{
1690 int ret = -EINVAL;
1691 lock_page_cgroup(pc);
1692 if (PageCgroupUsed(pc) && pc->mem_cgroup == from) {
1693 __mem_cgroup_move_account(pc, from, to);
1694 ret = 0;
1695 }
1696 unlock_page_cgroup(pc);
1548 return ret; 1697 return ret;
1549} 1698}
1550 1699
@@ -1566,45 +1715,27 @@ static int mem_cgroup_move_parent(struct page_cgroup *pc,
1566 if (!pcg) 1715 if (!pcg)
1567 return -EINVAL; 1716 return -EINVAL;
1568 1717
1718 ret = -EBUSY;
1719 if (!get_page_unless_zero(page))
1720 goto out;
1721 if (isolate_lru_page(page))
1722 goto put;
1569 1723
1570 parent = mem_cgroup_from_cont(pcg); 1724 parent = mem_cgroup_from_cont(pcg);
1571
1572
1573 ret = __mem_cgroup_try_charge(NULL, gfp_mask, &parent, false, page); 1725 ret = __mem_cgroup_try_charge(NULL, gfp_mask, &parent, false, page);
1574 if (ret || !parent) 1726 if (ret || !parent)
1575 return ret; 1727 goto put_back;
1576
1577 if (!get_page_unless_zero(page)) {
1578 ret = -EBUSY;
1579 goto uncharge;
1580 }
1581
1582 ret = isolate_lru_page(page);
1583
1584 if (ret)
1585 goto cancel;
1586 1728
1587 ret = mem_cgroup_move_account(pc, child, parent); 1729 ret = mem_cgroup_move_account(pc, child, parent);
1588 1730 if (!ret)
1731 css_put(&parent->css); /* drop extra refcnt by try_charge() */
1732 else
1733 mem_cgroup_cancel_charge(parent); /* does css_put */
1734put_back:
1589 putback_lru_page(page); 1735 putback_lru_page(page);
1590 if (!ret) { 1736put:
1591 put_page(page);
1592 /* drop extra refcnt by try_charge() */
1593 css_put(&parent->css);
1594 return 0;
1595 }
1596
1597cancel:
1598 put_page(page); 1737 put_page(page);
1599uncharge: 1738out:
1600 /* drop extra refcnt by try_charge() */
1601 css_put(&parent->css);
1602 /* uncharge if move fails */
1603 if (!mem_cgroup_is_root(parent)) {
1604 res_counter_uncharge(&parent->res, PAGE_SIZE);
1605 if (do_swap_account)
1606 res_counter_uncharge(&parent->memsw, PAGE_SIZE);
1607 }
1608 return ret; 1739 return ret;
1609} 1740}
1610 1741
@@ -1821,14 +1952,53 @@ void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *mem)
1821 return; 1952 return;
1822 if (!mem) 1953 if (!mem)
1823 return; 1954 return;
1824 if (!mem_cgroup_is_root(mem)) { 1955 mem_cgroup_cancel_charge(mem);
1825 res_counter_uncharge(&mem->res, PAGE_SIZE);
1826 if (do_swap_account)
1827 res_counter_uncharge(&mem->memsw, PAGE_SIZE);
1828 }
1829 css_put(&mem->css);
1830} 1956}
1831 1957
1958static void
1959__do_uncharge(struct mem_cgroup *mem, const enum charge_type ctype)
1960{
1961 struct memcg_batch_info *batch = NULL;
1962 bool uncharge_memsw = true;
1963 /* If swapout, usage of swap doesn't decrease */
1964 if (!do_swap_account || ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT)
1965 uncharge_memsw = false;
1966 /*
1967 * do_batch > 0 when unmapping pages or inode invalidate/truncate.
1968 * In those cases, all pages freed continously can be expected to be in
1969 * the same cgroup and we have chance to coalesce uncharges.
1970 * But we do uncharge one by one if this is killed by OOM(TIF_MEMDIE)
1971 * because we want to do uncharge as soon as possible.
1972 */
1973 if (!current->memcg_batch.do_batch || test_thread_flag(TIF_MEMDIE))
1974 goto direct_uncharge;
1975
1976 batch = &current->memcg_batch;
1977 /*
1978 * In usual, we do css_get() when we remember memcg pointer.
1979 * But in this case, we keep res->usage until end of a series of
1980 * uncharges. Then, it's ok to ignore memcg's refcnt.
1981 */
1982 if (!batch->memcg)
1983 batch->memcg = mem;
1984 /*
1985 * In typical case, batch->memcg == mem. This means we can
1986 * merge a series of uncharges to an uncharge of res_counter.
1987 * If not, we uncharge res_counter ony by one.
1988 */
1989 if (batch->memcg != mem)
1990 goto direct_uncharge;
1991 /* remember freed charge and uncharge it later */
1992 batch->bytes += PAGE_SIZE;
1993 if (uncharge_memsw)
1994 batch->memsw_bytes += PAGE_SIZE;
1995 return;
1996direct_uncharge:
1997 res_counter_uncharge(&mem->res, PAGE_SIZE);
1998 if (uncharge_memsw)
1999 res_counter_uncharge(&mem->memsw, PAGE_SIZE);
2000 return;
2001}
1832 2002
1833/* 2003/*
1834 * uncharge if !page_mapped(page) 2004 * uncharge if !page_mapped(page)
@@ -1877,12 +2047,8 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
1877 break; 2047 break;
1878 } 2048 }
1879 2049
1880 if (!mem_cgroup_is_root(mem)) { 2050 if (!mem_cgroup_is_root(mem))
1881 res_counter_uncharge(&mem->res, PAGE_SIZE); 2051 __do_uncharge(mem, ctype);
1882 if (do_swap_account &&
1883 (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT))
1884 res_counter_uncharge(&mem->memsw, PAGE_SIZE);
1885 }
1886 if (ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT) 2052 if (ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT)
1887 mem_cgroup_swap_statistics(mem, true); 2053 mem_cgroup_swap_statistics(mem, true);
1888 mem_cgroup_charge_statistics(mem, pc, false); 2054 mem_cgroup_charge_statistics(mem, pc, false);
@@ -1928,6 +2094,50 @@ void mem_cgroup_uncharge_cache_page(struct page *page)
1928 __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE); 2094 __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE);
1929} 2095}
1930 2096
2097/*
2098 * Batch_start/batch_end is called in unmap_page_range/invlidate/trucate.
2099 * In that cases, pages are freed continuously and we can expect pages
2100 * are in the same memcg. All these calls itself limits the number of
2101 * pages freed at once, then uncharge_start/end() is called properly.
2102 * This may be called prural(2) times in a context,
2103 */
2104
2105void mem_cgroup_uncharge_start(void)
2106{
2107 current->memcg_batch.do_batch++;
2108 /* We can do nest. */
2109 if (current->memcg_batch.do_batch == 1) {
2110 current->memcg_batch.memcg = NULL;
2111 current->memcg_batch.bytes = 0;
2112 current->memcg_batch.memsw_bytes = 0;
2113 }
2114}
2115
2116void mem_cgroup_uncharge_end(void)
2117{
2118 struct memcg_batch_info *batch = &current->memcg_batch;
2119
2120 if (!batch->do_batch)
2121 return;
2122
2123 batch->do_batch--;
2124 if (batch->do_batch) /* If stacked, do nothing. */
2125 return;
2126
2127 if (!batch->memcg)
2128 return;
2129 /*
2130 * This "batch->memcg" is valid without any css_get/put etc...
2131 * bacause we hide charges behind us.
2132 */
2133 if (batch->bytes)
2134 res_counter_uncharge(&batch->memcg->res, batch->bytes);
2135 if (batch->memsw_bytes)
2136 res_counter_uncharge(&batch->memcg->memsw, batch->memsw_bytes);
2137 /* forget this pointer (for sanity check) */
2138 batch->memcg = NULL;
2139}
2140
1931#ifdef CONFIG_SWAP 2141#ifdef CONFIG_SWAP
1932/* 2142/*
1933 * called after __delete_from_swap_cache() and drop "page" account. 2143 * called after __delete_from_swap_cache() and drop "page" account.
@@ -2103,7 +2313,6 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
2103 unsigned long long val) 2313 unsigned long long val)
2104{ 2314{
2105 int retry_count; 2315 int retry_count;
2106 int progress;
2107 u64 memswlimit; 2316 u64 memswlimit;
2108 int ret = 0; 2317 int ret = 0;
2109 int children = mem_cgroup_count_children(memcg); 2318 int children = mem_cgroup_count_children(memcg);
@@ -2147,8 +2356,7 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
2147 if (!ret) 2356 if (!ret)
2148 break; 2357 break;
2149 2358
2150 progress = mem_cgroup_hierarchical_reclaim(memcg, NULL, 2359 mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL,
2151 GFP_KERNEL,
2152 MEM_CGROUP_RECLAIM_SHRINK); 2360 MEM_CGROUP_RECLAIM_SHRINK);
2153 curusage = res_counter_read_u64(&memcg->res, RES_USAGE); 2361 curusage = res_counter_read_u64(&memcg->res, RES_USAGE);
2154 /* Usage is reduced ? */ 2362 /* Usage is reduced ? */
@@ -2387,6 +2595,7 @@ move_account:
2387 goto out; 2595 goto out;
2388 /* This is for making all *used* pages to be on LRU. */ 2596 /* This is for making all *used* pages to be on LRU. */
2389 lru_add_drain_all(); 2597 lru_add_drain_all();
2598 drain_all_stock_sync();
2390 ret = 0; 2599 ret = 0;
2391 for_each_node_state(node, N_HIGH_MEMORY) { 2600 for_each_node_state(node, N_HIGH_MEMORY) {
2392 for (zid = 0; !ret && zid < MAX_NR_ZONES; zid++) { 2601 for (zid = 0; !ret && zid < MAX_NR_ZONES; zid++) {
@@ -2544,6 +2753,7 @@ static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft)
2544 val += idx_val; 2753 val += idx_val;
2545 mem_cgroup_get_recursive_idx_stat(mem, 2754 mem_cgroup_get_recursive_idx_stat(mem,
2546 MEM_CGROUP_STAT_SWAPOUT, &idx_val); 2755 MEM_CGROUP_STAT_SWAPOUT, &idx_val);
2756 val += idx_val;
2547 val <<= PAGE_SHIFT; 2757 val <<= PAGE_SHIFT;
2548 } else 2758 } else
2549 val = res_counter_read_u64(&mem->memsw, name); 2759 val = res_counter_read_u64(&mem->memsw, name);
@@ -2663,7 +2873,7 @@ static int mem_cgroup_reset(struct cgroup *cont, unsigned int event)
2663enum { 2873enum {
2664 MCS_CACHE, 2874 MCS_CACHE,
2665 MCS_RSS, 2875 MCS_RSS,
2666 MCS_MAPPED_FILE, 2876 MCS_FILE_MAPPED,
2667 MCS_PGPGIN, 2877 MCS_PGPGIN,
2668 MCS_PGPGOUT, 2878 MCS_PGPGOUT,
2669 MCS_SWAP, 2879 MCS_SWAP,
@@ -2707,8 +2917,8 @@ static int mem_cgroup_get_local_stat(struct mem_cgroup *mem, void *data)
2707 s->stat[MCS_CACHE] += val * PAGE_SIZE; 2917 s->stat[MCS_CACHE] += val * PAGE_SIZE;
2708 val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_RSS); 2918 val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_RSS);
2709 s->stat[MCS_RSS] += val * PAGE_SIZE; 2919 s->stat[MCS_RSS] += val * PAGE_SIZE;
2710 val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_MAPPED_FILE); 2920 val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_FILE_MAPPED);
2711 s->stat[MCS_MAPPED_FILE] += val * PAGE_SIZE; 2921 s->stat[MCS_FILE_MAPPED] += val * PAGE_SIZE;
2712 val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_PGPGIN_COUNT); 2922 val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_PGPGIN_COUNT);
2713 s->stat[MCS_PGPGIN] += val; 2923 s->stat[MCS_PGPGIN] += val;
2714 val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_PGPGOUT_COUNT); 2924 val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_PGPGOUT_COUNT);
@@ -3100,11 +3310,18 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
3100 3310
3101 /* root ? */ 3311 /* root ? */
3102 if (cont->parent == NULL) { 3312 if (cont->parent == NULL) {
3313 int cpu;
3103 enable_swap_cgroup(); 3314 enable_swap_cgroup();
3104 parent = NULL; 3315 parent = NULL;
3105 root_mem_cgroup = mem; 3316 root_mem_cgroup = mem;
3106 if (mem_cgroup_soft_limit_tree_init()) 3317 if (mem_cgroup_soft_limit_tree_init())
3107 goto free_out; 3318 goto free_out;
3319 for_each_possible_cpu(cpu) {
3320 struct memcg_stock_pcp *stock =
3321 &per_cpu(memcg_stock, cpu);
3322 INIT_WORK(&stock->work, drain_local_stock);
3323 }
3324 hotcpu_notifier(memcg_stock_cpu_callback, 0);
3108 3325
3109 } else { 3326 } else {
3110 parent = mem_cgroup_from_cont(cont->parent); 3327 parent = mem_cgroup_from_cont(cont->parent);
@@ -3173,12 +3390,10 @@ static void mem_cgroup_move_task(struct cgroup_subsys *ss,
3173 struct task_struct *p, 3390 struct task_struct *p,
3174 bool threadgroup) 3391 bool threadgroup)
3175{ 3392{
3176 mutex_lock(&memcg_tasklist);
3177 /* 3393 /*
3178 * FIXME: It's better to move charges of this process from old 3394 * FIXME: It's better to move charges of this process from old
3179 * memcg to new memcg. But it's just on TODO-List now. 3395 * memcg to new memcg. But it's just on TODO-List now.
3180 */ 3396 */
3181 mutex_unlock(&memcg_tasklist);
3182} 3397}
3183 3398
3184struct cgroup_subsys mem_cgroup_subsys = { 3399struct cgroup_subsys mem_cgroup_subsys = {
diff --git a/mm/memory.c b/mm/memory.c
index db09106ed44..09e4b1be7b6 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -956,6 +956,7 @@ static unsigned long unmap_page_range(struct mmu_gather *tlb,
956 details = NULL; 956 details = NULL;
957 957
958 BUG_ON(addr >= end); 958 BUG_ON(addr >= end);
959 mem_cgroup_uncharge_start();
959 tlb_start_vma(tlb, vma); 960 tlb_start_vma(tlb, vma);
960 pgd = pgd_offset(vma->vm_mm, addr); 961 pgd = pgd_offset(vma->vm_mm, addr);
961 do { 962 do {
@@ -968,6 +969,7 @@ static unsigned long unmap_page_range(struct mmu_gather *tlb,
968 zap_work, details); 969 zap_work, details);
969 } while (pgd++, addr = next, (addr != end && *zap_work > 0)); 970 } while (pgd++, addr = next, (addr != end && *zap_work > 0));
970 tlb_end_vma(tlb, vma); 971 tlb_end_vma(tlb, vma);
972 mem_cgroup_uncharge_end();
971 973
972 return addr; 974 return addr;
973} 975}
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 492c98624fc..f52481b1c1e 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -196,27 +196,46 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
196/* 196/*
197 * Determine the type of allocation constraint. 197 * Determine the type of allocation constraint.
198 */ 198 */
199static inline enum oom_constraint constrained_alloc(struct zonelist *zonelist,
200 gfp_t gfp_mask)
201{
202#ifdef CONFIG_NUMA 199#ifdef CONFIG_NUMA
200static enum oom_constraint constrained_alloc(struct zonelist *zonelist,
201 gfp_t gfp_mask, nodemask_t *nodemask)
202{
203 struct zone *zone; 203 struct zone *zone;
204 struct zoneref *z; 204 struct zoneref *z;
205 enum zone_type high_zoneidx = gfp_zone(gfp_mask); 205 enum zone_type high_zoneidx = gfp_zone(gfp_mask);
206 nodemask_t nodes = node_states[N_HIGH_MEMORY];
207 206
208 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) 207 /*
209 if (cpuset_zone_allowed_softwall(zone, gfp_mask)) 208 * Reach here only when __GFP_NOFAIL is used. So, we should avoid
210 node_clear(zone_to_nid(zone), nodes); 209 * to kill current.We have to random task kill in this case.
211 else 210 * Hopefully, CONSTRAINT_THISNODE...but no way to handle it, now.
212 return CONSTRAINT_CPUSET; 211 */
212 if (gfp_mask & __GFP_THISNODE)
213 return CONSTRAINT_NONE;
213 214
214 if (!nodes_empty(nodes)) 215 /*
216 * The nodemask here is a nodemask passed to alloc_pages(). Now,
217 * cpuset doesn't use this nodemask for its hardwall/softwall/hierarchy
218 * feature. mempolicy is an only user of nodemask here.
219 * check mempolicy's nodemask contains all N_HIGH_MEMORY
220 */
221 if (nodemask && !nodes_subset(node_states[N_HIGH_MEMORY], *nodemask))
215 return CONSTRAINT_MEMORY_POLICY; 222 return CONSTRAINT_MEMORY_POLICY;
216#endif 223
224 /* Check this allocation failure is caused by cpuset's wall function */
225 for_each_zone_zonelist_nodemask(zone, z, zonelist,
226 high_zoneidx, nodemask)
227 if (!cpuset_zone_allowed_softwall(zone, gfp_mask))
228 return CONSTRAINT_CPUSET;
217 229
218 return CONSTRAINT_NONE; 230 return CONSTRAINT_NONE;
219} 231}
232#else
233static enum oom_constraint constrained_alloc(struct zonelist *zonelist,
234 gfp_t gfp_mask, nodemask_t *nodemask)
235{
236 return CONSTRAINT_NONE;
237}
238#endif
220 239
221/* 240/*
222 * Simple selection loop. We chose the process with the highest 241 * Simple selection loop. We chose the process with the highest
@@ -337,7 +356,8 @@ static void dump_tasks(const struct mem_cgroup *mem)
337 } while_each_thread(g, p); 356 } while_each_thread(g, p);
338} 357}
339 358
340static void dump_header(gfp_t gfp_mask, int order, struct mem_cgroup *mem) 359static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order,
360 struct mem_cgroup *mem)
341{ 361{
342 pr_warning("%s invoked oom-killer: gfp_mask=0x%x, order=%d, " 362 pr_warning("%s invoked oom-killer: gfp_mask=0x%x, order=%d, "
343 "oom_adj=%d\n", 363 "oom_adj=%d\n",
@@ -346,12 +366,14 @@ static void dump_header(gfp_t gfp_mask, int order, struct mem_cgroup *mem)
346 cpuset_print_task_mems_allowed(current); 366 cpuset_print_task_mems_allowed(current);
347 task_unlock(current); 367 task_unlock(current);
348 dump_stack(); 368 dump_stack();
349 mem_cgroup_print_oom_info(mem, current); 369 mem_cgroup_print_oom_info(mem, p);
350 show_mem(); 370 show_mem();
351 if (sysctl_oom_dump_tasks) 371 if (sysctl_oom_dump_tasks)
352 dump_tasks(mem); 372 dump_tasks(mem);
353} 373}
354 374
375#define K(x) ((x) << (PAGE_SHIFT-10))
376
355/* 377/*
356 * Send SIGKILL to the selected process irrespective of CAP_SYS_RAW_IO 378 * Send SIGKILL to the selected process irrespective of CAP_SYS_RAW_IO
357 * flag though it's unlikely that we select a process with CAP_SYS_RAW_IO 379 * flag though it's unlikely that we select a process with CAP_SYS_RAW_IO
@@ -365,15 +387,23 @@ static void __oom_kill_task(struct task_struct *p, int verbose)
365 return; 387 return;
366 } 388 }
367 389
390 task_lock(p);
368 if (!p->mm) { 391 if (!p->mm) {
369 WARN_ON(1); 392 WARN_ON(1);
370 printk(KERN_WARNING "tried to kill an mm-less task!\n"); 393 printk(KERN_WARNING "tried to kill an mm-less task %d (%s)!\n",
394 task_pid_nr(p), p->comm);
395 task_unlock(p);
371 return; 396 return;
372 } 397 }
373 398
374 if (verbose) 399 if (verbose)
375 printk(KERN_ERR "Killed process %d (%s)\n", 400 printk(KERN_ERR "Killed process %d (%s) "
376 task_pid_nr(p), p->comm); 401 "vsz:%lukB, anon-rss:%lukB, file-rss:%lukB\n",
402 task_pid_nr(p), p->comm,
403 K(p->mm->total_vm),
404 K(get_mm_counter(p->mm, anon_rss)),
405 K(get_mm_counter(p->mm, file_rss)));
406 task_unlock(p);
377 407
378 /* 408 /*
379 * We give our sacrificial lamb high priority and access to 409 * We give our sacrificial lamb high priority and access to
@@ -411,7 +441,7 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
411 struct task_struct *c; 441 struct task_struct *c;
412 442
413 if (printk_ratelimit()) 443 if (printk_ratelimit())
414 dump_header(gfp_mask, order, mem); 444 dump_header(p, gfp_mask, order, mem);
415 445
416 /* 446 /*
417 * If the task is already exiting, don't alarm the sysadmin or kill 447 * If the task is already exiting, don't alarm the sysadmin or kill
@@ -547,7 +577,7 @@ retry:
547 /* Found nothing?!?! Either we hang forever, or we panic. */ 577 /* Found nothing?!?! Either we hang forever, or we panic. */
548 if (!p) { 578 if (!p) {
549 read_unlock(&tasklist_lock); 579 read_unlock(&tasklist_lock);
550 dump_header(gfp_mask, order, NULL); 580 dump_header(NULL, gfp_mask, order, NULL);
551 panic("Out of memory and no killable processes...\n"); 581 panic("Out of memory and no killable processes...\n");
552 } 582 }
553 583
@@ -603,7 +633,8 @@ rest_and_return:
603 * OR try to be smart about which process to kill. Note that we 633 * OR try to be smart about which process to kill. Note that we
604 * don't have to be perfect here, we just have to be good. 634 * don't have to be perfect here, we just have to be good.
605 */ 635 */
606void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order) 636void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
637 int order, nodemask_t *nodemask)
607{ 638{
608 unsigned long freed = 0; 639 unsigned long freed = 0;
609 enum oom_constraint constraint; 640 enum oom_constraint constraint;
@@ -614,7 +645,7 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order)
614 return; 645 return;
615 646
616 if (sysctl_panic_on_oom == 2) { 647 if (sysctl_panic_on_oom == 2) {
617 dump_header(gfp_mask, order, NULL); 648 dump_header(NULL, gfp_mask, order, NULL);
618 panic("out of memory. Compulsory panic_on_oom is selected.\n"); 649 panic("out of memory. Compulsory panic_on_oom is selected.\n");
619 } 650 }
620 651
@@ -622,7 +653,7 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order)
622 * Check if there were limitations on the allocation (only relevant for 653 * Check if there were limitations on the allocation (only relevant for
623 * NUMA) that may require different handling. 654 * NUMA) that may require different handling.
624 */ 655 */
625 constraint = constrained_alloc(zonelist, gfp_mask); 656 constraint = constrained_alloc(zonelist, gfp_mask, nodemask);
626 read_lock(&tasklist_lock); 657 read_lock(&tasklist_lock);
627 658
628 switch (constraint) { 659 switch (constraint) {
@@ -633,7 +664,7 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order)
633 664
634 case CONSTRAINT_NONE: 665 case CONSTRAINT_NONE:
635 if (sysctl_panic_on_oom) { 666 if (sysctl_panic_on_oom) {
636 dump_header(gfp_mask, order, NULL); 667 dump_header(NULL, gfp_mask, order, NULL);
637 panic("out of memory. panic_on_oom is selected\n"); 668 panic("out of memory. panic_on_oom is selected\n");
638 } 669 }
639 /* Fall-through */ 670 /* Fall-through */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 6867b4d391f..74af449b1f1 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1654,12 +1654,22 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
1654 if (page) 1654 if (page)
1655 goto out; 1655 goto out;
1656 1656
1657 /* The OOM killer will not help higher order allocs */ 1657 if (!(gfp_mask & __GFP_NOFAIL)) {
1658 if (order > PAGE_ALLOC_COSTLY_ORDER && !(gfp_mask & __GFP_NOFAIL)) 1658 /* The OOM killer will not help higher order allocs */
1659 goto out; 1659 if (order > PAGE_ALLOC_COSTLY_ORDER)
1660 1660 goto out;
1661 /*
1662 * GFP_THISNODE contains __GFP_NORETRY and we never hit this.
1663 * Sanity check for bare calls of __GFP_THISNODE, not real OOM.
1664 * The caller should handle page allocation failure by itself if
1665 * it specifies __GFP_THISNODE.
1666 * Note: Hugepage uses it but will hit PAGE_ALLOC_COSTLY_ORDER.
1667 */
1668 if (gfp_mask & __GFP_THISNODE)
1669 goto out;
1670 }
1661 /* Exhausted what can be done so it's blamo time */ 1671 /* Exhausted what can be done so it's blamo time */
1662 out_of_memory(zonelist, gfp_mask, order); 1672 out_of_memory(zonelist, gfp_mask, order, nodemask);
1663 1673
1664out: 1674out:
1665 clear_zonelist_oom(zonelist, gfp_mask); 1675 clear_zonelist_oom(zonelist, gfp_mask);
@@ -3123,7 +3133,7 @@ static int __cpuinit process_zones(int cpu)
3123 3133
3124 if (percpu_pagelist_fraction) 3134 if (percpu_pagelist_fraction)
3125 setup_pagelist_highmark(zone_pcp(zone, cpu), 3135 setup_pagelist_highmark(zone_pcp(zone, cpu),
3126 (zone->present_pages / percpu_pagelist_fraction)); 3136 (zone->present_pages / percpu_pagelist_fraction));
3127 } 3137 }
3128 3138
3129 return 0; 3139 return 0;
diff --git a/mm/rmap.c b/mm/rmap.c
index 98135dbd25b..278cd277bde 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -721,7 +721,7 @@ void page_add_file_rmap(struct page *page)
721{ 721{
722 if (atomic_inc_and_test(&page->_mapcount)) { 722 if (atomic_inc_and_test(&page->_mapcount)) {
723 __inc_zone_page_state(page, NR_FILE_MAPPED); 723 __inc_zone_page_state(page, NR_FILE_MAPPED);
724 mem_cgroup_update_mapped_file_stat(page, 1); 724 mem_cgroup_update_file_mapped(page, 1);
725 } 725 }
726} 726}
727 727
@@ -753,8 +753,8 @@ void page_remove_rmap(struct page *page)
753 __dec_zone_page_state(page, NR_ANON_PAGES); 753 __dec_zone_page_state(page, NR_ANON_PAGES);
754 } else { 754 } else {
755 __dec_zone_page_state(page, NR_FILE_MAPPED); 755 __dec_zone_page_state(page, NR_FILE_MAPPED);
756 mem_cgroup_update_file_mapped(page, -1);
756 } 757 }
757 mem_cgroup_update_mapped_file_stat(page, -1);
758 /* 758 /*
759 * It would be tidy to reset the PageAnon mapping here, 759 * It would be tidy to reset the PageAnon mapping here,
760 * but that might overwrite a racing page_add_anon_rmap 760 * but that might overwrite a racing page_add_anon_rmap
diff --git a/mm/shmem.c b/mm/shmem.c
index 4fb41c83dac..f8485062f3b 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -29,7 +29,6 @@
29#include <linux/mm.h> 29#include <linux/mm.h>
30#include <linux/module.h> 30#include <linux/module.h>
31#include <linux/swap.h> 31#include <linux/swap.h>
32#include <linux/ima.h>
33 32
34static struct vfsmount *shm_mnt; 33static struct vfsmount *shm_mnt;
35 34
@@ -42,6 +41,7 @@ static struct vfsmount *shm_mnt;
42 41
43#include <linux/xattr.h> 42#include <linux/xattr.h>
44#include <linux/exportfs.h> 43#include <linux/exportfs.h>
44#include <linux/posix_acl.h>
45#include <linux/generic_acl.h> 45#include <linux/generic_acl.h>
46#include <linux/mman.h> 46#include <linux/mman.h>
47#include <linux/string.h> 47#include <linux/string.h>
@@ -810,7 +810,7 @@ static int shmem_notify_change(struct dentry *dentry, struct iattr *attr)
810 error = inode_setattr(inode, attr); 810 error = inode_setattr(inode, attr);
811#ifdef CONFIG_TMPFS_POSIX_ACL 811#ifdef CONFIG_TMPFS_POSIX_ACL
812 if (!error && (attr->ia_valid & ATTR_MODE)) 812 if (!error && (attr->ia_valid & ATTR_MODE))
813 error = generic_acl_chmod(inode, &shmem_acl_ops); 813 error = generic_acl_chmod(inode);
814#endif 814#endif
815 if (page) 815 if (page)
816 page_cache_release(page); 816 page_cache_release(page);
@@ -1824,11 +1824,13 @@ shmem_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
1824 return error; 1824 return error;
1825 } 1825 }
1826 } 1826 }
1827 error = shmem_acl_init(inode, dir); 1827#ifdef CONFIG_TMPFS_POSIX_ACL
1828 error = generic_acl_init(inode, dir);
1828 if (error) { 1829 if (error) {
1829 iput(inode); 1830 iput(inode);
1830 return error; 1831 return error;
1831 } 1832 }
1833#endif
1832 if (dir->i_mode & S_ISGID) { 1834 if (dir->i_mode & S_ISGID) {
1833 inode->i_gid = dir->i_gid; 1835 inode->i_gid = dir->i_gid;
1834 if (S_ISDIR(mode)) 1836 if (S_ISDIR(mode))
@@ -2043,27 +2045,28 @@ static const struct inode_operations shmem_symlink_inode_operations = {
2043 * filesystem level, though. 2045 * filesystem level, though.
2044 */ 2046 */
2045 2047
2046static size_t shmem_xattr_security_list(struct inode *inode, char *list, 2048static size_t shmem_xattr_security_list(struct dentry *dentry, char *list,
2047 size_t list_len, const char *name, 2049 size_t list_len, const char *name,
2048 size_t name_len) 2050 size_t name_len, int handler_flags)
2049{ 2051{
2050 return security_inode_listsecurity(inode, list, list_len); 2052 return security_inode_listsecurity(dentry->d_inode, list, list_len);
2051} 2053}
2052 2054
2053static int shmem_xattr_security_get(struct inode *inode, const char *name, 2055static int shmem_xattr_security_get(struct dentry *dentry, const char *name,
2054 void *buffer, size_t size) 2056 void *buffer, size_t size, int handler_flags)
2055{ 2057{
2056 if (strcmp(name, "") == 0) 2058 if (strcmp(name, "") == 0)
2057 return -EINVAL; 2059 return -EINVAL;
2058 return xattr_getsecurity(inode, name, buffer, size); 2060 return xattr_getsecurity(dentry->d_inode, name, buffer, size);
2059} 2061}
2060 2062
2061static int shmem_xattr_security_set(struct inode *inode, const char *name, 2063static int shmem_xattr_security_set(struct dentry *dentry, const char *name,
2062 const void *value, size_t size, int flags) 2064 const void *value, size_t size, int flags, int handler_flags)
2063{ 2065{
2064 if (strcmp(name, "") == 0) 2066 if (strcmp(name, "") == 0)
2065 return -EINVAL; 2067 return -EINVAL;
2066 return security_inode_setsecurity(inode, name, value, size, flags); 2068 return security_inode_setsecurity(dentry->d_inode, name, value,
2069 size, flags);
2067} 2070}
2068 2071
2069static struct xattr_handler shmem_xattr_security_handler = { 2072static struct xattr_handler shmem_xattr_security_handler = {
@@ -2074,8 +2077,8 @@ static struct xattr_handler shmem_xattr_security_handler = {
2074}; 2077};
2075 2078
2076static struct xattr_handler *shmem_xattr_handlers[] = { 2079static struct xattr_handler *shmem_xattr_handlers[] = {
2077 &shmem_xattr_acl_access_handler, 2080 &generic_acl_access_handler,
2078 &shmem_xattr_acl_default_handler, 2081 &generic_acl_default_handler,
2079 &shmem_xattr_security_handler, 2082 &shmem_xattr_security_handler,
2080 NULL 2083 NULL
2081}; 2084};
@@ -2454,7 +2457,7 @@ static const struct inode_operations shmem_inode_operations = {
2454 .getxattr = generic_getxattr, 2457 .getxattr = generic_getxattr,
2455 .listxattr = generic_listxattr, 2458 .listxattr = generic_listxattr,
2456 .removexattr = generic_removexattr, 2459 .removexattr = generic_removexattr,
2457 .check_acl = shmem_check_acl, 2460 .check_acl = generic_check_acl,
2458#endif 2461#endif
2459 2462
2460}; 2463};
@@ -2477,7 +2480,7 @@ static const struct inode_operations shmem_dir_inode_operations = {
2477 .getxattr = generic_getxattr, 2480 .getxattr = generic_getxattr,
2478 .listxattr = generic_listxattr, 2481 .listxattr = generic_listxattr,
2479 .removexattr = generic_removexattr, 2482 .removexattr = generic_removexattr,
2480 .check_acl = shmem_check_acl, 2483 .check_acl = generic_check_acl,
2481#endif 2484#endif
2482}; 2485};
2483 2486
@@ -2488,7 +2491,7 @@ static const struct inode_operations shmem_special_inode_operations = {
2488 .getxattr = generic_getxattr, 2491 .getxattr = generic_getxattr,
2489 .listxattr = generic_listxattr, 2492 .listxattr = generic_listxattr,
2490 .removexattr = generic_removexattr, 2493 .removexattr = generic_removexattr,
2491 .check_acl = shmem_check_acl, 2494 .check_acl = generic_check_acl,
2492#endif 2495#endif
2493}; 2496};
2494 2497
@@ -2626,7 +2629,8 @@ struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags
2626 int error; 2629 int error;
2627 struct file *file; 2630 struct file *file;
2628 struct inode *inode; 2631 struct inode *inode;
2629 struct dentry *dentry, *root; 2632 struct path path;
2633 struct dentry *root;
2630 struct qstr this; 2634 struct qstr this;
2631 2635
2632 if (IS_ERR(shm_mnt)) 2636 if (IS_ERR(shm_mnt))
@@ -2643,38 +2647,35 @@ struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags
2643 this.len = strlen(name); 2647 this.len = strlen(name);
2644 this.hash = 0; /* will go */ 2648 this.hash = 0; /* will go */
2645 root = shm_mnt->mnt_root; 2649 root = shm_mnt->mnt_root;
2646 dentry = d_alloc(root, &this); 2650 path.dentry = d_alloc(root, &this);
2647 if (!dentry) 2651 if (!path.dentry)
2648 goto put_memory; 2652 goto put_memory;
2649 2653 path.mnt = mntget(shm_mnt);
2650 error = -ENFILE;
2651 file = get_empty_filp();
2652 if (!file)
2653 goto put_dentry;
2654 2654
2655 error = -ENOSPC; 2655 error = -ENOSPC;
2656 inode = shmem_get_inode(root->d_sb, S_IFREG | S_IRWXUGO, 0, flags); 2656 inode = shmem_get_inode(root->d_sb, S_IFREG | S_IRWXUGO, 0, flags);
2657 if (!inode) 2657 if (!inode)
2658 goto close_file; 2658 goto put_dentry;
2659 2659
2660 d_instantiate(dentry, inode); 2660 d_instantiate(path.dentry, inode);
2661 inode->i_size = size; 2661 inode->i_size = size;
2662 inode->i_nlink = 0; /* It is unlinked */ 2662 inode->i_nlink = 0; /* It is unlinked */
2663 init_file(file, shm_mnt, dentry, FMODE_WRITE | FMODE_READ,
2664 &shmem_file_operations);
2665
2666#ifndef CONFIG_MMU 2663#ifndef CONFIG_MMU
2667 error = ramfs_nommu_expand_for_mapping(inode, size); 2664 error = ramfs_nommu_expand_for_mapping(inode, size);
2668 if (error) 2665 if (error)
2669 goto close_file; 2666 goto put_dentry;
2670#endif 2667#endif
2671 ima_counts_get(file); 2668
2669 error = -ENFILE;
2670 file = alloc_file(&path, FMODE_WRITE | FMODE_READ,
2671 &shmem_file_operations);
2672 if (!file)
2673 goto put_dentry;
2674
2672 return file; 2675 return file;
2673 2676
2674close_file:
2675 put_filp(file);
2676put_dentry: 2677put_dentry:
2677 dput(dentry); 2678 path_put(&path);
2678put_memory: 2679put_memory:
2679 shmem_unacct_size(flags, size); 2680 shmem_unacct_size(flags, size);
2680 return ERR_PTR(error); 2681 return ERR_PTR(error);
diff --git a/mm/shmem_acl.c b/mm/shmem_acl.c
deleted file mode 100644
index df2c87fdae5..00000000000
--- a/mm/shmem_acl.c
+++ /dev/null
@@ -1,171 +0,0 @@
1/*
2 * mm/shmem_acl.c
3 *
4 * (C) 2005 Andreas Gruenbacher <agruen@suse.de>
5 *
6 * This file is released under the GPL.
7 */
8
9#include <linux/fs.h>
10#include <linux/shmem_fs.h>
11#include <linux/xattr.h>
12#include <linux/generic_acl.h>
13
14/**
15 * shmem_get_acl - generic_acl_operations->getacl() operation
16 */
17static struct posix_acl *
18shmem_get_acl(struct inode *inode, int type)
19{
20 struct posix_acl *acl = NULL;
21
22 spin_lock(&inode->i_lock);
23 switch(type) {
24 case ACL_TYPE_ACCESS:
25 acl = posix_acl_dup(inode->i_acl);
26 break;
27
28 case ACL_TYPE_DEFAULT:
29 acl = posix_acl_dup(inode->i_default_acl);
30 break;
31 }
32 spin_unlock(&inode->i_lock);
33
34 return acl;
35}
36
37/**
38 * shmem_set_acl - generic_acl_operations->setacl() operation
39 */
40static void
41shmem_set_acl(struct inode *inode, int type, struct posix_acl *acl)
42{
43 struct posix_acl *free = NULL;
44
45 spin_lock(&inode->i_lock);
46 switch(type) {
47 case ACL_TYPE_ACCESS:
48 free = inode->i_acl;
49 inode->i_acl = posix_acl_dup(acl);
50 break;
51
52 case ACL_TYPE_DEFAULT:
53 free = inode->i_default_acl;
54 inode->i_default_acl = posix_acl_dup(acl);
55 break;
56 }
57 spin_unlock(&inode->i_lock);
58 posix_acl_release(free);
59}
60
61struct generic_acl_operations shmem_acl_ops = {
62 .getacl = shmem_get_acl,
63 .setacl = shmem_set_acl,
64};
65
66/**
67 * shmem_list_acl_access, shmem_get_acl_access, shmem_set_acl_access,
68 * shmem_xattr_acl_access_handler - plumbing code to implement the
69 * system.posix_acl_access xattr using the generic acl functions.
70 */
71
72static size_t
73shmem_list_acl_access(struct inode *inode, char *list, size_t list_size,
74 const char *name, size_t name_len)
75{
76 return generic_acl_list(inode, &shmem_acl_ops, ACL_TYPE_ACCESS,
77 list, list_size);
78}
79
80static int
81shmem_get_acl_access(struct inode *inode, const char *name, void *buffer,
82 size_t size)
83{
84 if (strcmp(name, "") != 0)
85 return -EINVAL;
86 return generic_acl_get(inode, &shmem_acl_ops, ACL_TYPE_ACCESS, buffer,
87 size);
88}
89
90static int
91shmem_set_acl_access(struct inode *inode, const char *name, const void *value,
92 size_t size, int flags)
93{
94 if (strcmp(name, "") != 0)
95 return -EINVAL;
96 return generic_acl_set(inode, &shmem_acl_ops, ACL_TYPE_ACCESS, value,
97 size);
98}
99
100struct xattr_handler shmem_xattr_acl_access_handler = {
101 .prefix = POSIX_ACL_XATTR_ACCESS,
102 .list = shmem_list_acl_access,
103 .get = shmem_get_acl_access,
104 .set = shmem_set_acl_access,
105};
106
107/**
108 * shmem_list_acl_default, shmem_get_acl_default, shmem_set_acl_default,
109 * shmem_xattr_acl_default_handler - plumbing code to implement the
110 * system.posix_acl_default xattr using the generic acl functions.
111 */
112
113static size_t
114shmem_list_acl_default(struct inode *inode, char *list, size_t list_size,
115 const char *name, size_t name_len)
116{
117 return generic_acl_list(inode, &shmem_acl_ops, ACL_TYPE_DEFAULT,
118 list, list_size);
119}
120
121static int
122shmem_get_acl_default(struct inode *inode, const char *name, void *buffer,
123 size_t size)
124{
125 if (strcmp(name, "") != 0)
126 return -EINVAL;
127 return generic_acl_get(inode, &shmem_acl_ops, ACL_TYPE_DEFAULT, buffer,
128 size);
129}
130
131static int
132shmem_set_acl_default(struct inode *inode, const char *name, const void *value,
133 size_t size, int flags)
134{
135 if (strcmp(name, "") != 0)
136 return -EINVAL;
137 return generic_acl_set(inode, &shmem_acl_ops, ACL_TYPE_DEFAULT, value,
138 size);
139}
140
141struct xattr_handler shmem_xattr_acl_default_handler = {
142 .prefix = POSIX_ACL_XATTR_DEFAULT,
143 .list = shmem_list_acl_default,
144 .get = shmem_get_acl_default,
145 .set = shmem_set_acl_default,
146};
147
148/**
149 * shmem_acl_init - Inizialize the acl(s) of a new inode
150 */
151int
152shmem_acl_init(struct inode *inode, struct inode *dir)
153{
154 return generic_acl_init(inode, dir, &shmem_acl_ops);
155}
156
157/**
158 * shmem_check_acl - check_acl() callback for generic_permission()
159 */
160int
161shmem_check_acl(struct inode *inode, int mask)
162{
163 struct posix_acl *acl = shmem_get_acl(inode, ACL_TYPE_ACCESS);
164
165 if (acl) {
166 int error = posix_acl_permission(inode, acl, mask);
167 posix_acl_release(acl);
168 return error;
169 }
170 return -EAGAIN;
171}
diff --git a/mm/truncate.c b/mm/truncate.c
index 2c147a7e5f2..342deee2268 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -272,6 +272,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
272 pagevec_release(&pvec); 272 pagevec_release(&pvec);
273 break; 273 break;
274 } 274 }
275 mem_cgroup_uncharge_start();
275 for (i = 0; i < pagevec_count(&pvec); i++) { 276 for (i = 0; i < pagevec_count(&pvec); i++) {
276 struct page *page = pvec.pages[i]; 277 struct page *page = pvec.pages[i];
277 278
@@ -286,6 +287,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
286 unlock_page(page); 287 unlock_page(page);
287 } 288 }
288 pagevec_release(&pvec); 289 pagevec_release(&pvec);
290 mem_cgroup_uncharge_end();
289 } 291 }
290} 292}
291EXPORT_SYMBOL(truncate_inode_pages_range); 293EXPORT_SYMBOL(truncate_inode_pages_range);
@@ -327,6 +329,7 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
327 pagevec_init(&pvec, 0); 329 pagevec_init(&pvec, 0);
328 while (next <= end && 330 while (next <= end &&
329 pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { 331 pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
332 mem_cgroup_uncharge_start();
330 for (i = 0; i < pagevec_count(&pvec); i++) { 333 for (i = 0; i < pagevec_count(&pvec); i++) {
331 struct page *page = pvec.pages[i]; 334 struct page *page = pvec.pages[i];
332 pgoff_t index; 335 pgoff_t index;
@@ -354,6 +357,7 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
354 break; 357 break;
355 } 358 }
356 pagevec_release(&pvec); 359 pagevec_release(&pvec);
360 mem_cgroup_uncharge_end();
357 cond_resched(); 361 cond_resched();
358 } 362 }
359 return ret; 363 return ret;
@@ -428,6 +432,7 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
428 while (next <= end && !wrapped && 432 while (next <= end && !wrapped &&
429 pagevec_lookup(&pvec, mapping, next, 433 pagevec_lookup(&pvec, mapping, next,
430 min(end - next, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) { 434 min(end - next, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
435 mem_cgroup_uncharge_start();
431 for (i = 0; i < pagevec_count(&pvec); i++) { 436 for (i = 0; i < pagevec_count(&pvec); i++) {
432 struct page *page = pvec.pages[i]; 437 struct page *page = pvec.pages[i];
433 pgoff_t page_index; 438 pgoff_t page_index;
@@ -477,6 +482,7 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
477 unlock_page(page); 482 unlock_page(page);
478 } 483 }
479 pagevec_release(&pvec); 484 pagevec_release(&pvec);
485 mem_cgroup_uncharge_end();
480 cond_resched(); 486 cond_resched();
481 } 487 }
482 return ret; 488 return ret;