diff options
author | Vladimir Davydov <vdavydov@parallels.com> | 2015-02-12 17:58:47 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-02-12 21:54:08 -0500 |
commit | 503c358cf1925853195ee39ec437e51138bbb7df (patch) | |
tree | 14aebe291975ec4353f21068990ebfec503ed63f /fs/xfs | |
parent | 10c1045f28e86ac90589a188f0be2d7a4347efdf (diff) |
list_lru: introduce list_lru_shrink_{count,walk}
Kmem accounting of memcg is unusable now, because it lacks slab shrinker
support. That means when we hit the limit we will get ENOMEM w/o any
chance to recover. What we should do then is to call shrink_slab, which
would reclaim old inode/dentry caches from this cgroup. This is what
this patch set is intended to do.
Basically, it does two things. First, it introduces the notion of
per-memcg slab shrinker. A shrinker that wants to reclaim objects per
cgroup should mark itself as SHRINKER_MEMCG_AWARE. Then it will be
passed the memory cgroup to scan from in shrink_control->memcg. For
such shrinkers shrink_slab iterates over the whole cgroup subtree under
the target cgroup and calls the shrinker for each kmem-active memory
cgroup.
Secondly, this patch set makes the list_lru structure per-memcg. It's
done transparently to list_lru users - everything they have to do is to
tell list_lru_init that they want memcg-aware list_lru. Then the
list_lru will automatically distribute objects among per-memcg lists
basing on which cgroup the object is accounted to. This way to make FS
shrinkers (icache, dcache) memcg-aware we only need to make them use
memcg-aware list_lru, and this is what this patch set does.
As before, this patch set only enables per-memcg kmem reclaim when the
pressure goes from memory.limit, not from memory.kmem.limit. Handling
memory.kmem.limit is going to be tricky due to GFP_NOFS allocations, and
it is still unclear whether we will have this knob in the unified
hierarchy.
This patch (of 9):
NUMA aware slab shrinkers use the list_lru structure to distribute
objects coming from different NUMA nodes to different lists. Whenever
such a shrinker needs to count or scan objects from a particular node,
it issues commands like this:
count = list_lru_count_node(lru, sc->nid);
freed = list_lru_walk_node(lru, sc->nid, isolate_func,
isolate_arg, &sc->nr_to_scan);
where sc is an instance of the shrink_control structure passed to it
from vmscan.
To simplify this, let's add special list_lru functions to be used by
shrinkers, list_lru_shrink_count() and list_lru_shrink_walk(), which
consolidate the nid and nr_to_scan arguments in the shrink_control
structure.
This will also allow us to avoid patching shrinkers that use list_lru
when we make shrink_slab() per-memcg - all we will have to do is extend
the shrink_control structure to include the target memcg and make
list_lru_shrink_{count,walk} handle this appropriately.
Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
Suggested-by: Dave Chinner <david@fromorbit.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Greg Thelen <gthelen@google.com>
Cc: Glauber Costa <glommer@gmail.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs/xfs')
-rw-r--r-- | fs/xfs/xfs_buf.c | 7 | ||||
-rw-r--r-- | fs/xfs/xfs_qm.c | 7 |
2 files changed, 6 insertions, 8 deletions
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index bb502a391792..15c9d224c721 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c | |||
@@ -1583,10 +1583,9 @@ xfs_buftarg_shrink_scan( | |||
1583 | struct xfs_buftarg, bt_shrinker); | 1583 | struct xfs_buftarg, bt_shrinker); |
1584 | LIST_HEAD(dispose); | 1584 | LIST_HEAD(dispose); |
1585 | unsigned long freed; | 1585 | unsigned long freed; |
1586 | unsigned long nr_to_scan = sc->nr_to_scan; | ||
1587 | 1586 | ||
1588 | freed = list_lru_walk_node(&btp->bt_lru, sc->nid, xfs_buftarg_isolate, | 1587 | freed = list_lru_shrink_walk(&btp->bt_lru, sc, |
1589 | &dispose, &nr_to_scan); | 1588 | xfs_buftarg_isolate, &dispose); |
1590 | 1589 | ||
1591 | while (!list_empty(&dispose)) { | 1590 | while (!list_empty(&dispose)) { |
1592 | struct xfs_buf *bp; | 1591 | struct xfs_buf *bp; |
@@ -1605,7 +1604,7 @@ xfs_buftarg_shrink_count( | |||
1605 | { | 1604 | { |
1606 | struct xfs_buftarg *btp = container_of(shrink, | 1605 | struct xfs_buftarg *btp = container_of(shrink, |
1607 | struct xfs_buftarg, bt_shrinker); | 1606 | struct xfs_buftarg, bt_shrinker); |
1608 | return list_lru_count_node(&btp->bt_lru, sc->nid); | 1607 | return list_lru_shrink_count(&btp->bt_lru, sc); |
1609 | } | 1608 | } |
1610 | 1609 | ||
1611 | void | 1610 | void |
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 3e8186279541..4f4b1274e144 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c | |||
@@ -523,7 +523,6 @@ xfs_qm_shrink_scan( | |||
523 | struct xfs_qm_isolate isol; | 523 | struct xfs_qm_isolate isol; |
524 | unsigned long freed; | 524 | unsigned long freed; |
525 | int error; | 525 | int error; |
526 | unsigned long nr_to_scan = sc->nr_to_scan; | ||
527 | 526 | ||
528 | if ((sc->gfp_mask & (__GFP_FS|__GFP_WAIT)) != (__GFP_FS|__GFP_WAIT)) | 527 | if ((sc->gfp_mask & (__GFP_FS|__GFP_WAIT)) != (__GFP_FS|__GFP_WAIT)) |
529 | return 0; | 528 | return 0; |
@@ -531,8 +530,8 @@ xfs_qm_shrink_scan( | |||
531 | INIT_LIST_HEAD(&isol.buffers); | 530 | INIT_LIST_HEAD(&isol.buffers); |
532 | INIT_LIST_HEAD(&isol.dispose); | 531 | INIT_LIST_HEAD(&isol.dispose); |
533 | 532 | ||
534 | freed = list_lru_walk_node(&qi->qi_lru, sc->nid, xfs_qm_dquot_isolate, &isol, | 533 | freed = list_lru_shrink_walk(&qi->qi_lru, sc, |
535 | &nr_to_scan); | 534 | xfs_qm_dquot_isolate, &isol); |
536 | 535 | ||
537 | error = xfs_buf_delwri_submit(&isol.buffers); | 536 | error = xfs_buf_delwri_submit(&isol.buffers); |
538 | if (error) | 537 | if (error) |
@@ -557,7 +556,7 @@ xfs_qm_shrink_count( | |||
557 | struct xfs_quotainfo *qi = container_of(shrink, | 556 | struct xfs_quotainfo *qi = container_of(shrink, |
558 | struct xfs_quotainfo, qi_shrinker); | 557 | struct xfs_quotainfo, qi_shrinker); |
559 | 558 | ||
560 | return list_lru_count_node(&qi->qi_lru, sc->nid); | 559 | return list_lru_shrink_count(&qi->qi_lru, sc); |
561 | } | 560 | } |
562 | 561 | ||
563 | /* | 562 | /* |