6 files changed, 59 insertions, 26 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index b569b8be5c5a..96e5d2573eb0 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -506,6 +506,9 @@ void memcg_update_array_size(int num_groups);
 struct kmem_cache *
 __memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp);
+int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size);
+void memcg_uncharge_kmem(struct mem_cgroup *memcg, u64 size);
 void mem_cgroup_destroy_cache(struct kmem_cache *cachep);
 int __kmem_cache_destroy_memcg_children(struct kmem_cache *s);
@@ -583,17 +586,7 @@ memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg, int order)
 * @cachep: the original global kmem cache
 * @gfp: allocation flags.
 *
- * This function assumes that the task allocating, which determines the memcg
+ * All memory allocated from a per-memcg cache is charged to the owner memcg.
- * in the page allocator, belongs to the same cgroup throughout the whole
- * process.  Misacounting can happen if the task calls memcg_kmem_get_cache()
- * while belonging to a cgroup, and later on changes. This is considered
- * acceptable, and should only happen upon task migration.
- *
- * Before the cache is created by the memcg core, there is also a possible
- * imbalance: the task belongs to a memcg, but the cache being allocated from
- * is the global cache, since the child cache is not yet guaranteed to be
- * ready. This case is also fine, since in this case the GFP_KMEMCG will not be
- * passed and the page allocator will not attempt any cgroup accounting.
 */
 static __always_inline struct kmem_cache *
 memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 5177c6d4a2dd..56a768b3d5a8 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2953,7 +2953,7 @@ static int mem_cgroup_slabinfo_read(struct seq_file *m, void *v)
 }
 #endif
-static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size)
+int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size)
 {
        struct res_counter *fail_res;
        int ret = 0;
@@ -2991,7 +2991,7 @@ static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size)
        return ret;
 }
-static void memcg_uncharge_kmem(struct mem_cgroup *memcg, u64 size)
+void memcg_uncharge_kmem(struct mem_cgroup *memcg, u64 size)
 {
        res_counter_uncharge(&memcg->res, size);
        if (do_swap_account)
diff --git a/mm/slab.c b/mm/slab.c
index 5c846d25c17d..944ac58cfcf8 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1688,8 +1688,12 @@ static struct page *kmem_getpages(struct kmem_cache *cachep, gfp_t flags,
        if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
                flags |= __GFP_RECLAIMABLE;
+        if (memcg_charge_slab(cachep, flags, cachep->gfporder))
+                return NULL;
        page = alloc_pages_exact_node(nodeid, flags | __GFP_NOTRACK, cachep->gfporder);
        if (!page) {
+                memcg_uncharge_slab(cachep, cachep->gfporder);
                slab_out_of_memory(cachep, flags, nodeid);
                return NULL;
        }
@@ -1747,7 +1751,8 @@ static void kmem_freepages(struct kmem_cache *cachep, struct page *page)
        memcg_release_pages(cachep, cachep->gfporder);
        if (current->reclaim_state)
                current->reclaim_state->reclaimed_slab += nr_freed;
-        __free_memcg_kmem_pages(page, cachep->gfporder);
+        __free_pages(page, cachep->gfporder);
+        memcg_uncharge_slab(cachep, cachep->gfporder);
 }
 static void kmem_rcu_free(struct rcu_head *head)
diff --git a/mm/slab.h b/mm/slab.h
index 6bd4c353704f..863e67b8c8c9 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -192,6 +192,26 @@ static inline struct kmem_cache *memcg_root_cache(struct kmem_cache *s)
                return s;
        return s->memcg_params->root_cache;
 }
+static __always_inline int memcg_charge_slab(struct kmem_cache *s,
+                                             gfp_t gfp, int order)
+{
+        if (!memcg_kmem_enabled())
+                return 0;
+        if (is_root_cache(s))
+                return 0;
+        return memcg_charge_kmem(s->memcg_params->memcg, gfp,
+                                 PAGE_SIZE << order);
+}
+static __always_inline void memcg_uncharge_slab(struct kmem_cache *s, int order)
+{
+        if (!memcg_kmem_enabled())
+                return;
+        if (is_root_cache(s))
+                return;
+        memcg_uncharge_kmem(s->memcg_params->memcg, PAGE_SIZE << order);
+}
 #else
 static inline bool is_root_cache(struct kmem_cache *s)
 {
@@ -227,6 +247,15 @@ static inline struct kmem_cache *memcg_root_cache(struct kmem_cache *s)
 {
        return s;
 }
+static inline int memcg_charge_slab(struct kmem_cache *s, gfp_t gfp, int order)
+{
+        return 0;
+}
+static inline void memcg_uncharge_slab(struct kmem_cache *s, int order)
+{
+}
 #endif
 static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 102cc6fca3d3..06f0c6125632 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -290,12 +290,8 @@ void kmem_cache_create_memcg(struct mem_cgroup *memcg, struct kmem_cache *root_c
                                 root_cache->size, root_cache->align,
                                 root_cache->flags, root_cache->ctor,
                                 memcg, root_cache);
-        if (IS_ERR(s)) {
+        if (IS_ERR(s))
                kfree(cache_name);
-                goto out_unlock;
-        }
-        s->allocflags |= __GFP_KMEMCG;
 out_unlock:
        mutex_unlock(&slab_mutex);
diff --git a/mm/slub.c b/mm/slub.c
index d05a5483106d..fc9831851be6 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1312,17 +1312,26 @@ static inline void slab_free_hook(struct kmem_cache *s, void *x)
 /*
 * Slab allocation and freeing
 */
-static inline struct page *alloc_slab_page(gfp_t flags, int node,
+static inline struct page *alloc_slab_page(struct kmem_cache *s,
-                                        struct kmem_cache_order_objects oo)
+                gfp_t flags, int node, struct kmem_cache_order_objects oo)
 {
+        struct page *page;
        int order = oo_order(oo);
        flags |= __GFP_NOTRACK;
+        if (memcg_charge_slab(s, flags, order))
+                return NULL;
        if (node == NUMA_NO_NODE)
-                return alloc_pages(flags, order);
+                page = alloc_pages(flags, order);
        else
-                return alloc_pages_exact_node(node, flags, order);
+                page = alloc_pages_exact_node(node, flags, order);
+        if (!page)
+                memcg_uncharge_slab(s, order);
+        return page;
 }
 static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
@@ -1344,7 +1353,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
         */
        alloc_gfp = (flags | __GFP_NOWARN | __GFP_NORETRY) & ~__GFP_NOFAIL;
-        page = alloc_slab_page(alloc_gfp, node, oo);
+        page = alloc_slab_page(s, alloc_gfp, node, oo);
        if (unlikely(!page)) {
                oo = s->min;
                alloc_gfp = flags;
@@ -1352,7 +1361,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
                 * Allocation may have failed due to fragmentation.
                 * Try a lower order alloc if possible
                 */
-                page = alloc_slab_page(alloc_gfp, node, oo);
+                page = alloc_slab_page(s, alloc_gfp, node, oo);
                if (page)
                        stat(s, ORDER_FALLBACK);
@@ -1468,7 +1477,8 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
        page_mapcount_reset(page);
        if (current->reclaim_state)
                current->reclaim_state->reclaimed_slab += pages;
-        __free_memcg_kmem_pages(page, order);
+        __free_pages(page, order);
+        memcg_uncharge_slab(s, order);
 }
 #define need_reserve_slab_rcu                                           \