1 files changed, 35 insertions, 145 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 85df503ec023..ef91e856c7e4 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -296,7 +296,6 @@ struct mem_cgroup {
         * Should the accounting and control be hierarchical, per subtree?
         */
        bool use_hierarchy;
-        unsigned long kmem_account_flags; /* See KMEM_ACCOUNTED_*, below */
        bool            oom_lock;
        atomic_t        under_oom;
@@ -366,22 +365,11 @@ struct mem_cgroup {
        /* WARNING: nodeinfo must be the last member here */
 };
-/* internal only representation about the status of kmem accounting. */
-enum {
-        KMEM_ACCOUNTED_ACTIVE, /* accounted by this cgroup itself */
-};
 #ifdef CONFIG_MEMCG_KMEM
-static inline void memcg_kmem_set_active(struct mem_cgroup *memcg)
-{
-        set_bit(KMEM_ACCOUNTED_ACTIVE, &memcg->kmem_account_flags);
-}
 static bool memcg_kmem_is_active(struct mem_cgroup *memcg)
 {
-        return test_bit(KMEM_ACCOUNTED_ACTIVE, &memcg->kmem_account_flags);
+        return memcg->kmemcg_id >= 0;
 }
 #endif
 /* Stuffs for move charges at task migration. */
@@ -1571,7 +1559,7 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
         * select it.  The goal is to allow it to allocate so that it may
         * quickly exit and free its memory.
         */
-        if (fatal_signal_pending(current) || current->flags & PF_EXITING) {
+        if (fatal_signal_pending(current) || task_will_free_mem(current)) {
                set_thread_flag(TIF_MEMDIE);
                return;
        }
@@ -1628,6 +1616,8 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
                         NULL, "Memory cgroup out of memory");
 }
+#if MAX_NUMNODES > 1
 /**
 * test_mem_cgroup_node_reclaimable
 * @memcg: the target memcg
@@ -1650,7 +1640,6 @@ static bool test_mem_cgroup_node_reclaimable(struct mem_cgroup *memcg,
        return false;
 }
-#if MAX_NUMNODES > 1
 /*
 * Always updating the nodemask is not very good - even if we have an empty
@@ -2646,7 +2635,6 @@ static void memcg_register_cache(struct mem_cgroup *memcg,
        if (!cachep)
                return;
-        css_get(&memcg->css);
        list_add(&cachep->memcg_params->list, &memcg->memcg_slab_caches);
        /*
@@ -2680,40 +2668,6 @@ static void memcg_unregister_cache(struct kmem_cache *cachep)
        list_del(&cachep->memcg_params->list);
        kmem_cache_destroy(cachep);
-        /* drop the reference taken in memcg_register_cache */
-        css_put(&memcg->css);
-}
-/*
- * During the creation a new cache, we need to disable our accounting mechanism
- * altogether. This is true even if we are not creating, but rather just
- * enqueing new caches to be created.
- *
- * This is because that process will trigger allocations; some visible, like
- * explicit kmallocs to auxiliary data structures, name strings and internal
- * cache structures; some well concealed, like INIT_WORK() that can allocate
- * objects during debug.
- *
- * If any allocation happens during memcg_kmem_get_cache, we will recurse back
- * to it. This may not be a bounded recursion: since the first cache creation
- * failed to complete (waiting on the allocation), we'll just try to create the
- * cache again, failing at the same point.
- *
- * memcg_kmem_get_cache is prepared to abort after seeing a positive count of
- * memcg_kmem_skip_account. So we enclose anything that might allocate memory
- * inside the following two functions.
- */
-static inline void memcg_stop_kmem_account(void)
-{
-        VM_BUG_ON(!current->mm);
-        current->memcg_kmem_skip_account++;
-}
-static inline void memcg_resume_kmem_account(void)
-{
-        VM_BUG_ON(!current->mm);
-        current->memcg_kmem_skip_account--;
 }
 int __memcg_cleanup_cache_params(struct kmem_cache *s)
@@ -2747,9 +2701,7 @@ static void memcg_unregister_all_caches(struct mem_cgroup *memcg)
        mutex_lock(&memcg_slab_mutex);
        list_for_each_entry_safe(params, tmp, &memcg->memcg_slab_caches, list) {
                cachep = memcg_params_to_cache(params);
-                kmem_cache_shrink(cachep);
+                memcg_unregister_cache(cachep);
-                if (atomic_read(&cachep->memcg_params->nr_pages) == 0)
-                        memcg_unregister_cache(cachep);
        }
        mutex_unlock(&memcg_slab_mutex);
 }
@@ -2784,10 +2736,10 @@ static void __memcg_schedule_register_cache(struct mem_cgroup *memcg,
        struct memcg_register_cache_work *cw;
        cw = kmalloc(sizeof(*cw), GFP_NOWAIT);
-        if (cw == NULL) {
+        if (!cw)
-                css_put(&memcg->css);
                return;
-        }
+        css_get(&memcg->css);
        cw->memcg = memcg;
        cw->cachep = cachep;
@@ -2810,20 +2762,16 @@ static void memcg_schedule_register_cache(struct mem_cgroup *memcg,
         * this point we can't allow ourselves back into memcg_kmem_get_cache,
         * the safest choice is to do it like this, wrapping the whole function.
         */
-        memcg_stop_kmem_account();
+        current->memcg_kmem_skip_account = 1;
        __memcg_schedule_register_cache(memcg, cachep);
-        memcg_resume_kmem_account();
+        current->memcg_kmem_skip_account = 0;
 }
 int __memcg_charge_slab(struct kmem_cache *cachep, gfp_t gfp, int order)
 {
        unsigned int nr_pages = 1 << order;
-        int res;
-        res = memcg_charge_kmem(cachep->memcg_params->memcg, gfp, nr_pages);
+        return memcg_charge_kmem(cachep->memcg_params->memcg, gfp, nr_pages);
-        if (!res)
-                atomic_add(nr_pages, &cachep->memcg_params->nr_pages);
-        return res;
 }
 void __memcg_uncharge_slab(struct kmem_cache *cachep, int order)
@@ -2831,7 +2779,6 @@ void __memcg_uncharge_slab(struct kmem_cache *cachep, int order)
        unsigned int nr_pages = 1 << order;
        memcg_uncharge_kmem(cachep->memcg_params->memcg, nr_pages);
-        atomic_sub(nr_pages, &cachep->memcg_params->nr_pages);
 }
 /*
@@ -2847,8 +2794,7 @@ void __memcg_uncharge_slab(struct kmem_cache *cachep, int order)
 * Can't be called in interrupt context or from kernel threads.
 * This function needs to be called with rcu_read_lock() held.
 */
-struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep,
+struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep)
-                                          gfp_t gfp)
 {
        struct mem_cgroup *memcg;
        struct kmem_cache *memcg_cachep;
@@ -2856,25 +2802,16 @@ struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep,
        VM_BUG_ON(!cachep->memcg_params);
        VM_BUG_ON(!cachep->memcg_params->is_root_cache);
-        if (!current->mm || current->memcg_kmem_skip_account)
+        if (current->memcg_kmem_skip_account)
                return cachep;
-        rcu_read_lock();
+        memcg = get_mem_cgroup_from_mm(current->mm);
-        memcg = mem_cgroup_from_task(rcu_dereference(current->mm->owner));
        if (!memcg_kmem_is_active(memcg))
                goto out;
        memcg_cachep = cache_from_memcg_idx(cachep, memcg_cache_id(memcg));
-        if (likely(memcg_cachep)) {
+        if (likely(memcg_cachep))
-                cachep = memcg_cachep;
+                return memcg_cachep;
-                goto out;
-        }
-        /* The corresponding put will be done in the workqueue. */
-        if (!css_tryget_online(&memcg->css))
-                goto out;
-        rcu_read_unlock();
        /*
         * If we are in a safe context (can wait, and not in interrupt
@@ -2889,12 +2826,17 @@ struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep,
         * defer everything.
         */
        memcg_schedule_register_cache(memcg, cachep);
-        return cachep;
 out:
-        rcu_read_unlock();
+        css_put(&memcg->css);
        return cachep;
 }
+void __memcg_kmem_put_cache(struct kmem_cache *cachep)
+{
+        if (!is_root_cache(cachep))
+                css_put(&cachep->memcg_params->memcg->css);
+}
 /*
 * We need to verify if the allocation against current->mm->owner's memcg is
 * possible for the given order. But the page is not allocated yet, so we'll
@@ -2917,34 +2859,6 @@ __memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **_memcg, int order)
        *_memcg = NULL;
-        /*
-         * Disabling accounting is only relevant for some specific memcg
-         * internal allocations. Therefore we would initially not have such
-         * check here, since direct calls to the page allocator that are
-         * accounted to kmemcg (alloc_kmem_pages and friends) only happen
-         * outside memcg core. We are mostly concerned with cache allocations,
-         * and by having this test at memcg_kmem_get_cache, we are already able
-         * to relay the allocation to the root cache and bypass the memcg cache
-         * altogether.
-         *
-         * There is one exception, though: the SLUB allocator does not create
-         * large order caches, but rather service large kmallocs directly from
-         * the page allocator. Therefore, the following sequence when backed by
-         * the SLUB allocator:
-         *
-         *      memcg_stop_kmem_account();
-         *      kmalloc(<large_number>)
-         *      memcg_resume_kmem_account();
-         *
-         * would effectively ignore the fact that we should skip accounting,
-         * since it will drive us directly to this function without passing
-         * through the cache selector memcg_kmem_get_cache. Such large
-         * allocations are extremely rare but can happen, for instance, for the
-         * cache arrays. We bring this test here.
-         */
-        if (!current->mm || current->memcg_kmem_skip_account)
-                return true;
        memcg = get_mem_cgroup_from_mm(current->mm);
        if (!memcg_kmem_is_active(memcg)) {
@@ -2985,10 +2899,6 @@ void __memcg_kmem_uncharge_pages(struct page *page, int order)
        memcg_uncharge_kmem(memcg, 1 << order);
        page->mem_cgroup = NULL;
 }
-#else
-static inline void memcg_unregister_all_caches(struct mem_cgroup *memcg)
-{
-}
 #endif /* CONFIG_MEMCG_KMEM */
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -3539,12 +3449,6 @@ static int memcg_activate_kmem(struct mem_cgroup *memcg,
                return 0;
        /*
-         * We are going to allocate memory for data shared by all memory
-         * cgroups so let's stop accounting here.
-         */
-        memcg_stop_kmem_account();
-        /*
         * For simplicity, we won't allow this to be disabled.  It also can't
         * be changed if the cgroup has children already, or if tasks had
         * already joined.
@@ -3570,25 +3474,22 @@ static int memcg_activate_kmem(struct mem_cgroup *memcg,
                goto out;
        }
-        memcg->kmemcg_id = memcg_id;
-        INIT_LIST_HEAD(&memcg->memcg_slab_caches);
        /*
-         * We couldn't have accounted to this cgroup, because it hasn't got the
+         * We couldn't have accounted to this cgroup, because it hasn't got
-         * active bit set yet, so this should succeed.
+         * activated yet, so this should succeed.
         */
        err = page_counter_limit(&memcg->kmem, nr_pages);
        VM_BUG_ON(err);
        static_key_slow_inc(&memcg_kmem_enabled_key);
        /*
-         * Setting the active bit after enabling static branching will
+         * A memory cgroup is considered kmem-active as soon as it gets
+         * kmemcg_id. Setting the id after enabling static branching will
         * guarantee no one starts accounting before all call sites are
         * patched.
         */
-        memcg_kmem_set_active(memcg);
+        memcg->kmemcg_id = memcg_id;
 out:
-        memcg_resume_kmem_account();
        return err;
 }
@@ -3791,11 +3692,6 @@ static int memcg_numa_stat_show(struct seq_file *m, void *v)
 }
 #endif /* CONFIG_NUMA */
-static inline void mem_cgroup_lru_names_not_uptodate(void)
-{
-        BUILD_BUG_ON(ARRAY_SIZE(mem_cgroup_lru_names) != NR_LRU_LISTS);
-}
 static int memcg_stat_show(struct seq_file *m, void *v)
 {
        struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
@@ -3803,6 +3699,8 @@ static int memcg_stat_show(struct seq_file *m, void *v)
        struct mem_cgroup *mi;
        unsigned int i;
+        BUILD_BUG_ON(ARRAY_SIZE(mem_cgroup_lru_names) != NR_LRU_LISTS);
        for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) {
                if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account)
                        continue;
@@ -4259,7 +4157,6 @@ static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
 {
        int ret;
-        memcg->kmemcg_id = -1;
        ret = memcg_propagate_kmem(memcg);
        if (ret)
                return ret;
@@ -4269,6 +4166,7 @@ static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
 static void memcg_destroy_kmem(struct mem_cgroup *memcg)
 {
+        memcg_unregister_all_caches(memcg);
        mem_cgroup_sockets_destroy(memcg);
 }
 #else
@@ -4724,17 +4622,6 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
        free_percpu(memcg->stat);
-        /*
-         * We need to make sure that (at least for now), the jump label
-         * destruction code runs outside of the cgroup lock. This is because
-         * get_online_cpus(), which is called from the static_branch update,
-         * can't be called inside the cgroup_lock. cpusets are the ones
-         * enforcing this dependency, so if they ever change, we might as well.
-         *
-         * schedule_work() will guarantee this happens. Be careful if you need
-         * to move this code around, and make sure it is outside
-         * the cgroup_lock.
-         */
        disarm_static_keys(memcg);
        kfree(memcg);
 }
@@ -4804,6 +4691,10 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
        vmpressure_init(&memcg->vmpressure);
        INIT_LIST_HEAD(&memcg->event_list);
        spin_lock_init(&memcg->event_list_lock);
+#ifdef CONFIG_MEMCG_KMEM
+        memcg->kmemcg_id = -1;
+        INIT_LIST_HEAD(&memcg->memcg_slab_caches);
+#endif
        return &memcg->css;
@@ -4885,7 +4776,6 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
        }
        spin_unlock(&memcg->event_list_lock);
-        memcg_unregister_all_caches(memcg);
        vmpressure_cleanup(&memcg->vmpressure);
 }

diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 85df503ec023..ef91e856c7e4 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c
@@ -296,7 +296,6 @@ struct mem_cgroup {
296	* Should the accounting and control be hierarchical, per subtree?	296	* Should the accounting and control be hierarchical, per subtree?
297	*/	297	*/
298	bool use_hierarchy;	298	bool use_hierarchy;
299	unsigned long kmem_account_flags; /* See KMEM_ACCOUNTED_, below /
300		299
301	bool oom_lock;	300	bool oom_lock;
302	atomic_t under_oom;	301	atomic_t under_oom;
@@ -366,22 +365,11 @@ struct mem_cgroup {
366	/* WARNING: nodeinfo must be the last member here */	365	/* WARNING: nodeinfo must be the last member here */
367	};	366	};
368		367
369	/* internal only representation about the status of kmem accounting. */
370	enum {
371	KMEM_ACCOUNTED_ACTIVE, /* accounted by this cgroup itself */
372	};
373
374	#ifdef CONFIG_MEMCG_KMEM	368	#ifdef CONFIG_MEMCG_KMEM
375	static inline void memcg_kmem_set_active(struct mem_cgroup *memcg)
376	{
377	set_bit(KMEM_ACCOUNTED_ACTIVE, &memcg->kmem_account_flags);
378	}
379
380	static bool memcg_kmem_is_active(struct mem_cgroup *memcg)	369	static bool memcg_kmem_is_active(struct mem_cgroup *memcg)
381	{	370	{
382	return test_bit(KMEM_ACCOUNTED_ACTIVE, &memcg->kmem_account_flags);	371	return memcg->kmemcg_id >= 0;
383	}	372	}
384
385	#endif	373	#endif
386		374
387	/* Stuffs for move charges at task migration. */	375	/* Stuffs for move charges at task migration. */
@@ -1571,7 +1559,7 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
1571	* select it. The goal is to allow it to allocate so that it may	1559	* select it. The goal is to allow it to allocate so that it may
1572	* quickly exit and free its memory.	1560	* quickly exit and free its memory.
1573	*/	1561	*/
1574	if (fatal_signal_pending(current) \|\| current->flags & PF_EXITING) {	1562	if (fatal_signal_pending(current) \|\| task_will_free_mem(current)) {
1575	set_thread_flag(TIF_MEMDIE);	1563	set_thread_flag(TIF_MEMDIE);
1576	return;	1564	return;
1577	}	1565	}
@@ -1628,6 +1616,8 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
1628	NULL, "Memory cgroup out of memory");	1616	NULL, "Memory cgroup out of memory");
1629	}	1617	}
1630		1618
		1619	#if MAX_NUMNODES > 1
		1620
1631	/**	1621	/**
1632	* test_mem_cgroup_node_reclaimable	1622	* test_mem_cgroup_node_reclaimable
1633	* @memcg: the target memcg	1623	* @memcg: the target memcg
@@ -1650,7 +1640,6 @@ static bool test_mem_cgroup_node_reclaimable(struct mem_cgroup *memcg,
1650	return false;	1640	return false;
1651		1641
1652	}	1642	}
1653	#if MAX_NUMNODES > 1
1654		1643
1655	/*	1644	/*
1656	* Always updating the nodemask is not very good - even if we have an empty	1645	* Always updating the nodemask is not very good - even if we have an empty
@@ -2646,7 +2635,6 @@ static void memcg_register_cache(struct mem_cgroup *memcg,
2646	if (!cachep)	2635	if (!cachep)
2647	return;	2636	return;
2648		2637
2649	css_get(&memcg->css);
2650	list_add(&cachep->memcg_params->list, &memcg->memcg_slab_caches);	2638	list_add(&cachep->memcg_params->list, &memcg->memcg_slab_caches);
2651		2639
2652	/*	2640	/*
@@ -2680,40 +2668,6 @@ static void memcg_unregister_cache(struct kmem_cache *cachep)
2680	list_del(&cachep->memcg_params->list);	2668	list_del(&cachep->memcg_params->list);
2681		2669
2682	kmem_cache_destroy(cachep);	2670	kmem_cache_destroy(cachep);
2683
2684	/* drop the reference taken in memcg_register_cache */
2685	css_put(&memcg->css);
2686	}
2687
2688	/*
2689	* During the creation a new cache, we need to disable our accounting mechanism
2690	* altogether. This is true even if we are not creating, but rather just
2691	* enqueing new caches to be created.
2692	*
2693	* This is because that process will trigger allocations; some visible, like
2694	* explicit kmallocs to auxiliary data structures, name strings and internal
2695	* cache structures; some well concealed, like INIT_WORK() that can allocate
2696	* objects during debug.
2697	*
2698	* If any allocation happens during memcg_kmem_get_cache, we will recurse back
2699	* to it. This may not be a bounded recursion: since the first cache creation
2700	* failed to complete (waiting on the allocation), we'll just try to create the
2701	* cache again, failing at the same point.
2702	*
2703	* memcg_kmem_get_cache is prepared to abort after seeing a positive count of
2704	* memcg_kmem_skip_account. So we enclose anything that might allocate memory
2705	* inside the following two functions.
2706	*/
2707	static inline void memcg_stop_kmem_account(void)
2708	{
2709	VM_BUG_ON(!current->mm);
2710	current->memcg_kmem_skip_account++;
2711	}
2712
2713	static inline void memcg_resume_kmem_account(void)
2714	{
2715	VM_BUG_ON(!current->mm);
2716	current->memcg_kmem_skip_account--;
2717	}	2671	}
2718		2672
2719	int __memcg_cleanup_cache_params(struct kmem_cache *s)	2673	int __memcg_cleanup_cache_params(struct kmem_cache *s)
@@ -2747,9 +2701,7 @@ static void memcg_unregister_all_caches(struct mem_cgroup *memcg)
2747	mutex_lock(&memcg_slab_mutex);	2701	mutex_lock(&memcg_slab_mutex);
2748	list_for_each_entry_safe(params, tmp, &memcg->memcg_slab_caches, list) {	2702	list_for_each_entry_safe(params, tmp, &memcg->memcg_slab_caches, list) {
2749	cachep = memcg_params_to_cache(params);	2703	cachep = memcg_params_to_cache(params);
2750	kmem_cache_shrink(cachep);	2704	memcg_unregister_cache(cachep);
2751	if (atomic_read(&cachep->memcg_params->nr_pages) == 0)
2752	memcg_unregister_cache(cachep);
2753	}	2705	}
2754	mutex_unlock(&memcg_slab_mutex);	2706	mutex_unlock(&memcg_slab_mutex);
2755	}	2707	}
@@ -2784,10 +2736,10 @@ static void __memcg_schedule_register_cache(struct mem_cgroup *memcg,
2784	struct memcg_register_cache_work *cw;	2736	struct memcg_register_cache_work *cw;
2785		2737
2786	cw = kmalloc(sizeof(*cw), GFP_NOWAIT);	2738	cw = kmalloc(sizeof(*cw), GFP_NOWAIT);
2787	if (cw == NULL) {	2739	if (!cw)
2788	css_put(&memcg->css);
2789	return;	2740	return;
2790	}	2741
		2742	css_get(&memcg->css);
2791		2743
2792	cw->memcg = memcg;	2744	cw->memcg = memcg;
2793	cw->cachep = cachep;	2745	cw->cachep = cachep;
@@ -2810,20 +2762,16 @@ static void memcg_schedule_register_cache(struct mem_cgroup *memcg,
2810	* this point we can't allow ourselves back into memcg_kmem_get_cache,	2762	* this point we can't allow ourselves back into memcg_kmem_get_cache,
2811	* the safest choice is to do it like this, wrapping the whole function.	2763	* the safest choice is to do it like this, wrapping the whole function.
2812	*/	2764	*/
2813	memcg_stop_kmem_account();	2765	current->memcg_kmem_skip_account = 1;
2814	__memcg_schedule_register_cache(memcg, cachep);	2766	__memcg_schedule_register_cache(memcg, cachep);
2815	memcg_resume_kmem_account();	2767	current->memcg_kmem_skip_account = 0;
2816	}	2768	}
2817		2769
2818	int __memcg_charge_slab(struct kmem_cache *cachep, gfp_t gfp, int order)	2770	int __memcg_charge_slab(struct kmem_cache *cachep, gfp_t gfp, int order)
2819	{	2771	{
2820	unsigned int nr_pages = 1 << order;	2772	unsigned int nr_pages = 1 << order;
2821	int res;
2822		2773
2823	res = memcg_charge_kmem(cachep->memcg_params->memcg, gfp, nr_pages);	2774	return memcg_charge_kmem(cachep->memcg_params->memcg, gfp, nr_pages);
2824	if (!res)
2825	atomic_add(nr_pages, &cachep->memcg_params->nr_pages);
2826	return res;
2827	}	2775	}
2828		2776
2829	void __memcg_uncharge_slab(struct kmem_cache *cachep, int order)	2777	void __memcg_uncharge_slab(struct kmem_cache *cachep, int order)
@@ -2831,7 +2779,6 @@ void __memcg_uncharge_slab(struct kmem_cache *cachep, int order)
2831	unsigned int nr_pages = 1 << order;	2779	unsigned int nr_pages = 1 << order;
2832		2780
2833	memcg_uncharge_kmem(cachep->memcg_params->memcg, nr_pages);	2781	memcg_uncharge_kmem(cachep->memcg_params->memcg, nr_pages);
2834	atomic_sub(nr_pages, &cachep->memcg_params->nr_pages);
2835	}	2782	}
2836		2783
2837	/*	2784	/*
@@ -2847,8 +2794,7 @@ void __memcg_uncharge_slab(struct kmem_cache *cachep, int order)
2847	* Can't be called in interrupt context or from kernel threads.	2794	* Can't be called in interrupt context or from kernel threads.
2848	* This function needs to be called with rcu_read_lock() held.	2795	* This function needs to be called with rcu_read_lock() held.
2849	*/	2796	*/
2850	struct kmem_cache __memcg_kmem_get_cache(struct kmem_cache cachep,	2797	struct kmem_cache __memcg_kmem_get_cache(struct kmem_cache cachep)
2851	gfp_t gfp)
2852	{	2798	{
2853	struct mem_cgroup *memcg;	2799	struct mem_cgroup *memcg;
2854	struct kmem_cache *memcg_cachep;	2800	struct kmem_cache *memcg_cachep;
@@ -2856,25 +2802,16 @@ struct kmem_cache __memcg_kmem_get_cache(struct kmem_cache cachep,
2856	VM_BUG_ON(!cachep->memcg_params);	2802	VM_BUG_ON(!cachep->memcg_params);
2857	VM_BUG_ON(!cachep->memcg_params->is_root_cache);	2803	VM_BUG_ON(!cachep->memcg_params->is_root_cache);
2858		2804
2859	if (!current->mm \|\| current->memcg_kmem_skip_account)	2805	if (current->memcg_kmem_skip_account)
2860	return cachep;	2806	return cachep;
2861		2807
2862	rcu_read_lock();	2808	memcg = get_mem_cgroup_from_mm(current->mm);
2863	memcg = mem_cgroup_from_task(rcu_dereference(current->mm->owner));
2864
2865	if (!memcg_kmem_is_active(memcg))	2809	if (!memcg_kmem_is_active(memcg))
2866	goto out;	2810	goto out;
2867		2811
2868	memcg_cachep = cache_from_memcg_idx(cachep, memcg_cache_id(memcg));	2812	memcg_cachep = cache_from_memcg_idx(cachep, memcg_cache_id(memcg));
2869	if (likely(memcg_cachep)) {	2813	if (likely(memcg_cachep))
2870	cachep = memcg_cachep;	2814	return memcg_cachep;
2871	goto out;
2872	}
2873
2874	/* The corresponding put will be done in the workqueue. */
2875	if (!css_tryget_online(&memcg->css))
2876	goto out;
2877	rcu_read_unlock();
2878		2815
2879	/*	2816	/*
2880	* If we are in a safe context (can wait, and not in interrupt	2817	* If we are in a safe context (can wait, and not in interrupt
@@ -2889,12 +2826,17 @@ struct kmem_cache __memcg_kmem_get_cache(struct kmem_cache cachep,
2889	* defer everything.	2826	* defer everything.
2890	*/	2827	*/
2891	memcg_schedule_register_cache(memcg, cachep);	2828	memcg_schedule_register_cache(memcg, cachep);
2892	return cachep;
2893	out:	2829	out:
2894	rcu_read_unlock();	2830	css_put(&memcg->css);
2895	return cachep;	2831	return cachep;
2896	}	2832	}
2897		2833
		2834	void __memcg_kmem_put_cache(struct kmem_cache *cachep)
		2835	{
		2836	if (!is_root_cache(cachep))
		2837	css_put(&cachep->memcg_params->memcg->css);
		2838	}
		2839
2898	/*	2840	/*
2899	* We need to verify if the allocation against current->mm->owner's memcg is	2841	* We need to verify if the allocation against current->mm->owner's memcg is
2900	* possible for the given order. But the page is not allocated yet, so we'll	2842	* possible for the given order. But the page is not allocated yet, so we'll
@@ -2917,34 +2859,6 @@ __memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **_memcg, int order)
2917		2859
2918	*_memcg = NULL;	2860	*_memcg = NULL;
2919		2861
2920	/*
2921	* Disabling accounting is only relevant for some specific memcg
2922	* internal allocations. Therefore we would initially not have such
2923	* check here, since direct calls to the page allocator that are
2924	* accounted to kmemcg (alloc_kmem_pages and friends) only happen
2925	* outside memcg core. We are mostly concerned with cache allocations,
2926	* and by having this test at memcg_kmem_get_cache, we are already able
2927	* to relay the allocation to the root cache and bypass the memcg cache
2928	* altogether.
2929	*
2930	* There is one exception, though: the SLUB allocator does not create
2931	* large order caches, but rather service large kmallocs directly from
2932	* the page allocator. Therefore, the following sequence when backed by
2933	* the SLUB allocator:
2934	*
2935	* memcg_stop_kmem_account();
2936	* kmalloc(<large_number>)
2937	* memcg_resume_kmem_account();
2938	*
2939	* would effectively ignore the fact that we should skip accounting,
2940	* since it will drive us directly to this function without passing
2941	* through the cache selector memcg_kmem_get_cache. Such large
2942	* allocations are extremely rare but can happen, for instance, for the
2943	* cache arrays. We bring this test here.
2944	*/
2945	if (!current->mm \|\| current->memcg_kmem_skip_account)
2946	return true;
2947
2948	memcg = get_mem_cgroup_from_mm(current->mm);	2862	memcg = get_mem_cgroup_from_mm(current->mm);
2949		2863
2950	if (!memcg_kmem_is_active(memcg)) {	2864	if (!memcg_kmem_is_active(memcg)) {
@@ -2985,10 +2899,6 @@ void __memcg_kmem_uncharge_pages(struct page *page, int order)
2985	memcg_uncharge_kmem(memcg, 1 << order);	2899	memcg_uncharge_kmem(memcg, 1 << order);
2986	page->mem_cgroup = NULL;	2900	page->mem_cgroup = NULL;
2987	}	2901	}
2988	#else
2989	static inline void memcg_unregister_all_caches(struct mem_cgroup *memcg)
2990	{
2991	}
2992	#endif /* CONFIG_MEMCG_KMEM */	2902	#endif /* CONFIG_MEMCG_KMEM */
2993		2903
2994	#ifdef CONFIG_TRANSPARENT_HUGEPAGE	2904	#ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -3539,12 +3449,6 @@ static int memcg_activate_kmem(struct mem_cgroup *memcg,
3539	return 0;	3449	return 0;
3540		3450
3541	/*	3451	/*
3542	* We are going to allocate memory for data shared by all memory
3543	* cgroups so let's stop accounting here.
3544	*/
3545	memcg_stop_kmem_account();
3546
3547	/*
3548	* For simplicity, we won't allow this to be disabled. It also can't	3452	* For simplicity, we won't allow this to be disabled. It also can't
3549	* be changed if the cgroup has children already, or if tasks had	3453	* be changed if the cgroup has children already, or if tasks had
3550	* already joined.	3454	* already joined.
@@ -3570,25 +3474,22 @@ static int memcg_activate_kmem(struct mem_cgroup *memcg,
3570	goto out;	3474	goto out;
3571	}	3475	}
3572		3476
3573	memcg->kmemcg_id = memcg_id;
3574	INIT_LIST_HEAD(&memcg->memcg_slab_caches);
3575
3576	/*	3477	/*
3577	* We couldn't have accounted to this cgroup, because it hasn't got the	3478	* We couldn't have accounted to this cgroup, because it hasn't got
3578	* active bit set yet, so this should succeed.	3479	* activated yet, so this should succeed.
3579	*/	3480	*/
3580	err = page_counter_limit(&memcg->kmem, nr_pages);	3481	err = page_counter_limit(&memcg->kmem, nr_pages);
3581	VM_BUG_ON(err);	3482	VM_BUG_ON(err);
3582		3483
3583	static_key_slow_inc(&memcg_kmem_enabled_key);	3484	static_key_slow_inc(&memcg_kmem_enabled_key);
3584	/*	3485	/*
3585	* Setting the active bit after enabling static branching will	3486	* A memory cgroup is considered kmem-active as soon as it gets
		3487	* kmemcg_id. Setting the id after enabling static branching will
3586	* guarantee no one starts accounting before all call sites are	3488	* guarantee no one starts accounting before all call sites are
3587	* patched.	3489	* patched.
3588	*/	3490	*/
3589	memcg_kmem_set_active(memcg);	3491	memcg->kmemcg_id = memcg_id;
3590	out:	3492	out:
3591	memcg_resume_kmem_account();
3592	return err;	3493	return err;
3593	}	3494	}
3594		3495
@@ -3791,11 +3692,6 @@ static int memcg_numa_stat_show(struct seq_file m, void v)
3791	}	3692	}
3792	#endif /* CONFIG_NUMA */	3693	#endif /* CONFIG_NUMA */
3793		3694
3794	static inline void mem_cgroup_lru_names_not_uptodate(void)
3795	{
3796	BUILD_BUG_ON(ARRAY_SIZE(mem_cgroup_lru_names) != NR_LRU_LISTS);
3797	}
3798
3799	static int memcg_stat_show(struct seq_file m, void v)	3695	static int memcg_stat_show(struct seq_file m, void v)
3800	{	3696	{
3801	struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));	3697	struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
@@ -3803,6 +3699,8 @@ static int memcg_stat_show(struct seq_file m, void v)
3803	struct mem_cgroup *mi;	3699	struct mem_cgroup *mi;
3804	unsigned int i;	3700	unsigned int i;
3805		3701
		3702	BUILD_BUG_ON(ARRAY_SIZE(mem_cgroup_lru_names) != NR_LRU_LISTS);
		3703
3806	for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) {	3704	for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) {
3807	if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account)	3705	if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account)
3808	continue;	3706	continue;
@@ -4259,7 +4157,6 @@ static int memcg_init_kmem(struct mem_cgroup memcg, struct cgroup_subsys ss)
4259	{	4157	{
4260	int ret;	4158	int ret;
4261		4159
4262	memcg->kmemcg_id = -1;
4263	ret = memcg_propagate_kmem(memcg);	4160	ret = memcg_propagate_kmem(memcg);
4264	if (ret)	4161	if (ret)
4265	return ret;	4162	return ret;
@@ -4269,6 +4166,7 @@ static int memcg_init_kmem(struct mem_cgroup memcg, struct cgroup_subsys ss)
4269		4166
4270	static void memcg_destroy_kmem(struct mem_cgroup *memcg)	4167	static void memcg_destroy_kmem(struct mem_cgroup *memcg)
4271	{	4168	{
		4169	memcg_unregister_all_caches(memcg);
4272	mem_cgroup_sockets_destroy(memcg);	4170	mem_cgroup_sockets_destroy(memcg);
4273	}	4171	}
4274	#else	4172	#else
@@ -4724,17 +4622,6 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
4724		4622
4725	free_percpu(memcg->stat);	4623	free_percpu(memcg->stat);
4726		4624
4727	/*
4728	* We need to make sure that (at least for now), the jump label
4729	* destruction code runs outside of the cgroup lock. This is because
4730	* get_online_cpus(), which is called from the static_branch update,
4731	* can't be called inside the cgroup_lock. cpusets are the ones
4732	* enforcing this dependency, so if they ever change, we might as well.
4733	*
4734	* schedule_work() will guarantee this happens. Be careful if you need
4735	* to move this code around, and make sure it is outside
4736	* the cgroup_lock.
4737	*/
4738	disarm_static_keys(memcg);	4625	disarm_static_keys(memcg);
4739	kfree(memcg);	4626	kfree(memcg);
4740	}	4627	}
@@ -4804,6 +4691,10 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
4804	vmpressure_init(&memcg->vmpressure);	4691	vmpressure_init(&memcg->vmpressure);
4805	INIT_LIST_HEAD(&memcg->event_list);	4692	INIT_LIST_HEAD(&memcg->event_list);
4806	spin_lock_init(&memcg->event_list_lock);	4693	spin_lock_init(&memcg->event_list_lock);
		4694	#ifdef CONFIG_MEMCG_KMEM
		4695	memcg->kmemcg_id = -1;
		4696	INIT_LIST_HEAD(&memcg->memcg_slab_caches);
		4697	#endif
4807		4698
4808	return &memcg->css;	4699	return &memcg->css;
4809		4700
@@ -4885,7 +4776,6 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
4885	}	4776	}
4886	spin_unlock(&memcg->event_list_lock);	4777	spin_unlock(&memcg->event_list_lock);
4887		4778
4888	memcg_unregister_all_caches(memcg);
4889	vmpressure_cleanup(&memcg->vmpressure);	4779	vmpressure_cleanup(&memcg->vmpressure);
4890	}	4780	}
4891		4781