1 files changed, 164 insertions, 24 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index f3c15bb07cce..2156ef775d04 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -57,6 +57,7 @@
 #include <linux/lockdep.h>
 #include <linux/file.h>
 #include <linux/tracehook.h>
+#include <linux/psi.h>
 #include <linux/seq_buf.h>
 #include "internal.h"
 #include <net/sock.h>
@@ -317,6 +318,7 @@ DEFINE_STATIC_KEY_FALSE(memcg_kmem_enabled_key);
 EXPORT_SYMBOL(memcg_kmem_enabled_key);
 struct workqueue_struct *memcg_kmem_cache_wq;
+#endif
 static int memcg_shrinker_map_size;
 static DEFINE_MUTEX(memcg_shrinker_map_mutex);
@@ -440,14 +442,6 @@ void memcg_set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id)
        }
 }
-#else /* CONFIG_MEMCG_KMEM */
-static int memcg_alloc_shrinker_maps(struct mem_cgroup *memcg)
-{
-        return 0;
-}
-static void memcg_free_shrinker_maps(struct mem_cgroup *memcg) { }
-#endif /* CONFIG_MEMCG_KMEM */
 /**
 * mem_cgroup_css_from_page - css of the memcg associated with a page
 * @page: page of interest
@@ -2270,21 +2264,22 @@ static void drain_all_stock(struct mem_cgroup *root_memcg)
        for_each_online_cpu(cpu) {
                struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu);
                struct mem_cgroup *memcg;
+                bool flush = false;
+                rcu_read_lock();
                memcg = stock->cached;
-                if (!memcg || !stock->nr_pages || !css_tryget(&memcg->css))
+                if (memcg && stock->nr_pages &&
-                        continue;
+                    mem_cgroup_is_descendant(memcg, root_memcg))
-                if (!mem_cgroup_is_descendant(memcg, root_memcg)) {
+                        flush = true;
-                        css_put(&memcg->css);
+                rcu_read_unlock();
-                        continue;
-                }
+                if (flush &&
-                if (!test_and_set_bit(FLUSHING_CACHED_CHARGE, &stock->flags)) {
+                    !test_and_set_bit(FLUSHING_CACHED_CHARGE, &stock->flags)) {
                        if (cpu == curcpu)
                                drain_local_stock(&stock->work);
                        else
                                schedule_work_on(cpu, &stock->work);
                }
-                css_put(&memcg->css);
        }
        put_cpu();
        mutex_unlock(&percpu_charge_mutex);
@@ -2359,11 +2354,67 @@ static void high_work_func(struct work_struct *work)
 }
 /*
+ * Clamp the maximum sleep time per allocation batch to 2 seconds. This is
+ * enough to still cause a significant slowdown in most cases, while still
+ * allowing diagnostics and tracing to proceed without becoming stuck.
+ */
+#define MEMCG_MAX_HIGH_DELAY_JIFFIES (2UL*HZ)
+/*
+ * When calculating the delay, we use these either side of the exponentiation to
+ * maintain precision and scale to a reasonable number of jiffies (see the table
+ * below.
+ *
+ * - MEMCG_DELAY_PRECISION_SHIFT: Extra precision bits while translating the
+ *   overage ratio to a delay.
+ * - MEMCG_DELAY_SCALING_SHIFT: The number of bits to scale down down the
+ *   proposed penalty in order to reduce to a reasonable number of jiffies, and
+ *   to produce a reasonable delay curve.
+ *
+ * MEMCG_DELAY_SCALING_SHIFT just happens to be a number that produces a
+ * reasonable delay curve compared to precision-adjusted overage, not
+ * penalising heavily at first, but still making sure that growth beyond the
+ * limit penalises misbehaviour cgroups by slowing them down exponentially. For
+ * example, with a high of 100 megabytes:
+ *
+ *  +-------+------------------------+
+ *  | usage | time to allocate in ms |
+ *  +-------+------------------------+
+ *  | 100M  |                      0 |
+ *  | 101M  |                      6 |
+ *  | 102M  |                     25 |
+ *  | 103M  |                     57 |
+ *  | 104M  |                    102 |
+ *  | 105M  |                    159 |
+ *  | 106M  |                    230 |
+ *  | 107M  |                    313 |
+ *  | 108M  |                    409 |
+ *  | 109M  |                    518 |
+ *  | 110M  |                    639 |
+ *  | 111M  |                    774 |
+ *  | 112M  |                    921 |
+ *  | 113M  |                   1081 |
+ *  | 114M  |                   1254 |
+ *  | 115M  |                   1439 |
+ *  | 116M  |                   1638 |
+ *  | 117M  |                   1849 |
+ *  | 118M  |                   2000 |
+ *  | 119M  |                   2000 |
+ *  | 120M  |                   2000 |
+ *  +-------+------------------------+
+ */
+ #define MEMCG_DELAY_PRECISION_SHIFT 20
+ #define MEMCG_DELAY_SCALING_SHIFT 14
+/*
 * Scheduled by try_charge() to be executed from the userland return path
 * and reclaims memory over the high limit.
 */
 void mem_cgroup_handle_over_high(void)
 {
+        unsigned long usage, high, clamped_high;
+        unsigned long pflags;
+        unsigned long penalty_jiffies, overage;
        unsigned int nr_pages = current->memcg_nr_pages_over_high;
        struct mem_cgroup *memcg;
@@ -2372,8 +2423,75 @@ void mem_cgroup_handle_over_high(void)
        memcg = get_mem_cgroup_from_mm(current->mm);
        reclaim_high(memcg, nr_pages, GFP_KERNEL);
-        css_put(&memcg->css);
        current->memcg_nr_pages_over_high = 0;
+        /*
+         * memory.high is breached and reclaim is unable to keep up. Throttle
+         * allocators proactively to slow down excessive growth.
+         *
+         * We use overage compared to memory.high to calculate the number of
+         * jiffies to sleep (penalty_jiffies). Ideally this value should be
+         * fairly lenient on small overages, and increasingly harsh when the
+         * memcg in question makes it clear that it has no intention of stopping
+         * its crazy behaviour, so we exponentially increase the delay based on
+         * overage amount.
+         */
+        usage = page_counter_read(&memcg->memory);
+        high = READ_ONCE(memcg->high);
+        if (usage <= high)
+                goto out;
+        /*
+         * Prevent division by 0 in overage calculation by acting as if it was a
+         * threshold of 1 page
+         */
+        clamped_high = max(high, 1UL);
+        overage = div_u64((u64)(usage - high) << MEMCG_DELAY_PRECISION_SHIFT,
+                          clamped_high);
+        penalty_jiffies = ((u64)overage * overage * HZ)
+                >> (MEMCG_DELAY_PRECISION_SHIFT + MEMCG_DELAY_SCALING_SHIFT);
+        /*
+         * Factor in the task's own contribution to the overage, such that four
+         * N-sized allocations are throttled approximately the same as one
+         * 4N-sized allocation.
+         *
+         * MEMCG_CHARGE_BATCH pages is nominal, so work out how much smaller or
+         * larger the current charge patch is than that.
+         */
+        penalty_jiffies = penalty_jiffies * nr_pages / MEMCG_CHARGE_BATCH;
+        /*
+         * Clamp the max delay per usermode return so as to still keep the
+         * application moving forwards and also permit diagnostics, albeit
+         * extremely slowly.
+         */
+        penalty_jiffies = min(penalty_jiffies, MEMCG_MAX_HIGH_DELAY_JIFFIES);
+        /*
+         * Don't sleep if the amount of jiffies this memcg owes us is so low
+         * that it's not even worth doing, in an attempt to be nice to those who
+         * go only a small amount over their memory.high value and maybe haven't
+         * been aggressively reclaimed enough yet.
+         */
+        if (penalty_jiffies <= HZ / 100)
+                goto out;
+        /*
+         * If we exit early, we're guaranteed to die (since
+         * schedule_timeout_killable sets TASK_KILLABLE). This means we don't
+         * need to account for any ill-begotten jiffies to pay them off later.
+         */
+        psi_memstall_enter(&pflags);
+        schedule_timeout_killable(penalty_jiffies);
+        psi_memstall_leave(&pflags);
+out:
+        css_put(&memcg->css);
 }
 static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
@@ -3512,6 +3630,9 @@ static ssize_t mem_cgroup_write(struct kernfs_open_file *of,
                        ret = mem_cgroup_resize_max(memcg, nr_pages, true);
                        break;
                case _KMEM:
+                        pr_warn_once("kmem.limit_in_bytes is deprecated and will be removed. "
+                                     "Please report your usecase to linux-mm@kvack.org if you "
+                                     "depend on this functionality.\n");
                        ret = memcg_update_kmem_max(memcg, nr_pages);
                        break;
                case _TCP:
@@ -4805,11 +4926,6 @@ static void mem_cgroup_id_put_many(struct mem_cgroup *memcg, unsigned int n)
        }
 }
-static inline void mem_cgroup_id_get(struct mem_cgroup *memcg)
-{
-        mem_cgroup_id_get_many(memcg, 1);
-}
 static inline void mem_cgroup_id_put(struct mem_cgroup *memcg)
 {
        mem_cgroup_id_put_many(memcg, 1);
@@ -4955,6 +5071,11 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
                memcg->cgwb_frn[i].done =
                        __WB_COMPLETION_INIT(&memcg_cgwb_frn_waitq);
 #endif
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+        spin_lock_init(&memcg->deferred_split_queue.split_queue_lock);
+        INIT_LIST_HEAD(&memcg->deferred_split_queue.split_queue);
+        memcg->deferred_split_queue.split_queue_len = 0;
+#endif
        idr_replace(&mem_cgroup_idr, memcg, memcg->id.id);
        return memcg;
 fail:
@@ -5333,6 +5454,14 @@ static int mem_cgroup_move_account(struct page *page,
                __mod_memcg_state(to, NR_WRITEBACK, nr_pages);
        }
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+        if (compound && !list_empty(page_deferred_list(page))) {
+                spin_lock(&from->deferred_split_queue.split_queue_lock);
+                list_del_init(page_deferred_list(page));
+                from->deferred_split_queue.split_queue_len--;
+                spin_unlock(&from->deferred_split_queue.split_queue_lock);
+        }
+#endif
        /*
         * It is safe to change page->mem_cgroup here because the page
         * is referenced, charged, and isolated - we can't race with
@@ -5341,6 +5470,17 @@ static int mem_cgroup_move_account(struct page *page,
        /* caller should have done css_get */
        page->mem_cgroup = to;
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+        if (compound && list_empty(page_deferred_list(page))) {
+                spin_lock(&to->deferred_split_queue.split_queue_lock);
+                list_add_tail(page_deferred_list(page),
+                              &to->deferred_split_queue.split_queue);
+                to->deferred_split_queue.split_queue_len++;
+                spin_unlock(&to->deferred_split_queue.split_queue_lock);
+        }
+#endif
        spin_unlock_irqrestore(&from->move_lock, flags);
        ret = 0;
@@ -6511,7 +6651,7 @@ static void uncharge_page(struct page *page, struct uncharge_gather *ug)
                unsigned int nr_pages = 1;
                if (PageTransHuge(page)) {
-                        nr_pages <<= compound_order(page);
+                        nr_pages = compound_nr(page);
                        ug->nr_huge += nr_pages;
                }
                if (PageAnon(page))
@@ -6523,7 +6663,7 @@ static void uncharge_page(struct page *page, struct uncharge_gather *ug)
                }
                ug->pgpgout++;
        } else {
-                ug->nr_kmem += 1 << compound_order(page);
+                ug->nr_kmem += compound_nr(page);
                __ClearPageKmemcg(page);
        }

diff --git a/mm/memcontrol.c b/mm/memcontrol.c index f3c15bb07cce..2156ef775d04 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c
@@ -57,6 +57,7 @@
57	#include <linux/lockdep.h>	57	#include <linux/lockdep.h>
58	#include <linux/file.h>	58	#include <linux/file.h>
59	#include <linux/tracehook.h>	59	#include <linux/tracehook.h>
		60	#include <linux/psi.h>
60	#include <linux/seq_buf.h>	61	#include <linux/seq_buf.h>
61	#include "internal.h"	62	#include "internal.h"
62	#include <net/sock.h>	63	#include <net/sock.h>
@@ -317,6 +318,7 @@ DEFINE_STATIC_KEY_FALSE(memcg_kmem_enabled_key);
317	EXPORT_SYMBOL(memcg_kmem_enabled_key);	318	EXPORT_SYMBOL(memcg_kmem_enabled_key);
318		319
319	struct workqueue_struct *memcg_kmem_cache_wq;	320	struct workqueue_struct *memcg_kmem_cache_wq;
		321	#endif
320		322
321	static int memcg_shrinker_map_size;	323	static int memcg_shrinker_map_size;
322	static DEFINE_MUTEX(memcg_shrinker_map_mutex);	324	static DEFINE_MUTEX(memcg_shrinker_map_mutex);
@@ -440,14 +442,6 @@ void memcg_set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id)
440	}	442	}
441	}	443	}
442		444
443	#else /* CONFIG_MEMCG_KMEM */
444	static int memcg_alloc_shrinker_maps(struct mem_cgroup *memcg)
445	{
446	return 0;
447	}
448	static void memcg_free_shrinker_maps(struct mem_cgroup *memcg) { }
449	#endif /* CONFIG_MEMCG_KMEM */
450
451	/**	445	/**
452	* mem_cgroup_css_from_page - css of the memcg associated with a page	446	* mem_cgroup_css_from_page - css of the memcg associated with a page
453	* @page: page of interest	447	* @page: page of interest
@@ -2270,21 +2264,22 @@ static void drain_all_stock(struct mem_cgroup *root_memcg)
2270	for_each_online_cpu(cpu) {	2264	for_each_online_cpu(cpu) {
2271	struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu);	2265	struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu);
2272	struct mem_cgroup *memcg;	2266	struct mem_cgroup *memcg;
		2267	bool flush = false;
2273		2268
		2269	rcu_read_lock();
2274	memcg = stock->cached;	2270	memcg = stock->cached;
2275	if (!memcg \|\| !stock->nr_pages \|\| !css_tryget(&memcg->css))	2271	if (memcg && stock->nr_pages &&
2276	continue;	2272	mem_cgroup_is_descendant(memcg, root_memcg))
2277	if (!mem_cgroup_is_descendant(memcg, root_memcg)) {	2273	flush = true;
2278	css_put(&memcg->css);	2274	rcu_read_unlock();
2279	continue;	2275
2280	}	2276	if (flush &&
2281	if (!test_and_set_bit(FLUSHING_CACHED_CHARGE, &stock->flags)) {	2277	!test_and_set_bit(FLUSHING_CACHED_CHARGE, &stock->flags)) {
2282	if (cpu == curcpu)	2278	if (cpu == curcpu)
2283	drain_local_stock(&stock->work);	2279	drain_local_stock(&stock->work);
2284	else	2280	else
2285	schedule_work_on(cpu, &stock->work);	2281	schedule_work_on(cpu, &stock->work);
2286	}	2282	}
2287	css_put(&memcg->css);
2288	}	2283	}
2289	put_cpu();	2284	put_cpu();
2290	mutex_unlock(&percpu_charge_mutex);	2285	mutex_unlock(&percpu_charge_mutex);
@@ -2359,11 +2354,67 @@ static void high_work_func(struct work_struct *work)
2359	}	2354	}
2360		2355
2361	/*	2356	/*
		2357	* Clamp the maximum sleep time per allocation batch to 2 seconds. This is
		2358	* enough to still cause a significant slowdown in most cases, while still
		2359	* allowing diagnostics and tracing to proceed without becoming stuck.
		2360	*/
		2361	#define MEMCG_MAX_HIGH_DELAY_JIFFIES (2UL*HZ)
		2362
		2363	/*
		2364	* When calculating the delay, we use these either side of the exponentiation to
		2365	* maintain precision and scale to a reasonable number of jiffies (see the table
		2366	* below.
		2367	*
		2368	* - MEMCG_DELAY_PRECISION_SHIFT: Extra precision bits while translating the
		2369	* overage ratio to a delay.
		2370	* - MEMCG_DELAY_SCALING_SHIFT: The number of bits to scale down down the
		2371	* proposed penalty in order to reduce to a reasonable number of jiffies, and
		2372	* to produce a reasonable delay curve.
		2373	*
		2374	* MEMCG_DELAY_SCALING_SHIFT just happens to be a number that produces a
		2375	* reasonable delay curve compared to precision-adjusted overage, not
		2376	* penalising heavily at first, but still making sure that growth beyond the
		2377	* limit penalises misbehaviour cgroups by slowing them down exponentially. For
		2378	* example, with a high of 100 megabytes:
		2379	*
		2380	* +-------+------------------------+
		2381	* \| usage \| time to allocate in ms \|
		2382	* +-------+------------------------+
		2383	* \| 100M \| 0 \|
		2384	* \| 101M \| 6 \|
		2385	* \| 102M \| 25 \|
		2386	* \| 103M \| 57 \|
		2387	* \| 104M \| 102 \|
		2388	* \| 105M \| 159 \|
		2389	* \| 106M \| 230 \|
		2390	* \| 107M \| 313 \|
		2391	* \| 108M \| 409 \|
		2392	* \| 109M \| 518 \|
		2393	* \| 110M \| 639 \|
		2394	* \| 111M \| 774 \|
		2395	* \| 112M \| 921 \|
		2396	* \| 113M \| 1081 \|
		2397	* \| 114M \| 1254 \|
		2398	* \| 115M \| 1439 \|
		2399	* \| 116M \| 1638 \|
		2400	* \| 117M \| 1849 \|
		2401	* \| 118M \| 2000 \|
		2402	* \| 119M \| 2000 \|
		2403	* \| 120M \| 2000 \|
		2404	* +-------+------------------------+
		2405	*/
		2406	#define MEMCG_DELAY_PRECISION_SHIFT 20
		2407	#define MEMCG_DELAY_SCALING_SHIFT 14
		2408
		2409	/*
2362	* Scheduled by try_charge() to be executed from the userland return path	2410	* Scheduled by try_charge() to be executed from the userland return path
2363	* and reclaims memory over the high limit.	2411	* and reclaims memory over the high limit.
2364	*/	2412	*/
2365	void mem_cgroup_handle_over_high(void)	2413	void mem_cgroup_handle_over_high(void)
2366	{	2414	{
		2415	unsigned long usage, high, clamped_high;
		2416	unsigned long pflags;
		2417	unsigned long penalty_jiffies, overage;
2367	unsigned int nr_pages = current->memcg_nr_pages_over_high;	2418	unsigned int nr_pages = current->memcg_nr_pages_over_high;
2368	struct mem_cgroup *memcg;	2419	struct mem_cgroup *memcg;
2369		2420
@@ -2372,8 +2423,75 @@ void mem_cgroup_handle_over_high(void)
2372		2423
2373	memcg = get_mem_cgroup_from_mm(current->mm);	2424	memcg = get_mem_cgroup_from_mm(current->mm);
2374	reclaim_high(memcg, nr_pages, GFP_KERNEL);	2425	reclaim_high(memcg, nr_pages, GFP_KERNEL);
2375	css_put(&memcg->css);
2376	current->memcg_nr_pages_over_high = 0;	2426	current->memcg_nr_pages_over_high = 0;
		2427
		2428	/*
		2429	* memory.high is breached and reclaim is unable to keep up. Throttle
		2430	* allocators proactively to slow down excessive growth.
		2431	*
		2432	* We use overage compared to memory.high to calculate the number of
		2433	* jiffies to sleep (penalty_jiffies). Ideally this value should be
		2434	* fairly lenient on small overages, and increasingly harsh when the
		2435	* memcg in question makes it clear that it has no intention of stopping
		2436	* its crazy behaviour, so we exponentially increase the delay based on
		2437	* overage amount.
		2438	*/
		2439
		2440	usage = page_counter_read(&memcg->memory);
		2441	high = READ_ONCE(memcg->high);
		2442
		2443	if (usage <= high)
		2444	goto out;
		2445
		2446	/*
		2447	* Prevent division by 0 in overage calculation by acting as if it was a
		2448	* threshold of 1 page
		2449	*/
		2450	clamped_high = max(high, 1UL);
		2451
		2452	overage = div_u64((u64)(usage - high) << MEMCG_DELAY_PRECISION_SHIFT,
		2453	clamped_high);
		2454
		2455	penalty_jiffies = ((u64)overage * overage * HZ)
		2456	>> (MEMCG_DELAY_PRECISION_SHIFT + MEMCG_DELAY_SCALING_SHIFT);
		2457
		2458	/*
		2459	* Factor in the task's own contribution to the overage, such that four
		2460	* N-sized allocations are throttled approximately the same as one
		2461	* 4N-sized allocation.
		2462	*
		2463	* MEMCG_CHARGE_BATCH pages is nominal, so work out how much smaller or
		2464	* larger the current charge patch is than that.
		2465	*/
		2466	penalty_jiffies = penalty_jiffies * nr_pages / MEMCG_CHARGE_BATCH;
		2467
		2468	/*
		2469	* Clamp the max delay per usermode return so as to still keep the
		2470	* application moving forwards and also permit diagnostics, albeit
		2471	* extremely slowly.
		2472	*/
		2473	penalty_jiffies = min(penalty_jiffies, MEMCG_MAX_HIGH_DELAY_JIFFIES);
		2474
		2475	/*
		2476	* Don't sleep if the amount of jiffies this memcg owes us is so low
		2477	* that it's not even worth doing, in an attempt to be nice to those who
		2478	* go only a small amount over their memory.high value and maybe haven't
		2479	* been aggressively reclaimed enough yet.
		2480	*/
		2481	if (penalty_jiffies <= HZ / 100)
		2482	goto out;
		2483
		2484	/*
		2485	* If we exit early, we're guaranteed to die (since
		2486	* schedule_timeout_killable sets TASK_KILLABLE). This means we don't
		2487	* need to account for any ill-begotten jiffies to pay them off later.
		2488	*/
		2489	psi_memstall_enter(&pflags);
		2490	schedule_timeout_killable(penalty_jiffies);
		2491	psi_memstall_leave(&pflags);
		2492
		2493	out:
		2494	css_put(&memcg->css);
2377	}	2495	}
2378		2496
2379	static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,	2497	static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
@@ -3512,6 +3630,9 @@ static ssize_t mem_cgroup_write(struct kernfs_open_file *of,
3512	ret = mem_cgroup_resize_max(memcg, nr_pages, true);	3630	ret = mem_cgroup_resize_max(memcg, nr_pages, true);
3513	break;	3631	break;
3514	case _KMEM:	3632	case _KMEM:
		3633	pr_warn_once("kmem.limit_in_bytes is deprecated and will be removed. "
		3634	"Please report your usecase to linux-mm@kvack.org if you "
		3635	"depend on this functionality.\n");
3515	ret = memcg_update_kmem_max(memcg, nr_pages);	3636	ret = memcg_update_kmem_max(memcg, nr_pages);
3516	break;	3637	break;
3517	case _TCP:	3638	case _TCP:
@@ -4805,11 +4926,6 @@ static void mem_cgroup_id_put_many(struct mem_cgroup *memcg, unsigned int n)
4805	}	4926	}
4806	}	4927	}
4807		4928
4808	static inline void mem_cgroup_id_get(struct mem_cgroup *memcg)
4809	{
4810	mem_cgroup_id_get_many(memcg, 1);
4811	}
4812
4813	static inline void mem_cgroup_id_put(struct mem_cgroup *memcg)	4929	static inline void mem_cgroup_id_put(struct mem_cgroup *memcg)
4814	{	4930	{
4815	mem_cgroup_id_put_many(memcg, 1);	4931	mem_cgroup_id_put_many(memcg, 1);
@@ -4955,6 +5071,11 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
4955	memcg->cgwb_frn[i].done =	5071	memcg->cgwb_frn[i].done =
4956	__WB_COMPLETION_INIT(&memcg_cgwb_frn_waitq);	5072	__WB_COMPLETION_INIT(&memcg_cgwb_frn_waitq);
4957	#endif	5073	#endif
		5074	#ifdef CONFIG_TRANSPARENT_HUGEPAGE
		5075	spin_lock_init(&memcg->deferred_split_queue.split_queue_lock);
		5076	INIT_LIST_HEAD(&memcg->deferred_split_queue.split_queue);
		5077	memcg->deferred_split_queue.split_queue_len = 0;
		5078	#endif
4958	idr_replace(&mem_cgroup_idr, memcg, memcg->id.id);	5079	idr_replace(&mem_cgroup_idr, memcg, memcg->id.id);
4959	return memcg;	5080	return memcg;
4960	fail:	5081	fail:
@@ -5333,6 +5454,14 @@ static int mem_cgroup_move_account(struct page *page,
5333	__mod_memcg_state(to, NR_WRITEBACK, nr_pages);	5454	__mod_memcg_state(to, NR_WRITEBACK, nr_pages);
5334	}	5455	}
5335		5456
		5457	#ifdef CONFIG_TRANSPARENT_HUGEPAGE
		5458	if (compound && !list_empty(page_deferred_list(page))) {
		5459	spin_lock(&from->deferred_split_queue.split_queue_lock);
		5460	list_del_init(page_deferred_list(page));
		5461	from->deferred_split_queue.split_queue_len--;
		5462	spin_unlock(&from->deferred_split_queue.split_queue_lock);
		5463	}
		5464	#endif
5336	/*	5465	/*
5337	* It is safe to change page->mem_cgroup here because the page	5466	* It is safe to change page->mem_cgroup here because the page
5338	* is referenced, charged, and isolated - we can't race with	5467	* is referenced, charged, and isolated - we can't race with
@@ -5341,6 +5470,17 @@ static int mem_cgroup_move_account(struct page *page,
5341		5470
5342	/* caller should have done css_get */	5471	/* caller should have done css_get */
5343	page->mem_cgroup = to;	5472	page->mem_cgroup = to;
		5473
		5474	#ifdef CONFIG_TRANSPARENT_HUGEPAGE
		5475	if (compound && list_empty(page_deferred_list(page))) {
		5476	spin_lock(&to->deferred_split_queue.split_queue_lock);
		5477	list_add_tail(page_deferred_list(page),
		5478	&to->deferred_split_queue.split_queue);
		5479	to->deferred_split_queue.split_queue_len++;
		5480	spin_unlock(&to->deferred_split_queue.split_queue_lock);
		5481	}
		5482	#endif
		5483
5344	spin_unlock_irqrestore(&from->move_lock, flags);	5484	spin_unlock_irqrestore(&from->move_lock, flags);
5345		5485
5346	ret = 0;	5486	ret = 0;
@@ -6511,7 +6651,7 @@ static void uncharge_page(struct page page, struct uncharge_gather ug)
6511	unsigned int nr_pages = 1;	6651	unsigned int nr_pages = 1;
6512		6652
6513	if (PageTransHuge(page)) {	6653	if (PageTransHuge(page)) {
6514	nr_pages <<= compound_order(page);	6654	nr_pages = compound_nr(page);
6515	ug->nr_huge += nr_pages;	6655	ug->nr_huge += nr_pages;
6516	}	6656	}
6517	if (PageAnon(page))	6657	if (PageAnon(page))
@@ -6523,7 +6663,7 @@ static void uncharge_page(struct page page, struct uncharge_gather ug)
6523	}	6663	}
6524	ug->pgpgout++;	6664	ug->pgpgout++;
6525	} else {	6665	} else {
6526	ug->nr_kmem += 1 << compound_order(page);	6666	ug->nr_kmem += compound_nr(page);
6527	__ClearPageKmemcg(page);	6667	__ClearPageKmemcg(page);
6528	}	6668	}
6529		6669