summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/compaction.h16
-rw-r--r--include/linux/mmzone.h6
-rw-r--r--include/linux/vm_event_item.h1
-rw-r--r--include/trace/events/compaction.h55
-rw-r--r--mm/compaction.c222
-rw-r--r--mm/memory_hotplug.c9
-rw-r--r--mm/page_alloc.c3
-rw-r--r--mm/vmstat.c1
8 files changed, 311 insertions, 2 deletions
diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index 4cd4ddf64cc7..d7c8de583a23 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -52,6 +52,10 @@ extern void compaction_defer_reset(struct zone *zone, int order,
52 bool alloc_success); 52 bool alloc_success);
53extern bool compaction_restarting(struct zone *zone, int order); 53extern bool compaction_restarting(struct zone *zone, int order);
54 54
55extern int kcompactd_run(int nid);
56extern void kcompactd_stop(int nid);
57extern void wakeup_kcompactd(pg_data_t *pgdat, int order, int classzone_idx);
58
55#else 59#else
56static inline unsigned long try_to_compact_pages(gfp_t gfp_mask, 60static inline unsigned long try_to_compact_pages(gfp_t gfp_mask,
57 unsigned int order, int alloc_flags, 61 unsigned int order, int alloc_flags,
@@ -84,6 +88,18 @@ static inline bool compaction_deferred(struct zone *zone, int order)
84 return true; 88 return true;
85} 89}
86 90
91static inline int kcompactd_run(int nid)
92{
93 return 0;
94}
95static inline void kcompactd_stop(int nid)
96{
97}
98
99static inline void wakeup_kcompactd(pg_data_t *pgdat, int order, int classzone_idx)
100{
101}
102
87#endif /* CONFIG_COMPACTION */ 103#endif /* CONFIG_COMPACTION */
88 104
89#if defined(CONFIG_COMPACTION) && defined(CONFIG_SYSFS) && defined(CONFIG_NUMA) 105#if defined(CONFIG_COMPACTION) && defined(CONFIG_SYSFS) && defined(CONFIG_NUMA)
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 6de02ac378a0..bdd9a270a813 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -668,6 +668,12 @@ typedef struct pglist_data {
668 mem_hotplug_begin/end() */ 668 mem_hotplug_begin/end() */
669 int kswapd_max_order; 669 int kswapd_max_order;
670 enum zone_type classzone_idx; 670 enum zone_type classzone_idx;
671#ifdef CONFIG_COMPACTION
672 int kcompactd_max_order;
673 enum zone_type kcompactd_classzone_idx;
674 wait_queue_head_t kcompactd_wait;
675 struct task_struct *kcompactd;
676#endif
671#ifdef CONFIG_NUMA_BALANCING 677#ifdef CONFIG_NUMA_BALANCING
672 /* Lock serializing the migrate rate limiting window */ 678 /* Lock serializing the migrate rate limiting window */
673 spinlock_t numabalancing_migrate_lock; 679 spinlock_t numabalancing_migrate_lock;
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index 67c1dbd19c6d..58ecc056ee45 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -53,6 +53,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
53 COMPACTMIGRATE_SCANNED, COMPACTFREE_SCANNED, 53 COMPACTMIGRATE_SCANNED, COMPACTFREE_SCANNED,
54 COMPACTISOLATED, 54 COMPACTISOLATED,
55 COMPACTSTALL, COMPACTFAIL, COMPACTSUCCESS, 55 COMPACTSTALL, COMPACTFAIL, COMPACTSUCCESS,
56 KCOMPACTD_WAKE,
56#endif 57#endif
57#ifdef CONFIG_HUGETLB_PAGE 58#ifdef CONFIG_HUGETLB_PAGE
58 HTLB_BUDDY_PGALLOC, HTLB_BUDDY_PGALLOC_FAIL, 59 HTLB_BUDDY_PGALLOC, HTLB_BUDDY_PGALLOC_FAIL,
diff --git a/include/trace/events/compaction.h b/include/trace/events/compaction.h
index 111e5666e5eb..e215bf68f521 100644
--- a/include/trace/events/compaction.h
+++ b/include/trace/events/compaction.h
@@ -350,6 +350,61 @@ DEFINE_EVENT(mm_compaction_defer_template, mm_compaction_defer_reset,
350); 350);
351#endif 351#endif
352 352
353TRACE_EVENT(mm_compaction_kcompactd_sleep,
354
355 TP_PROTO(int nid),
356
357 TP_ARGS(nid),
358
359 TP_STRUCT__entry(
360 __field(int, nid)
361 ),
362
363 TP_fast_assign(
364 __entry->nid = nid;
365 ),
366
367 TP_printk("nid=%d", __entry->nid)
368);
369
370DECLARE_EVENT_CLASS(kcompactd_wake_template,
371
372 TP_PROTO(int nid, int order, enum zone_type classzone_idx),
373
374 TP_ARGS(nid, order, classzone_idx),
375
376 TP_STRUCT__entry(
377 __field(int, nid)
378 __field(int, order)
379 __field(enum zone_type, classzone_idx)
380 ),
381
382 TP_fast_assign(
383 __entry->nid = nid;
384 __entry->order = order;
385 __entry->classzone_idx = classzone_idx;
386 ),
387
388 TP_printk("nid=%d order=%d classzone_idx=%-8s",
389 __entry->nid,
390 __entry->order,
391 __print_symbolic(__entry->classzone_idx, ZONE_TYPE))
392);
393
394DEFINE_EVENT(kcompactd_wake_template, mm_compaction_wakeup_kcompactd,
395
396 TP_PROTO(int nid, int order, enum zone_type classzone_idx),
397
398 TP_ARGS(nid, order, classzone_idx)
399);
400
401DEFINE_EVENT(kcompactd_wake_template, mm_compaction_kcompactd_wake,
402
403 TP_PROTO(int nid, int order, enum zone_type classzone_idx),
404
405 TP_ARGS(nid, order, classzone_idx)
406);
407
353#endif /* _TRACE_COMPACTION_H */ 408#endif /* _TRACE_COMPACTION_H */
354 409
355/* This part must be outside protection */ 410/* This part must be outside protection */
diff --git a/mm/compaction.c b/mm/compaction.c
index 93f71d968098..5b2bfbaa821a 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -7,6 +7,7 @@
7 * 7 *
8 * Copyright IBM Corp. 2007-2010 Mel Gorman <mel@csn.ul.ie> 8 * Copyright IBM Corp. 2007-2010 Mel Gorman <mel@csn.ul.ie>
9 */ 9 */
10#include <linux/cpu.h>
10#include <linux/swap.h> 11#include <linux/swap.h>
11#include <linux/migrate.h> 12#include <linux/migrate.h>
12#include <linux/compaction.h> 13#include <linux/compaction.h>
@@ -17,6 +18,8 @@
17#include <linux/balloon_compaction.h> 18#include <linux/balloon_compaction.h>
18#include <linux/page-isolation.h> 19#include <linux/page-isolation.h>
19#include <linux/kasan.h> 20#include <linux/kasan.h>
21#include <linux/kthread.h>
22#include <linux/freezer.h>
20#include "internal.h" 23#include "internal.h"
21 24
22#ifdef CONFIG_COMPACTION 25#ifdef CONFIG_COMPACTION
@@ -1736,4 +1739,223 @@ void compaction_unregister_node(struct node *node)
1736} 1739}
1737#endif /* CONFIG_SYSFS && CONFIG_NUMA */ 1740#endif /* CONFIG_SYSFS && CONFIG_NUMA */
1738 1741
1742static inline bool kcompactd_work_requested(pg_data_t *pgdat)
1743{
1744 return pgdat->kcompactd_max_order > 0;
1745}
1746
1747static bool kcompactd_node_suitable(pg_data_t *pgdat)
1748{
1749 int zoneid;
1750 struct zone *zone;
1751 enum zone_type classzone_idx = pgdat->kcompactd_classzone_idx;
1752
1753 for (zoneid = 0; zoneid < classzone_idx; zoneid++) {
1754 zone = &pgdat->node_zones[zoneid];
1755
1756 if (!populated_zone(zone))
1757 continue;
1758
1759 if (compaction_suitable(zone, pgdat->kcompactd_max_order, 0,
1760 classzone_idx) == COMPACT_CONTINUE)
1761 return true;
1762 }
1763
1764 return false;
1765}
1766
1767static void kcompactd_do_work(pg_data_t *pgdat)
1768{
1769 /*
1770 * With no special task, compact all zones so that a page of requested
1771 * order is allocatable.
1772 */
1773 int zoneid;
1774 struct zone *zone;
1775 struct compact_control cc = {
1776 .order = pgdat->kcompactd_max_order,
1777 .classzone_idx = pgdat->kcompactd_classzone_idx,
1778 .mode = MIGRATE_SYNC_LIGHT,
1779 .ignore_skip_hint = true,
1780
1781 };
1782 bool success = false;
1783
1784 trace_mm_compaction_kcompactd_wake(pgdat->node_id, cc.order,
1785 cc.classzone_idx);
1786 count_vm_event(KCOMPACTD_WAKE);
1787
1788 for (zoneid = 0; zoneid < cc.classzone_idx; zoneid++) {
1789 int status;
1790
1791 zone = &pgdat->node_zones[zoneid];
1792 if (!populated_zone(zone))
1793 continue;
1794
1795 if (compaction_deferred(zone, cc.order))
1796 continue;
1797
1798 if (compaction_suitable(zone, cc.order, 0, zoneid) !=
1799 COMPACT_CONTINUE)
1800 continue;
1801
1802 cc.nr_freepages = 0;
1803 cc.nr_migratepages = 0;
1804 cc.zone = zone;
1805 INIT_LIST_HEAD(&cc.freepages);
1806 INIT_LIST_HEAD(&cc.migratepages);
1807
1808 status = compact_zone(zone, &cc);
1809
1810 if (zone_watermark_ok(zone, cc.order, low_wmark_pages(zone),
1811 cc.classzone_idx, 0)) {
1812 success = true;
1813 compaction_defer_reset(zone, cc.order, false);
1814 } else if (status == COMPACT_COMPLETE) {
1815 /*
1816 * We use sync migration mode here, so we defer like
1817 * sync direct compaction does.
1818 */
1819 defer_compaction(zone, cc.order);
1820 }
1821
1822 VM_BUG_ON(!list_empty(&cc.freepages));
1823 VM_BUG_ON(!list_empty(&cc.migratepages));
1824 }
1825
1826 /*
1827 * Regardless of success, we are done until woken up next. But remember
1828 * the requested order/classzone_idx in case it was higher/tighter than
1829 * our current ones
1830 */
1831 if (pgdat->kcompactd_max_order <= cc.order)
1832 pgdat->kcompactd_max_order = 0;
1833 if (pgdat->kcompactd_classzone_idx >= cc.classzone_idx)
1834 pgdat->kcompactd_classzone_idx = pgdat->nr_zones - 1;
1835}
1836
1837void wakeup_kcompactd(pg_data_t *pgdat, int order, int classzone_idx)
1838{
1839 if (!order)
1840 return;
1841
1842 if (pgdat->kcompactd_max_order < order)
1843 pgdat->kcompactd_max_order = order;
1844
1845 if (pgdat->kcompactd_classzone_idx > classzone_idx)
1846 pgdat->kcompactd_classzone_idx = classzone_idx;
1847
1848 if (!waitqueue_active(&pgdat->kcompactd_wait))
1849 return;
1850
1851 if (!kcompactd_node_suitable(pgdat))
1852 return;
1853
1854 trace_mm_compaction_wakeup_kcompactd(pgdat->node_id, order,
1855 classzone_idx);
1856 wake_up_interruptible(&pgdat->kcompactd_wait);
1857}
1858
1859/*
1860 * The background compaction daemon, started as a kernel thread
1861 * from the init process.
1862 */
1863static int kcompactd(void *p)
1864{
1865 pg_data_t *pgdat = (pg_data_t*)p;
1866 struct task_struct *tsk = current;
1867
1868 const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);
1869
1870 if (!cpumask_empty(cpumask))
1871 set_cpus_allowed_ptr(tsk, cpumask);
1872
1873 set_freezable();
1874
1875 pgdat->kcompactd_max_order = 0;
1876 pgdat->kcompactd_classzone_idx = pgdat->nr_zones - 1;
1877
1878 while (!kthread_should_stop()) {
1879 trace_mm_compaction_kcompactd_sleep(pgdat->node_id);
1880 wait_event_freezable(pgdat->kcompactd_wait,
1881 kcompactd_work_requested(pgdat));
1882
1883 kcompactd_do_work(pgdat);
1884 }
1885
1886 return 0;
1887}
1888
1889/*
1890 * This kcompactd start function will be called by init and node-hot-add.
1891 * On node-hot-add, kcompactd will moved to proper cpus if cpus are hot-added.
1892 */
1893int kcompactd_run(int nid)
1894{
1895 pg_data_t *pgdat = NODE_DATA(nid);
1896 int ret = 0;
1897
1898 if (pgdat->kcompactd)
1899 return 0;
1900
1901 pgdat->kcompactd = kthread_run(kcompactd, pgdat, "kcompactd%d", nid);
1902 if (IS_ERR(pgdat->kcompactd)) {
1903 pr_err("Failed to start kcompactd on node %d\n", nid);
1904 ret = PTR_ERR(pgdat->kcompactd);
1905 pgdat->kcompactd = NULL;
1906 }
1907 return ret;
1908}
1909
1910/*
1911 * Called by memory hotplug when all memory in a node is offlined. Caller must
1912 * hold mem_hotplug_begin/end().
1913 */
1914void kcompactd_stop(int nid)
1915{
1916 struct task_struct *kcompactd = NODE_DATA(nid)->kcompactd;
1917
1918 if (kcompactd) {
1919 kthread_stop(kcompactd);
1920 NODE_DATA(nid)->kcompactd = NULL;
1921 }
1922}
1923
1924/*
1925 * It's optimal to keep kcompactd on the same CPUs as their memory, but
1926 * not required for correctness. So if the last cpu in a node goes
1927 * away, we get changed to run anywhere: as the first one comes back,
1928 * restore their cpu bindings.
1929 */
1930static int cpu_callback(struct notifier_block *nfb, unsigned long action,
1931 void *hcpu)
1932{
1933 int nid;
1934
1935 if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) {
1936 for_each_node_state(nid, N_MEMORY) {
1937 pg_data_t *pgdat = NODE_DATA(nid);
1938 const struct cpumask *mask;
1939
1940 mask = cpumask_of_node(pgdat->node_id);
1941
1942 if (cpumask_any_and(cpu_online_mask, mask) < nr_cpu_ids)
1943 /* One of our CPUs online: restore mask */
1944 set_cpus_allowed_ptr(pgdat->kcompactd, mask);
1945 }
1946 }
1947 return NOTIFY_OK;
1948}
1949
1950static int __init kcompactd_init(void)
1951{
1952 int nid;
1953
1954 for_each_node_state(nid, N_MEMORY)
1955 kcompactd_run(nid);
1956 hotcpu_notifier(cpu_callback, 0);
1957 return 0;
1958}
1959subsys_initcall(kcompactd_init)
1960
1739#endif /* CONFIG_COMPACTION */ 1961#endif /* CONFIG_COMPACTION */
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 24ea06393816..d9bcb26fc4df 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -33,6 +33,7 @@
33#include <linux/hugetlb.h> 33#include <linux/hugetlb.h>
34#include <linux/memblock.h> 34#include <linux/memblock.h>
35#include <linux/bootmem.h> 35#include <linux/bootmem.h>
36#include <linux/compaction.h>
36 37
37#include <asm/tlbflush.h> 38#include <asm/tlbflush.h>
38 39
@@ -1105,8 +1106,10 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ
1105 1106
1106 init_per_zone_wmark_min(); 1107 init_per_zone_wmark_min();
1107 1108
1108 if (onlined_pages) 1109 if (onlined_pages) {
1109 kswapd_run(zone_to_nid(zone)); 1110 kswapd_run(zone_to_nid(zone));
1111 kcompactd_run(nid);
1112 }
1110 1113
1111 vm_total_pages = nr_free_pagecache_pages(); 1114 vm_total_pages = nr_free_pagecache_pages();
1112 1115
@@ -1880,8 +1883,10 @@ repeat:
1880 zone_pcp_update(zone); 1883 zone_pcp_update(zone);
1881 1884
1882 node_states_clear_node(node, &arg); 1885 node_states_clear_node(node, &arg);
1883 if (arg.status_change_nid >= 0) 1886 if (arg.status_change_nid >= 0) {
1884 kswapd_stop(node); 1887 kswapd_stop(node);
1888 kcompactd_stop(node);
1889 }
1885 1890
1886 vm_total_pages = nr_free_pagecache_pages(); 1891 vm_total_pages = nr_free_pagecache_pages();
1887 writeback_set_ratelimit(); 1892 writeback_set_ratelimit();
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index b1fc19ebb8a2..25a75da53c27 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5405,6 +5405,9 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
5405#endif 5405#endif
5406 init_waitqueue_head(&pgdat->kswapd_wait); 5406 init_waitqueue_head(&pgdat->kswapd_wait);
5407 init_waitqueue_head(&pgdat->pfmemalloc_wait); 5407 init_waitqueue_head(&pgdat->pfmemalloc_wait);
5408#ifdef CONFIG_COMPACTION
5409 init_waitqueue_head(&pgdat->kcompactd_wait);
5410#endif
5408 pgdat_page_ext_init(pgdat); 5411 pgdat_page_ext_init(pgdat);
5409 5412
5410 for (j = 0; j < MAX_NR_ZONES; j++) { 5413 for (j = 0; j < MAX_NR_ZONES; j++) {
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 69ce64f7b8d7..f80066248c94 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -826,6 +826,7 @@ const char * const vmstat_text[] = {
826 "compact_stall", 826 "compact_stall",
827 "compact_fail", 827 "compact_fail",
828 "compact_success", 828 "compact_success",
829 "compact_daemon_wake",
829#endif 830#endif
830 831
831#ifdef CONFIG_HUGETLB_PAGE 832#ifdef CONFIG_HUGETLB_PAGE