diff options
-rw-r--r-- | include/linux/compaction.h | 16 | ||||
-rw-r--r-- | include/linux/mmzone.h | 6 | ||||
-rw-r--r-- | include/linux/vm_event_item.h | 1 | ||||
-rw-r--r-- | include/trace/events/compaction.h | 55 | ||||
-rw-r--r-- | mm/compaction.c | 222 | ||||
-rw-r--r-- | mm/memory_hotplug.c | 9 | ||||
-rw-r--r-- | mm/page_alloc.c | 3 | ||||
-rw-r--r-- | mm/vmstat.c | 1 |
8 files changed, 311 insertions, 2 deletions
diff --git a/include/linux/compaction.h b/include/linux/compaction.h index 4cd4ddf64cc7..d7c8de583a23 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h | |||
@@ -52,6 +52,10 @@ extern void compaction_defer_reset(struct zone *zone, int order, | |||
52 | bool alloc_success); | 52 | bool alloc_success); |
53 | extern bool compaction_restarting(struct zone *zone, int order); | 53 | extern bool compaction_restarting(struct zone *zone, int order); |
54 | 54 | ||
55 | extern int kcompactd_run(int nid); | ||
56 | extern void kcompactd_stop(int nid); | ||
57 | extern void wakeup_kcompactd(pg_data_t *pgdat, int order, int classzone_idx); | ||
58 | |||
55 | #else | 59 | #else |
56 | static inline unsigned long try_to_compact_pages(gfp_t gfp_mask, | 60 | static inline unsigned long try_to_compact_pages(gfp_t gfp_mask, |
57 | unsigned int order, int alloc_flags, | 61 | unsigned int order, int alloc_flags, |
@@ -84,6 +88,18 @@ static inline bool compaction_deferred(struct zone *zone, int order) | |||
84 | return true; | 88 | return true; |
85 | } | 89 | } |
86 | 90 | ||
91 | static inline int kcompactd_run(int nid) | ||
92 | { | ||
93 | return 0; | ||
94 | } | ||
95 | static inline void kcompactd_stop(int nid) | ||
96 | { | ||
97 | } | ||
98 | |||
99 | static inline void wakeup_kcompactd(pg_data_t *pgdat, int order, int classzone_idx) | ||
100 | { | ||
101 | } | ||
102 | |||
87 | #endif /* CONFIG_COMPACTION */ | 103 | #endif /* CONFIG_COMPACTION */ |
88 | 104 | ||
89 | #if defined(CONFIG_COMPACTION) && defined(CONFIG_SYSFS) && defined(CONFIG_NUMA) | 105 | #if defined(CONFIG_COMPACTION) && defined(CONFIG_SYSFS) && defined(CONFIG_NUMA) |
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 6de02ac378a0..bdd9a270a813 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h | |||
@@ -668,6 +668,12 @@ typedef struct pglist_data { | |||
668 | mem_hotplug_begin/end() */ | 668 | mem_hotplug_begin/end() */ |
669 | int kswapd_max_order; | 669 | int kswapd_max_order; |
670 | enum zone_type classzone_idx; | 670 | enum zone_type classzone_idx; |
671 | #ifdef CONFIG_COMPACTION | ||
672 | int kcompactd_max_order; | ||
673 | enum zone_type kcompactd_classzone_idx; | ||
674 | wait_queue_head_t kcompactd_wait; | ||
675 | struct task_struct *kcompactd; | ||
676 | #endif | ||
671 | #ifdef CONFIG_NUMA_BALANCING | 677 | #ifdef CONFIG_NUMA_BALANCING |
672 | /* Lock serializing the migrate rate limiting window */ | 678 | /* Lock serializing the migrate rate limiting window */ |
673 | spinlock_t numabalancing_migrate_lock; | 679 | spinlock_t numabalancing_migrate_lock; |
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 67c1dbd19c6d..58ecc056ee45 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h | |||
@@ -53,6 +53,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, | |||
53 | COMPACTMIGRATE_SCANNED, COMPACTFREE_SCANNED, | 53 | COMPACTMIGRATE_SCANNED, COMPACTFREE_SCANNED, |
54 | COMPACTISOLATED, | 54 | COMPACTISOLATED, |
55 | COMPACTSTALL, COMPACTFAIL, COMPACTSUCCESS, | 55 | COMPACTSTALL, COMPACTFAIL, COMPACTSUCCESS, |
56 | KCOMPACTD_WAKE, | ||
56 | #endif | 57 | #endif |
57 | #ifdef CONFIG_HUGETLB_PAGE | 58 | #ifdef CONFIG_HUGETLB_PAGE |
58 | HTLB_BUDDY_PGALLOC, HTLB_BUDDY_PGALLOC_FAIL, | 59 | HTLB_BUDDY_PGALLOC, HTLB_BUDDY_PGALLOC_FAIL, |
diff --git a/include/trace/events/compaction.h b/include/trace/events/compaction.h index 111e5666e5eb..e215bf68f521 100644 --- a/include/trace/events/compaction.h +++ b/include/trace/events/compaction.h | |||
@@ -350,6 +350,61 @@ DEFINE_EVENT(mm_compaction_defer_template, mm_compaction_defer_reset, | |||
350 | ); | 350 | ); |
351 | #endif | 351 | #endif |
352 | 352 | ||
353 | TRACE_EVENT(mm_compaction_kcompactd_sleep, | ||
354 | |||
355 | TP_PROTO(int nid), | ||
356 | |||
357 | TP_ARGS(nid), | ||
358 | |||
359 | TP_STRUCT__entry( | ||
360 | __field(int, nid) | ||
361 | ), | ||
362 | |||
363 | TP_fast_assign( | ||
364 | __entry->nid = nid; | ||
365 | ), | ||
366 | |||
367 | TP_printk("nid=%d", __entry->nid) | ||
368 | ); | ||
369 | |||
370 | DECLARE_EVENT_CLASS(kcompactd_wake_template, | ||
371 | |||
372 | TP_PROTO(int nid, int order, enum zone_type classzone_idx), | ||
373 | |||
374 | TP_ARGS(nid, order, classzone_idx), | ||
375 | |||
376 | TP_STRUCT__entry( | ||
377 | __field(int, nid) | ||
378 | __field(int, order) | ||
379 | __field(enum zone_type, classzone_idx) | ||
380 | ), | ||
381 | |||
382 | TP_fast_assign( | ||
383 | __entry->nid = nid; | ||
384 | __entry->order = order; | ||
385 | __entry->classzone_idx = classzone_idx; | ||
386 | ), | ||
387 | |||
388 | TP_printk("nid=%d order=%d classzone_idx=%-8s", | ||
389 | __entry->nid, | ||
390 | __entry->order, | ||
391 | __print_symbolic(__entry->classzone_idx, ZONE_TYPE)) | ||
392 | ); | ||
393 | |||
394 | DEFINE_EVENT(kcompactd_wake_template, mm_compaction_wakeup_kcompactd, | ||
395 | |||
396 | TP_PROTO(int nid, int order, enum zone_type classzone_idx), | ||
397 | |||
398 | TP_ARGS(nid, order, classzone_idx) | ||
399 | ); | ||
400 | |||
401 | DEFINE_EVENT(kcompactd_wake_template, mm_compaction_kcompactd_wake, | ||
402 | |||
403 | TP_PROTO(int nid, int order, enum zone_type classzone_idx), | ||
404 | |||
405 | TP_ARGS(nid, order, classzone_idx) | ||
406 | ); | ||
407 | |||
353 | #endif /* _TRACE_COMPACTION_H */ | 408 | #endif /* _TRACE_COMPACTION_H */ |
354 | 409 | ||
355 | /* This part must be outside protection */ | 410 | /* This part must be outside protection */ |
diff --git a/mm/compaction.c b/mm/compaction.c index 93f71d968098..5b2bfbaa821a 100644 --- a/mm/compaction.c +++ b/mm/compaction.c | |||
@@ -7,6 +7,7 @@ | |||
7 | * | 7 | * |
8 | * Copyright IBM Corp. 2007-2010 Mel Gorman <mel@csn.ul.ie> | 8 | * Copyright IBM Corp. 2007-2010 Mel Gorman <mel@csn.ul.ie> |
9 | */ | 9 | */ |
10 | #include <linux/cpu.h> | ||
10 | #include <linux/swap.h> | 11 | #include <linux/swap.h> |
11 | #include <linux/migrate.h> | 12 | #include <linux/migrate.h> |
12 | #include <linux/compaction.h> | 13 | #include <linux/compaction.h> |
@@ -17,6 +18,8 @@ | |||
17 | #include <linux/balloon_compaction.h> | 18 | #include <linux/balloon_compaction.h> |
18 | #include <linux/page-isolation.h> | 19 | #include <linux/page-isolation.h> |
19 | #include <linux/kasan.h> | 20 | #include <linux/kasan.h> |
21 | #include <linux/kthread.h> | ||
22 | #include <linux/freezer.h> | ||
20 | #include "internal.h" | 23 | #include "internal.h" |
21 | 24 | ||
22 | #ifdef CONFIG_COMPACTION | 25 | #ifdef CONFIG_COMPACTION |
@@ -1736,4 +1739,223 @@ void compaction_unregister_node(struct node *node) | |||
1736 | } | 1739 | } |
1737 | #endif /* CONFIG_SYSFS && CONFIG_NUMA */ | 1740 | #endif /* CONFIG_SYSFS && CONFIG_NUMA */ |
1738 | 1741 | ||
1742 | static inline bool kcompactd_work_requested(pg_data_t *pgdat) | ||
1743 | { | ||
1744 | return pgdat->kcompactd_max_order > 0; | ||
1745 | } | ||
1746 | |||
1747 | static bool kcompactd_node_suitable(pg_data_t *pgdat) | ||
1748 | { | ||
1749 | int zoneid; | ||
1750 | struct zone *zone; | ||
1751 | enum zone_type classzone_idx = pgdat->kcompactd_classzone_idx; | ||
1752 | |||
1753 | for (zoneid = 0; zoneid < classzone_idx; zoneid++) { | ||
1754 | zone = &pgdat->node_zones[zoneid]; | ||
1755 | |||
1756 | if (!populated_zone(zone)) | ||
1757 | continue; | ||
1758 | |||
1759 | if (compaction_suitable(zone, pgdat->kcompactd_max_order, 0, | ||
1760 | classzone_idx) == COMPACT_CONTINUE) | ||
1761 | return true; | ||
1762 | } | ||
1763 | |||
1764 | return false; | ||
1765 | } | ||
1766 | |||
1767 | static void kcompactd_do_work(pg_data_t *pgdat) | ||
1768 | { | ||
1769 | /* | ||
1770 | * With no special task, compact all zones so that a page of requested | ||
1771 | * order is allocatable. | ||
1772 | */ | ||
1773 | int zoneid; | ||
1774 | struct zone *zone; | ||
1775 | struct compact_control cc = { | ||
1776 | .order = pgdat->kcompactd_max_order, | ||
1777 | .classzone_idx = pgdat->kcompactd_classzone_idx, | ||
1778 | .mode = MIGRATE_SYNC_LIGHT, | ||
1779 | .ignore_skip_hint = true, | ||
1780 | |||
1781 | }; | ||
1782 | bool success = false; | ||
1783 | |||
1784 | trace_mm_compaction_kcompactd_wake(pgdat->node_id, cc.order, | ||
1785 | cc.classzone_idx); | ||
1786 | count_vm_event(KCOMPACTD_WAKE); | ||
1787 | |||
1788 | for (zoneid = 0; zoneid < cc.classzone_idx; zoneid++) { | ||
1789 | int status; | ||
1790 | |||
1791 | zone = &pgdat->node_zones[zoneid]; | ||
1792 | if (!populated_zone(zone)) | ||
1793 | continue; | ||
1794 | |||
1795 | if (compaction_deferred(zone, cc.order)) | ||
1796 | continue; | ||
1797 | |||
1798 | if (compaction_suitable(zone, cc.order, 0, zoneid) != | ||
1799 | COMPACT_CONTINUE) | ||
1800 | continue; | ||
1801 | |||
1802 | cc.nr_freepages = 0; | ||
1803 | cc.nr_migratepages = 0; | ||
1804 | cc.zone = zone; | ||
1805 | INIT_LIST_HEAD(&cc.freepages); | ||
1806 | INIT_LIST_HEAD(&cc.migratepages); | ||
1807 | |||
1808 | status = compact_zone(zone, &cc); | ||
1809 | |||
1810 | if (zone_watermark_ok(zone, cc.order, low_wmark_pages(zone), | ||
1811 | cc.classzone_idx, 0)) { | ||
1812 | success = true; | ||
1813 | compaction_defer_reset(zone, cc.order, false); | ||
1814 | } else if (status == COMPACT_COMPLETE) { | ||
1815 | /* | ||
1816 | * We use sync migration mode here, so we defer like | ||
1817 | * sync direct compaction does. | ||
1818 | */ | ||
1819 | defer_compaction(zone, cc.order); | ||
1820 | } | ||
1821 | |||
1822 | VM_BUG_ON(!list_empty(&cc.freepages)); | ||
1823 | VM_BUG_ON(!list_empty(&cc.migratepages)); | ||
1824 | } | ||
1825 | |||
1826 | /* | ||
1827 | * Regardless of success, we are done until woken up next. But remember | ||
1828 | * the requested order/classzone_idx in case it was higher/tighter than | ||
1829 | * our current ones | ||
1830 | */ | ||
1831 | if (pgdat->kcompactd_max_order <= cc.order) | ||
1832 | pgdat->kcompactd_max_order = 0; | ||
1833 | if (pgdat->kcompactd_classzone_idx >= cc.classzone_idx) | ||
1834 | pgdat->kcompactd_classzone_idx = pgdat->nr_zones - 1; | ||
1835 | } | ||
1836 | |||
1837 | void wakeup_kcompactd(pg_data_t *pgdat, int order, int classzone_idx) | ||
1838 | { | ||
1839 | if (!order) | ||
1840 | return; | ||
1841 | |||
1842 | if (pgdat->kcompactd_max_order < order) | ||
1843 | pgdat->kcompactd_max_order = order; | ||
1844 | |||
1845 | if (pgdat->kcompactd_classzone_idx > classzone_idx) | ||
1846 | pgdat->kcompactd_classzone_idx = classzone_idx; | ||
1847 | |||
1848 | if (!waitqueue_active(&pgdat->kcompactd_wait)) | ||
1849 | return; | ||
1850 | |||
1851 | if (!kcompactd_node_suitable(pgdat)) | ||
1852 | return; | ||
1853 | |||
1854 | trace_mm_compaction_wakeup_kcompactd(pgdat->node_id, order, | ||
1855 | classzone_idx); | ||
1856 | wake_up_interruptible(&pgdat->kcompactd_wait); | ||
1857 | } | ||
1858 | |||
1859 | /* | ||
1860 | * The background compaction daemon, started as a kernel thread | ||
1861 | * from the init process. | ||
1862 | */ | ||
1863 | static int kcompactd(void *p) | ||
1864 | { | ||
1865 | pg_data_t *pgdat = (pg_data_t*)p; | ||
1866 | struct task_struct *tsk = current; | ||
1867 | |||
1868 | const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id); | ||
1869 | |||
1870 | if (!cpumask_empty(cpumask)) | ||
1871 | set_cpus_allowed_ptr(tsk, cpumask); | ||
1872 | |||
1873 | set_freezable(); | ||
1874 | |||
1875 | pgdat->kcompactd_max_order = 0; | ||
1876 | pgdat->kcompactd_classzone_idx = pgdat->nr_zones - 1; | ||
1877 | |||
1878 | while (!kthread_should_stop()) { | ||
1879 | trace_mm_compaction_kcompactd_sleep(pgdat->node_id); | ||
1880 | wait_event_freezable(pgdat->kcompactd_wait, | ||
1881 | kcompactd_work_requested(pgdat)); | ||
1882 | |||
1883 | kcompactd_do_work(pgdat); | ||
1884 | } | ||
1885 | |||
1886 | return 0; | ||
1887 | } | ||
1888 | |||
1889 | /* | ||
1890 | * This kcompactd start function will be called by init and node-hot-add. | ||
1891 | * On node-hot-add, kcompactd will moved to proper cpus if cpus are hot-added. | ||
1892 | */ | ||
1893 | int kcompactd_run(int nid) | ||
1894 | { | ||
1895 | pg_data_t *pgdat = NODE_DATA(nid); | ||
1896 | int ret = 0; | ||
1897 | |||
1898 | if (pgdat->kcompactd) | ||
1899 | return 0; | ||
1900 | |||
1901 | pgdat->kcompactd = kthread_run(kcompactd, pgdat, "kcompactd%d", nid); | ||
1902 | if (IS_ERR(pgdat->kcompactd)) { | ||
1903 | pr_err("Failed to start kcompactd on node %d\n", nid); | ||
1904 | ret = PTR_ERR(pgdat->kcompactd); | ||
1905 | pgdat->kcompactd = NULL; | ||
1906 | } | ||
1907 | return ret; | ||
1908 | } | ||
1909 | |||
1910 | /* | ||
1911 | * Called by memory hotplug when all memory in a node is offlined. Caller must | ||
1912 | * hold mem_hotplug_begin/end(). | ||
1913 | */ | ||
1914 | void kcompactd_stop(int nid) | ||
1915 | { | ||
1916 | struct task_struct *kcompactd = NODE_DATA(nid)->kcompactd; | ||
1917 | |||
1918 | if (kcompactd) { | ||
1919 | kthread_stop(kcompactd); | ||
1920 | NODE_DATA(nid)->kcompactd = NULL; | ||
1921 | } | ||
1922 | } | ||
1923 | |||
1924 | /* | ||
1925 | * It's optimal to keep kcompactd on the same CPUs as their memory, but | ||
1926 | * not required for correctness. So if the last cpu in a node goes | ||
1927 | * away, we get changed to run anywhere: as the first one comes back, | ||
1928 | * restore their cpu bindings. | ||
1929 | */ | ||
1930 | static int cpu_callback(struct notifier_block *nfb, unsigned long action, | ||
1931 | void *hcpu) | ||
1932 | { | ||
1933 | int nid; | ||
1934 | |||
1935 | if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) { | ||
1936 | for_each_node_state(nid, N_MEMORY) { | ||
1937 | pg_data_t *pgdat = NODE_DATA(nid); | ||
1938 | const struct cpumask *mask; | ||
1939 | |||
1940 | mask = cpumask_of_node(pgdat->node_id); | ||
1941 | |||
1942 | if (cpumask_any_and(cpu_online_mask, mask) < nr_cpu_ids) | ||
1943 | /* One of our CPUs online: restore mask */ | ||
1944 | set_cpus_allowed_ptr(pgdat->kcompactd, mask); | ||
1945 | } | ||
1946 | } | ||
1947 | return NOTIFY_OK; | ||
1948 | } | ||
1949 | |||
1950 | static int __init kcompactd_init(void) | ||
1951 | { | ||
1952 | int nid; | ||
1953 | |||
1954 | for_each_node_state(nid, N_MEMORY) | ||
1955 | kcompactd_run(nid); | ||
1956 | hotcpu_notifier(cpu_callback, 0); | ||
1957 | return 0; | ||
1958 | } | ||
1959 | subsys_initcall(kcompactd_init) | ||
1960 | |||
1739 | #endif /* CONFIG_COMPACTION */ | 1961 | #endif /* CONFIG_COMPACTION */ |
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 24ea06393816..d9bcb26fc4df 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c | |||
@@ -33,6 +33,7 @@ | |||
33 | #include <linux/hugetlb.h> | 33 | #include <linux/hugetlb.h> |
34 | #include <linux/memblock.h> | 34 | #include <linux/memblock.h> |
35 | #include <linux/bootmem.h> | 35 | #include <linux/bootmem.h> |
36 | #include <linux/compaction.h> | ||
36 | 37 | ||
37 | #include <asm/tlbflush.h> | 38 | #include <asm/tlbflush.h> |
38 | 39 | ||
@@ -1105,8 +1106,10 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ | |||
1105 | 1106 | ||
1106 | init_per_zone_wmark_min(); | 1107 | init_per_zone_wmark_min(); |
1107 | 1108 | ||
1108 | if (onlined_pages) | 1109 | if (onlined_pages) { |
1109 | kswapd_run(zone_to_nid(zone)); | 1110 | kswapd_run(zone_to_nid(zone)); |
1111 | kcompactd_run(nid); | ||
1112 | } | ||
1110 | 1113 | ||
1111 | vm_total_pages = nr_free_pagecache_pages(); | 1114 | vm_total_pages = nr_free_pagecache_pages(); |
1112 | 1115 | ||
@@ -1880,8 +1883,10 @@ repeat: | |||
1880 | zone_pcp_update(zone); | 1883 | zone_pcp_update(zone); |
1881 | 1884 | ||
1882 | node_states_clear_node(node, &arg); | 1885 | node_states_clear_node(node, &arg); |
1883 | if (arg.status_change_nid >= 0) | 1886 | if (arg.status_change_nid >= 0) { |
1884 | kswapd_stop(node); | 1887 | kswapd_stop(node); |
1888 | kcompactd_stop(node); | ||
1889 | } | ||
1885 | 1890 | ||
1886 | vm_total_pages = nr_free_pagecache_pages(); | 1891 | vm_total_pages = nr_free_pagecache_pages(); |
1887 | writeback_set_ratelimit(); | 1892 | writeback_set_ratelimit(); |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index b1fc19ebb8a2..25a75da53c27 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -5405,6 +5405,9 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat) | |||
5405 | #endif | 5405 | #endif |
5406 | init_waitqueue_head(&pgdat->kswapd_wait); | 5406 | init_waitqueue_head(&pgdat->kswapd_wait); |
5407 | init_waitqueue_head(&pgdat->pfmemalloc_wait); | 5407 | init_waitqueue_head(&pgdat->pfmemalloc_wait); |
5408 | #ifdef CONFIG_COMPACTION | ||
5409 | init_waitqueue_head(&pgdat->kcompactd_wait); | ||
5410 | #endif | ||
5408 | pgdat_page_ext_init(pgdat); | 5411 | pgdat_page_ext_init(pgdat); |
5409 | 5412 | ||
5410 | for (j = 0; j < MAX_NR_ZONES; j++) { | 5413 | for (j = 0; j < MAX_NR_ZONES; j++) { |
diff --git a/mm/vmstat.c b/mm/vmstat.c index 69ce64f7b8d7..f80066248c94 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c | |||
@@ -826,6 +826,7 @@ const char * const vmstat_text[] = { | |||
826 | "compact_stall", | 826 | "compact_stall", |
827 | "compact_fail", | 827 | "compact_fail", |
828 | "compact_success", | 828 | "compact_success", |
829 | "compact_daemon_wake", | ||
829 | #endif | 830 | #endif |
830 | 831 | ||
831 | #ifdef CONFIG_HUGETLB_PAGE | 832 | #ifdef CONFIG_HUGETLB_PAGE |