diff options
-rw-r--r-- | include/linux/cpuset.h | 47 | ||||
-rw-r--r-- | include/linux/init_task.h | 8 | ||||
-rw-r--r-- | include/linux/sched.h | 2 | ||||
-rw-r--r-- | kernel/cpuset.c | 43 | ||||
-rw-r--r-- | kernel/fork.c | 1 | ||||
-rw-r--r-- | mm/filemap.c | 11 | ||||
-rw-r--r-- | mm/hugetlb.c | 15 | ||||
-rw-r--r-- | mm/mempolicy.c | 28 | ||||
-rw-r--r-- | mm/page_alloc.c | 33 | ||||
-rw-r--r-- | mm/slab.c | 13 | ||||
-rw-r--r-- | mm/slub.c | 40 | ||||
-rw-r--r-- | mm/vmscan.c | 2 |
12 files changed, 133 insertions, 110 deletions
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index e9eaec522655..7a7e5fd2a277 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h | |||
@@ -89,42 +89,33 @@ extern void rebuild_sched_domains(void); | |||
89 | extern void cpuset_print_task_mems_allowed(struct task_struct *p); | 89 | extern void cpuset_print_task_mems_allowed(struct task_struct *p); |
90 | 90 | ||
91 | /* | 91 | /* |
92 | * reading current mems_allowed and mempolicy in the fastpath must protected | 92 | * get_mems_allowed is required when making decisions involving mems_allowed |
93 | * by get_mems_allowed() | 93 | * such as during page allocation. mems_allowed can be updated in parallel |
94 | * and depending on the new value an operation can fail potentially causing | ||
95 | * process failure. A retry loop with get_mems_allowed and put_mems_allowed | ||
96 | * prevents these artificial failures. | ||
94 | */ | 97 | */ |
95 | static inline void get_mems_allowed(void) | 98 | static inline unsigned int get_mems_allowed(void) |
96 | { | 99 | { |
97 | current->mems_allowed_change_disable++; | 100 | return read_seqcount_begin(¤t->mems_allowed_seq); |
98 | |||
99 | /* | ||
100 | * ensure that reading mems_allowed and mempolicy happens after the | ||
101 | * update of ->mems_allowed_change_disable. | ||
102 | * | ||
103 | * the write-side task finds ->mems_allowed_change_disable is not 0, | ||
104 | * and knows the read-side task is reading mems_allowed or mempolicy, | ||
105 | * so it will clear old bits lazily. | ||
106 | */ | ||
107 | smp_mb(); | ||
108 | } | 101 | } |
109 | 102 | ||
110 | static inline void put_mems_allowed(void) | 103 | /* |
104 | * If this returns false, the operation that took place after get_mems_allowed | ||
105 | * may have failed. It is up to the caller to retry the operation if | ||
106 | * appropriate. | ||
107 | */ | ||
108 | static inline bool put_mems_allowed(unsigned int seq) | ||
111 | { | 109 | { |
112 | /* | 110 | return !read_seqcount_retry(¤t->mems_allowed_seq, seq); |
113 | * ensure that reading mems_allowed and mempolicy before reducing | ||
114 | * mems_allowed_change_disable. | ||
115 | * | ||
116 | * the write-side task will know that the read-side task is still | ||
117 | * reading mems_allowed or mempolicy, don't clears old bits in the | ||
118 | * nodemask. | ||
119 | */ | ||
120 | smp_mb(); | ||
121 | --ACCESS_ONCE(current->mems_allowed_change_disable); | ||
122 | } | 111 | } |
123 | 112 | ||
124 | static inline void set_mems_allowed(nodemask_t nodemask) | 113 | static inline void set_mems_allowed(nodemask_t nodemask) |
125 | { | 114 | { |
126 | task_lock(current); | 115 | task_lock(current); |
116 | write_seqcount_begin(¤t->mems_allowed_seq); | ||
127 | current->mems_allowed = nodemask; | 117 | current->mems_allowed = nodemask; |
118 | write_seqcount_end(¤t->mems_allowed_seq); | ||
128 | task_unlock(current); | 119 | task_unlock(current); |
129 | } | 120 | } |
130 | 121 | ||
@@ -234,12 +225,14 @@ static inline void set_mems_allowed(nodemask_t nodemask) | |||
234 | { | 225 | { |
235 | } | 226 | } |
236 | 227 | ||
237 | static inline void get_mems_allowed(void) | 228 | static inline unsigned int get_mems_allowed(void) |
238 | { | 229 | { |
230 | return 0; | ||
239 | } | 231 | } |
240 | 232 | ||
241 | static inline void put_mems_allowed(void) | 233 | static inline bool put_mems_allowed(unsigned int seq) |
242 | { | 234 | { |
235 | return true; | ||
243 | } | 236 | } |
244 | 237 | ||
245 | #endif /* !CONFIG_CPUSETS */ | 238 | #endif /* !CONFIG_CPUSETS */ |
diff --git a/include/linux/init_task.h b/include/linux/init_task.h index f994d51f70f2..e4baff5f7ff4 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h | |||
@@ -29,6 +29,13 @@ extern struct fs_struct init_fs; | |||
29 | #define INIT_GROUP_RWSEM(sig) | 29 | #define INIT_GROUP_RWSEM(sig) |
30 | #endif | 30 | #endif |
31 | 31 | ||
32 | #ifdef CONFIG_CPUSETS | ||
33 | #define INIT_CPUSET_SEQ \ | ||
34 | .mems_allowed_seq = SEQCNT_ZERO, | ||
35 | #else | ||
36 | #define INIT_CPUSET_SEQ | ||
37 | #endif | ||
38 | |||
32 | #define INIT_SIGNALS(sig) { \ | 39 | #define INIT_SIGNALS(sig) { \ |
33 | .nr_threads = 1, \ | 40 | .nr_threads = 1, \ |
34 | .wait_chldexit = __WAIT_QUEUE_HEAD_INITIALIZER(sig.wait_chldexit),\ | 41 | .wait_chldexit = __WAIT_QUEUE_HEAD_INITIALIZER(sig.wait_chldexit),\ |
@@ -192,6 +199,7 @@ extern struct cred init_cred; | |||
192 | INIT_FTRACE_GRAPH \ | 199 | INIT_FTRACE_GRAPH \ |
193 | INIT_TRACE_RECURSION \ | 200 | INIT_TRACE_RECURSION \ |
194 | INIT_TASK_RCU_PREEMPT(tsk) \ | 201 | INIT_TASK_RCU_PREEMPT(tsk) \ |
202 | INIT_CPUSET_SEQ \ | ||
195 | } | 203 | } |
196 | 204 | ||
197 | 205 | ||
diff --git a/include/linux/sched.h b/include/linux/sched.h index e074e1e54f85..0c147a4260a5 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -1514,7 +1514,7 @@ struct task_struct { | |||
1514 | #endif | 1514 | #endif |
1515 | #ifdef CONFIG_CPUSETS | 1515 | #ifdef CONFIG_CPUSETS |
1516 | nodemask_t mems_allowed; /* Protected by alloc_lock */ | 1516 | nodemask_t mems_allowed; /* Protected by alloc_lock */ |
1517 | int mems_allowed_change_disable; | 1517 | seqcount_t mems_allowed_seq; /* Seqence no to catch updates */ |
1518 | int cpuset_mem_spread_rotor; | 1518 | int cpuset_mem_spread_rotor; |
1519 | int cpuset_slab_spread_rotor; | 1519 | int cpuset_slab_spread_rotor; |
1520 | #endif | 1520 | #endif |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 5d575836dba6..1010cc61931f 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -964,7 +964,6 @@ static void cpuset_change_task_nodemask(struct task_struct *tsk, | |||
964 | { | 964 | { |
965 | bool need_loop; | 965 | bool need_loop; |
966 | 966 | ||
967 | repeat: | ||
968 | /* | 967 | /* |
969 | * Allow tasks that have access to memory reserves because they have | 968 | * Allow tasks that have access to memory reserves because they have |
970 | * been OOM killed to get memory anywhere. | 969 | * been OOM killed to get memory anywhere. |
@@ -983,45 +982,19 @@ repeat: | |||
983 | */ | 982 | */ |
984 | need_loop = task_has_mempolicy(tsk) || | 983 | need_loop = task_has_mempolicy(tsk) || |
985 | !nodes_intersects(*newmems, tsk->mems_allowed); | 984 | !nodes_intersects(*newmems, tsk->mems_allowed); |
986 | nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems); | ||
987 | mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP1); | ||
988 | 985 | ||
989 | /* | 986 | if (need_loop) |
990 | * ensure checking ->mems_allowed_change_disable after setting all new | 987 | write_seqcount_begin(&tsk->mems_allowed_seq); |
991 | * allowed nodes. | ||
992 | * | ||
993 | * the read-side task can see an nodemask with new allowed nodes and | ||
994 | * old allowed nodes. and if it allocates page when cpuset clears newly | ||
995 | * disallowed ones continuous, it can see the new allowed bits. | ||
996 | * | ||
997 | * And if setting all new allowed nodes is after the checking, setting | ||
998 | * all new allowed nodes and clearing newly disallowed ones will be done | ||
999 | * continuous, and the read-side task may find no node to alloc page. | ||
1000 | */ | ||
1001 | smp_mb(); | ||
1002 | 988 | ||
1003 | /* | 989 | nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems); |
1004 | * Allocation of memory is very fast, we needn't sleep when waiting | 990 | mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP1); |
1005 | * for the read-side. | ||
1006 | */ | ||
1007 | while (need_loop && ACCESS_ONCE(tsk->mems_allowed_change_disable)) { | ||
1008 | task_unlock(tsk); | ||
1009 | if (!task_curr(tsk)) | ||
1010 | yield(); | ||
1011 | goto repeat; | ||
1012 | } | ||
1013 | |||
1014 | /* | ||
1015 | * ensure checking ->mems_allowed_change_disable before clearing all new | ||
1016 | * disallowed nodes. | ||
1017 | * | ||
1018 | * if clearing newly disallowed bits before the checking, the read-side | ||
1019 | * task may find no node to alloc page. | ||
1020 | */ | ||
1021 | smp_mb(); | ||
1022 | 991 | ||
1023 | mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP2); | 992 | mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP2); |
1024 | tsk->mems_allowed = *newmems; | 993 | tsk->mems_allowed = *newmems; |
994 | |||
995 | if (need_loop) | ||
996 | write_seqcount_end(&tsk->mems_allowed_seq); | ||
997 | |||
1025 | task_unlock(tsk); | 998 | task_unlock(tsk); |
1026 | } | 999 | } |
1027 | 1000 | ||
diff --git a/kernel/fork.c b/kernel/fork.c index a9e99f3c18e0..9cc227d54102 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -1237,6 +1237,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1237 | #ifdef CONFIG_CPUSETS | 1237 | #ifdef CONFIG_CPUSETS |
1238 | p->cpuset_mem_spread_rotor = NUMA_NO_NODE; | 1238 | p->cpuset_mem_spread_rotor = NUMA_NO_NODE; |
1239 | p->cpuset_slab_spread_rotor = NUMA_NO_NODE; | 1239 | p->cpuset_slab_spread_rotor = NUMA_NO_NODE; |
1240 | seqcount_init(&p->mems_allowed_seq); | ||
1240 | #endif | 1241 | #endif |
1241 | #ifdef CONFIG_TRACE_IRQFLAGS | 1242 | #ifdef CONFIG_TRACE_IRQFLAGS |
1242 | p->irq_events = 0; | 1243 | p->irq_events = 0; |
diff --git a/mm/filemap.c b/mm/filemap.c index f3230604006c..843042045dc9 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -499,10 +499,13 @@ struct page *__page_cache_alloc(gfp_t gfp) | |||
499 | struct page *page; | 499 | struct page *page; |
500 | 500 | ||
501 | if (cpuset_do_page_mem_spread()) { | 501 | if (cpuset_do_page_mem_spread()) { |
502 | get_mems_allowed(); | 502 | unsigned int cpuset_mems_cookie; |
503 | n = cpuset_mem_spread_node(); | 503 | do { |
504 | page = alloc_pages_exact_node(n, gfp, 0); | 504 | cpuset_mems_cookie = get_mems_allowed(); |
505 | put_mems_allowed(); | 505 | n = cpuset_mem_spread_node(); |
506 | page = alloc_pages_exact_node(n, gfp, 0); | ||
507 | } while (!put_mems_allowed(cpuset_mems_cookie) && !page); | ||
508 | |||
506 | return page; | 509 | return page; |
507 | } | 510 | } |
508 | return alloc_pages(gfp, 0); | 511 | return alloc_pages(gfp, 0); |
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 62f9fada4d6d..b1c314877334 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -454,14 +454,16 @@ static struct page *dequeue_huge_page_vma(struct hstate *h, | |||
454 | struct vm_area_struct *vma, | 454 | struct vm_area_struct *vma, |
455 | unsigned long address, int avoid_reserve) | 455 | unsigned long address, int avoid_reserve) |
456 | { | 456 | { |
457 | struct page *page = NULL; | 457 | struct page *page; |
458 | struct mempolicy *mpol; | 458 | struct mempolicy *mpol; |
459 | nodemask_t *nodemask; | 459 | nodemask_t *nodemask; |
460 | struct zonelist *zonelist; | 460 | struct zonelist *zonelist; |
461 | struct zone *zone; | 461 | struct zone *zone; |
462 | struct zoneref *z; | 462 | struct zoneref *z; |
463 | unsigned int cpuset_mems_cookie; | ||
463 | 464 | ||
464 | get_mems_allowed(); | 465 | retry_cpuset: |
466 | cpuset_mems_cookie = get_mems_allowed(); | ||
465 | zonelist = huge_zonelist(vma, address, | 467 | zonelist = huge_zonelist(vma, address, |
466 | htlb_alloc_mask, &mpol, &nodemask); | 468 | htlb_alloc_mask, &mpol, &nodemask); |
467 | /* | 469 | /* |
@@ -488,10 +490,15 @@ static struct page *dequeue_huge_page_vma(struct hstate *h, | |||
488 | } | 490 | } |
489 | } | 491 | } |
490 | } | 492 | } |
491 | err: | 493 | |
492 | mpol_cond_put(mpol); | 494 | mpol_cond_put(mpol); |
493 | put_mems_allowed(); | 495 | if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page)) |
496 | goto retry_cpuset; | ||
494 | return page; | 497 | return page; |
498 | |||
499 | err: | ||
500 | mpol_cond_put(mpol); | ||
501 | return NULL; | ||
495 | } | 502 | } |
496 | 503 | ||
497 | static void update_and_free_page(struct hstate *h, struct page *page) | 504 | static void update_and_free_page(struct hstate *h, struct page *page) |
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 71e1a523e209..cfb6c8678754 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -1850,18 +1850,24 @@ struct page * | |||
1850 | alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, | 1850 | alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, |
1851 | unsigned long addr, int node) | 1851 | unsigned long addr, int node) |
1852 | { | 1852 | { |
1853 | struct mempolicy *pol = get_vma_policy(current, vma, addr); | 1853 | struct mempolicy *pol; |
1854 | struct zonelist *zl; | 1854 | struct zonelist *zl; |
1855 | struct page *page; | 1855 | struct page *page; |
1856 | unsigned int cpuset_mems_cookie; | ||
1857 | |||
1858 | retry_cpuset: | ||
1859 | pol = get_vma_policy(current, vma, addr); | ||
1860 | cpuset_mems_cookie = get_mems_allowed(); | ||
1856 | 1861 | ||
1857 | get_mems_allowed(); | ||
1858 | if (unlikely(pol->mode == MPOL_INTERLEAVE)) { | 1862 | if (unlikely(pol->mode == MPOL_INTERLEAVE)) { |
1859 | unsigned nid; | 1863 | unsigned nid; |
1860 | 1864 | ||
1861 | nid = interleave_nid(pol, vma, addr, PAGE_SHIFT + order); | 1865 | nid = interleave_nid(pol, vma, addr, PAGE_SHIFT + order); |
1862 | mpol_cond_put(pol); | 1866 | mpol_cond_put(pol); |
1863 | page = alloc_page_interleave(gfp, order, nid); | 1867 | page = alloc_page_interleave(gfp, order, nid); |
1864 | put_mems_allowed(); | 1868 | if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page)) |
1869 | goto retry_cpuset; | ||
1870 | |||
1865 | return page; | 1871 | return page; |
1866 | } | 1872 | } |
1867 | zl = policy_zonelist(gfp, pol, node); | 1873 | zl = policy_zonelist(gfp, pol, node); |
@@ -1872,7 +1878,8 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, | |||
1872 | struct page *page = __alloc_pages_nodemask(gfp, order, | 1878 | struct page *page = __alloc_pages_nodemask(gfp, order, |
1873 | zl, policy_nodemask(gfp, pol)); | 1879 | zl, policy_nodemask(gfp, pol)); |
1874 | __mpol_put(pol); | 1880 | __mpol_put(pol); |
1875 | put_mems_allowed(); | 1881 | if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page)) |
1882 | goto retry_cpuset; | ||
1876 | return page; | 1883 | return page; |
1877 | } | 1884 | } |
1878 | /* | 1885 | /* |
@@ -1880,7 +1887,8 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, | |||
1880 | */ | 1887 | */ |
1881 | page = __alloc_pages_nodemask(gfp, order, zl, | 1888 | page = __alloc_pages_nodemask(gfp, order, zl, |
1882 | policy_nodemask(gfp, pol)); | 1889 | policy_nodemask(gfp, pol)); |
1883 | put_mems_allowed(); | 1890 | if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page)) |
1891 | goto retry_cpuset; | ||
1884 | return page; | 1892 | return page; |
1885 | } | 1893 | } |
1886 | 1894 | ||
@@ -1907,11 +1915,14 @@ struct page *alloc_pages_current(gfp_t gfp, unsigned order) | |||
1907 | { | 1915 | { |
1908 | struct mempolicy *pol = current->mempolicy; | 1916 | struct mempolicy *pol = current->mempolicy; |
1909 | struct page *page; | 1917 | struct page *page; |
1918 | unsigned int cpuset_mems_cookie; | ||
1910 | 1919 | ||
1911 | if (!pol || in_interrupt() || (gfp & __GFP_THISNODE)) | 1920 | if (!pol || in_interrupt() || (gfp & __GFP_THISNODE)) |
1912 | pol = &default_policy; | 1921 | pol = &default_policy; |
1913 | 1922 | ||
1914 | get_mems_allowed(); | 1923 | retry_cpuset: |
1924 | cpuset_mems_cookie = get_mems_allowed(); | ||
1925 | |||
1915 | /* | 1926 | /* |
1916 | * No reference counting needed for current->mempolicy | 1927 | * No reference counting needed for current->mempolicy |
1917 | * nor system default_policy | 1928 | * nor system default_policy |
@@ -1922,7 +1933,10 @@ struct page *alloc_pages_current(gfp_t gfp, unsigned order) | |||
1922 | page = __alloc_pages_nodemask(gfp, order, | 1933 | page = __alloc_pages_nodemask(gfp, order, |
1923 | policy_zonelist(gfp, pol, numa_node_id()), | 1934 | policy_zonelist(gfp, pol, numa_node_id()), |
1924 | policy_nodemask(gfp, pol)); | 1935 | policy_nodemask(gfp, pol)); |
1925 | put_mems_allowed(); | 1936 | |
1937 | if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page)) | ||
1938 | goto retry_cpuset; | ||
1939 | |||
1926 | return page; | 1940 | return page; |
1927 | } | 1941 | } |
1928 | EXPORT_SYMBOL(alloc_pages_current); | 1942 | EXPORT_SYMBOL(alloc_pages_current); |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 673596ad9c80..40de6854b980 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -2380,8 +2380,9 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, | |||
2380 | { | 2380 | { |
2381 | enum zone_type high_zoneidx = gfp_zone(gfp_mask); | 2381 | enum zone_type high_zoneidx = gfp_zone(gfp_mask); |
2382 | struct zone *preferred_zone; | 2382 | struct zone *preferred_zone; |
2383 | struct page *page; | 2383 | struct page *page = NULL; |
2384 | int migratetype = allocflags_to_migratetype(gfp_mask); | 2384 | int migratetype = allocflags_to_migratetype(gfp_mask); |
2385 | unsigned int cpuset_mems_cookie; | ||
2385 | 2386 | ||
2386 | gfp_mask &= gfp_allowed_mask; | 2387 | gfp_mask &= gfp_allowed_mask; |
2387 | 2388 | ||
@@ -2400,15 +2401,15 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, | |||
2400 | if (unlikely(!zonelist->_zonerefs->zone)) | 2401 | if (unlikely(!zonelist->_zonerefs->zone)) |
2401 | return NULL; | 2402 | return NULL; |
2402 | 2403 | ||
2403 | get_mems_allowed(); | 2404 | retry_cpuset: |
2405 | cpuset_mems_cookie = get_mems_allowed(); | ||
2406 | |||
2404 | /* The preferred zone is used for statistics later */ | 2407 | /* The preferred zone is used for statistics later */ |
2405 | first_zones_zonelist(zonelist, high_zoneidx, | 2408 | first_zones_zonelist(zonelist, high_zoneidx, |
2406 | nodemask ? : &cpuset_current_mems_allowed, | 2409 | nodemask ? : &cpuset_current_mems_allowed, |
2407 | &preferred_zone); | 2410 | &preferred_zone); |
2408 | if (!preferred_zone) { | 2411 | if (!preferred_zone) |
2409 | put_mems_allowed(); | 2412 | goto out; |
2410 | return NULL; | ||
2411 | } | ||
2412 | 2413 | ||
2413 | /* First allocation attempt */ | 2414 | /* First allocation attempt */ |
2414 | page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order, | 2415 | page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order, |
@@ -2418,9 +2419,19 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, | |||
2418 | page = __alloc_pages_slowpath(gfp_mask, order, | 2419 | page = __alloc_pages_slowpath(gfp_mask, order, |
2419 | zonelist, high_zoneidx, nodemask, | 2420 | zonelist, high_zoneidx, nodemask, |
2420 | preferred_zone, migratetype); | 2421 | preferred_zone, migratetype); |
2421 | put_mems_allowed(); | ||
2422 | 2422 | ||
2423 | trace_mm_page_alloc(page, order, gfp_mask, migratetype); | 2423 | trace_mm_page_alloc(page, order, gfp_mask, migratetype); |
2424 | |||
2425 | out: | ||
2426 | /* | ||
2427 | * When updating a task's mems_allowed, it is possible to race with | ||
2428 | * parallel threads in such a way that an allocation can fail while | ||
2429 | * the mask is being updated. If a page allocation is about to fail, | ||
2430 | * check if the cpuset changed during allocation and if so, retry. | ||
2431 | */ | ||
2432 | if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page)) | ||
2433 | goto retry_cpuset; | ||
2434 | |||
2424 | return page; | 2435 | return page; |
2425 | } | 2436 | } |
2426 | EXPORT_SYMBOL(__alloc_pages_nodemask); | 2437 | EXPORT_SYMBOL(__alloc_pages_nodemask); |
@@ -2634,13 +2645,15 @@ void si_meminfo_node(struct sysinfo *val, int nid) | |||
2634 | bool skip_free_areas_node(unsigned int flags, int nid) | 2645 | bool skip_free_areas_node(unsigned int flags, int nid) |
2635 | { | 2646 | { |
2636 | bool ret = false; | 2647 | bool ret = false; |
2648 | unsigned int cpuset_mems_cookie; | ||
2637 | 2649 | ||
2638 | if (!(flags & SHOW_MEM_FILTER_NODES)) | 2650 | if (!(flags & SHOW_MEM_FILTER_NODES)) |
2639 | goto out; | 2651 | goto out; |
2640 | 2652 | ||
2641 | get_mems_allowed(); | 2653 | do { |
2642 | ret = !node_isset(nid, cpuset_current_mems_allowed); | 2654 | cpuset_mems_cookie = get_mems_allowed(); |
2643 | put_mems_allowed(); | 2655 | ret = !node_isset(nid, cpuset_current_mems_allowed); |
2656 | } while (!put_mems_allowed(cpuset_mems_cookie)); | ||
2644 | out: | 2657 | out: |
2645 | return ret; | 2658 | return ret; |
2646 | } | 2659 | } |
@@ -3284,12 +3284,10 @@ static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags) | |||
3284 | if (in_interrupt() || (flags & __GFP_THISNODE)) | 3284 | if (in_interrupt() || (flags & __GFP_THISNODE)) |
3285 | return NULL; | 3285 | return NULL; |
3286 | nid_alloc = nid_here = numa_mem_id(); | 3286 | nid_alloc = nid_here = numa_mem_id(); |
3287 | get_mems_allowed(); | ||
3288 | if (cpuset_do_slab_mem_spread() && (cachep->flags & SLAB_MEM_SPREAD)) | 3287 | if (cpuset_do_slab_mem_spread() && (cachep->flags & SLAB_MEM_SPREAD)) |
3289 | nid_alloc = cpuset_slab_spread_node(); | 3288 | nid_alloc = cpuset_slab_spread_node(); |
3290 | else if (current->mempolicy) | 3289 | else if (current->mempolicy) |
3291 | nid_alloc = slab_node(current->mempolicy); | 3290 | nid_alloc = slab_node(current->mempolicy); |
3292 | put_mems_allowed(); | ||
3293 | if (nid_alloc != nid_here) | 3291 | if (nid_alloc != nid_here) |
3294 | return ____cache_alloc_node(cachep, flags, nid_alloc); | 3292 | return ____cache_alloc_node(cachep, flags, nid_alloc); |
3295 | return NULL; | 3293 | return NULL; |
@@ -3312,14 +3310,17 @@ static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags) | |||
3312 | enum zone_type high_zoneidx = gfp_zone(flags); | 3310 | enum zone_type high_zoneidx = gfp_zone(flags); |
3313 | void *obj = NULL; | 3311 | void *obj = NULL; |
3314 | int nid; | 3312 | int nid; |
3313 | unsigned int cpuset_mems_cookie; | ||
3315 | 3314 | ||
3316 | if (flags & __GFP_THISNODE) | 3315 | if (flags & __GFP_THISNODE) |
3317 | return NULL; | 3316 | return NULL; |
3318 | 3317 | ||
3319 | get_mems_allowed(); | ||
3320 | zonelist = node_zonelist(slab_node(current->mempolicy), flags); | ||
3321 | local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK); | 3318 | local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK); |
3322 | 3319 | ||
3320 | retry_cpuset: | ||
3321 | cpuset_mems_cookie = get_mems_allowed(); | ||
3322 | zonelist = node_zonelist(slab_node(current->mempolicy), flags); | ||
3323 | |||
3323 | retry: | 3324 | retry: |
3324 | /* | 3325 | /* |
3325 | * Look through allowed nodes for objects available | 3326 | * Look through allowed nodes for objects available |
@@ -3372,7 +3373,9 @@ retry: | |||
3372 | } | 3373 | } |
3373 | } | 3374 | } |
3374 | } | 3375 | } |
3375 | put_mems_allowed(); | 3376 | |
3377 | if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !obj)) | ||
3378 | goto retry_cpuset; | ||
3376 | return obj; | 3379 | return obj; |
3377 | } | 3380 | } |
3378 | 3381 | ||
@@ -1581,6 +1581,7 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags, | |||
1581 | struct zone *zone; | 1581 | struct zone *zone; |
1582 | enum zone_type high_zoneidx = gfp_zone(flags); | 1582 | enum zone_type high_zoneidx = gfp_zone(flags); |
1583 | void *object; | 1583 | void *object; |
1584 | unsigned int cpuset_mems_cookie; | ||
1584 | 1585 | ||
1585 | /* | 1586 | /* |
1586 | * The defrag ratio allows a configuration of the tradeoffs between | 1587 | * The defrag ratio allows a configuration of the tradeoffs between |
@@ -1604,23 +1605,32 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags, | |||
1604 | get_cycles() % 1024 > s->remote_node_defrag_ratio) | 1605 | get_cycles() % 1024 > s->remote_node_defrag_ratio) |
1605 | return NULL; | 1606 | return NULL; |
1606 | 1607 | ||
1607 | get_mems_allowed(); | 1608 | do { |
1608 | zonelist = node_zonelist(slab_node(current->mempolicy), flags); | 1609 | cpuset_mems_cookie = get_mems_allowed(); |
1609 | for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { | 1610 | zonelist = node_zonelist(slab_node(current->mempolicy), flags); |
1610 | struct kmem_cache_node *n; | 1611 | for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { |
1611 | 1612 | struct kmem_cache_node *n; | |
1612 | n = get_node(s, zone_to_nid(zone)); | 1613 | |
1613 | 1614 | n = get_node(s, zone_to_nid(zone)); | |
1614 | if (n && cpuset_zone_allowed_hardwall(zone, flags) && | 1615 | |
1615 | n->nr_partial > s->min_partial) { | 1616 | if (n && cpuset_zone_allowed_hardwall(zone, flags) && |
1616 | object = get_partial_node(s, n, c); | 1617 | n->nr_partial > s->min_partial) { |
1617 | if (object) { | 1618 | object = get_partial_node(s, n, c); |
1618 | put_mems_allowed(); | 1619 | if (object) { |
1619 | return object; | 1620 | /* |
1621 | * Return the object even if | ||
1622 | * put_mems_allowed indicated that | ||
1623 | * the cpuset mems_allowed was | ||
1624 | * updated in parallel. It's a | ||
1625 | * harmless race between the alloc | ||
1626 | * and the cpuset update. | ||
1627 | */ | ||
1628 | put_mems_allowed(cpuset_mems_cookie); | ||
1629 | return object; | ||
1630 | } | ||
1620 | } | 1631 | } |
1621 | } | 1632 | } |
1622 | } | 1633 | } while (!put_mems_allowed(cpuset_mems_cookie)); |
1623 | put_mems_allowed(); | ||
1624 | #endif | 1634 | #endif |
1625 | return NULL; | 1635 | return NULL; |
1626 | } | 1636 | } |
diff --git a/mm/vmscan.c b/mm/vmscan.c index 440af1d899b9..55d86c9506f3 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -2343,7 +2343,6 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, | |||
2343 | unsigned long writeback_threshold; | 2343 | unsigned long writeback_threshold; |
2344 | bool aborted_reclaim; | 2344 | bool aborted_reclaim; |
2345 | 2345 | ||
2346 | get_mems_allowed(); | ||
2347 | delayacct_freepages_start(); | 2346 | delayacct_freepages_start(); |
2348 | 2347 | ||
2349 | if (global_reclaim(sc)) | 2348 | if (global_reclaim(sc)) |
@@ -2407,7 +2406,6 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, | |||
2407 | 2406 | ||
2408 | out: | 2407 | out: |
2409 | delayacct_freepages_end(); | 2408 | delayacct_freepages_end(); |
2410 | put_mems_allowed(); | ||
2411 | 2409 | ||
2412 | if (sc->nr_reclaimed) | 2410 | if (sc->nr_reclaimed) |
2413 | return sc->nr_reclaimed; | 2411 | return sc->nr_reclaimed; |