aboutsummaryrefslogtreecommitdiffstats
path: root/mm/mempolicy.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/mempolicy.c')
-rw-r--r--mm/mempolicy.c92
1 files changed, 10 insertions, 82 deletions
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index ae3c8f3595d4..4755c8576942 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1556,10 +1556,10 @@ SYSCALL_DEFINE5(get_mempolicy, int __user *, policy,
1556 1556
1557#ifdef CONFIG_COMPAT 1557#ifdef CONFIG_COMPAT
1558 1558
1559asmlinkage long compat_sys_get_mempolicy(int __user *policy, 1559COMPAT_SYSCALL_DEFINE5(get_mempolicy, int __user *, policy,
1560 compat_ulong_t __user *nmask, 1560 compat_ulong_t __user *, nmask,
1561 compat_ulong_t maxnode, 1561 compat_ulong_t, maxnode,
1562 compat_ulong_t addr, compat_ulong_t flags) 1562 compat_ulong_t, addr, compat_ulong_t, flags)
1563{ 1563{
1564 long err; 1564 long err;
1565 unsigned long __user *nm = NULL; 1565 unsigned long __user *nm = NULL;
@@ -1586,8 +1586,8 @@ asmlinkage long compat_sys_get_mempolicy(int __user *policy,
1586 return err; 1586 return err;
1587} 1587}
1588 1588
1589asmlinkage long compat_sys_set_mempolicy(int mode, compat_ulong_t __user *nmask, 1589COMPAT_SYSCALL_DEFINE3(set_mempolicy, int, mode, compat_ulong_t __user *, nmask,
1590 compat_ulong_t maxnode) 1590 compat_ulong_t, maxnode)
1591{ 1591{
1592 long err = 0; 1592 long err = 0;
1593 unsigned long __user *nm = NULL; 1593 unsigned long __user *nm = NULL;
@@ -1609,9 +1609,9 @@ asmlinkage long compat_sys_set_mempolicy(int mode, compat_ulong_t __user *nmask,
1609 return sys_set_mempolicy(mode, nm, nr_bits+1); 1609 return sys_set_mempolicy(mode, nm, nr_bits+1);
1610} 1610}
1611 1611
1612asmlinkage long compat_sys_mbind(compat_ulong_t start, compat_ulong_t len, 1612COMPAT_SYSCALL_DEFINE6(mbind, compat_ulong_t, start, compat_ulong_t, len,
1613 compat_ulong_t mode, compat_ulong_t __user *nmask, 1613 compat_ulong_t, mode, compat_ulong_t __user *, nmask,
1614 compat_ulong_t maxnode, compat_ulong_t flags) 1614 compat_ulong_t, maxnode, compat_ulong_t, flags)
1615{ 1615{
1616 long err = 0; 1616 long err = 0;
1617 unsigned long __user *nm = NULL; 1617 unsigned long __user *nm = NULL;
@@ -2301,35 +2301,6 @@ static void sp_free(struct sp_node *n)
2301 kmem_cache_free(sn_cache, n); 2301 kmem_cache_free(sn_cache, n);
2302} 2302}
2303 2303
2304#ifdef CONFIG_NUMA_BALANCING
2305static bool numa_migrate_deferred(struct task_struct *p, int last_cpupid)
2306{
2307 /* Never defer a private fault */
2308 if (cpupid_match_pid(p, last_cpupid))
2309 return false;
2310
2311 if (p->numa_migrate_deferred) {
2312 p->numa_migrate_deferred--;
2313 return true;
2314 }
2315 return false;
2316}
2317
2318static inline void defer_numa_migrate(struct task_struct *p)
2319{
2320 p->numa_migrate_deferred = sysctl_numa_balancing_migrate_deferred;
2321}
2322#else
2323static inline bool numa_migrate_deferred(struct task_struct *p, int last_cpupid)
2324{
2325 return false;
2326}
2327
2328static inline void defer_numa_migrate(struct task_struct *p)
2329{
2330}
2331#endif /* CONFIG_NUMA_BALANCING */
2332
2333/** 2304/**
2334 * mpol_misplaced - check whether current page node is valid in policy 2305 * mpol_misplaced - check whether current page node is valid in policy
2335 * 2306 *
@@ -2403,52 +2374,9 @@ int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long
2403 2374
2404 /* Migrate the page towards the node whose CPU is referencing it */ 2375 /* Migrate the page towards the node whose CPU is referencing it */
2405 if (pol->flags & MPOL_F_MORON) { 2376 if (pol->flags & MPOL_F_MORON) {
2406 int last_cpupid;
2407 int this_cpupid;
2408
2409 polnid = thisnid; 2377 polnid = thisnid;
2410 this_cpupid = cpu_pid_to_cpupid(thiscpu, current->pid);
2411
2412 /*
2413 * Multi-stage node selection is used in conjunction
2414 * with a periodic migration fault to build a temporal
2415 * task<->page relation. By using a two-stage filter we
2416 * remove short/unlikely relations.
2417 *
2418 * Using P(p) ~ n_p / n_t as per frequentist
2419 * probability, we can equate a task's usage of a
2420 * particular page (n_p) per total usage of this
2421 * page (n_t) (in a given time-span) to a probability.
2422 *
2423 * Our periodic faults will sample this probability and
2424 * getting the same result twice in a row, given these
2425 * samples are fully independent, is then given by
2426 * P(n)^2, provided our sample period is sufficiently
2427 * short compared to the usage pattern.
2428 *
2429 * This quadric squishes small probabilities, making
2430 * it less likely we act on an unlikely task<->page
2431 * relation.
2432 */
2433 last_cpupid = page_cpupid_xchg_last(page, this_cpupid);
2434 if (!cpupid_pid_unset(last_cpupid) && cpupid_to_nid(last_cpupid) != thisnid) {
2435 2378
2436 /* See sysctl_numa_balancing_migrate_deferred comment */ 2379 if (!should_numa_migrate_memory(current, page, curnid, thiscpu))
2437 if (!cpupid_match_pid(current, last_cpupid))
2438 defer_numa_migrate(current);
2439
2440 goto out;
2441 }
2442
2443 /*
2444 * The quadratic filter above reduces extraneous migration
2445 * of shared pages somewhat. This code reduces it even more,
2446 * reducing the overhead of page migrations of shared pages.
2447 * This makes workloads with shared pages rely more on
2448 * "move task near its memory", and less on "move memory
2449 * towards its task", which is exactly what we want.
2450 */
2451 if (numa_migrate_deferred(current, last_cpupid))
2452 goto out; 2380 goto out;
2453 } 2381 }
2454 2382