aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-03-31 14:21:19 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-03-31 14:21:19 -0400
commit971eae7c99212dd67b425a603f1fe3b763359907 (patch)
tree2ff002ecc759275cbecee123a230f90ea7452b18 /mm
parent8c292f11744297dfb3a69f4a0bccbe4a6417b50d (diff)
parent6037dd1a49f95092824fa8ba75c717ff7805e317 (diff)
Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler changes from Ingo Molnar: "Bigger changes: - sched/idle restructuring: they are WIP preparation for deeper integration between the scheduler and idle state selection, by Nicolas Pitre. - add NUMA scheduling pseudo-interleaving, by Rik van Riel. - optimize cgroup context switches, by Peter Zijlstra. - RT scheduling enhancements, by Thomas Gleixner. The rest is smaller changes, non-urgnt fixes and cleanups" * 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (68 commits) sched: Clean up the task_hot() function sched: Remove double calculation in fix_small_imbalance() sched: Fix broken setscheduler() sparc64, sched: Remove unused sparc64_multi_core sched: Remove unused mc_capable() and smt_capable() sched/numa: Move task_numa_free() to __put_task_struct() sched/fair: Fix endless loop in idle_balance() sched/core: Fix endless loop in pick_next_task() sched/fair: Push down check for high priority class task into idle_balance() sched/rt: Fix picking RT and DL tasks from empty queue trace: Replace hardcoding of 19 with MAX_NICE sched: Guarantee task priority in pick_next_task() sched/idle: Remove stale old file sched: Put rq's sched_avg under CONFIG_FAIR_GROUP_SCHED cpuidle/arm64: Remove redundant cpuidle_idle_call() cpuidle/powernv: Remove redundant cpuidle_idle_call() sched, nohz: Exclude isolated cores from load balancing sched: Fix select_task_rq_fair() description comments workqueue: Replace hardcoding of -20 and 19 with MIN_NICE and MAX_NICE sys: Replace hardcoding of -20 and 19 with MIN_NICE and MAX_NICE ...
Diffstat (limited to 'mm')
-rw-r--r--mm/mempolicy.c74
1 files changed, 1 insertions, 73 deletions
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index ae3c8f3595d4..f520b9da9c1f 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2301,35 +2301,6 @@ static void sp_free(struct sp_node *n)
2301 kmem_cache_free(sn_cache, n); 2301 kmem_cache_free(sn_cache, n);
2302} 2302}
2303 2303
2304#ifdef CONFIG_NUMA_BALANCING
2305static bool numa_migrate_deferred(struct task_struct *p, int last_cpupid)
2306{
2307 /* Never defer a private fault */
2308 if (cpupid_match_pid(p, last_cpupid))
2309 return false;
2310
2311 if (p->numa_migrate_deferred) {
2312 p->numa_migrate_deferred--;
2313 return true;
2314 }
2315 return false;
2316}
2317
2318static inline void defer_numa_migrate(struct task_struct *p)
2319{
2320 p->numa_migrate_deferred = sysctl_numa_balancing_migrate_deferred;
2321}
2322#else
2323static inline bool numa_migrate_deferred(struct task_struct *p, int last_cpupid)
2324{
2325 return false;
2326}
2327
2328static inline void defer_numa_migrate(struct task_struct *p)
2329{
2330}
2331#endif /* CONFIG_NUMA_BALANCING */
2332
2333/** 2304/**
2334 * mpol_misplaced - check whether current page node is valid in policy 2305 * mpol_misplaced - check whether current page node is valid in policy
2335 * 2306 *
@@ -2403,52 +2374,9 @@ int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long
2403 2374
2404 /* Migrate the page towards the node whose CPU is referencing it */ 2375 /* Migrate the page towards the node whose CPU is referencing it */
2405 if (pol->flags & MPOL_F_MORON) { 2376 if (pol->flags & MPOL_F_MORON) {
2406 int last_cpupid;
2407 int this_cpupid;
2408
2409 polnid = thisnid; 2377 polnid = thisnid;
2410 this_cpupid = cpu_pid_to_cpupid(thiscpu, current->pid);
2411
2412 /*
2413 * Multi-stage node selection is used in conjunction
2414 * with a periodic migration fault to build a temporal
2415 * task<->page relation. By using a two-stage filter we
2416 * remove short/unlikely relations.
2417 *
2418 * Using P(p) ~ n_p / n_t as per frequentist
2419 * probability, we can equate a task's usage of a
2420 * particular page (n_p) per total usage of this
2421 * page (n_t) (in a given time-span) to a probability.
2422 *
2423 * Our periodic faults will sample this probability and
2424 * getting the same result twice in a row, given these
2425 * samples are fully independent, is then given by
2426 * P(n)^2, provided our sample period is sufficiently
2427 * short compared to the usage pattern.
2428 *
2429 * This quadric squishes small probabilities, making
2430 * it less likely we act on an unlikely task<->page
2431 * relation.
2432 */
2433 last_cpupid = page_cpupid_xchg_last(page, this_cpupid);
2434 if (!cpupid_pid_unset(last_cpupid) && cpupid_to_nid(last_cpupid) != thisnid) {
2435 2378
2436 /* See sysctl_numa_balancing_migrate_deferred comment */ 2379 if (!should_numa_migrate_memory(current, page, curnid, thiscpu))
2437 if (!cpupid_match_pid(current, last_cpupid))
2438 defer_numa_migrate(current);
2439
2440 goto out;
2441 }
2442
2443 /*
2444 * The quadratic filter above reduces extraneous migration
2445 * of shared pages somewhat. This code reduces it even more,
2446 * reducing the overhead of page migrations of shared pages.
2447 * This makes workloads with shared pages rely more on
2448 * "move task near its memory", and less on "move memory
2449 * towards its task", which is exactly what we want.
2450 */
2451 if (numa_migrate_deferred(current, last_cpupid))
2452 goto out; 2380 goto out;
2453 } 2381 }
2454 2382