diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-03-31 14:21:19 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-03-31 14:21:19 -0400 |
commit | 971eae7c99212dd67b425a603f1fe3b763359907 (patch) | |
tree | 2ff002ecc759275cbecee123a230f90ea7452b18 /mm | |
parent | 8c292f11744297dfb3a69f4a0bccbe4a6417b50d (diff) | |
parent | 6037dd1a49f95092824fa8ba75c717ff7805e317 (diff) |
Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler changes from Ingo Molnar:
"Bigger changes:
- sched/idle restructuring: they are WIP preparation for deeper
integration between the scheduler and idle state selection, by
Nicolas Pitre.
- add NUMA scheduling pseudo-interleaving, by Rik van Riel.
- optimize cgroup context switches, by Peter Zijlstra.
- RT scheduling enhancements, by Thomas Gleixner.
The rest is smaller changes, non-urgnt fixes and cleanups"
* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (68 commits)
sched: Clean up the task_hot() function
sched: Remove double calculation in fix_small_imbalance()
sched: Fix broken setscheduler()
sparc64, sched: Remove unused sparc64_multi_core
sched: Remove unused mc_capable() and smt_capable()
sched/numa: Move task_numa_free() to __put_task_struct()
sched/fair: Fix endless loop in idle_balance()
sched/core: Fix endless loop in pick_next_task()
sched/fair: Push down check for high priority class task into idle_balance()
sched/rt: Fix picking RT and DL tasks from empty queue
trace: Replace hardcoding of 19 with MAX_NICE
sched: Guarantee task priority in pick_next_task()
sched/idle: Remove stale old file
sched: Put rq's sched_avg under CONFIG_FAIR_GROUP_SCHED
cpuidle/arm64: Remove redundant cpuidle_idle_call()
cpuidle/powernv: Remove redundant cpuidle_idle_call()
sched, nohz: Exclude isolated cores from load balancing
sched: Fix select_task_rq_fair() description comments
workqueue: Replace hardcoding of -20 and 19 with MIN_NICE and MAX_NICE
sys: Replace hardcoding of -20 and 19 with MIN_NICE and MAX_NICE
...
Diffstat (limited to 'mm')
-rw-r--r-- | mm/mempolicy.c | 74 |
1 files changed, 1 insertions, 73 deletions
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index ae3c8f3595d4..f520b9da9c1f 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -2301,35 +2301,6 @@ static void sp_free(struct sp_node *n) | |||
2301 | kmem_cache_free(sn_cache, n); | 2301 | kmem_cache_free(sn_cache, n); |
2302 | } | 2302 | } |
2303 | 2303 | ||
2304 | #ifdef CONFIG_NUMA_BALANCING | ||
2305 | static bool numa_migrate_deferred(struct task_struct *p, int last_cpupid) | ||
2306 | { | ||
2307 | /* Never defer a private fault */ | ||
2308 | if (cpupid_match_pid(p, last_cpupid)) | ||
2309 | return false; | ||
2310 | |||
2311 | if (p->numa_migrate_deferred) { | ||
2312 | p->numa_migrate_deferred--; | ||
2313 | return true; | ||
2314 | } | ||
2315 | return false; | ||
2316 | } | ||
2317 | |||
2318 | static inline void defer_numa_migrate(struct task_struct *p) | ||
2319 | { | ||
2320 | p->numa_migrate_deferred = sysctl_numa_balancing_migrate_deferred; | ||
2321 | } | ||
2322 | #else | ||
2323 | static inline bool numa_migrate_deferred(struct task_struct *p, int last_cpupid) | ||
2324 | { | ||
2325 | return false; | ||
2326 | } | ||
2327 | |||
2328 | static inline void defer_numa_migrate(struct task_struct *p) | ||
2329 | { | ||
2330 | } | ||
2331 | #endif /* CONFIG_NUMA_BALANCING */ | ||
2332 | |||
2333 | /** | 2304 | /** |
2334 | * mpol_misplaced - check whether current page node is valid in policy | 2305 | * mpol_misplaced - check whether current page node is valid in policy |
2335 | * | 2306 | * |
@@ -2403,52 +2374,9 @@ int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long | |||
2403 | 2374 | ||
2404 | /* Migrate the page towards the node whose CPU is referencing it */ | 2375 | /* Migrate the page towards the node whose CPU is referencing it */ |
2405 | if (pol->flags & MPOL_F_MORON) { | 2376 | if (pol->flags & MPOL_F_MORON) { |
2406 | int last_cpupid; | ||
2407 | int this_cpupid; | ||
2408 | |||
2409 | polnid = thisnid; | 2377 | polnid = thisnid; |
2410 | this_cpupid = cpu_pid_to_cpupid(thiscpu, current->pid); | ||
2411 | |||
2412 | /* | ||
2413 | * Multi-stage node selection is used in conjunction | ||
2414 | * with a periodic migration fault to build a temporal | ||
2415 | * task<->page relation. By using a two-stage filter we | ||
2416 | * remove short/unlikely relations. | ||
2417 | * | ||
2418 | * Using P(p) ~ n_p / n_t as per frequentist | ||
2419 | * probability, we can equate a task's usage of a | ||
2420 | * particular page (n_p) per total usage of this | ||
2421 | * page (n_t) (in a given time-span) to a probability. | ||
2422 | * | ||
2423 | * Our periodic faults will sample this probability and | ||
2424 | * getting the same result twice in a row, given these | ||
2425 | * samples are fully independent, is then given by | ||
2426 | * P(n)^2, provided our sample period is sufficiently | ||
2427 | * short compared to the usage pattern. | ||
2428 | * | ||
2429 | * This quadric squishes small probabilities, making | ||
2430 | * it less likely we act on an unlikely task<->page | ||
2431 | * relation. | ||
2432 | */ | ||
2433 | last_cpupid = page_cpupid_xchg_last(page, this_cpupid); | ||
2434 | if (!cpupid_pid_unset(last_cpupid) && cpupid_to_nid(last_cpupid) != thisnid) { | ||
2435 | 2378 | ||
2436 | /* See sysctl_numa_balancing_migrate_deferred comment */ | 2379 | if (!should_numa_migrate_memory(current, page, curnid, thiscpu)) |
2437 | if (!cpupid_match_pid(current, last_cpupid)) | ||
2438 | defer_numa_migrate(current); | ||
2439 | |||
2440 | goto out; | ||
2441 | } | ||
2442 | |||
2443 | /* | ||
2444 | * The quadratic filter above reduces extraneous migration | ||
2445 | * of shared pages somewhat. This code reduces it even more, | ||
2446 | * reducing the overhead of page migrations of shared pages. | ||
2447 | * This makes workloads with shared pages rely more on | ||
2448 | * "move task near its memory", and less on "move memory | ||
2449 | * towards its task", which is exactly what we want. | ||
2450 | */ | ||
2451 | if (numa_migrate_deferred(current, last_cpupid)) | ||
2452 | goto out; | 2380 | goto out; |
2453 | } | 2381 | } |
2454 | 2382 | ||