aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/cpuset.h4
-rw-r--r--kernel/cpuset.c95
-rw-r--r--mm/mempolicy.c10
3 files changed, 48 insertions, 61 deletions
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index 736d73801cb6..1feebf16ab08 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -20,7 +20,7 @@ extern void cpuset_fork(struct task_struct *p);
20extern void cpuset_exit(struct task_struct *p); 20extern void cpuset_exit(struct task_struct *p);
21extern cpumask_t cpuset_cpus_allowed(const struct task_struct *p); 21extern cpumask_t cpuset_cpus_allowed(const struct task_struct *p);
22void cpuset_init_current_mems_allowed(void); 22void cpuset_init_current_mems_allowed(void);
23void cpuset_update_current_mems_allowed(void); 23void cpuset_update_task_memory_state(void);
24#define cpuset_nodes_subset_current_mems_allowed(nodes) \ 24#define cpuset_nodes_subset_current_mems_allowed(nodes) \
25 nodes_subset((nodes), current->mems_allowed) 25 nodes_subset((nodes), current->mems_allowed)
26int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl); 26int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl);
@@ -51,7 +51,7 @@ static inline cpumask_t cpuset_cpus_allowed(struct task_struct *p)
51} 51}
52 52
53static inline void cpuset_init_current_mems_allowed(void) {} 53static inline void cpuset_init_current_mems_allowed(void) {}
54static inline void cpuset_update_current_mems_allowed(void) {} 54static inline void cpuset_update_task_memory_state(void) {}
55#define cpuset_nodes_subset_current_mems_allowed(nodes) (1) 55#define cpuset_nodes_subset_current_mems_allowed(nodes) (1)
56 56
57static inline int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl) 57static inline int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl)
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index d9349cc48b95..e9917d71628a 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -584,13 +584,26 @@ static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask)
584 BUG_ON(!nodes_intersects(*pmask, node_online_map)); 584 BUG_ON(!nodes_intersects(*pmask, node_online_map));
585} 585}
586 586
587/* 587/**
588 * Refresh current tasks mems_allowed and mems_generation from current 588 * cpuset_update_task_memory_state - update task memory placement
589 * tasks cpuset.
590 * 589 *
591 * Call without callback_sem or task_lock() held. May be called with 590 * If the current tasks cpusets mems_allowed changed behind our
592 * or without manage_sem held. Will acquire task_lock() and might 591 * backs, update current->mems_allowed, mems_generation and task NUMA
593 * acquire callback_sem during call. 592 * mempolicy to the new value.
593 *
594 * Task mempolicy is updated by rebinding it relative to the
595 * current->cpuset if a task has its memory placement changed.
596 * Do not call this routine if in_interrupt().
597 *
598 * Call without callback_sem or task_lock() held. May be called
599 * with or without manage_sem held. Except in early boot or
600 * an exiting task, when tsk->cpuset is NULL, this routine will
601 * acquire task_lock(). We don't need to use task_lock to guard
602 * against another task changing a non-NULL cpuset pointer to NULL,
603 * as that is only done by a task on itself, and if the current task
604 * is here, it is not simultaneously in the exit code NULL'ing its
605 * cpuset pointer. This routine also might acquire callback_sem and
606 * current->mm->mmap_sem during call.
594 * 607 *
595 * The task_lock() is required to dereference current->cpuset safely. 608 * The task_lock() is required to dereference current->cpuset safely.
596 * Without it, we could pick up the pointer value of current->cpuset 609 * Without it, we could pick up the pointer value of current->cpuset
@@ -605,32 +618,36 @@ static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask)
605 * task has been modifying its cpuset. 618 * task has been modifying its cpuset.
606 */ 619 */
607 620
608static void refresh_mems(void) 621void cpuset_update_task_memory_state()
609{ 622{
610 int my_cpusets_mem_gen; 623 int my_cpusets_mem_gen;
624 struct task_struct *tsk = current;
625 struct cpuset *cs = tsk->cpuset;
611 626
612 task_lock(current); 627 if (unlikely(!cs))
613 my_cpusets_mem_gen = current->cpuset->mems_generation; 628 return;
614 task_unlock(current); 629
630 task_lock(tsk);
631 my_cpusets_mem_gen = cs->mems_generation;
632 task_unlock(tsk);
615 633
616 if (current->cpuset_mems_generation != my_cpusets_mem_gen) { 634 if (my_cpusets_mem_gen != tsk->cpuset_mems_generation) {
617 struct cpuset *cs; 635 nodemask_t oldmem = tsk->mems_allowed;
618 nodemask_t oldmem = current->mems_allowed;
619 int migrate; 636 int migrate;
620 637
621 down(&callback_sem); 638 down(&callback_sem);
622 task_lock(current); 639 task_lock(tsk);
623 cs = current->cpuset; 640 cs = tsk->cpuset; /* Maybe changed when task not locked */
624 migrate = is_memory_migrate(cs); 641 migrate = is_memory_migrate(cs);
625 guarantee_online_mems(cs, &current->mems_allowed); 642 guarantee_online_mems(cs, &tsk->mems_allowed);
626 current->cpuset_mems_generation = cs->mems_generation; 643 tsk->cpuset_mems_generation = cs->mems_generation;
627 task_unlock(current); 644 task_unlock(tsk);
628 up(&callback_sem); 645 up(&callback_sem);
629 if (!nodes_equal(oldmem, current->mems_allowed)) { 646 numa_policy_rebind(&oldmem, &tsk->mems_allowed);
630 numa_policy_rebind(&oldmem, &current->mems_allowed); 647 if (!nodes_equal(oldmem, tsk->mems_allowed)) {
631 if (migrate) { 648 if (migrate) {
632 do_migrate_pages(current->mm, &oldmem, 649 do_migrate_pages(tsk->mm, &oldmem,
633 &current->mems_allowed, 650 &tsk->mems_allowed,
634 MPOL_MF_MOVE_ALL); 651 MPOL_MF_MOVE_ALL);
635 } 652 }
636 } 653 }
@@ -1630,7 +1647,7 @@ static long cpuset_create(struct cpuset *parent, const char *name, int mode)
1630 return -ENOMEM; 1647 return -ENOMEM;
1631 1648
1632 down(&manage_sem); 1649 down(&manage_sem);
1633 refresh_mems(); 1650 cpuset_update_task_memory_state();
1634 cs->flags = 0; 1651 cs->flags = 0;
1635 if (notify_on_release(parent)) 1652 if (notify_on_release(parent))
1636 set_bit(CS_NOTIFY_ON_RELEASE, &cs->flags); 1653 set_bit(CS_NOTIFY_ON_RELEASE, &cs->flags);
@@ -1688,7 +1705,7 @@ static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry)
1688 /* the vfs holds both inode->i_sem already */ 1705 /* the vfs holds both inode->i_sem already */
1689 1706
1690 down(&manage_sem); 1707 down(&manage_sem);
1691 refresh_mems(); 1708 cpuset_update_task_memory_state();
1692 if (atomic_read(&cs->count) > 0) { 1709 if (atomic_read(&cs->count) > 0) {
1693 up(&manage_sem); 1710 up(&manage_sem);
1694 return -EBUSY; 1711 return -EBUSY;
@@ -1873,36 +1890,6 @@ void cpuset_init_current_mems_allowed(void)
1873} 1890}
1874 1891
1875/** 1892/**
1876 * cpuset_update_current_mems_allowed - update mems parameters to new values
1877 *
1878 * If the current tasks cpusets mems_allowed changed behind our backs,
1879 * update current->mems_allowed and mems_generation to the new value.
1880 * Do not call this routine if in_interrupt().
1881 *
1882 * Call without callback_sem or task_lock() held. May be called
1883 * with or without manage_sem held. Unless exiting, it will acquire
1884 * task_lock(). Also might acquire callback_sem during call to
1885 * refresh_mems().
1886 */
1887
1888void cpuset_update_current_mems_allowed(void)
1889{
1890 struct cpuset *cs;
1891 int need_to_refresh = 0;
1892
1893 task_lock(current);
1894 cs = current->cpuset;
1895 if (!cs)
1896 goto done;
1897 if (current->cpuset_mems_generation != cs->mems_generation)
1898 need_to_refresh = 1;
1899done:
1900 task_unlock(current);
1901 if (need_to_refresh)
1902 refresh_mems();
1903}
1904
1905/**
1906 * cpuset_zonelist_valid_mems_allowed - check zonelist vs. curremt mems_allowed 1893 * cpuset_zonelist_valid_mems_allowed - check zonelist vs. curremt mems_allowed
1907 * @zl: the zonelist to be checked 1894 * @zl: the zonelist to be checked
1908 * 1895 *
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 9dea2b8a7d48..515bfeee027e 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -387,7 +387,7 @@ static int contextualize_policy(int mode, nodemask_t *nodes)
387 if (!nodes) 387 if (!nodes)
388 return 0; 388 return 0;
389 389
390 cpuset_update_current_mems_allowed(); 390 cpuset_update_task_memory_state();
391 if (!cpuset_nodes_subset_current_mems_allowed(*nodes)) 391 if (!cpuset_nodes_subset_current_mems_allowed(*nodes))
392 return -EINVAL; 392 return -EINVAL;
393 return mpol_check_policy(mode, nodes); 393 return mpol_check_policy(mode, nodes);
@@ -461,7 +461,7 @@ long do_get_mempolicy(int *policy, nodemask_t *nmask,
461 struct vm_area_struct *vma = NULL; 461 struct vm_area_struct *vma = NULL;
462 struct mempolicy *pol = current->mempolicy; 462 struct mempolicy *pol = current->mempolicy;
463 463
464 cpuset_update_current_mems_allowed(); 464 cpuset_update_task_memory_state();
465 if (flags & ~(unsigned long)(MPOL_F_NODE|MPOL_F_ADDR)) 465 if (flags & ~(unsigned long)(MPOL_F_NODE|MPOL_F_ADDR))
466 return -EINVAL; 466 return -EINVAL;
467 if (flags & MPOL_F_ADDR) { 467 if (flags & MPOL_F_ADDR) {
@@ -1089,7 +1089,7 @@ alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr)
1089{ 1089{
1090 struct mempolicy *pol = get_vma_policy(current, vma, addr); 1090 struct mempolicy *pol = get_vma_policy(current, vma, addr);
1091 1091
1092 cpuset_update_current_mems_allowed(); 1092 cpuset_update_task_memory_state();
1093 1093
1094 if (unlikely(pol->policy == MPOL_INTERLEAVE)) { 1094 if (unlikely(pol->policy == MPOL_INTERLEAVE)) {
1095 unsigned nid; 1095 unsigned nid;
@@ -1115,7 +1115,7 @@ alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr)
1115 * interrupt context and apply the current process NUMA policy. 1115 * interrupt context and apply the current process NUMA policy.
1116 * Returns NULL when no page can be allocated. 1116 * Returns NULL when no page can be allocated.
1117 * 1117 *
1118 * Don't call cpuset_update_current_mems_allowed() unless 1118 * Don't call cpuset_update_task_memory_state() unless
1119 * 1) it's ok to take cpuset_sem (can WAIT), and 1119 * 1) it's ok to take cpuset_sem (can WAIT), and
1120 * 2) allocating for current task (not interrupt). 1120 * 2) allocating for current task (not interrupt).
1121 */ 1121 */
@@ -1124,7 +1124,7 @@ struct page *alloc_pages_current(gfp_t gfp, unsigned order)
1124 struct mempolicy *pol = current->mempolicy; 1124 struct mempolicy *pol = current->mempolicy;
1125 1125
1126 if ((gfp & __GFP_WAIT) && !in_interrupt()) 1126 if ((gfp & __GFP_WAIT) && !in_interrupt())
1127 cpuset_update_current_mems_allowed(); 1127 cpuset_update_task_memory_state();
1128 if (!pol || in_interrupt()) 1128 if (!pol || in_interrupt())
1129 pol = &default_policy; 1129 pol = &default_policy;
1130 if (pol->policy == MPOL_INTERLEAVE) 1130 if (pol->policy == MPOL_INTERLEAVE)