aboutsummaryrefslogtreecommitdiffstats
path: root/mm/mempolicy.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/mempolicy.c')
-rw-r--r--mm/mempolicy.c51
1 files changed, 34 insertions, 17 deletions
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index ec4a1a950df9..cf18f0942553 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -105,7 +105,7 @@ static struct kmem_cache *sn_cache;
105 105
106/* Highest zone. An specific allocation for a zone below that is not 106/* Highest zone. An specific allocation for a zone below that is not
107 policied. */ 107 policied. */
108int policy_zone = ZONE_DMA; 108enum zone_type policy_zone = ZONE_DMA;
109 109
110struct mempolicy default_policy = { 110struct mempolicy default_policy = {
111 .refcnt = ATOMIC_INIT(1), /* never free it */ 111 .refcnt = ATOMIC_INIT(1), /* never free it */
@@ -137,7 +137,8 @@ static int mpol_check_policy(int mode, nodemask_t *nodes)
137static struct zonelist *bind_zonelist(nodemask_t *nodes) 137static struct zonelist *bind_zonelist(nodemask_t *nodes)
138{ 138{
139 struct zonelist *zl; 139 struct zonelist *zl;
140 int num, max, nd, k; 140 int num, max, nd;
141 enum zone_type k;
141 142
142 max = 1 + MAX_NR_ZONES * nodes_weight(*nodes); 143 max = 1 + MAX_NR_ZONES * nodes_weight(*nodes);
143 zl = kmalloc(sizeof(struct zone *) * max, GFP_KERNEL); 144 zl = kmalloc(sizeof(struct zone *) * max, GFP_KERNEL);
@@ -148,12 +149,16 @@ static struct zonelist *bind_zonelist(nodemask_t *nodes)
148 lower zones etc. Avoid empty zones because the memory allocator 149 lower zones etc. Avoid empty zones because the memory allocator
149 doesn't like them. If you implement node hot removal you 150 doesn't like them. If you implement node hot removal you
150 have to fix that. */ 151 have to fix that. */
151 for (k = policy_zone; k >= 0; k--) { 152 k = policy_zone;
153 while (1) {
152 for_each_node_mask(nd, *nodes) { 154 for_each_node_mask(nd, *nodes) {
153 struct zone *z = &NODE_DATA(nd)->node_zones[k]; 155 struct zone *z = &NODE_DATA(nd)->node_zones[k];
154 if (z->present_pages > 0) 156 if (z->present_pages > 0)
155 zl->zones[num++] = z; 157 zl->zones[num++] = z;
156 } 158 }
159 if (k == 0)
160 break;
161 k--;
157 } 162 }
158 zl->zones[num] = NULL; 163 zl->zones[num] = NULL;
159 return zl; 164 return zl;
@@ -482,7 +487,7 @@ static void get_zonemask(struct mempolicy *p, nodemask_t *nodes)
482 switch (p->policy) { 487 switch (p->policy) {
483 case MPOL_BIND: 488 case MPOL_BIND:
484 for (i = 0; p->v.zonelist->zones[i]; i++) 489 for (i = 0; p->v.zonelist->zones[i]; i++)
485 node_set(p->v.zonelist->zones[i]->zone_pgdat->node_id, 490 node_set(zone_to_nid(p->v.zonelist->zones[i]),
486 *nodes); 491 *nodes);
487 break; 492 break;
488 case MPOL_DEFAULT: 493 case MPOL_DEFAULT:
@@ -632,6 +637,10 @@ int do_migrate_pages(struct mm_struct *mm,
632 637
633 down_read(&mm->mmap_sem); 638 down_read(&mm->mmap_sem);
634 639
640 err = migrate_vmas(mm, from_nodes, to_nodes, flags);
641 if (err)
642 goto out;
643
635/* 644/*
636 * Find a 'source' bit set in 'tmp' whose corresponding 'dest' 645 * Find a 'source' bit set in 'tmp' whose corresponding 'dest'
637 * bit in 'to' is not also set in 'tmp'. Clear the found 'source' 646 * bit in 'to' is not also set in 'tmp'. Clear the found 'source'
@@ -691,7 +700,7 @@ int do_migrate_pages(struct mm_struct *mm,
691 if (err < 0) 700 if (err < 0)
692 break; 701 break;
693 } 702 }
694 703out:
695 up_read(&mm->mmap_sem); 704 up_read(&mm->mmap_sem);
696 if (err < 0) 705 if (err < 0)
697 return err; 706 return err;
@@ -1127,7 +1136,9 @@ static unsigned interleave_nodes(struct mempolicy *policy)
1127 */ 1136 */
1128unsigned slab_node(struct mempolicy *policy) 1137unsigned slab_node(struct mempolicy *policy)
1129{ 1138{
1130 switch (policy->policy) { 1139 int pol = policy ? policy->policy : MPOL_DEFAULT;
1140
1141 switch (pol) {
1131 case MPOL_INTERLEAVE: 1142 case MPOL_INTERLEAVE:
1132 return interleave_nodes(policy); 1143 return interleave_nodes(policy);
1133 1144
@@ -1136,7 +1147,7 @@ unsigned slab_node(struct mempolicy *policy)
1136 * Follow bind policy behavior and start allocation at the 1147 * Follow bind policy behavior and start allocation at the
1137 * first node. 1148 * first node.
1138 */ 1149 */
1139 return policy->v.zonelist->zones[0]->zone_pgdat->node_id; 1150 return zone_to_nid(policy->v.zonelist->zones[0]);
1140 1151
1141 case MPOL_PREFERRED: 1152 case MPOL_PREFERRED:
1142 if (policy->v.preferred_node >= 0) 1153 if (policy->v.preferred_node >= 0)
@@ -1172,7 +1183,15 @@ static inline unsigned interleave_nid(struct mempolicy *pol,
1172 if (vma) { 1183 if (vma) {
1173 unsigned long off; 1184 unsigned long off;
1174 1185
1175 off = vma->vm_pgoff; 1186 /*
1187 * for small pages, there is no difference between
1188 * shift and PAGE_SHIFT, so the bit-shift is safe.
1189 * for huge pages, since vm_pgoff is in units of small
1190 * pages, we need to shift off the always 0 bits to get
1191 * a useful offset.
1192 */
1193 BUG_ON(shift < PAGE_SHIFT);
1194 off = vma->vm_pgoff >> (shift - PAGE_SHIFT);
1176 off += (addr - vma->vm_start) >> shift; 1195 off += (addr - vma->vm_start) >> shift;
1177 return offset_il_node(pol, vma, off); 1196 return offset_il_node(pol, vma, off);
1178 } else 1197 } else
@@ -1205,10 +1224,8 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
1205 1224
1206 zl = NODE_DATA(nid)->node_zonelists + gfp_zone(gfp); 1225 zl = NODE_DATA(nid)->node_zonelists + gfp_zone(gfp);
1207 page = __alloc_pages(gfp, order, zl); 1226 page = __alloc_pages(gfp, order, zl);
1208 if (page && page_zone(page) == zl->zones[0]) { 1227 if (page && page_zone(page) == zl->zones[0])
1209 zone_pcp(zl->zones[0],get_cpu())->interleave_hit++; 1228 inc_zone_page_state(page, NUMA_INTERLEAVE_HIT);
1210 put_cpu();
1211 }
1212 return page; 1229 return page;
1213} 1230}
1214 1231
@@ -1275,7 +1292,7 @@ struct page *alloc_pages_current(gfp_t gfp, unsigned order)
1275 1292
1276 if ((gfp & __GFP_WAIT) && !in_interrupt()) 1293 if ((gfp & __GFP_WAIT) && !in_interrupt())
1277 cpuset_update_task_memory_state(); 1294 cpuset_update_task_memory_state();
1278 if (!pol || in_interrupt()) 1295 if (!pol || in_interrupt() || (gfp & __GFP_THISNODE))
1279 pol = &default_policy; 1296 pol = &default_policy;
1280 if (pol->policy == MPOL_INTERLEAVE) 1297 if (pol->policy == MPOL_INTERLEAVE)
1281 return alloc_page_interleave(gfp, order, interleave_nodes(pol)); 1298 return alloc_page_interleave(gfp, order, interleave_nodes(pol));
@@ -1634,7 +1651,7 @@ void mpol_rebind_policy(struct mempolicy *pol, const nodemask_t *newmask)
1634 1651
1635 nodes_clear(nodes); 1652 nodes_clear(nodes);
1636 for (z = pol->v.zonelist->zones; *z; z++) 1653 for (z = pol->v.zonelist->zones; *z; z++)
1637 node_set((*z)->zone_pgdat->node_id, nodes); 1654 node_set(zone_to_nid(*z), nodes);
1638 nodes_remap(tmp, nodes, *mpolmask, *newmask); 1655 nodes_remap(tmp, nodes, *mpolmask, *newmask);
1639 nodes = tmp; 1656 nodes = tmp;
1640 1657
@@ -1817,7 +1834,7 @@ static inline void check_huge_range(struct vm_area_struct *vma,
1817 1834
1818int show_numa_map(struct seq_file *m, void *v) 1835int show_numa_map(struct seq_file *m, void *v)
1819{ 1836{
1820 struct task_struct *task = m->private; 1837 struct proc_maps_private *priv = m->private;
1821 struct vm_area_struct *vma = v; 1838 struct vm_area_struct *vma = v;
1822 struct numa_maps *md; 1839 struct numa_maps *md;
1823 struct file *file = vma->vm_file; 1840 struct file *file = vma->vm_file;
@@ -1833,7 +1850,7 @@ int show_numa_map(struct seq_file *m, void *v)
1833 return 0; 1850 return 0;
1834 1851
1835 mpol_to_str(buffer, sizeof(buffer), 1852 mpol_to_str(buffer, sizeof(buffer),
1836 get_vma_policy(task, vma, vma->vm_start)); 1853 get_vma_policy(priv->task, vma, vma->vm_start));
1837 1854
1838 seq_printf(m, "%08lx %s", vma->vm_start, buffer); 1855 seq_printf(m, "%08lx %s", vma->vm_start, buffer);
1839 1856
@@ -1887,7 +1904,7 @@ out:
1887 kfree(md); 1904 kfree(md);
1888 1905
1889 if (m->count < m->size) 1906 if (m->count < m->size)
1890 m->version = (vma != get_gate_vma(task)) ? vma->vm_start : 0; 1907 m->version = (vma != priv->tail_vma) ? vma->vm_start : 0;
1891 return 0; 1908 return 0;
1892} 1909}
1893 1910