diff options
Diffstat (limited to 'mm/mempolicy.c')
-rw-r--r-- | mm/mempolicy.c | 51 |
1 files changed, 34 insertions, 17 deletions
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index ec4a1a950df9..cf18f0942553 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -105,7 +105,7 @@ static struct kmem_cache *sn_cache; | |||
105 | 105 | ||
106 | /* Highest zone. An specific allocation for a zone below that is not | 106 | /* Highest zone. An specific allocation for a zone below that is not |
107 | policied. */ | 107 | policied. */ |
108 | int policy_zone = ZONE_DMA; | 108 | enum zone_type policy_zone = ZONE_DMA; |
109 | 109 | ||
110 | struct mempolicy default_policy = { | 110 | struct mempolicy default_policy = { |
111 | .refcnt = ATOMIC_INIT(1), /* never free it */ | 111 | .refcnt = ATOMIC_INIT(1), /* never free it */ |
@@ -137,7 +137,8 @@ static int mpol_check_policy(int mode, nodemask_t *nodes) | |||
137 | static struct zonelist *bind_zonelist(nodemask_t *nodes) | 137 | static struct zonelist *bind_zonelist(nodemask_t *nodes) |
138 | { | 138 | { |
139 | struct zonelist *zl; | 139 | struct zonelist *zl; |
140 | int num, max, nd, k; | 140 | int num, max, nd; |
141 | enum zone_type k; | ||
141 | 142 | ||
142 | max = 1 + MAX_NR_ZONES * nodes_weight(*nodes); | 143 | max = 1 + MAX_NR_ZONES * nodes_weight(*nodes); |
143 | zl = kmalloc(sizeof(struct zone *) * max, GFP_KERNEL); | 144 | zl = kmalloc(sizeof(struct zone *) * max, GFP_KERNEL); |
@@ -148,12 +149,16 @@ static struct zonelist *bind_zonelist(nodemask_t *nodes) | |||
148 | lower zones etc. Avoid empty zones because the memory allocator | 149 | lower zones etc. Avoid empty zones because the memory allocator |
149 | doesn't like them. If you implement node hot removal you | 150 | doesn't like them. If you implement node hot removal you |
150 | have to fix that. */ | 151 | have to fix that. */ |
151 | for (k = policy_zone; k >= 0; k--) { | 152 | k = policy_zone; |
153 | while (1) { | ||
152 | for_each_node_mask(nd, *nodes) { | 154 | for_each_node_mask(nd, *nodes) { |
153 | struct zone *z = &NODE_DATA(nd)->node_zones[k]; | 155 | struct zone *z = &NODE_DATA(nd)->node_zones[k]; |
154 | if (z->present_pages > 0) | 156 | if (z->present_pages > 0) |
155 | zl->zones[num++] = z; | 157 | zl->zones[num++] = z; |
156 | } | 158 | } |
159 | if (k == 0) | ||
160 | break; | ||
161 | k--; | ||
157 | } | 162 | } |
158 | zl->zones[num] = NULL; | 163 | zl->zones[num] = NULL; |
159 | return zl; | 164 | return zl; |
@@ -482,7 +487,7 @@ static void get_zonemask(struct mempolicy *p, nodemask_t *nodes) | |||
482 | switch (p->policy) { | 487 | switch (p->policy) { |
483 | case MPOL_BIND: | 488 | case MPOL_BIND: |
484 | for (i = 0; p->v.zonelist->zones[i]; i++) | 489 | for (i = 0; p->v.zonelist->zones[i]; i++) |
485 | node_set(p->v.zonelist->zones[i]->zone_pgdat->node_id, | 490 | node_set(zone_to_nid(p->v.zonelist->zones[i]), |
486 | *nodes); | 491 | *nodes); |
487 | break; | 492 | break; |
488 | case MPOL_DEFAULT: | 493 | case MPOL_DEFAULT: |
@@ -632,6 +637,10 @@ int do_migrate_pages(struct mm_struct *mm, | |||
632 | 637 | ||
633 | down_read(&mm->mmap_sem); | 638 | down_read(&mm->mmap_sem); |
634 | 639 | ||
640 | err = migrate_vmas(mm, from_nodes, to_nodes, flags); | ||
641 | if (err) | ||
642 | goto out; | ||
643 | |||
635 | /* | 644 | /* |
636 | * Find a 'source' bit set in 'tmp' whose corresponding 'dest' | 645 | * Find a 'source' bit set in 'tmp' whose corresponding 'dest' |
637 | * bit in 'to' is not also set in 'tmp'. Clear the found 'source' | 646 | * bit in 'to' is not also set in 'tmp'. Clear the found 'source' |
@@ -691,7 +700,7 @@ int do_migrate_pages(struct mm_struct *mm, | |||
691 | if (err < 0) | 700 | if (err < 0) |
692 | break; | 701 | break; |
693 | } | 702 | } |
694 | 703 | out: | |
695 | up_read(&mm->mmap_sem); | 704 | up_read(&mm->mmap_sem); |
696 | if (err < 0) | 705 | if (err < 0) |
697 | return err; | 706 | return err; |
@@ -1127,7 +1136,9 @@ static unsigned interleave_nodes(struct mempolicy *policy) | |||
1127 | */ | 1136 | */ |
1128 | unsigned slab_node(struct mempolicy *policy) | 1137 | unsigned slab_node(struct mempolicy *policy) |
1129 | { | 1138 | { |
1130 | switch (policy->policy) { | 1139 | int pol = policy ? policy->policy : MPOL_DEFAULT; |
1140 | |||
1141 | switch (pol) { | ||
1131 | case MPOL_INTERLEAVE: | 1142 | case MPOL_INTERLEAVE: |
1132 | return interleave_nodes(policy); | 1143 | return interleave_nodes(policy); |
1133 | 1144 | ||
@@ -1136,7 +1147,7 @@ unsigned slab_node(struct mempolicy *policy) | |||
1136 | * Follow bind policy behavior and start allocation at the | 1147 | * Follow bind policy behavior and start allocation at the |
1137 | * first node. | 1148 | * first node. |
1138 | */ | 1149 | */ |
1139 | return policy->v.zonelist->zones[0]->zone_pgdat->node_id; | 1150 | return zone_to_nid(policy->v.zonelist->zones[0]); |
1140 | 1151 | ||
1141 | case MPOL_PREFERRED: | 1152 | case MPOL_PREFERRED: |
1142 | if (policy->v.preferred_node >= 0) | 1153 | if (policy->v.preferred_node >= 0) |
@@ -1172,7 +1183,15 @@ static inline unsigned interleave_nid(struct mempolicy *pol, | |||
1172 | if (vma) { | 1183 | if (vma) { |
1173 | unsigned long off; | 1184 | unsigned long off; |
1174 | 1185 | ||
1175 | off = vma->vm_pgoff; | 1186 | /* |
1187 | * for small pages, there is no difference between | ||
1188 | * shift and PAGE_SHIFT, so the bit-shift is safe. | ||
1189 | * for huge pages, since vm_pgoff is in units of small | ||
1190 | * pages, we need to shift off the always 0 bits to get | ||
1191 | * a useful offset. | ||
1192 | */ | ||
1193 | BUG_ON(shift < PAGE_SHIFT); | ||
1194 | off = vma->vm_pgoff >> (shift - PAGE_SHIFT); | ||
1176 | off += (addr - vma->vm_start) >> shift; | 1195 | off += (addr - vma->vm_start) >> shift; |
1177 | return offset_il_node(pol, vma, off); | 1196 | return offset_il_node(pol, vma, off); |
1178 | } else | 1197 | } else |
@@ -1205,10 +1224,8 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order, | |||
1205 | 1224 | ||
1206 | zl = NODE_DATA(nid)->node_zonelists + gfp_zone(gfp); | 1225 | zl = NODE_DATA(nid)->node_zonelists + gfp_zone(gfp); |
1207 | page = __alloc_pages(gfp, order, zl); | 1226 | page = __alloc_pages(gfp, order, zl); |
1208 | if (page && page_zone(page) == zl->zones[0]) { | 1227 | if (page && page_zone(page) == zl->zones[0]) |
1209 | zone_pcp(zl->zones[0],get_cpu())->interleave_hit++; | 1228 | inc_zone_page_state(page, NUMA_INTERLEAVE_HIT); |
1210 | put_cpu(); | ||
1211 | } | ||
1212 | return page; | 1229 | return page; |
1213 | } | 1230 | } |
1214 | 1231 | ||
@@ -1275,7 +1292,7 @@ struct page *alloc_pages_current(gfp_t gfp, unsigned order) | |||
1275 | 1292 | ||
1276 | if ((gfp & __GFP_WAIT) && !in_interrupt()) | 1293 | if ((gfp & __GFP_WAIT) && !in_interrupt()) |
1277 | cpuset_update_task_memory_state(); | 1294 | cpuset_update_task_memory_state(); |
1278 | if (!pol || in_interrupt()) | 1295 | if (!pol || in_interrupt() || (gfp & __GFP_THISNODE)) |
1279 | pol = &default_policy; | 1296 | pol = &default_policy; |
1280 | if (pol->policy == MPOL_INTERLEAVE) | 1297 | if (pol->policy == MPOL_INTERLEAVE) |
1281 | return alloc_page_interleave(gfp, order, interleave_nodes(pol)); | 1298 | return alloc_page_interleave(gfp, order, interleave_nodes(pol)); |
@@ -1634,7 +1651,7 @@ void mpol_rebind_policy(struct mempolicy *pol, const nodemask_t *newmask) | |||
1634 | 1651 | ||
1635 | nodes_clear(nodes); | 1652 | nodes_clear(nodes); |
1636 | for (z = pol->v.zonelist->zones; *z; z++) | 1653 | for (z = pol->v.zonelist->zones; *z; z++) |
1637 | node_set((*z)->zone_pgdat->node_id, nodes); | 1654 | node_set(zone_to_nid(*z), nodes); |
1638 | nodes_remap(tmp, nodes, *mpolmask, *newmask); | 1655 | nodes_remap(tmp, nodes, *mpolmask, *newmask); |
1639 | nodes = tmp; | 1656 | nodes = tmp; |
1640 | 1657 | ||
@@ -1817,7 +1834,7 @@ static inline void check_huge_range(struct vm_area_struct *vma, | |||
1817 | 1834 | ||
1818 | int show_numa_map(struct seq_file *m, void *v) | 1835 | int show_numa_map(struct seq_file *m, void *v) |
1819 | { | 1836 | { |
1820 | struct task_struct *task = m->private; | 1837 | struct proc_maps_private *priv = m->private; |
1821 | struct vm_area_struct *vma = v; | 1838 | struct vm_area_struct *vma = v; |
1822 | struct numa_maps *md; | 1839 | struct numa_maps *md; |
1823 | struct file *file = vma->vm_file; | 1840 | struct file *file = vma->vm_file; |
@@ -1833,7 +1850,7 @@ int show_numa_map(struct seq_file *m, void *v) | |||
1833 | return 0; | 1850 | return 0; |
1834 | 1851 | ||
1835 | mpol_to_str(buffer, sizeof(buffer), | 1852 | mpol_to_str(buffer, sizeof(buffer), |
1836 | get_vma_policy(task, vma, vma->vm_start)); | 1853 | get_vma_policy(priv->task, vma, vma->vm_start)); |
1837 | 1854 | ||
1838 | seq_printf(m, "%08lx %s", vma->vm_start, buffer); | 1855 | seq_printf(m, "%08lx %s", vma->vm_start, buffer); |
1839 | 1856 | ||
@@ -1887,7 +1904,7 @@ out: | |||
1887 | kfree(md); | 1904 | kfree(md); |
1888 | 1905 | ||
1889 | if (m->count < m->size) | 1906 | if (m->count < m->size) |
1890 | m->version = (vma != get_gate_vma(task)) ? vma->vm_start : 0; | 1907 | m->version = (vma != priv->tail_vma) ? vma->vm_start : 0; |
1891 | return 0; | 1908 | return 0; |
1892 | } | 1909 | } |
1893 | 1910 | ||