diff options
Diffstat (limited to 'mm/mempolicy.c')
| -rw-r--r-- | mm/mempolicy.c | 51 |
1 files changed, 34 insertions, 17 deletions
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index ec4a1a950df9..cf18f0942553 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
| @@ -105,7 +105,7 @@ static struct kmem_cache *sn_cache; | |||
| 105 | 105 | ||
| 106 | /* Highest zone. An specific allocation for a zone below that is not | 106 | /* Highest zone. An specific allocation for a zone below that is not |
| 107 | policied. */ | 107 | policied. */ |
| 108 | int policy_zone = ZONE_DMA; | 108 | enum zone_type policy_zone = ZONE_DMA; |
| 109 | 109 | ||
| 110 | struct mempolicy default_policy = { | 110 | struct mempolicy default_policy = { |
| 111 | .refcnt = ATOMIC_INIT(1), /* never free it */ | 111 | .refcnt = ATOMIC_INIT(1), /* never free it */ |
| @@ -137,7 +137,8 @@ static int mpol_check_policy(int mode, nodemask_t *nodes) | |||
| 137 | static struct zonelist *bind_zonelist(nodemask_t *nodes) | 137 | static struct zonelist *bind_zonelist(nodemask_t *nodes) |
| 138 | { | 138 | { |
| 139 | struct zonelist *zl; | 139 | struct zonelist *zl; |
| 140 | int num, max, nd, k; | 140 | int num, max, nd; |
| 141 | enum zone_type k; | ||
| 141 | 142 | ||
| 142 | max = 1 + MAX_NR_ZONES * nodes_weight(*nodes); | 143 | max = 1 + MAX_NR_ZONES * nodes_weight(*nodes); |
| 143 | zl = kmalloc(sizeof(struct zone *) * max, GFP_KERNEL); | 144 | zl = kmalloc(sizeof(struct zone *) * max, GFP_KERNEL); |
| @@ -148,12 +149,16 @@ static struct zonelist *bind_zonelist(nodemask_t *nodes) | |||
| 148 | lower zones etc. Avoid empty zones because the memory allocator | 149 | lower zones etc. Avoid empty zones because the memory allocator |
| 149 | doesn't like them. If you implement node hot removal you | 150 | doesn't like them. If you implement node hot removal you |
| 150 | have to fix that. */ | 151 | have to fix that. */ |
| 151 | for (k = policy_zone; k >= 0; k--) { | 152 | k = policy_zone; |
| 153 | while (1) { | ||
| 152 | for_each_node_mask(nd, *nodes) { | 154 | for_each_node_mask(nd, *nodes) { |
| 153 | struct zone *z = &NODE_DATA(nd)->node_zones[k]; | 155 | struct zone *z = &NODE_DATA(nd)->node_zones[k]; |
| 154 | if (z->present_pages > 0) | 156 | if (z->present_pages > 0) |
| 155 | zl->zones[num++] = z; | 157 | zl->zones[num++] = z; |
| 156 | } | 158 | } |
| 159 | if (k == 0) | ||
| 160 | break; | ||
| 161 | k--; | ||
| 157 | } | 162 | } |
| 158 | zl->zones[num] = NULL; | 163 | zl->zones[num] = NULL; |
| 159 | return zl; | 164 | return zl; |
| @@ -482,7 +487,7 @@ static void get_zonemask(struct mempolicy *p, nodemask_t *nodes) | |||
| 482 | switch (p->policy) { | 487 | switch (p->policy) { |
| 483 | case MPOL_BIND: | 488 | case MPOL_BIND: |
| 484 | for (i = 0; p->v.zonelist->zones[i]; i++) | 489 | for (i = 0; p->v.zonelist->zones[i]; i++) |
| 485 | node_set(p->v.zonelist->zones[i]->zone_pgdat->node_id, | 490 | node_set(zone_to_nid(p->v.zonelist->zones[i]), |
| 486 | *nodes); | 491 | *nodes); |
| 487 | break; | 492 | break; |
| 488 | case MPOL_DEFAULT: | 493 | case MPOL_DEFAULT: |
| @@ -632,6 +637,10 @@ int do_migrate_pages(struct mm_struct *mm, | |||
| 632 | 637 | ||
| 633 | down_read(&mm->mmap_sem); | 638 | down_read(&mm->mmap_sem); |
| 634 | 639 | ||
| 640 | err = migrate_vmas(mm, from_nodes, to_nodes, flags); | ||
| 641 | if (err) | ||
| 642 | goto out; | ||
| 643 | |||
| 635 | /* | 644 | /* |
| 636 | * Find a 'source' bit set in 'tmp' whose corresponding 'dest' | 645 | * Find a 'source' bit set in 'tmp' whose corresponding 'dest' |
| 637 | * bit in 'to' is not also set in 'tmp'. Clear the found 'source' | 646 | * bit in 'to' is not also set in 'tmp'. Clear the found 'source' |
| @@ -691,7 +700,7 @@ int do_migrate_pages(struct mm_struct *mm, | |||
| 691 | if (err < 0) | 700 | if (err < 0) |
| 692 | break; | 701 | break; |
| 693 | } | 702 | } |
| 694 | 703 | out: | |
| 695 | up_read(&mm->mmap_sem); | 704 | up_read(&mm->mmap_sem); |
| 696 | if (err < 0) | 705 | if (err < 0) |
| 697 | return err; | 706 | return err; |
| @@ -1127,7 +1136,9 @@ static unsigned interleave_nodes(struct mempolicy *policy) | |||
| 1127 | */ | 1136 | */ |
| 1128 | unsigned slab_node(struct mempolicy *policy) | 1137 | unsigned slab_node(struct mempolicy *policy) |
| 1129 | { | 1138 | { |
| 1130 | switch (policy->policy) { | 1139 | int pol = policy ? policy->policy : MPOL_DEFAULT; |
| 1140 | |||
| 1141 | switch (pol) { | ||
| 1131 | case MPOL_INTERLEAVE: | 1142 | case MPOL_INTERLEAVE: |
| 1132 | return interleave_nodes(policy); | 1143 | return interleave_nodes(policy); |
| 1133 | 1144 | ||
| @@ -1136,7 +1147,7 @@ unsigned slab_node(struct mempolicy *policy) | |||
| 1136 | * Follow bind policy behavior and start allocation at the | 1147 | * Follow bind policy behavior and start allocation at the |
| 1137 | * first node. | 1148 | * first node. |
| 1138 | */ | 1149 | */ |
| 1139 | return policy->v.zonelist->zones[0]->zone_pgdat->node_id; | 1150 | return zone_to_nid(policy->v.zonelist->zones[0]); |
| 1140 | 1151 | ||
| 1141 | case MPOL_PREFERRED: | 1152 | case MPOL_PREFERRED: |
| 1142 | if (policy->v.preferred_node >= 0) | 1153 | if (policy->v.preferred_node >= 0) |
| @@ -1172,7 +1183,15 @@ static inline unsigned interleave_nid(struct mempolicy *pol, | |||
| 1172 | if (vma) { | 1183 | if (vma) { |
| 1173 | unsigned long off; | 1184 | unsigned long off; |
| 1174 | 1185 | ||
| 1175 | off = vma->vm_pgoff; | 1186 | /* |
| 1187 | * for small pages, there is no difference between | ||
| 1188 | * shift and PAGE_SHIFT, so the bit-shift is safe. | ||
| 1189 | * for huge pages, since vm_pgoff is in units of small | ||
| 1190 | * pages, we need to shift off the always 0 bits to get | ||
| 1191 | * a useful offset. | ||
| 1192 | */ | ||
| 1193 | BUG_ON(shift < PAGE_SHIFT); | ||
| 1194 | off = vma->vm_pgoff >> (shift - PAGE_SHIFT); | ||
| 1176 | off += (addr - vma->vm_start) >> shift; | 1195 | off += (addr - vma->vm_start) >> shift; |
| 1177 | return offset_il_node(pol, vma, off); | 1196 | return offset_il_node(pol, vma, off); |
| 1178 | } else | 1197 | } else |
| @@ -1205,10 +1224,8 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order, | |||
| 1205 | 1224 | ||
| 1206 | zl = NODE_DATA(nid)->node_zonelists + gfp_zone(gfp); | 1225 | zl = NODE_DATA(nid)->node_zonelists + gfp_zone(gfp); |
| 1207 | page = __alloc_pages(gfp, order, zl); | 1226 | page = __alloc_pages(gfp, order, zl); |
| 1208 | if (page && page_zone(page) == zl->zones[0]) { | 1227 | if (page && page_zone(page) == zl->zones[0]) |
| 1209 | zone_pcp(zl->zones[0],get_cpu())->interleave_hit++; | 1228 | inc_zone_page_state(page, NUMA_INTERLEAVE_HIT); |
| 1210 | put_cpu(); | ||
| 1211 | } | ||
| 1212 | return page; | 1229 | return page; |
| 1213 | } | 1230 | } |
| 1214 | 1231 | ||
| @@ -1275,7 +1292,7 @@ struct page *alloc_pages_current(gfp_t gfp, unsigned order) | |||
| 1275 | 1292 | ||
| 1276 | if ((gfp & __GFP_WAIT) && !in_interrupt()) | 1293 | if ((gfp & __GFP_WAIT) && !in_interrupt()) |
| 1277 | cpuset_update_task_memory_state(); | 1294 | cpuset_update_task_memory_state(); |
| 1278 | if (!pol || in_interrupt()) | 1295 | if (!pol || in_interrupt() || (gfp & __GFP_THISNODE)) |
| 1279 | pol = &default_policy; | 1296 | pol = &default_policy; |
| 1280 | if (pol->policy == MPOL_INTERLEAVE) | 1297 | if (pol->policy == MPOL_INTERLEAVE) |
| 1281 | return alloc_page_interleave(gfp, order, interleave_nodes(pol)); | 1298 | return alloc_page_interleave(gfp, order, interleave_nodes(pol)); |
| @@ -1634,7 +1651,7 @@ void mpol_rebind_policy(struct mempolicy *pol, const nodemask_t *newmask) | |||
| 1634 | 1651 | ||
| 1635 | nodes_clear(nodes); | 1652 | nodes_clear(nodes); |
| 1636 | for (z = pol->v.zonelist->zones; *z; z++) | 1653 | for (z = pol->v.zonelist->zones; *z; z++) |
| 1637 | node_set((*z)->zone_pgdat->node_id, nodes); | 1654 | node_set(zone_to_nid(*z), nodes); |
| 1638 | nodes_remap(tmp, nodes, *mpolmask, *newmask); | 1655 | nodes_remap(tmp, nodes, *mpolmask, *newmask); |
| 1639 | nodes = tmp; | 1656 | nodes = tmp; |
| 1640 | 1657 | ||
| @@ -1817,7 +1834,7 @@ static inline void check_huge_range(struct vm_area_struct *vma, | |||
| 1817 | 1834 | ||
| 1818 | int show_numa_map(struct seq_file *m, void *v) | 1835 | int show_numa_map(struct seq_file *m, void *v) |
| 1819 | { | 1836 | { |
| 1820 | struct task_struct *task = m->private; | 1837 | struct proc_maps_private *priv = m->private; |
| 1821 | struct vm_area_struct *vma = v; | 1838 | struct vm_area_struct *vma = v; |
| 1822 | struct numa_maps *md; | 1839 | struct numa_maps *md; |
| 1823 | struct file *file = vma->vm_file; | 1840 | struct file *file = vma->vm_file; |
| @@ -1833,7 +1850,7 @@ int show_numa_map(struct seq_file *m, void *v) | |||
| 1833 | return 0; | 1850 | return 0; |
| 1834 | 1851 | ||
| 1835 | mpol_to_str(buffer, sizeof(buffer), | 1852 | mpol_to_str(buffer, sizeof(buffer), |
| 1836 | get_vma_policy(task, vma, vma->vm_start)); | 1853 | get_vma_policy(priv->task, vma, vma->vm_start)); |
| 1837 | 1854 | ||
| 1838 | seq_printf(m, "%08lx %s", vma->vm_start, buffer); | 1855 | seq_printf(m, "%08lx %s", vma->vm_start, buffer); |
| 1839 | 1856 | ||
| @@ -1887,7 +1904,7 @@ out: | |||
| 1887 | kfree(md); | 1904 | kfree(md); |
| 1888 | 1905 | ||
| 1889 | if (m->count < m->size) | 1906 | if (m->count < m->size) |
| 1890 | m->version = (vma != get_gate_vma(task)) ? vma->vm_start : 0; | 1907 | m->version = (vma != priv->tail_vma) ? vma->vm_start : 0; |
| 1891 | return 0; | 1908 | return 0; |
| 1892 | } | 1909 | } |
| 1893 | 1910 | ||
