diff options
| -rw-r--r-- | include/linux/mempolicy.h | 4 | ||||
| -rw-r--r-- | mm/hugetlb.c | 4 | ||||
| -rw-r--r-- | mm/mempolicy.c | 79 |
3 files changed, 75 insertions, 12 deletions
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 5bdd656e88cf..a020eb2d4e2a 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h | |||
| @@ -159,7 +159,7 @@ extern void mpol_fix_fork_child_flag(struct task_struct *p); | |||
| 159 | 159 | ||
| 160 | extern struct mempolicy default_policy; | 160 | extern struct mempolicy default_policy; |
| 161 | extern struct zonelist *huge_zonelist(struct vm_area_struct *vma, | 161 | extern struct zonelist *huge_zonelist(struct vm_area_struct *vma, |
| 162 | unsigned long addr, gfp_t gfp_flags); | 162 | unsigned long addr, gfp_t gfp_flags, struct mempolicy **mpol); |
| 163 | extern unsigned slab_node(struct mempolicy *policy); | 163 | extern unsigned slab_node(struct mempolicy *policy); |
| 164 | 164 | ||
| 165 | extern enum zone_type policy_zone; | 165 | extern enum zone_type policy_zone; |
| @@ -256,7 +256,7 @@ static inline void mpol_fix_fork_child_flag(struct task_struct *p) | |||
| 256 | #define set_cpuset_being_rebound(x) do {} while (0) | 256 | #define set_cpuset_being_rebound(x) do {} while (0) |
| 257 | 257 | ||
| 258 | static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma, | 258 | static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma, |
| 259 | unsigned long addr, gfp_t gfp_flags) | 259 | unsigned long addr, gfp_t gfp_flags, struct mempolicy **mpol) |
| 260 | { | 260 | { |
| 261 | return NODE_DATA(0)->node_zonelists + gfp_zone(gfp_flags); | 261 | return NODE_DATA(0)->node_zonelists + gfp_zone(gfp_flags); |
| 262 | } | 262 | } |
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index de4cf458d6e1..84c795ee2d65 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
| @@ -71,8 +71,9 @@ static struct page *dequeue_huge_page(struct vm_area_struct *vma, | |||
| 71 | { | 71 | { |
| 72 | int nid; | 72 | int nid; |
| 73 | struct page *page = NULL; | 73 | struct page *page = NULL; |
| 74 | struct mempolicy *mpol; | ||
| 74 | struct zonelist *zonelist = huge_zonelist(vma, address, | 75 | struct zonelist *zonelist = huge_zonelist(vma, address, |
| 75 | htlb_alloc_mask); | 76 | htlb_alloc_mask, &mpol); |
| 76 | struct zone **z; | 77 | struct zone **z; |
| 77 | 78 | ||
| 78 | for (z = zonelist->zones; *z; z++) { | 79 | for (z = zonelist->zones; *z; z++) { |
| @@ -87,6 +88,7 @@ static struct page *dequeue_huge_page(struct vm_area_struct *vma, | |||
| 87 | break; | 88 | break; |
| 88 | } | 89 | } |
| 89 | } | 90 | } |
| 91 | mpol_free(mpol); /* unref if mpol !NULL */ | ||
| 90 | return page; | 92 | return page; |
| 91 | } | 93 | } |
| 92 | 94 | ||
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index bb54b88c3d5a..3d6ac9505d07 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
| @@ -1077,21 +1077,37 @@ asmlinkage long compat_sys_mbind(compat_ulong_t start, compat_ulong_t len, | |||
| 1077 | 1077 | ||
| 1078 | #endif | 1078 | #endif |
| 1079 | 1079 | ||
| 1080 | /* Return effective policy for a VMA */ | 1080 | /* |
| 1081 | * get_vma_policy(@task, @vma, @addr) | ||
| 1082 | * @task - task for fallback if vma policy == default | ||
| 1083 | * @vma - virtual memory area whose policy is sought | ||
| 1084 | * @addr - address in @vma for shared policy lookup | ||
| 1085 | * | ||
| 1086 | * Returns effective policy for a VMA at specified address. | ||
| 1087 | * Falls back to @task or system default policy, as necessary. | ||
| 1088 | * Returned policy has extra reference count if shared, vma, | ||
| 1089 | * or some other task's policy [show_numa_maps() can pass | ||
| 1090 | * @task != current]. It is the caller's responsibility to | ||
| 1091 | * free the reference in these cases. | ||
| 1092 | */ | ||
| 1081 | static struct mempolicy * get_vma_policy(struct task_struct *task, | 1093 | static struct mempolicy * get_vma_policy(struct task_struct *task, |
| 1082 | struct vm_area_struct *vma, unsigned long addr) | 1094 | struct vm_area_struct *vma, unsigned long addr) |
| 1083 | { | 1095 | { |
| 1084 | struct mempolicy *pol = task->mempolicy; | 1096 | struct mempolicy *pol = task->mempolicy; |
| 1097 | int shared_pol = 0; | ||
| 1085 | 1098 | ||
| 1086 | if (vma) { | 1099 | if (vma) { |
| 1087 | if (vma->vm_ops && vma->vm_ops->get_policy) | 1100 | if (vma->vm_ops && vma->vm_ops->get_policy) { |
| 1088 | pol = vma->vm_ops->get_policy(vma, addr); | 1101 | pol = vma->vm_ops->get_policy(vma, addr); |
| 1089 | else if (vma->vm_policy && | 1102 | shared_pol = 1; /* if pol non-NULL, add ref below */ |
| 1103 | } else if (vma->vm_policy && | ||
| 1090 | vma->vm_policy->policy != MPOL_DEFAULT) | 1104 | vma->vm_policy->policy != MPOL_DEFAULT) |
| 1091 | pol = vma->vm_policy; | 1105 | pol = vma->vm_policy; |
| 1092 | } | 1106 | } |
| 1093 | if (!pol) | 1107 | if (!pol) |
| 1094 | pol = &default_policy; | 1108 | pol = &default_policy; |
| 1109 | else if (!shared_pol && pol != current->mempolicy) | ||
| 1110 | mpol_get(pol); /* vma or other task's policy */ | ||
| 1095 | return pol; | 1111 | return pol; |
| 1096 | } | 1112 | } |
| 1097 | 1113 | ||
| @@ -1207,19 +1223,45 @@ static inline unsigned interleave_nid(struct mempolicy *pol, | |||
| 1207 | } | 1223 | } |
| 1208 | 1224 | ||
| 1209 | #ifdef CONFIG_HUGETLBFS | 1225 | #ifdef CONFIG_HUGETLBFS |
| 1210 | /* Return a zonelist suitable for a huge page allocation. */ | 1226 | /* |
| 1227 | * huge_zonelist(@vma, @addr, @gfp_flags, @mpol) | ||
| 1228 | * @vma = virtual memory area whose policy is sought | ||
| 1229 | * @addr = address in @vma for shared policy lookup and interleave policy | ||
| 1230 | * @gfp_flags = for requested zone | ||
| 1231 | * @mpol = pointer to mempolicy pointer for reference counted 'BIND policy | ||
| 1232 | * | ||
| 1233 | * Returns a zonelist suitable for a huge page allocation. | ||
| 1234 | * If the effective policy is 'BIND, returns pointer to policy's zonelist. | ||
| 1235 | * If it is also a policy for which get_vma_policy() returns an extra | ||
| 1236 | * reference, we must hold that reference until after allocation. | ||
| 1237 | * In that case, return policy via @mpol so hugetlb allocation can drop | ||
| 1238 | * the reference. For non-'BIND referenced policies, we can/do drop the | ||
| 1239 | * reference here, so the caller doesn't need to know about the special case | ||
| 1240 | * for default and current task policy. | ||
| 1241 | */ | ||
| 1211 | struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr, | 1242 | struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr, |
| 1212 | gfp_t gfp_flags) | 1243 | gfp_t gfp_flags, struct mempolicy **mpol) |
| 1213 | { | 1244 | { |
| 1214 | struct mempolicy *pol = get_vma_policy(current, vma, addr); | 1245 | struct mempolicy *pol = get_vma_policy(current, vma, addr); |
| 1246 | struct zonelist *zl; | ||
| 1215 | 1247 | ||
| 1248 | *mpol = NULL; /* probably no unref needed */ | ||
| 1216 | if (pol->policy == MPOL_INTERLEAVE) { | 1249 | if (pol->policy == MPOL_INTERLEAVE) { |
| 1217 | unsigned nid; | 1250 | unsigned nid; |
| 1218 | 1251 | ||
| 1219 | nid = interleave_nid(pol, vma, addr, HPAGE_SHIFT); | 1252 | nid = interleave_nid(pol, vma, addr, HPAGE_SHIFT); |
| 1253 | __mpol_free(pol); /* finished with pol */ | ||
| 1220 | return NODE_DATA(nid)->node_zonelists + gfp_zone(gfp_flags); | 1254 | return NODE_DATA(nid)->node_zonelists + gfp_zone(gfp_flags); |
| 1221 | } | 1255 | } |
| 1222 | return zonelist_policy(GFP_HIGHUSER, pol); | 1256 | |
| 1257 | zl = zonelist_policy(GFP_HIGHUSER, pol); | ||
| 1258 | if (unlikely(pol != &default_policy && pol != current->mempolicy)) { | ||
| 1259 | if (pol->policy != MPOL_BIND) | ||
| 1260 | __mpol_free(pol); /* finished with pol */ | ||
| 1261 | else | ||
| 1262 | *mpol = pol; /* unref needed after allocation */ | ||
| 1263 | } | ||
| 1264 | return zl; | ||
| 1223 | } | 1265 | } |
| 1224 | #endif | 1266 | #endif |
| 1225 | 1267 | ||
| @@ -1264,6 +1306,7 @@ struct page * | |||
| 1264 | alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr) | 1306 | alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr) |
| 1265 | { | 1307 | { |
| 1266 | struct mempolicy *pol = get_vma_policy(current, vma, addr); | 1308 | struct mempolicy *pol = get_vma_policy(current, vma, addr); |
| 1309 | struct zonelist *zl; | ||
| 1267 | 1310 | ||
| 1268 | cpuset_update_task_memory_state(); | 1311 | cpuset_update_task_memory_state(); |
| 1269 | 1312 | ||
| @@ -1273,7 +1316,19 @@ alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr) | |||
| 1273 | nid = interleave_nid(pol, vma, addr, PAGE_SHIFT); | 1316 | nid = interleave_nid(pol, vma, addr, PAGE_SHIFT); |
| 1274 | return alloc_page_interleave(gfp, 0, nid); | 1317 | return alloc_page_interleave(gfp, 0, nid); |
| 1275 | } | 1318 | } |
| 1276 | return __alloc_pages(gfp, 0, zonelist_policy(gfp, pol)); | 1319 | zl = zonelist_policy(gfp, pol); |
| 1320 | if (pol != &default_policy && pol != current->mempolicy) { | ||
| 1321 | /* | ||
| 1322 | * slow path: ref counted policy -- shared or vma | ||
| 1323 | */ | ||
| 1324 | struct page *page = __alloc_pages(gfp, 0, zl); | ||
| 1325 | __mpol_free(pol); | ||
| 1326 | return page; | ||
| 1327 | } | ||
| 1328 | /* | ||
| 1329 | * fast path: default or task policy | ||
| 1330 | */ | ||
| 1331 | return __alloc_pages(gfp, 0, zl); | ||
| 1277 | } | 1332 | } |
| 1278 | 1333 | ||
| 1279 | /** | 1334 | /** |
| @@ -1872,6 +1927,7 @@ int show_numa_map(struct seq_file *m, void *v) | |||
| 1872 | struct numa_maps *md; | 1927 | struct numa_maps *md; |
| 1873 | struct file *file = vma->vm_file; | 1928 | struct file *file = vma->vm_file; |
| 1874 | struct mm_struct *mm = vma->vm_mm; | 1929 | struct mm_struct *mm = vma->vm_mm; |
| 1930 | struct mempolicy *pol; | ||
| 1875 | int n; | 1931 | int n; |
| 1876 | char buffer[50]; | 1932 | char buffer[50]; |
| 1877 | 1933 | ||
| @@ -1882,8 +1938,13 @@ int show_numa_map(struct seq_file *m, void *v) | |||
| 1882 | if (!md) | 1938 | if (!md) |
| 1883 | return 0; | 1939 | return 0; |
| 1884 | 1940 | ||
| 1885 | mpol_to_str(buffer, sizeof(buffer), | 1941 | pol = get_vma_policy(priv->task, vma, vma->vm_start); |
| 1886 | get_vma_policy(priv->task, vma, vma->vm_start)); | 1942 | mpol_to_str(buffer, sizeof(buffer), pol); |
| 1943 | /* | ||
| 1944 | * unref shared or other task's mempolicy | ||
| 1945 | */ | ||
| 1946 | if (pol != &default_policy && pol != current->mempolicy) | ||
| 1947 | __mpol_free(pol); | ||
| 1887 | 1948 | ||
| 1888 | seq_printf(m, "%08lx %s", vma->vm_start, buffer); | 1949 | seq_printf(m, "%08lx %s", vma->vm_start, buffer); |
| 1889 | 1950 | ||
