diff options
-rw-r--r-- | include/linux/mempolicy.h | 4 | ||||
-rw-r--r-- | mm/hugetlb.c | 4 | ||||
-rw-r--r-- | mm/mempolicy.c | 79 |
3 files changed, 75 insertions, 12 deletions
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 5bdd656e88cf..a020eb2d4e2a 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h | |||
@@ -159,7 +159,7 @@ extern void mpol_fix_fork_child_flag(struct task_struct *p); | |||
159 | 159 | ||
160 | extern struct mempolicy default_policy; | 160 | extern struct mempolicy default_policy; |
161 | extern struct zonelist *huge_zonelist(struct vm_area_struct *vma, | 161 | extern struct zonelist *huge_zonelist(struct vm_area_struct *vma, |
162 | unsigned long addr, gfp_t gfp_flags); | 162 | unsigned long addr, gfp_t gfp_flags, struct mempolicy **mpol); |
163 | extern unsigned slab_node(struct mempolicy *policy); | 163 | extern unsigned slab_node(struct mempolicy *policy); |
164 | 164 | ||
165 | extern enum zone_type policy_zone; | 165 | extern enum zone_type policy_zone; |
@@ -256,7 +256,7 @@ static inline void mpol_fix_fork_child_flag(struct task_struct *p) | |||
256 | #define set_cpuset_being_rebound(x) do {} while (0) | 256 | #define set_cpuset_being_rebound(x) do {} while (0) |
257 | 257 | ||
258 | static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma, | 258 | static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma, |
259 | unsigned long addr, gfp_t gfp_flags) | 259 | unsigned long addr, gfp_t gfp_flags, struct mempolicy **mpol) |
260 | { | 260 | { |
261 | return NODE_DATA(0)->node_zonelists + gfp_zone(gfp_flags); | 261 | return NODE_DATA(0)->node_zonelists + gfp_zone(gfp_flags); |
262 | } | 262 | } |
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index de4cf458d6e1..84c795ee2d65 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -71,8 +71,9 @@ static struct page *dequeue_huge_page(struct vm_area_struct *vma, | |||
71 | { | 71 | { |
72 | int nid; | 72 | int nid; |
73 | struct page *page = NULL; | 73 | struct page *page = NULL; |
74 | struct mempolicy *mpol; | ||
74 | struct zonelist *zonelist = huge_zonelist(vma, address, | 75 | struct zonelist *zonelist = huge_zonelist(vma, address, |
75 | htlb_alloc_mask); | 76 | htlb_alloc_mask, &mpol); |
76 | struct zone **z; | 77 | struct zone **z; |
77 | 78 | ||
78 | for (z = zonelist->zones; *z; z++) { | 79 | for (z = zonelist->zones; *z; z++) { |
@@ -87,6 +88,7 @@ static struct page *dequeue_huge_page(struct vm_area_struct *vma, | |||
87 | break; | 88 | break; |
88 | } | 89 | } |
89 | } | 90 | } |
91 | mpol_free(mpol); /* unref if mpol !NULL */ | ||
90 | return page; | 92 | return page; |
91 | } | 93 | } |
92 | 94 | ||
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index bb54b88c3d5a..3d6ac9505d07 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -1077,21 +1077,37 @@ asmlinkage long compat_sys_mbind(compat_ulong_t start, compat_ulong_t len, | |||
1077 | 1077 | ||
1078 | #endif | 1078 | #endif |
1079 | 1079 | ||
1080 | /* Return effective policy for a VMA */ | 1080 | /* |
1081 | * get_vma_policy(@task, @vma, @addr) | ||
1082 | * @task - task for fallback if vma policy == default | ||
1083 | * @vma - virtual memory area whose policy is sought | ||
1084 | * @addr - address in @vma for shared policy lookup | ||
1085 | * | ||
1086 | * Returns effective policy for a VMA at specified address. | ||
1087 | * Falls back to @task or system default policy, as necessary. | ||
1088 | * Returned policy has extra reference count if shared, vma, | ||
1089 | * or some other task's policy [show_numa_maps() can pass | ||
1090 | * @task != current]. It is the caller's responsibility to | ||
1091 | * free the reference in these cases. | ||
1092 | */ | ||
1081 | static struct mempolicy * get_vma_policy(struct task_struct *task, | 1093 | static struct mempolicy * get_vma_policy(struct task_struct *task, |
1082 | struct vm_area_struct *vma, unsigned long addr) | 1094 | struct vm_area_struct *vma, unsigned long addr) |
1083 | { | 1095 | { |
1084 | struct mempolicy *pol = task->mempolicy; | 1096 | struct mempolicy *pol = task->mempolicy; |
1097 | int shared_pol = 0; | ||
1085 | 1098 | ||
1086 | if (vma) { | 1099 | if (vma) { |
1087 | if (vma->vm_ops && vma->vm_ops->get_policy) | 1100 | if (vma->vm_ops && vma->vm_ops->get_policy) { |
1088 | pol = vma->vm_ops->get_policy(vma, addr); | 1101 | pol = vma->vm_ops->get_policy(vma, addr); |
1089 | else if (vma->vm_policy && | 1102 | shared_pol = 1; /* if pol non-NULL, add ref below */ |
1103 | } else if (vma->vm_policy && | ||
1090 | vma->vm_policy->policy != MPOL_DEFAULT) | 1104 | vma->vm_policy->policy != MPOL_DEFAULT) |
1091 | pol = vma->vm_policy; | 1105 | pol = vma->vm_policy; |
1092 | } | 1106 | } |
1093 | if (!pol) | 1107 | if (!pol) |
1094 | pol = &default_policy; | 1108 | pol = &default_policy; |
1109 | else if (!shared_pol && pol != current->mempolicy) | ||
1110 | mpol_get(pol); /* vma or other task's policy */ | ||
1095 | return pol; | 1111 | return pol; |
1096 | } | 1112 | } |
1097 | 1113 | ||
@@ -1207,19 +1223,45 @@ static inline unsigned interleave_nid(struct mempolicy *pol, | |||
1207 | } | 1223 | } |
1208 | 1224 | ||
1209 | #ifdef CONFIG_HUGETLBFS | 1225 | #ifdef CONFIG_HUGETLBFS |
1210 | /* Return a zonelist suitable for a huge page allocation. */ | 1226 | /* |
1227 | * huge_zonelist(@vma, @addr, @gfp_flags, @mpol) | ||
1228 | * @vma = virtual memory area whose policy is sought | ||
1229 | * @addr = address in @vma for shared policy lookup and interleave policy | ||
1230 | * @gfp_flags = for requested zone | ||
1231 | * @mpol = pointer to mempolicy pointer for reference counted 'BIND policy | ||
1232 | * | ||
1233 | * Returns a zonelist suitable for a huge page allocation. | ||
1234 | * If the effective policy is 'BIND, returns pointer to policy's zonelist. | ||
1235 | * If it is also a policy for which get_vma_policy() returns an extra | ||
1236 | * reference, we must hold that reference until after allocation. | ||
1237 | * In that case, return policy via @mpol so hugetlb allocation can drop | ||
1238 | * the reference. For non-'BIND referenced policies, we can/do drop the | ||
1239 | * reference here, so the caller doesn't need to know about the special case | ||
1240 | * for default and current task policy. | ||
1241 | */ | ||
1211 | struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr, | 1242 | struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr, |
1212 | gfp_t gfp_flags) | 1243 | gfp_t gfp_flags, struct mempolicy **mpol) |
1213 | { | 1244 | { |
1214 | struct mempolicy *pol = get_vma_policy(current, vma, addr); | 1245 | struct mempolicy *pol = get_vma_policy(current, vma, addr); |
1246 | struct zonelist *zl; | ||
1215 | 1247 | ||
1248 | *mpol = NULL; /* probably no unref needed */ | ||
1216 | if (pol->policy == MPOL_INTERLEAVE) { | 1249 | if (pol->policy == MPOL_INTERLEAVE) { |
1217 | unsigned nid; | 1250 | unsigned nid; |
1218 | 1251 | ||
1219 | nid = interleave_nid(pol, vma, addr, HPAGE_SHIFT); | 1252 | nid = interleave_nid(pol, vma, addr, HPAGE_SHIFT); |
1253 | __mpol_free(pol); /* finished with pol */ | ||
1220 | return NODE_DATA(nid)->node_zonelists + gfp_zone(gfp_flags); | 1254 | return NODE_DATA(nid)->node_zonelists + gfp_zone(gfp_flags); |
1221 | } | 1255 | } |
1222 | return zonelist_policy(GFP_HIGHUSER, pol); | 1256 | |
1257 | zl = zonelist_policy(GFP_HIGHUSER, pol); | ||
1258 | if (unlikely(pol != &default_policy && pol != current->mempolicy)) { | ||
1259 | if (pol->policy != MPOL_BIND) | ||
1260 | __mpol_free(pol); /* finished with pol */ | ||
1261 | else | ||
1262 | *mpol = pol; /* unref needed after allocation */ | ||
1263 | } | ||
1264 | return zl; | ||
1223 | } | 1265 | } |
1224 | #endif | 1266 | #endif |
1225 | 1267 | ||
@@ -1264,6 +1306,7 @@ struct page * | |||
1264 | alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr) | 1306 | alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr) |
1265 | { | 1307 | { |
1266 | struct mempolicy *pol = get_vma_policy(current, vma, addr); | 1308 | struct mempolicy *pol = get_vma_policy(current, vma, addr); |
1309 | struct zonelist *zl; | ||
1267 | 1310 | ||
1268 | cpuset_update_task_memory_state(); | 1311 | cpuset_update_task_memory_state(); |
1269 | 1312 | ||
@@ -1273,7 +1316,19 @@ alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr) | |||
1273 | nid = interleave_nid(pol, vma, addr, PAGE_SHIFT); | 1316 | nid = interleave_nid(pol, vma, addr, PAGE_SHIFT); |
1274 | return alloc_page_interleave(gfp, 0, nid); | 1317 | return alloc_page_interleave(gfp, 0, nid); |
1275 | } | 1318 | } |
1276 | return __alloc_pages(gfp, 0, zonelist_policy(gfp, pol)); | 1319 | zl = zonelist_policy(gfp, pol); |
1320 | if (pol != &default_policy && pol != current->mempolicy) { | ||
1321 | /* | ||
1322 | * slow path: ref counted policy -- shared or vma | ||
1323 | */ | ||
1324 | struct page *page = __alloc_pages(gfp, 0, zl); | ||
1325 | __mpol_free(pol); | ||
1326 | return page; | ||
1327 | } | ||
1328 | /* | ||
1329 | * fast path: default or task policy | ||
1330 | */ | ||
1331 | return __alloc_pages(gfp, 0, zl); | ||
1277 | } | 1332 | } |
1278 | 1333 | ||
1279 | /** | 1334 | /** |
@@ -1872,6 +1927,7 @@ int show_numa_map(struct seq_file *m, void *v) | |||
1872 | struct numa_maps *md; | 1927 | struct numa_maps *md; |
1873 | struct file *file = vma->vm_file; | 1928 | struct file *file = vma->vm_file; |
1874 | struct mm_struct *mm = vma->vm_mm; | 1929 | struct mm_struct *mm = vma->vm_mm; |
1930 | struct mempolicy *pol; | ||
1875 | int n; | 1931 | int n; |
1876 | char buffer[50]; | 1932 | char buffer[50]; |
1877 | 1933 | ||
@@ -1882,8 +1938,13 @@ int show_numa_map(struct seq_file *m, void *v) | |||
1882 | if (!md) | 1938 | if (!md) |
1883 | return 0; | 1939 | return 0; |
1884 | 1940 | ||
1885 | mpol_to_str(buffer, sizeof(buffer), | 1941 | pol = get_vma_policy(priv->task, vma, vma->vm_start); |
1886 | get_vma_policy(priv->task, vma, vma->vm_start)); | 1942 | mpol_to_str(buffer, sizeof(buffer), pol); |
1943 | /* | ||
1944 | * unref shared or other task's mempolicy | ||
1945 | */ | ||
1946 | if (pol != &default_policy && pol != current->mempolicy) | ||
1947 | __mpol_free(pol); | ||
1887 | 1948 | ||
1888 | seq_printf(m, "%08lx %s", vma->vm_start, buffer); | 1949 | seq_printf(m, "%08lx %s", vma->vm_start, buffer); |
1889 | 1950 | ||