aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/mempolicy.h4
-rw-r--r--mm/hugetlb.c4
-rw-r--r--mm/mempolicy.c79
3 files changed, 75 insertions, 12 deletions
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 5bdd656e88cf..a020eb2d4e2a 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -159,7 +159,7 @@ extern void mpol_fix_fork_child_flag(struct task_struct *p);
159 159
160extern struct mempolicy default_policy; 160extern struct mempolicy default_policy;
161extern struct zonelist *huge_zonelist(struct vm_area_struct *vma, 161extern struct zonelist *huge_zonelist(struct vm_area_struct *vma,
162 unsigned long addr, gfp_t gfp_flags); 162 unsigned long addr, gfp_t gfp_flags, struct mempolicy **mpol);
163extern unsigned slab_node(struct mempolicy *policy); 163extern unsigned slab_node(struct mempolicy *policy);
164 164
165extern enum zone_type policy_zone; 165extern enum zone_type policy_zone;
@@ -256,7 +256,7 @@ static inline void mpol_fix_fork_child_flag(struct task_struct *p)
256#define set_cpuset_being_rebound(x) do {} while (0) 256#define set_cpuset_being_rebound(x) do {} while (0)
257 257
258static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma, 258static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma,
259 unsigned long addr, gfp_t gfp_flags) 259 unsigned long addr, gfp_t gfp_flags, struct mempolicy **mpol)
260{ 260{
261 return NODE_DATA(0)->node_zonelists + gfp_zone(gfp_flags); 261 return NODE_DATA(0)->node_zonelists + gfp_zone(gfp_flags);
262} 262}
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index de4cf458d6e1..84c795ee2d65 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -71,8 +71,9 @@ static struct page *dequeue_huge_page(struct vm_area_struct *vma,
71{ 71{
72 int nid; 72 int nid;
73 struct page *page = NULL; 73 struct page *page = NULL;
74 struct mempolicy *mpol;
74 struct zonelist *zonelist = huge_zonelist(vma, address, 75 struct zonelist *zonelist = huge_zonelist(vma, address,
75 htlb_alloc_mask); 76 htlb_alloc_mask, &mpol);
76 struct zone **z; 77 struct zone **z;
77 78
78 for (z = zonelist->zones; *z; z++) { 79 for (z = zonelist->zones; *z; z++) {
@@ -87,6 +88,7 @@ static struct page *dequeue_huge_page(struct vm_area_struct *vma,
87 break; 88 break;
88 } 89 }
89 } 90 }
91 mpol_free(mpol); /* unref if mpol !NULL */
90 return page; 92 return page;
91} 93}
92 94
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index bb54b88c3d5a..3d6ac9505d07 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1077,21 +1077,37 @@ asmlinkage long compat_sys_mbind(compat_ulong_t start, compat_ulong_t len,
1077 1077
1078#endif 1078#endif
1079 1079
1080/* Return effective policy for a VMA */ 1080/*
1081 * get_vma_policy(@task, @vma, @addr)
1082 * @task - task for fallback if vma policy == default
1083 * @vma - virtual memory area whose policy is sought
1084 * @addr - address in @vma for shared policy lookup
1085 *
1086 * Returns effective policy for a VMA at specified address.
1087 * Falls back to @task or system default policy, as necessary.
1088 * Returned policy has extra reference count if shared, vma,
1089 * or some other task's policy [show_numa_maps() can pass
1090 * @task != current]. It is the caller's responsibility to
1091 * free the reference in these cases.
1092 */
1081static struct mempolicy * get_vma_policy(struct task_struct *task, 1093static struct mempolicy * get_vma_policy(struct task_struct *task,
1082 struct vm_area_struct *vma, unsigned long addr) 1094 struct vm_area_struct *vma, unsigned long addr)
1083{ 1095{
1084 struct mempolicy *pol = task->mempolicy; 1096 struct mempolicy *pol = task->mempolicy;
1097 int shared_pol = 0;
1085 1098
1086 if (vma) { 1099 if (vma) {
1087 if (vma->vm_ops && vma->vm_ops->get_policy) 1100 if (vma->vm_ops && vma->vm_ops->get_policy) {
1088 pol = vma->vm_ops->get_policy(vma, addr); 1101 pol = vma->vm_ops->get_policy(vma, addr);
1089 else if (vma->vm_policy && 1102 shared_pol = 1; /* if pol non-NULL, add ref below */
1103 } else if (vma->vm_policy &&
1090 vma->vm_policy->policy != MPOL_DEFAULT) 1104 vma->vm_policy->policy != MPOL_DEFAULT)
1091 pol = vma->vm_policy; 1105 pol = vma->vm_policy;
1092 } 1106 }
1093 if (!pol) 1107 if (!pol)
1094 pol = &default_policy; 1108 pol = &default_policy;
1109 else if (!shared_pol && pol != current->mempolicy)
1110 mpol_get(pol); /* vma or other task's policy */
1095 return pol; 1111 return pol;
1096} 1112}
1097 1113
@@ -1207,19 +1223,45 @@ static inline unsigned interleave_nid(struct mempolicy *pol,
1207} 1223}
1208 1224
1209#ifdef CONFIG_HUGETLBFS 1225#ifdef CONFIG_HUGETLBFS
1210/* Return a zonelist suitable for a huge page allocation. */ 1226/*
1227 * huge_zonelist(@vma, @addr, @gfp_flags, @mpol)
1228 * @vma = virtual memory area whose policy is sought
1229 * @addr = address in @vma for shared policy lookup and interleave policy
1230 * @gfp_flags = for requested zone
1231 * @mpol = pointer to mempolicy pointer for reference counted 'BIND policy
1232 *
1233 * Returns a zonelist suitable for a huge page allocation.
1234 * If the effective policy is 'BIND, returns pointer to policy's zonelist.
1235 * If it is also a policy for which get_vma_policy() returns an extra
1236 * reference, we must hold that reference until after allocation.
1237 * In that case, return policy via @mpol so hugetlb allocation can drop
1238 * the reference. For non-'BIND referenced policies, we can/do drop the
1239 * reference here, so the caller doesn't need to know about the special case
1240 * for default and current task policy.
1241 */
1211struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr, 1242struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr,
1212 gfp_t gfp_flags) 1243 gfp_t gfp_flags, struct mempolicy **mpol)
1213{ 1244{
1214 struct mempolicy *pol = get_vma_policy(current, vma, addr); 1245 struct mempolicy *pol = get_vma_policy(current, vma, addr);
1246 struct zonelist *zl;
1215 1247
1248 *mpol = NULL; /* probably no unref needed */
1216 if (pol->policy == MPOL_INTERLEAVE) { 1249 if (pol->policy == MPOL_INTERLEAVE) {
1217 unsigned nid; 1250 unsigned nid;
1218 1251
1219 nid = interleave_nid(pol, vma, addr, HPAGE_SHIFT); 1252 nid = interleave_nid(pol, vma, addr, HPAGE_SHIFT);
1253 __mpol_free(pol); /* finished with pol */
1220 return NODE_DATA(nid)->node_zonelists + gfp_zone(gfp_flags); 1254 return NODE_DATA(nid)->node_zonelists + gfp_zone(gfp_flags);
1221 } 1255 }
1222 return zonelist_policy(GFP_HIGHUSER, pol); 1256
1257 zl = zonelist_policy(GFP_HIGHUSER, pol);
1258 if (unlikely(pol != &default_policy && pol != current->mempolicy)) {
1259 if (pol->policy != MPOL_BIND)
1260 __mpol_free(pol); /* finished with pol */
1261 else
1262 *mpol = pol; /* unref needed after allocation */
1263 }
1264 return zl;
1223} 1265}
1224#endif 1266#endif
1225 1267
@@ -1264,6 +1306,7 @@ struct page *
1264alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr) 1306alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr)
1265{ 1307{
1266 struct mempolicy *pol = get_vma_policy(current, vma, addr); 1308 struct mempolicy *pol = get_vma_policy(current, vma, addr);
1309 struct zonelist *zl;
1267 1310
1268 cpuset_update_task_memory_state(); 1311 cpuset_update_task_memory_state();
1269 1312
@@ -1273,7 +1316,19 @@ alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr)
1273 nid = interleave_nid(pol, vma, addr, PAGE_SHIFT); 1316 nid = interleave_nid(pol, vma, addr, PAGE_SHIFT);
1274 return alloc_page_interleave(gfp, 0, nid); 1317 return alloc_page_interleave(gfp, 0, nid);
1275 } 1318 }
1276 return __alloc_pages(gfp, 0, zonelist_policy(gfp, pol)); 1319 zl = zonelist_policy(gfp, pol);
1320 if (pol != &default_policy && pol != current->mempolicy) {
1321 /*
1322 * slow path: ref counted policy -- shared or vma
1323 */
1324 struct page *page = __alloc_pages(gfp, 0, zl);
1325 __mpol_free(pol);
1326 return page;
1327 }
1328 /*
1329 * fast path: default or task policy
1330 */
1331 return __alloc_pages(gfp, 0, zl);
1277} 1332}
1278 1333
1279/** 1334/**
@@ -1872,6 +1927,7 @@ int show_numa_map(struct seq_file *m, void *v)
1872 struct numa_maps *md; 1927 struct numa_maps *md;
1873 struct file *file = vma->vm_file; 1928 struct file *file = vma->vm_file;
1874 struct mm_struct *mm = vma->vm_mm; 1929 struct mm_struct *mm = vma->vm_mm;
1930 struct mempolicy *pol;
1875 int n; 1931 int n;
1876 char buffer[50]; 1932 char buffer[50];
1877 1933
@@ -1882,8 +1938,13 @@ int show_numa_map(struct seq_file *m, void *v)
1882 if (!md) 1938 if (!md)
1883 return 0; 1939 return 0;
1884 1940
1885 mpol_to_str(buffer, sizeof(buffer), 1941 pol = get_vma_policy(priv->task, vma, vma->vm_start);
1886 get_vma_policy(priv->task, vma, vma->vm_start)); 1942 mpol_to_str(buffer, sizeof(buffer), pol);
1943 /*
1944 * unref shared or other task's mempolicy
1945 */
1946 if (pol != &default_policy && pol != current->mempolicy)
1947 __mpol_free(pol);
1887 1948
1888 seq_printf(m, "%08lx %s", vma->vm_start, buffer); 1949 seq_printf(m, "%08lx %s", vma->vm_start, buffer);
1889 1950