aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux
diff options
context:
space:
mode:
authorMel Gorman <mel@csn.ul.ie>2008-04-28 05:12:18 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-04-28 11:58:19 -0400
commit19770b32609b6bf97a3dece2529089494cbfc549 (patch)
tree3b5922d1b20aabdf929bde9309f323841717747a /include/linux
parentdd1a239f6f2d4d3eedd318583ec319aa145b324c (diff)
mm: filter based on a nodemask as well as a gfp_mask
The MPOL_BIND policy creates a zonelist that is used for allocations controlled by that mempolicy. As the per-node zonelist is already being filtered based on a zone id, this patch adds a version of __alloc_pages() that takes a nodemask for further filtering. This eliminates the need for MPOL_BIND to create a custom zonelist. A positive benefit of this is that allocations using MPOL_BIND now use the local node's distance-ordered zonelist instead of a custom node-id-ordered zonelist. I.e., pages will be allocated from the closest allowed node with available memory. [Lee.Schermerhorn@hp.com: Mempolicy: update stale documentation and comments] [Lee.Schermerhorn@hp.com: Mempolicy: make dequeue_huge_page_vma() obey MPOL_BIND nodemask] [Lee.Schermerhorn@hp.com: Mempolicy: make dequeue_huge_page_vma() obey MPOL_BIND nodemask rework] Signed-off-by: Mel Gorman <mel@csn.ul.ie> Acked-by: Christoph Lameter <clameter@sgi.com> Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Mel Gorman <mel@csn.ul.ie> Cc: Hugh Dickins <hugh@veritas.com> Cc: Nick Piggin <nickpiggin@yahoo.com.au> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/cpuset.h4
-rw-r--r--include/linux/gfp.h4
-rw-r--r--include/linux/mempolicy.h19
-rw-r--r--include/linux/mmzone.h80
4 files changed, 68 insertions, 39 deletions
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index 726761e24003..038578362b47 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -26,7 +26,7 @@ extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
26#define cpuset_current_mems_allowed (current->mems_allowed) 26#define cpuset_current_mems_allowed (current->mems_allowed)
27void cpuset_init_current_mems_allowed(void); 27void cpuset_init_current_mems_allowed(void);
28void cpuset_update_task_memory_state(void); 28void cpuset_update_task_memory_state(void);
29int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl); 29int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask);
30 30
31extern int __cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask); 31extern int __cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask);
32extern int __cpuset_zone_allowed_hardwall(struct zone *z, gfp_t gfp_mask); 32extern int __cpuset_zone_allowed_hardwall(struct zone *z, gfp_t gfp_mask);
@@ -103,7 +103,7 @@ static inline nodemask_t cpuset_mems_allowed(struct task_struct *p)
103static inline void cpuset_init_current_mems_allowed(void) {} 103static inline void cpuset_init_current_mems_allowed(void) {}
104static inline void cpuset_update_task_memory_state(void) {} 104static inline void cpuset_update_task_memory_state(void) {}
105 105
106static inline int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl) 106static inline int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask)
107{ 107{
108 return 1; 108 return 1;
109} 109}
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index e1c6064cb6c7..898aa9d5b6c2 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -182,6 +182,10 @@ static inline void arch_alloc_page(struct page *page, int order) { }
182 182
183extern struct page *__alloc_pages(gfp_t, unsigned int, struct zonelist *); 183extern struct page *__alloc_pages(gfp_t, unsigned int, struct zonelist *);
184 184
185extern struct page *
186__alloc_pages_nodemask(gfp_t, unsigned int,
187 struct zonelist *, nodemask_t *nodemask);
188
185static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask, 189static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask,
186 unsigned int order) 190 unsigned int order)
187{ 191{
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 69160dc32d48..b8b3da7a3315 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -54,19 +54,20 @@ struct mm_struct;
54 * mmap_sem. 54 * mmap_sem.
55 * 55 *
56 * Freeing policy: 56 * Freeing policy:
57 * When policy is MPOL_BIND v.zonelist is kmalloc'ed and must be kfree'd. 57 * Mempolicy objects are reference counted. A mempolicy will be freed when
58 * All other policies don't have any external state. mpol_free() handles this. 58 * mpol_free() decrements the reference count to zero.
59 * 59 *
60 * Copying policy objects: 60 * Copying policy objects:
61 * For MPOL_BIND the zonelist must be always duplicated. mpol_clone() does this. 61 * mpol_copy() allocates a new mempolicy and copies the specified mempolicy
62 * to the new storage. The reference count of the new object is initialized
63 * to 1, representing the caller of mpol_copy().
62 */ 64 */
63struct mempolicy { 65struct mempolicy {
64 atomic_t refcnt; 66 atomic_t refcnt;
65 short policy; /* See MPOL_* above */ 67 short policy; /* See MPOL_* above */
66 union { 68 union {
67 struct zonelist *zonelist; /* bind */
68 short preferred_node; /* preferred */ 69 short preferred_node; /* preferred */
69 nodemask_t nodes; /* interleave */ 70 nodemask_t nodes; /* interleave/bind */
70 /* undefined for default */ 71 /* undefined for default */
71 } v; 72 } v;
72 nodemask_t cpuset_mems_allowed; /* mempolicy relative to these nodes */ 73 nodemask_t cpuset_mems_allowed; /* mempolicy relative to these nodes */
@@ -151,7 +152,8 @@ extern void mpol_fix_fork_child_flag(struct task_struct *p);
151 152
152extern struct mempolicy default_policy; 153extern struct mempolicy default_policy;
153extern struct zonelist *huge_zonelist(struct vm_area_struct *vma, 154extern struct zonelist *huge_zonelist(struct vm_area_struct *vma,
154 unsigned long addr, gfp_t gfp_flags, struct mempolicy **mpol); 155 unsigned long addr, gfp_t gfp_flags,
156 struct mempolicy **mpol, nodemask_t **nodemask);
155extern unsigned slab_node(struct mempolicy *policy); 157extern unsigned slab_node(struct mempolicy *policy);
156 158
157extern enum zone_type policy_zone; 159extern enum zone_type policy_zone;
@@ -239,8 +241,11 @@ static inline void mpol_fix_fork_child_flag(struct task_struct *p)
239} 241}
240 242
241static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma, 243static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma,
242 unsigned long addr, gfp_t gfp_flags, struct mempolicy **mpol) 244 unsigned long addr, gfp_t gfp_flags,
245 struct mempolicy **mpol, nodemask_t **nodemask)
243{ 246{
247 *mpol = NULL;
248 *nodemask = NULL;
244 return node_zonelist(0, gfp_flags); 249 return node_zonelist(0, gfp_flags);
245} 250}
246 251
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index d34b4c290017..498d6ceff2f4 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -749,36 +749,60 @@ static inline int zonelist_node_idx(struct zoneref *zoneref)
749#endif /* CONFIG_NUMA */ 749#endif /* CONFIG_NUMA */
750} 750}
751 751
752static inline void zoneref_set_zone(struct zone *zone, struct zoneref *zoneref) 752/**
753{ 753 * next_zones_zonelist - Returns the next zone at or below highest_zoneidx within the allowed nodemask using a cursor within a zonelist as a starting point
754 zoneref->zone = zone; 754 * @z - The cursor used as a starting point for the search
755 zoneref->zone_idx = zone_idx(zone); 755 * @highest_zoneidx - The zone index of the highest zone to return
756} 756 * @nodes - An optional nodemask to filter the zonelist with
757 * @zone - The first suitable zone found is returned via this parameter
758 *
759 * This function returns the next zone at or below a given zone index that is
760 * within the allowed nodemask using a cursor as the starting point for the
761 * search. The zoneref returned is a cursor that is used as the next starting
762 * point for future calls to next_zones_zonelist().
763 */
764struct zoneref *next_zones_zonelist(struct zoneref *z,
765 enum zone_type highest_zoneidx,
766 nodemask_t *nodes,
767 struct zone **zone);
757 768
758/* Returns the first zone at or below highest_zoneidx in a zonelist */ 769/**
770 * first_zones_zonelist - Returns the first zone at or below highest_zoneidx within the allowed nodemask in a zonelist
771 * @zonelist - The zonelist to search for a suitable zone
772 * @highest_zoneidx - The zone index of the highest zone to return
773 * @nodes - An optional nodemask to filter the zonelist with
774 * @zone - The first suitable zone found is returned via this parameter
775 *
776 * This function returns the first zone at or below a given zone index that is
777 * within the allowed nodemask. The zoneref returned is a cursor that can be
778 * used to iterate the zonelist with next_zones_zonelist. The cursor should
779 * not be used by the caller as it does not match the value of the zone
780 * returned.
781 */
759static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist, 782static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist,
760 enum zone_type highest_zoneidx) 783 enum zone_type highest_zoneidx,
784 nodemask_t *nodes,
785 struct zone **zone)
761{ 786{
762 struct zoneref *z; 787 return next_zones_zonelist(zonelist->_zonerefs, highest_zoneidx, nodes,
763 788 zone);
764 /* Find the first suitable zone to use for the allocation */
765 z = zonelist->_zonerefs;
766 while (zonelist_zone_idx(z) > highest_zoneidx)
767 z++;
768
769 return z;
770} 789}
771 790
772/* Returns the next zone at or below highest_zoneidx in a zonelist */ 791/**
773static inline struct zoneref *next_zones_zonelist(struct zoneref *z, 792 * for_each_zone_zonelist_nodemask - helper macro to iterate over valid zones in a zonelist at or below a given zone index and within a nodemask
774 enum zone_type highest_zoneidx) 793 * @zone - The current zone in the iterator
775{ 794 * @z - The current pointer within zonelist->zones being iterated
776 /* Find the next suitable zone to use for the allocation */ 795 * @zlist - The zonelist being iterated
777 while (zonelist_zone_idx(z) > highest_zoneidx) 796 * @highidx - The zone index of the highest zone to return
778 z++; 797 * @nodemask - Nodemask allowed by the allocator
779 798 *
780 return z; 799 * This iterator iterates though all zones at or below a given zone index and
781} 800 * within a given nodemask
801 */
802#define for_each_zone_zonelist_nodemask(zone, z, zlist, highidx, nodemask) \
803 for (z = first_zones_zonelist(zlist, highidx, nodemask, &zone); \
804 zone; \
805 z = next_zones_zonelist(z, highidx, nodemask, &zone)) \
782 806
783/** 807/**
784 * for_each_zone_zonelist - helper macro to iterate over valid zones in a zonelist at or below a given zone index 808 * for_each_zone_zonelist - helper macro to iterate over valid zones in a zonelist at or below a given zone index
@@ -790,11 +814,7 @@ static inline struct zoneref *next_zones_zonelist(struct zoneref *z,
790 * This iterator iterates though all zones at or below a given zone index. 814 * This iterator iterates though all zones at or below a given zone index.
791 */ 815 */
792#define for_each_zone_zonelist(zone, z, zlist, highidx) \ 816#define for_each_zone_zonelist(zone, z, zlist, highidx) \
793 for (z = first_zones_zonelist(zlist, highidx), \ 817 for_each_zone_zonelist_nodemask(zone, z, zlist, highidx, NULL)
794 zone = zonelist_zone(z++); \
795 zone; \
796 z = next_zones_zonelist(z, highidx), \
797 zone = zonelist_zone(z++))
798 818
799#ifdef CONFIG_SPARSEMEM 819#ifdef CONFIG_SPARSEMEM
800#include <asm/sparsemem.h> 820#include <asm/sparsemem.h>