diff options
| author | Mel Gorman <mel@csn.ul.ie> | 2008-04-28 05:12:18 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-04-28 11:58:19 -0400 |
| commit | 19770b32609b6bf97a3dece2529089494cbfc549 (patch) | |
| tree | 3b5922d1b20aabdf929bde9309f323841717747a /include | |
| parent | dd1a239f6f2d4d3eedd318583ec319aa145b324c (diff) | |
mm: filter based on a nodemask as well as a gfp_mask
The MPOL_BIND policy creates a zonelist that is used for allocations
controlled by that mempolicy. As the per-node zonelist is already being
filtered based on a zone id, this patch adds a version of __alloc_pages() that
takes a nodemask for further filtering. This eliminates the need for
MPOL_BIND to create a custom zonelist.
A positive benefit of this is that allocations using MPOL_BIND now use the
local node's distance-ordered zonelist instead of a custom node-id-ordered
zonelist. I.e., pages will be allocated from the closest allowed node with
available memory.
[Lee.Schermerhorn@hp.com: Mempolicy: update stale documentation and comments]
[Lee.Schermerhorn@hp.com: Mempolicy: make dequeue_huge_page_vma() obey MPOL_BIND nodemask]
[Lee.Schermerhorn@hp.com: Mempolicy: make dequeue_huge_page_vma() obey MPOL_BIND nodemask rework]
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Acked-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'include')
| -rw-r--r-- | include/linux/cpuset.h | 4 | ||||
| -rw-r--r-- | include/linux/gfp.h | 4 | ||||
| -rw-r--r-- | include/linux/mempolicy.h | 19 | ||||
| -rw-r--r-- | include/linux/mmzone.h | 80 |
4 files changed, 68 insertions, 39 deletions
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 726761e24003..038578362b47 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h | |||
| @@ -26,7 +26,7 @@ extern nodemask_t cpuset_mems_allowed(struct task_struct *p); | |||
| 26 | #define cpuset_current_mems_allowed (current->mems_allowed) | 26 | #define cpuset_current_mems_allowed (current->mems_allowed) |
| 27 | void cpuset_init_current_mems_allowed(void); | 27 | void cpuset_init_current_mems_allowed(void); |
| 28 | void cpuset_update_task_memory_state(void); | 28 | void cpuset_update_task_memory_state(void); |
| 29 | int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl); | 29 | int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask); |
| 30 | 30 | ||
| 31 | extern int __cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask); | 31 | extern int __cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask); |
| 32 | extern int __cpuset_zone_allowed_hardwall(struct zone *z, gfp_t gfp_mask); | 32 | extern int __cpuset_zone_allowed_hardwall(struct zone *z, gfp_t gfp_mask); |
| @@ -103,7 +103,7 @@ static inline nodemask_t cpuset_mems_allowed(struct task_struct *p) | |||
| 103 | static inline void cpuset_init_current_mems_allowed(void) {} | 103 | static inline void cpuset_init_current_mems_allowed(void) {} |
| 104 | static inline void cpuset_update_task_memory_state(void) {} | 104 | static inline void cpuset_update_task_memory_state(void) {} |
| 105 | 105 | ||
| 106 | static inline int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl) | 106 | static inline int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask) |
| 107 | { | 107 | { |
| 108 | return 1; | 108 | return 1; |
| 109 | } | 109 | } |
diff --git a/include/linux/gfp.h b/include/linux/gfp.h index e1c6064cb6c7..898aa9d5b6c2 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h | |||
| @@ -182,6 +182,10 @@ static inline void arch_alloc_page(struct page *page, int order) { } | |||
| 182 | 182 | ||
| 183 | extern struct page *__alloc_pages(gfp_t, unsigned int, struct zonelist *); | 183 | extern struct page *__alloc_pages(gfp_t, unsigned int, struct zonelist *); |
| 184 | 184 | ||
| 185 | extern struct page * | ||
| 186 | __alloc_pages_nodemask(gfp_t, unsigned int, | ||
| 187 | struct zonelist *, nodemask_t *nodemask); | ||
| 188 | |||
| 185 | static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask, | 189 | static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask, |
| 186 | unsigned int order) | 190 | unsigned int order) |
| 187 | { | 191 | { |
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 69160dc32d48..b8b3da7a3315 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h | |||
| @@ -54,19 +54,20 @@ struct mm_struct; | |||
| 54 | * mmap_sem. | 54 | * mmap_sem. |
| 55 | * | 55 | * |
| 56 | * Freeing policy: | 56 | * Freeing policy: |
| 57 | * When policy is MPOL_BIND v.zonelist is kmalloc'ed and must be kfree'd. | 57 | * Mempolicy objects are reference counted. A mempolicy will be freed when |
| 58 | * All other policies don't have any external state. mpol_free() handles this. | 58 | * mpol_free() decrements the reference count to zero. |
| 59 | * | 59 | * |
| 60 | * Copying policy objects: | 60 | * Copying policy objects: |
| 61 | * For MPOL_BIND the zonelist must be always duplicated. mpol_clone() does this. | 61 | * mpol_copy() allocates a new mempolicy and copies the specified mempolicy |
| 62 | * to the new storage. The reference count of the new object is initialized | ||
| 63 | * to 1, representing the caller of mpol_copy(). | ||
| 62 | */ | 64 | */ |
| 63 | struct mempolicy { | 65 | struct mempolicy { |
| 64 | atomic_t refcnt; | 66 | atomic_t refcnt; |
| 65 | short policy; /* See MPOL_* above */ | 67 | short policy; /* See MPOL_* above */ |
| 66 | union { | 68 | union { |
| 67 | struct zonelist *zonelist; /* bind */ | ||
| 68 | short preferred_node; /* preferred */ | 69 | short preferred_node; /* preferred */ |
| 69 | nodemask_t nodes; /* interleave */ | 70 | nodemask_t nodes; /* interleave/bind */ |
| 70 | /* undefined for default */ | 71 | /* undefined for default */ |
| 71 | } v; | 72 | } v; |
| 72 | nodemask_t cpuset_mems_allowed; /* mempolicy relative to these nodes */ | 73 | nodemask_t cpuset_mems_allowed; /* mempolicy relative to these nodes */ |
| @@ -151,7 +152,8 @@ extern void mpol_fix_fork_child_flag(struct task_struct *p); | |||
| 151 | 152 | ||
| 152 | extern struct mempolicy default_policy; | 153 | extern struct mempolicy default_policy; |
| 153 | extern struct zonelist *huge_zonelist(struct vm_area_struct *vma, | 154 | extern struct zonelist *huge_zonelist(struct vm_area_struct *vma, |
| 154 | unsigned long addr, gfp_t gfp_flags, struct mempolicy **mpol); | 155 | unsigned long addr, gfp_t gfp_flags, |
| 156 | struct mempolicy **mpol, nodemask_t **nodemask); | ||
| 155 | extern unsigned slab_node(struct mempolicy *policy); | 157 | extern unsigned slab_node(struct mempolicy *policy); |
| 156 | 158 | ||
| 157 | extern enum zone_type policy_zone; | 159 | extern enum zone_type policy_zone; |
| @@ -239,8 +241,11 @@ static inline void mpol_fix_fork_child_flag(struct task_struct *p) | |||
| 239 | } | 241 | } |
| 240 | 242 | ||
| 241 | static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma, | 243 | static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma, |
| 242 | unsigned long addr, gfp_t gfp_flags, struct mempolicy **mpol) | 244 | unsigned long addr, gfp_t gfp_flags, |
| 245 | struct mempolicy **mpol, nodemask_t **nodemask) | ||
| 243 | { | 246 | { |
| 247 | *mpol = NULL; | ||
| 248 | *nodemask = NULL; | ||
| 244 | return node_zonelist(0, gfp_flags); | 249 | return node_zonelist(0, gfp_flags); |
| 245 | } | 250 | } |
| 246 | 251 | ||
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index d34b4c290017..498d6ceff2f4 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h | |||
| @@ -749,36 +749,60 @@ static inline int zonelist_node_idx(struct zoneref *zoneref) | |||
| 749 | #endif /* CONFIG_NUMA */ | 749 | #endif /* CONFIG_NUMA */ |
| 750 | } | 750 | } |
| 751 | 751 | ||
| 752 | static inline void zoneref_set_zone(struct zone *zone, struct zoneref *zoneref) | 752 | /** |
| 753 | { | 753 | * next_zones_zonelist - Returns the next zone at or below highest_zoneidx within the allowed nodemask using a cursor within a zonelist as a starting point |
| 754 | zoneref->zone = zone; | 754 | * @z - The cursor used as a starting point for the search |
| 755 | zoneref->zone_idx = zone_idx(zone); | 755 | * @highest_zoneidx - The zone index of the highest zone to return |
| 756 | } | 756 | * @nodes - An optional nodemask to filter the zonelist with |
| 757 | * @zone - The first suitable zone found is returned via this parameter | ||
| 758 | * | ||
| 759 | * This function returns the next zone at or below a given zone index that is | ||
| 760 | * within the allowed nodemask using a cursor as the starting point for the | ||
| 761 | * search. The zoneref returned is a cursor that is used as the next starting | ||
| 762 | * point for future calls to next_zones_zonelist(). | ||
| 763 | */ | ||
| 764 | struct zoneref *next_zones_zonelist(struct zoneref *z, | ||
| 765 | enum zone_type highest_zoneidx, | ||
| 766 | nodemask_t *nodes, | ||
| 767 | struct zone **zone); | ||
| 757 | 768 | ||
| 758 | /* Returns the first zone at or below highest_zoneidx in a zonelist */ | 769 | /** |
| 770 | * first_zones_zonelist - Returns the first zone at or below highest_zoneidx within the allowed nodemask in a zonelist | ||
| 771 | * @zonelist - The zonelist to search for a suitable zone | ||
| 772 | * @highest_zoneidx - The zone index of the highest zone to return | ||
| 773 | * @nodes - An optional nodemask to filter the zonelist with | ||
| 774 | * @zone - The first suitable zone found is returned via this parameter | ||
| 775 | * | ||
| 776 | * This function returns the first zone at or below a given zone index that is | ||
| 777 | * within the allowed nodemask. The zoneref returned is a cursor that can be | ||
| 778 | * used to iterate the zonelist with next_zones_zonelist. The cursor should | ||
| 779 | * not be used by the caller as it does not match the value of the zone | ||
| 780 | * returned. | ||
| 781 | */ | ||
| 759 | static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist, | 782 | static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist, |
| 760 | enum zone_type highest_zoneidx) | 783 | enum zone_type highest_zoneidx, |
| 784 | nodemask_t *nodes, | ||
| 785 | struct zone **zone) | ||
| 761 | { | 786 | { |
| 762 | struct zoneref *z; | 787 | return next_zones_zonelist(zonelist->_zonerefs, highest_zoneidx, nodes, |
| 763 | 788 | zone); | |
| 764 | /* Find the first suitable zone to use for the allocation */ | ||
| 765 | z = zonelist->_zonerefs; | ||
| 766 | while (zonelist_zone_idx(z) > highest_zoneidx) | ||
| 767 | z++; | ||
| 768 | |||
| 769 | return z; | ||
| 770 | } | 789 | } |
| 771 | 790 | ||
| 772 | /* Returns the next zone at or below highest_zoneidx in a zonelist */ | 791 | /** |
| 773 | static inline struct zoneref *next_zones_zonelist(struct zoneref *z, | 792 | * for_each_zone_zonelist_nodemask - helper macro to iterate over valid zones in a zonelist at or below a given zone index and within a nodemask |
| 774 | enum zone_type highest_zoneidx) | 793 | * @zone - The current zone in the iterator |
| 775 | { | 794 | * @z - The current pointer within zonelist->zones being iterated |
| 776 | /* Find the next suitable zone to use for the allocation */ | 795 | * @zlist - The zonelist being iterated |
| 777 | while (zonelist_zone_idx(z) > highest_zoneidx) | 796 | * @highidx - The zone index of the highest zone to return |
| 778 | z++; | 797 | * @nodemask - Nodemask allowed by the allocator |
| 779 | 798 | * | |
| 780 | return z; | 799 | * This iterator iterates though all zones at or below a given zone index and |
| 781 | } | 800 | * within a given nodemask |
| 801 | */ | ||
| 802 | #define for_each_zone_zonelist_nodemask(zone, z, zlist, highidx, nodemask) \ | ||
| 803 | for (z = first_zones_zonelist(zlist, highidx, nodemask, &zone); \ | ||
| 804 | zone; \ | ||
| 805 | z = next_zones_zonelist(z, highidx, nodemask, &zone)) \ | ||
| 782 | 806 | ||
| 783 | /** | 807 | /** |
| 784 | * for_each_zone_zonelist - helper macro to iterate over valid zones in a zonelist at or below a given zone index | 808 | * for_each_zone_zonelist - helper macro to iterate over valid zones in a zonelist at or below a given zone index |
| @@ -790,11 +814,7 @@ static inline struct zoneref *next_zones_zonelist(struct zoneref *z, | |||
| 790 | * This iterator iterates though all zones at or below a given zone index. | 814 | * This iterator iterates though all zones at or below a given zone index. |
| 791 | */ | 815 | */ |
| 792 | #define for_each_zone_zonelist(zone, z, zlist, highidx) \ | 816 | #define for_each_zone_zonelist(zone, z, zlist, highidx) \ |
| 793 | for (z = first_zones_zonelist(zlist, highidx), \ | 817 | for_each_zone_zonelist_nodemask(zone, z, zlist, highidx, NULL) |
| 794 | zone = zonelist_zone(z++); \ | ||
| 795 | zone; \ | ||
| 796 | z = next_zones_zonelist(z, highidx), \ | ||
| 797 | zone = zonelist_zone(z++)) | ||
| 798 | 818 | ||
| 799 | #ifdef CONFIG_SPARSEMEM | 819 | #ifdef CONFIG_SPARSEMEM |
| 800 | #include <asm/sparsemem.h> | 820 | #include <asm/sparsemem.h> |
