aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristoph Lameter <clameter@sgi.com>2006-12-06 23:33:29 -0500
committerLinus Torvalds <torvalds@woody.osdl.org>2006-12-07 11:39:25 -0500
commit3c517a6132098ca37e122a2980fc64a9e798b0d7 (patch)
treee8fa49ef3f873624c0f5d29b34fdc8684988b426
parent952f3b51beb592f3f1de15adcdef802fc086ea91 (diff)
[PATCH] slab: better fallback allocation behavior
Currently we simply attempt to allocate from all allowed nodes using GFP_THISNODE. However, GFP_THISNODE does not do reclaim (it wont do any at all if the recent GFP_THISNODE patch is accepted). If we truly run out of memory in the whole system then fallback_alloc may return NULL although memory may still be available if we would perform more thorough reclaim. This patch changes fallback_alloc() so that we first only inspect all the per node queues for available slabs. If we find any then we allocate from those. This avoids slab fragmentation by first getting rid of all partial allocated slabs on every node before allocating new memory. If we cannot satisfy the allocation from any per node queue then we extend a slab. We now call into the page allocator without specifying GFP_THISNODE. The page allocator will then implement its own fallback (in the given cpuset context), perform necessary reclaim (again considering not a single node but the whole set of allowed nodes) and then return pages for a new slab. We identify from which node the pages were allocated and then insert the pages into the corresponding per node structure. In order to do so we need to modify cache_grow() to take a parameter that specifies the new slab. kmem_getpages() can no longer set the GFP_THISNODE flag since we need to be able to use kmem_getpage to allocate from an arbitrary node. GFP_THISNODE needs to be specified when calling cache_grow(). One key advantage is that the decision from which node to allocate new memory is removed from slab fallback processing. The patch allows to go back to use of the page allocators fallback/reclaim logic. Signed-off-by: Christoph Lameter <clameter@sgi.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--mm/slab.c79
1 files changed, 57 insertions, 22 deletions
diff --git a/mm/slab.c b/mm/slab.c
index 6da554fd3f6a..7b8e5d668586 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1605,12 +1605,7 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
1605 flags |= __GFP_COMP; 1605 flags |= __GFP_COMP;
1606#endif 1606#endif
1607 1607
1608 /* 1608 flags |= cachep->gfpflags;
1609 * Under NUMA we want memory on the indicated node. We will handle
1610 * the needed fallback ourselves since we want to serve from our
1611 * per node object lists first for other nodes.
1612 */
1613 flags |= cachep->gfpflags | GFP_THISNODE;
1614 1609
1615 page = alloc_pages_node(nodeid, flags, cachep->gfporder); 1610 page = alloc_pages_node(nodeid, flags, cachep->gfporder);
1616 if (!page) 1611 if (!page)
@@ -2567,7 +2562,7 @@ static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp,
2567 if (OFF_SLAB(cachep)) { 2562 if (OFF_SLAB(cachep)) {
2568 /* Slab management obj is off-slab. */ 2563 /* Slab management obj is off-slab. */
2569 slabp = kmem_cache_alloc_node(cachep->slabp_cache, 2564 slabp = kmem_cache_alloc_node(cachep->slabp_cache,
2570 local_flags, nodeid); 2565 local_flags & ~GFP_THISNODE, nodeid);
2571 if (!slabp) 2566 if (!slabp)
2572 return NULL; 2567 return NULL;
2573 } else { 2568 } else {
@@ -2708,10 +2703,10 @@ static void slab_map_pages(struct kmem_cache *cache, struct slab *slab,
2708 * Grow (by 1) the number of slabs within a cache. This is called by 2703 * Grow (by 1) the number of slabs within a cache. This is called by
2709 * kmem_cache_alloc() when there are no active objs left in a cache. 2704 * kmem_cache_alloc() when there are no active objs left in a cache.
2710 */ 2705 */
2711static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid) 2706static int cache_grow(struct kmem_cache *cachep,
2707 gfp_t flags, int nodeid, void *objp)
2712{ 2708{
2713 struct slab *slabp; 2709 struct slab *slabp;
2714 void *objp;
2715 size_t offset; 2710 size_t offset;
2716 gfp_t local_flags; 2711 gfp_t local_flags;
2717 unsigned long ctor_flags; 2712 unsigned long ctor_flags;
@@ -2763,12 +2758,14 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid)
2763 * Get mem for the objs. Attempt to allocate a physical page from 2758 * Get mem for the objs. Attempt to allocate a physical page from
2764 * 'nodeid'. 2759 * 'nodeid'.
2765 */ 2760 */
2766 objp = kmem_getpages(cachep, flags, nodeid); 2761 if (!objp)
2762 objp = kmem_getpages(cachep, flags, nodeid);
2767 if (!objp) 2763 if (!objp)
2768 goto failed; 2764 goto failed;
2769 2765
2770 /* Get slab management. */ 2766 /* Get slab management. */
2771 slabp = alloc_slabmgmt(cachep, objp, offset, local_flags, nodeid); 2767 slabp = alloc_slabmgmt(cachep, objp, offset,
2768 local_flags & ~GFP_THISNODE, nodeid);
2772 if (!slabp) 2769 if (!slabp)
2773 goto opps1; 2770 goto opps1;
2774 2771
@@ -3006,7 +3003,7 @@ alloc_done:
3006 3003
3007 if (unlikely(!ac->avail)) { 3004 if (unlikely(!ac->avail)) {
3008 int x; 3005 int x;
3009 x = cache_grow(cachep, flags, node); 3006 x = cache_grow(cachep, flags | GFP_THISNODE, node, NULL);
3010 3007
3011 /* cache_grow can reenable interrupts, then ac could change. */ 3008 /* cache_grow can reenable interrupts, then ac could change. */
3012 ac = cpu_cache_get(cachep); 3009 ac = cpu_cache_get(cachep);
@@ -3166,9 +3163,11 @@ static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags)
3166 3163
3167/* 3164/*
3168 * Fallback function if there was no memory available and no objects on a 3165 * Fallback function if there was no memory available and no objects on a
3169 * certain node and we are allowed to fall back. We mimick the behavior of 3166 * certain node and fall back is permitted. First we scan all the
3170 * the page allocator. We fall back according to a zonelist determined by 3167 * available nodelists for available objects. If that fails then we
3171 * the policy layer while obeying cpuset constraints. 3168 * perform an allocation without specifying a node. This allows the page
3169 * allocator to do its reclaim / fallback magic. We then insert the
3170 * slab into the proper nodelist and then allocate from it.
3172 */ 3171 */
3173void *fallback_alloc(struct kmem_cache *cache, gfp_t flags) 3172void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
3174{ 3173{
@@ -3176,15 +3175,51 @@ void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
3176 ->node_zonelists[gfp_zone(flags)]; 3175 ->node_zonelists[gfp_zone(flags)];
3177 struct zone **z; 3176 struct zone **z;
3178 void *obj = NULL; 3177 void *obj = NULL;
3178 int nid;
3179 3179
3180retry:
3181 /*
3182 * Look through allowed nodes for objects available
3183 * from existing per node queues.
3184 */
3180 for (z = zonelist->zones; *z && !obj; z++) { 3185 for (z = zonelist->zones; *z && !obj; z++) {
3181 int nid = zone_to_nid(*z); 3186 nid = zone_to_nid(*z);
3187
3188 if (cpuset_zone_allowed(*z, flags) &&
3189 cache->nodelists[nid] &&
3190 cache->nodelists[nid]->free_objects)
3191 obj = ____cache_alloc_node(cache,
3192 flags | GFP_THISNODE, nid);
3193 }
3182 3194
3183 if (zone_idx(*z) <= ZONE_NORMAL && 3195 if (!obj) {
3184 cpuset_zone_allowed(*z, flags) && 3196 /*
3185 cache->nodelists[nid]) 3197 * This allocation will be performed within the constraints
3186 obj = ____cache_alloc_node(cache, 3198 * of the current cpuset / memory policy requirements.
3187 flags | __GFP_THISNODE, nid); 3199 * We may trigger various forms of reclaim on the allowed
3200 * set and go into memory reserves if necessary.
3201 */
3202 obj = kmem_getpages(cache, flags, -1);
3203 if (obj) {
3204 /*
3205 * Insert into the appropriate per node queues
3206 */
3207 nid = page_to_nid(virt_to_page(obj));
3208 if (cache_grow(cache, flags, nid, obj)) {
3209 obj = ____cache_alloc_node(cache,
3210 flags | GFP_THISNODE, nid);
3211 if (!obj)
3212 /*
3213 * Another processor may allocate the
3214 * objects in the slab since we are
3215 * not holding any locks.
3216 */
3217 goto retry;
3218 } else {
3219 kmem_freepages(cache, obj);
3220 obj = NULL;
3221 }
3222 }
3188 } 3223 }
3189 return obj; 3224 return obj;
3190} 3225}
@@ -3241,7 +3276,7 @@ retry:
3241 3276
3242must_grow: 3277must_grow:
3243 spin_unlock(&l3->list_lock); 3278 spin_unlock(&l3->list_lock);
3244 x = cache_grow(cachep, flags, nodeid); 3279 x = cache_grow(cachep, flags | GFP_THISNODE, nodeid, NULL);
3245 if (x) 3280 if (x)
3246 goto retry; 3281 goto retry;
3247 3282