diff options
author | Christoph Lameter <clameter@engr.sgi.com> | 2005-09-23 00:44:02 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2005-09-23 01:17:35 -0400 |
commit | ff69416e6323fe9d38c42a06ebdefeb58bbe9336 (patch) | |
tree | c97b81660f38ff7a06196dc47b92bd5f44c26305 | |
parent | 57487f4376e16558ccbe45a5b41d8cb5192309a4 (diff) |
[PATCH] slab: fix handling of pages from foreign NUMA nodes
The numa slab allocator may allocate pages from foreign nodes onto the
lists for a particular node if a node runs out of memory. Inspecting the
slab->nodeid field will not reflect that the page is now in use for the
slabs of another node.
This patch fixes that issue by adding a node field to free_block so that
the caller can indicate which node currently uses a slab.
Also removes the check for the current node from kmalloc_cache_node since
the process may shift later to another node which may lead to an allocation
on another node than intended.
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | mm/slab.c | 38 |
1 files changed, 19 insertions, 19 deletions
@@ -640,7 +640,7 @@ static enum { | |||
640 | 640 | ||
641 | static DEFINE_PER_CPU(struct work_struct, reap_work); | 641 | static DEFINE_PER_CPU(struct work_struct, reap_work); |
642 | 642 | ||
643 | static void free_block(kmem_cache_t* cachep, void** objpp, int len); | 643 | static void free_block(kmem_cache_t* cachep, void** objpp, int len, int node); |
644 | static void enable_cpucache (kmem_cache_t *cachep); | 644 | static void enable_cpucache (kmem_cache_t *cachep); |
645 | static void cache_reap (void *unused); | 645 | static void cache_reap (void *unused); |
646 | static int __node_shrink(kmem_cache_t *cachep, int node); | 646 | static int __node_shrink(kmem_cache_t *cachep, int node); |
@@ -805,7 +805,7 @@ static inline void __drain_alien_cache(kmem_cache_t *cachep, struct array_cache | |||
805 | 805 | ||
806 | if (ac->avail) { | 806 | if (ac->avail) { |
807 | spin_lock(&rl3->list_lock); | 807 | spin_lock(&rl3->list_lock); |
808 | free_block(cachep, ac->entry, ac->avail); | 808 | free_block(cachep, ac->entry, ac->avail, node); |
809 | ac->avail = 0; | 809 | ac->avail = 0; |
810 | spin_unlock(&rl3->list_lock); | 810 | spin_unlock(&rl3->list_lock); |
811 | } | 811 | } |
@@ -926,7 +926,7 @@ static int __devinit cpuup_callback(struct notifier_block *nfb, | |||
926 | /* Free limit for this kmem_list3 */ | 926 | /* Free limit for this kmem_list3 */ |
927 | l3->free_limit -= cachep->batchcount; | 927 | l3->free_limit -= cachep->batchcount; |
928 | if (nc) | 928 | if (nc) |
929 | free_block(cachep, nc->entry, nc->avail); | 929 | free_block(cachep, nc->entry, nc->avail, node); |
930 | 930 | ||
931 | if (!cpus_empty(mask)) { | 931 | if (!cpus_empty(mask)) { |
932 | spin_unlock(&l3->list_lock); | 932 | spin_unlock(&l3->list_lock); |
@@ -935,7 +935,7 @@ static int __devinit cpuup_callback(struct notifier_block *nfb, | |||
935 | 935 | ||
936 | if (l3->shared) { | 936 | if (l3->shared) { |
937 | free_block(cachep, l3->shared->entry, | 937 | free_block(cachep, l3->shared->entry, |
938 | l3->shared->avail); | 938 | l3->shared->avail, node); |
939 | kfree(l3->shared); | 939 | kfree(l3->shared); |
940 | l3->shared = NULL; | 940 | l3->shared = NULL; |
941 | } | 941 | } |
@@ -1883,12 +1883,13 @@ static void do_drain(void *arg) | |||
1883 | { | 1883 | { |
1884 | kmem_cache_t *cachep = (kmem_cache_t*)arg; | 1884 | kmem_cache_t *cachep = (kmem_cache_t*)arg; |
1885 | struct array_cache *ac; | 1885 | struct array_cache *ac; |
1886 | int node = numa_node_id(); | ||
1886 | 1887 | ||
1887 | check_irq_off(); | 1888 | check_irq_off(); |
1888 | ac = ac_data(cachep); | 1889 | ac = ac_data(cachep); |
1889 | spin_lock(&cachep->nodelists[numa_node_id()]->list_lock); | 1890 | spin_lock(&cachep->nodelists[node]->list_lock); |
1890 | free_block(cachep, ac->entry, ac->avail); | 1891 | free_block(cachep, ac->entry, ac->avail, node); |
1891 | spin_unlock(&cachep->nodelists[numa_node_id()]->list_lock); | 1892 | spin_unlock(&cachep->nodelists[node]->list_lock); |
1892 | ac->avail = 0; | 1893 | ac->avail = 0; |
1893 | } | 1894 | } |
1894 | 1895 | ||
@@ -2609,7 +2610,7 @@ done: | |||
2609 | /* | 2610 | /* |
2610 | * Caller needs to acquire correct kmem_list's list_lock | 2611 | * Caller needs to acquire correct kmem_list's list_lock |
2611 | */ | 2612 | */ |
2612 | static void free_block(kmem_cache_t *cachep, void **objpp, int nr_objects) | 2613 | static void free_block(kmem_cache_t *cachep, void **objpp, int nr_objects, int node) |
2613 | { | 2614 | { |
2614 | int i; | 2615 | int i; |
2615 | struct kmem_list3 *l3; | 2616 | struct kmem_list3 *l3; |
@@ -2618,14 +2619,12 @@ static void free_block(kmem_cache_t *cachep, void **objpp, int nr_objects) | |||
2618 | void *objp = objpp[i]; | 2619 | void *objp = objpp[i]; |
2619 | struct slab *slabp; | 2620 | struct slab *slabp; |
2620 | unsigned int objnr; | 2621 | unsigned int objnr; |
2621 | int nodeid = 0; | ||
2622 | 2622 | ||
2623 | slabp = GET_PAGE_SLAB(virt_to_page(objp)); | 2623 | slabp = GET_PAGE_SLAB(virt_to_page(objp)); |
2624 | nodeid = slabp->nodeid; | 2624 | l3 = cachep->nodelists[node]; |
2625 | l3 = cachep->nodelists[nodeid]; | ||
2626 | list_del(&slabp->list); | 2625 | list_del(&slabp->list); |
2627 | objnr = (objp - slabp->s_mem) / cachep->objsize; | 2626 | objnr = (objp - slabp->s_mem) / cachep->objsize; |
2628 | check_spinlock_acquired_node(cachep, nodeid); | 2627 | check_spinlock_acquired_node(cachep, node); |
2629 | check_slabp(cachep, slabp); | 2628 | check_slabp(cachep, slabp); |
2630 | 2629 | ||
2631 | 2630 | ||
@@ -2665,13 +2664,14 @@ static void cache_flusharray(kmem_cache_t *cachep, struct array_cache *ac) | |||
2665 | { | 2664 | { |
2666 | int batchcount; | 2665 | int batchcount; |
2667 | struct kmem_list3 *l3; | 2666 | struct kmem_list3 *l3; |
2667 | int node = numa_node_id(); | ||
2668 | 2668 | ||
2669 | batchcount = ac->batchcount; | 2669 | batchcount = ac->batchcount; |
2670 | #if DEBUG | 2670 | #if DEBUG |
2671 | BUG_ON(!batchcount || batchcount > ac->avail); | 2671 | BUG_ON(!batchcount || batchcount > ac->avail); |
2672 | #endif | 2672 | #endif |
2673 | check_irq_off(); | 2673 | check_irq_off(); |
2674 | l3 = cachep->nodelists[numa_node_id()]; | 2674 | l3 = cachep->nodelists[node]; |
2675 | spin_lock(&l3->list_lock); | 2675 | spin_lock(&l3->list_lock); |
2676 | if (l3->shared) { | 2676 | if (l3->shared) { |
2677 | struct array_cache *shared_array = l3->shared; | 2677 | struct array_cache *shared_array = l3->shared; |
@@ -2687,7 +2687,7 @@ static void cache_flusharray(kmem_cache_t *cachep, struct array_cache *ac) | |||
2687 | } | 2687 | } |
2688 | } | 2688 | } |
2689 | 2689 | ||
2690 | free_block(cachep, ac->entry, batchcount); | 2690 | free_block(cachep, ac->entry, batchcount, node); |
2691 | free_done: | 2691 | free_done: |
2692 | #if STATS | 2692 | #if STATS |
2693 | { | 2693 | { |
@@ -2752,7 +2752,7 @@ static inline void __cache_free(kmem_cache_t *cachep, void *objp) | |||
2752 | } else { | 2752 | } else { |
2753 | spin_lock(&(cachep->nodelists[nodeid])-> | 2753 | spin_lock(&(cachep->nodelists[nodeid])-> |
2754 | list_lock); | 2754 | list_lock); |
2755 | free_block(cachep, &objp, 1); | 2755 | free_block(cachep, &objp, 1, nodeid); |
2756 | spin_unlock(&(cachep->nodelists[nodeid])-> | 2756 | spin_unlock(&(cachep->nodelists[nodeid])-> |
2757 | list_lock); | 2757 | list_lock); |
2758 | } | 2758 | } |
@@ -2845,7 +2845,7 @@ void *kmem_cache_alloc_node(kmem_cache_t *cachep, unsigned int __nocast flags, i | |||
2845 | unsigned long save_flags; | 2845 | unsigned long save_flags; |
2846 | void *ptr; | 2846 | void *ptr; |
2847 | 2847 | ||
2848 | if (nodeid == numa_node_id() || nodeid == -1) | 2848 | if (nodeid == -1) |
2849 | return __cache_alloc(cachep, flags); | 2849 | return __cache_alloc(cachep, flags); |
2850 | 2850 | ||
2851 | if (unlikely(!cachep->nodelists[nodeid])) { | 2851 | if (unlikely(!cachep->nodelists[nodeid])) { |
@@ -3080,7 +3080,7 @@ static int alloc_kmemlist(kmem_cache_t *cachep) | |||
3080 | 3080 | ||
3081 | if ((nc = cachep->nodelists[node]->shared)) | 3081 | if ((nc = cachep->nodelists[node]->shared)) |
3082 | free_block(cachep, nc->entry, | 3082 | free_block(cachep, nc->entry, |
3083 | nc->avail); | 3083 | nc->avail, node); |
3084 | 3084 | ||
3085 | l3->shared = new; | 3085 | l3->shared = new; |
3086 | if (!cachep->nodelists[node]->alien) { | 3086 | if (!cachep->nodelists[node]->alien) { |
@@ -3161,7 +3161,7 @@ static int do_tune_cpucache(kmem_cache_t *cachep, int limit, int batchcount, | |||
3161 | if (!ccold) | 3161 | if (!ccold) |
3162 | continue; | 3162 | continue; |
3163 | spin_lock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock); | 3163 | spin_lock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock); |
3164 | free_block(cachep, ccold->entry, ccold->avail); | 3164 | free_block(cachep, ccold->entry, ccold->avail, cpu_to_node(i)); |
3165 | spin_unlock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock); | 3165 | spin_unlock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock); |
3166 | kfree(ccold); | 3166 | kfree(ccold); |
3167 | } | 3167 | } |
@@ -3241,7 +3241,7 @@ static void drain_array_locked(kmem_cache_t *cachep, | |||
3241 | if (tofree > ac->avail) { | 3241 | if (tofree > ac->avail) { |
3242 | tofree = (ac->avail+1)/2; | 3242 | tofree = (ac->avail+1)/2; |
3243 | } | 3243 | } |
3244 | free_block(cachep, ac->entry, tofree); | 3244 | free_block(cachep, ac->entry, tofree, node); |
3245 | ac->avail -= tofree; | 3245 | ac->avail -= tofree; |
3246 | memmove(ac->entry, &(ac->entry[tofree]), | 3246 | memmove(ac->entry, &(ac->entry[tofree]), |
3247 | sizeof(void*)*ac->avail); | 3247 | sizeof(void*)*ac->avail); |