aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristoph Lameter <clameter@sgi.com>2007-10-16 04:26:08 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-10-16 12:43:01 -0400
commit4c93c355d5d563f300df7e61ef753d7a064411e9 (patch)
tree24bcdbed58a51c69640da9c8e220dd5ce0c054a7
parentee3c72a14bfecdf783738032ff3c73ef6412f5b3 (diff)
SLUB: Place kmem_cache_cpu structures in a NUMA aware way
The kmem_cache_cpu structures introduced are currently an array placed in the kmem_cache struct. Meaning the kmem_cache_cpu structures are overwhelmingly on the wrong node for systems with a higher amount of nodes. These are performance critical structures since the per node information has to be touched for every alloc and free in a slab. In order to place the kmem_cache_cpu structure optimally we put an array of pointers to kmem_cache_cpu structs in kmem_cache (similar to SLAB). However, the kmem_cache_cpu structures can now be allocated in a more intelligent way. We would like to put per cpu structures for the same cpu but different slab caches in cachelines together to save space and decrease the cache footprint. However, the slab allocators itself control only allocations per node. We set up a simple per cpu array for every processor with 100 per cpu structures which is usually enough to get them all set up right. If we run out then we fall back to kmalloc_node. This also solves the bootstrap problem since we do not have to use slab allocator functions early in boot to get memory for the small per cpu structures. Pro: - NUMA aware placement improves memory performance - All global structures in struct kmem_cache become readonly - Dense packing of per cpu structures reduces cacheline footprint in SMP and NUMA. - Potential avoidance of exclusive cacheline fetches on the free and alloc hotpath since multiple kmem_cache_cpu structures are in one cacheline. This is particularly important for the kmalloc array. Cons: - Additional reference to one read only cacheline (per cpu array of pointers to kmem_cache_cpu) in both slab_alloc() and slab_free(). [akinobu.mita@gmail.com: fix cpu hotplug offline/online path] Signed-off-by: Christoph Lameter <clameter@sgi.com> Cc: "Pekka Enberg" <penberg@cs.helsinki.fi> Cc: Akinobu Mita <akinobu.mita@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/slub_def.h9
-rw-r--r--mm/slub.c168
2 files changed, 160 insertions, 17 deletions
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 92e10cf6d0e8..f74716b59ce2 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -16,8 +16,7 @@ struct kmem_cache_cpu {
16 struct page *page; 16 struct page *page;
17 int node; 17 int node;
18 unsigned int offset; 18 unsigned int offset;
19 /* Lots of wasted space */ 19};
20} ____cacheline_aligned_in_smp;
21 20
22struct kmem_cache_node { 21struct kmem_cache_node {
23 spinlock_t list_lock; /* Protect partial list and nr_partial */ 22 spinlock_t list_lock; /* Protect partial list and nr_partial */
@@ -62,7 +61,11 @@ struct kmem_cache {
62 int defrag_ratio; 61 int defrag_ratio;
63 struct kmem_cache_node *node[MAX_NUMNODES]; 62 struct kmem_cache_node *node[MAX_NUMNODES];
64#endif 63#endif
65 struct kmem_cache_cpu cpu_slab[NR_CPUS]; 64#ifdef CONFIG_SMP
65 struct kmem_cache_cpu *cpu_slab[NR_CPUS];
66#else
67 struct kmem_cache_cpu cpu_slab;
68#endif
66}; 69};
67 70
68/* 71/*
diff --git a/mm/slub.c b/mm/slub.c
index ea9fd72093d8..6d4346ba0c29 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -269,7 +269,11 @@ static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
269 269
270static inline struct kmem_cache_cpu *get_cpu_slab(struct kmem_cache *s, int cpu) 270static inline struct kmem_cache_cpu *get_cpu_slab(struct kmem_cache *s, int cpu)
271{ 271{
272 return &s->cpu_slab[cpu]; 272#ifdef CONFIG_SMP
273 return s->cpu_slab[cpu];
274#else
275 return &s->cpu_slab;
276#endif
273} 277}
274 278
275static inline int check_valid_pointer(struct kmem_cache *s, 279static inline int check_valid_pointer(struct kmem_cache *s,
@@ -1858,16 +1862,6 @@ static void init_kmem_cache_cpu(struct kmem_cache *s,
1858 c->node = 0; 1862 c->node = 0;
1859} 1863}
1860 1864
1861static inline int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags)
1862{
1863 int cpu;
1864
1865 for_each_possible_cpu(cpu)
1866 init_kmem_cache_cpu(s, get_cpu_slab(s, cpu));
1867
1868 return 1;
1869}
1870
1871static void init_kmem_cache_node(struct kmem_cache_node *n) 1865static void init_kmem_cache_node(struct kmem_cache_node *n)
1872{ 1866{
1873 n->nr_partial = 0; 1867 n->nr_partial = 0;
@@ -1879,6 +1873,131 @@ static void init_kmem_cache_node(struct kmem_cache_node *n)
1879#endif 1873#endif
1880} 1874}
1881 1875
1876#ifdef CONFIG_SMP
1877/*
1878 * Per cpu array for per cpu structures.
1879 *
1880 * The per cpu array places all kmem_cache_cpu structures from one processor
1881 * close together meaning that it becomes possible that multiple per cpu
1882 * structures are contained in one cacheline. This may be particularly
1883 * beneficial for the kmalloc caches.
1884 *
1885 * A desktop system typically has around 60-80 slabs. With 100 here we are
1886 * likely able to get per cpu structures for all caches from the array defined
1887 * here. We must be able to cover all kmalloc caches during bootstrap.
1888 *
1889 * If the per cpu array is exhausted then fall back to kmalloc
1890 * of individual cachelines. No sharing is possible then.
1891 */
1892#define NR_KMEM_CACHE_CPU 100
1893
1894static DEFINE_PER_CPU(struct kmem_cache_cpu,
1895 kmem_cache_cpu)[NR_KMEM_CACHE_CPU];
1896
1897static DEFINE_PER_CPU(struct kmem_cache_cpu *, kmem_cache_cpu_free);
1898static cpumask_t kmem_cach_cpu_free_init_once = CPU_MASK_NONE;
1899
1900static struct kmem_cache_cpu *alloc_kmem_cache_cpu(struct kmem_cache *s,
1901 int cpu, gfp_t flags)
1902{
1903 struct kmem_cache_cpu *c = per_cpu(kmem_cache_cpu_free, cpu);
1904
1905 if (c)
1906 per_cpu(kmem_cache_cpu_free, cpu) =
1907 (void *)c->freelist;
1908 else {
1909 /* Table overflow: So allocate ourselves */
1910 c = kmalloc_node(
1911 ALIGN(sizeof(struct kmem_cache_cpu), cache_line_size()),
1912 flags, cpu_to_node(cpu));
1913 if (!c)
1914 return NULL;
1915 }
1916
1917 init_kmem_cache_cpu(s, c);
1918 return c;
1919}
1920
1921static void free_kmem_cache_cpu(struct kmem_cache_cpu *c, int cpu)
1922{
1923 if (c < per_cpu(kmem_cache_cpu, cpu) ||
1924 c > per_cpu(kmem_cache_cpu, cpu) + NR_KMEM_CACHE_CPU) {
1925 kfree(c);
1926 return;
1927 }
1928 c->freelist = (void *)per_cpu(kmem_cache_cpu_free, cpu);
1929 per_cpu(kmem_cache_cpu_free, cpu) = c;
1930}
1931
1932static void free_kmem_cache_cpus(struct kmem_cache *s)
1933{
1934 int cpu;
1935
1936 for_each_online_cpu(cpu) {
1937 struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);
1938
1939 if (c) {
1940 s->cpu_slab[cpu] = NULL;
1941 free_kmem_cache_cpu(c, cpu);
1942 }
1943 }
1944}
1945
1946static int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags)
1947{
1948 int cpu;
1949
1950 for_each_online_cpu(cpu) {
1951 struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);
1952
1953 if (c)
1954 continue;
1955
1956 c = alloc_kmem_cache_cpu(s, cpu, flags);
1957 if (!c) {
1958 free_kmem_cache_cpus(s);
1959 return 0;
1960 }
1961 s->cpu_slab[cpu] = c;
1962 }
1963 return 1;
1964}
1965
1966/*
1967 * Initialize the per cpu array.
1968 */
1969static void init_alloc_cpu_cpu(int cpu)
1970{
1971 int i;
1972
1973 if (cpu_isset(cpu, kmem_cach_cpu_free_init_once))
1974 return;
1975
1976 for (i = NR_KMEM_CACHE_CPU - 1; i >= 0; i--)
1977 free_kmem_cache_cpu(&per_cpu(kmem_cache_cpu, cpu)[i], cpu);
1978
1979 cpu_set(cpu, kmem_cach_cpu_free_init_once);
1980}
1981
1982static void __init init_alloc_cpu(void)
1983{
1984 int cpu;
1985
1986 for_each_online_cpu(cpu)
1987 init_alloc_cpu_cpu(cpu);
1988 }
1989
1990#else
1991static inline void free_kmem_cache_cpus(struct kmem_cache *s) {}
1992static inline void init_alloc_cpu(void) {}
1993
1994static inline int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags)
1995{
1996 init_kmem_cache_cpu(s, &s->cpu_slab);
1997 return 1;
1998}
1999#endif
2000
1882#ifdef CONFIG_NUMA 2001#ifdef CONFIG_NUMA
1883/* 2002/*
1884 * No kmalloc_node yet so do it by hand. We know that this is the first 2003 * No kmalloc_node yet so do it by hand. We know that this is the first
@@ -1886,7 +2005,8 @@ static void init_kmem_cache_node(struct kmem_cache_node *n)
1886 * possible. 2005 * possible.
1887 * 2006 *
1888 * Note that this function only works on the kmalloc_node_cache 2007 * Note that this function only works on the kmalloc_node_cache
1889 * when allocating for the kmalloc_node_cache. 2008 * when allocating for the kmalloc_node_cache. This is used for bootstrapping
2009 * memory on a fresh node that has no slab structures yet.
1890 */ 2010 */
1891static struct kmem_cache_node *early_kmem_cache_node_alloc(gfp_t gfpflags, 2011static struct kmem_cache_node *early_kmem_cache_node_alloc(gfp_t gfpflags,
1892 int node) 2012 int node)
@@ -2115,6 +2235,7 @@ static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags,
2115 2235
2116 if (alloc_kmem_cache_cpus(s, gfpflags & ~SLUB_DMA)) 2236 if (alloc_kmem_cache_cpus(s, gfpflags & ~SLUB_DMA))
2117 return 1; 2237 return 1;
2238 free_kmem_cache_nodes(s);
2118error: 2239error:
2119 if (flags & SLAB_PANIC) 2240 if (flags & SLAB_PANIC)
2120 panic("Cannot create slab %s size=%lu realsize=%u " 2241 panic("Cannot create slab %s size=%lu realsize=%u "
@@ -2197,6 +2318,7 @@ static inline int kmem_cache_close(struct kmem_cache *s)
2197 flush_all(s); 2318 flush_all(s);
2198 2319
2199 /* Attempt to free all objects */ 2320 /* Attempt to free all objects */
2321 free_kmem_cache_cpus(s);
2200 for_each_node_state(node, N_NORMAL_MEMORY) { 2322 for_each_node_state(node, N_NORMAL_MEMORY) {
2201 struct kmem_cache_node *n = get_node(s, node); 2323 struct kmem_cache_node *n = get_node(s, node);
2202 2324
@@ -2584,6 +2706,8 @@ void __init kmem_cache_init(void)
2584 int i; 2706 int i;
2585 int caches = 0; 2707 int caches = 0;
2586 2708
2709 init_alloc_cpu();
2710
2587#ifdef CONFIG_NUMA 2711#ifdef CONFIG_NUMA
2588 /* 2712 /*
2589 * Must first have the slab cache available for the allocations of the 2713 * Must first have the slab cache available for the allocations of the
@@ -2644,10 +2768,12 @@ void __init kmem_cache_init(void)
2644 2768
2645#ifdef CONFIG_SMP 2769#ifdef CONFIG_SMP
2646 register_cpu_notifier(&slab_notifier); 2770 register_cpu_notifier(&slab_notifier);
2771 kmem_size = offsetof(struct kmem_cache, cpu_slab) +
2772 nr_cpu_ids * sizeof(struct kmem_cache_cpu *);
2773#else
2774 kmem_size = sizeof(struct kmem_cache);
2647#endif 2775#endif
2648 2776
2649 kmem_size = offsetof(struct kmem_cache, cpu_slab) +
2650 nr_cpu_ids * sizeof(struct kmem_cache_cpu);
2651 2777
2652 printk(KERN_INFO "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d," 2778 printk(KERN_INFO "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d,"
2653 " CPUs=%d, Nodes=%d\n", 2779 " CPUs=%d, Nodes=%d\n",
@@ -2774,15 +2900,29 @@ static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb,
2774 unsigned long flags; 2900 unsigned long flags;
2775 2901
2776 switch (action) { 2902 switch (action) {
2903 case CPU_UP_PREPARE:
2904 case CPU_UP_PREPARE_FROZEN:
2905 init_alloc_cpu_cpu(cpu);
2906 down_read(&slub_lock);
2907 list_for_each_entry(s, &slab_caches, list)
2908 s->cpu_slab[cpu] = alloc_kmem_cache_cpu(s, cpu,
2909 GFP_KERNEL);
2910 up_read(&slub_lock);
2911 break;
2912
2777 case CPU_UP_CANCELED: 2913 case CPU_UP_CANCELED:
2778 case CPU_UP_CANCELED_FROZEN: 2914 case CPU_UP_CANCELED_FROZEN:
2779 case CPU_DEAD: 2915 case CPU_DEAD:
2780 case CPU_DEAD_FROZEN: 2916 case CPU_DEAD_FROZEN:
2781 down_read(&slub_lock); 2917 down_read(&slub_lock);
2782 list_for_each_entry(s, &slab_caches, list) { 2918 list_for_each_entry(s, &slab_caches, list) {
2919 struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);
2920
2783 local_irq_save(flags); 2921 local_irq_save(flags);
2784 __flush_cpu_slab(s, cpu); 2922 __flush_cpu_slab(s, cpu);
2785 local_irq_restore(flags); 2923 local_irq_restore(flags);
2924 free_kmem_cache_cpu(c, cpu);
2925 s->cpu_slab[cpu] = NULL;
2786 } 2926 }
2787 up_read(&slub_lock); 2927 up_read(&slub_lock);
2788 break; 2928 break;