aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2009-02-23 21:57:21 -0500
committerTejun Heo <tj@kernel.org>2009-02-23 21:57:21 -0500
commitd9b55eeb1d55ef2dc5a4fdbff9604c2c68cb5649 (patch)
tree1f81884066512810be81731a76fb5e041ac62c22
parent458a3e644c3327be529393982e24277eda8f1ac7 (diff)
percpu: remove unit_size power-of-2 restriction
Impact: allow unit_size to be arbitrary multiple of PAGE_SIZE In dynamic percpu allocator, there is no reason the unit size should be power of two. Remove the restriction. As non-power-of-two unit size means that empty chunks fall into the same slot index as lightly occupied chunks which is bad for reclaming. Reserve an extra slot for empty chunks. Signed-off-by: Tejun Heo <tj@kernel.org>
-rw-r--r--mm/percpu.c33
1 files changed, 19 insertions, 14 deletions
diff --git a/mm/percpu.c b/mm/percpu.c
index 41e7a5f5ab1b..d9e6e5d1dbd4 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -67,7 +67,7 @@
67#include <asm/cacheflush.h> 67#include <asm/cacheflush.h>
68#include <asm/tlbflush.h> 68#include <asm/tlbflush.h>
69 69
70#define PCPU_MIN_UNIT_PAGES_SHIFT 4 /* also max alloc size */ 70#define PCPU_MIN_UNIT_PAGES 16 /* max alloc size in pages */
71#define PCPU_SLOT_BASE_SHIFT 5 /* 1-31 shares the same slot */ 71#define PCPU_SLOT_BASE_SHIFT 5 /* 1-31 shares the same slot */
72#define PCPU_DFL_MAP_ALLOC 16 /* start a map with 16 ents */ 72#define PCPU_DFL_MAP_ALLOC 16 /* start a map with 16 ents */
73 73
@@ -83,9 +83,7 @@ struct pcpu_chunk {
83 struct page *page[]; /* #cpus * UNIT_PAGES */ 83 struct page *page[]; /* #cpus * UNIT_PAGES */
84}; 84};
85 85
86static int pcpu_unit_pages_shift;
87static int pcpu_unit_pages; 86static int pcpu_unit_pages;
88static int pcpu_unit_shift;
89static int pcpu_unit_size; 87static int pcpu_unit_size;
90static int pcpu_chunk_size; 88static int pcpu_chunk_size;
91static int pcpu_nr_slots; 89static int pcpu_nr_slots;
@@ -117,12 +115,19 @@ static DEFINE_MUTEX(pcpu_mutex);
117static struct list_head *pcpu_slot; /* chunk list slots */ 115static struct list_head *pcpu_slot; /* chunk list slots */
118static struct rb_root pcpu_addr_root = RB_ROOT; /* chunks by address */ 116static struct rb_root pcpu_addr_root = RB_ROOT; /* chunks by address */
119 117
120static int pcpu_size_to_slot(int size) 118static int __pcpu_size_to_slot(int size)
121{ 119{
122 int highbit = fls(size); /* size is in bytes */ 120 int highbit = fls(size); /* size is in bytes */
123 return max(highbit - PCPU_SLOT_BASE_SHIFT + 2, 1); 121 return max(highbit - PCPU_SLOT_BASE_SHIFT + 2, 1);
124} 122}
125 123
124static int pcpu_size_to_slot(int size)
125{
126 if (size == pcpu_unit_size)
127 return pcpu_nr_slots - 1;
128 return __pcpu_size_to_slot(size);
129}
130
126static int pcpu_chunk_slot(const struct pcpu_chunk *chunk) 131static int pcpu_chunk_slot(const struct pcpu_chunk *chunk)
127{ 132{
128 if (chunk->free_size < sizeof(int) || chunk->contig_hint < sizeof(int)) 133 if (chunk->free_size < sizeof(int) || chunk->contig_hint < sizeof(int))
@@ -133,7 +138,7 @@ static int pcpu_chunk_slot(const struct pcpu_chunk *chunk)
133 138
134static int pcpu_page_idx(unsigned int cpu, int page_idx) 139static int pcpu_page_idx(unsigned int cpu, int page_idx)
135{ 140{
136 return (cpu << pcpu_unit_pages_shift) + page_idx; 141 return cpu * pcpu_unit_pages + page_idx;
137} 142}
138 143
139static struct page **pcpu_chunk_pagep(struct pcpu_chunk *chunk, 144static struct page **pcpu_chunk_pagep(struct pcpu_chunk *chunk,
@@ -659,7 +664,7 @@ static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size)
659 goto err; 664 goto err;
660 665
661 for_each_possible_cpu(cpu) 666 for_each_possible_cpu(cpu)
662 memset(chunk->vm->addr + (cpu << pcpu_unit_shift) + off, 0, 667 memset(chunk->vm->addr + cpu * pcpu_unit_size + off, 0,
663 size); 668 size);
664 669
665 return 0; 670 return 0;
@@ -722,7 +727,7 @@ void *__alloc_percpu(size_t size, size_t align)
722 struct pcpu_chunk *chunk; 727 struct pcpu_chunk *chunk;
723 int slot, off; 728 int slot, off;
724 729
725 if (unlikely(!size || size > PAGE_SIZE << PCPU_MIN_UNIT_PAGES_SHIFT || 730 if (unlikely(!size || size > PCPU_MIN_UNIT_PAGES * PAGE_SIZE ||
726 align > PAGE_SIZE)) { 731 align > PAGE_SIZE)) {
727 WARN(true, "illegal size (%zu) or align (%zu) for " 732 WARN(true, "illegal size (%zu) or align (%zu) for "
728 "percpu allocation\n", size, align); 733 "percpu allocation\n", size, align);
@@ -840,19 +845,19 @@ size_t __init pcpu_setup_static(pcpu_populate_pte_fn_t populate_pte_fn,
840 unsigned int cpu; 845 unsigned int cpu;
841 int err, i; 846 int err, i;
842 847
843 pcpu_unit_pages_shift = max_t(int, PCPU_MIN_UNIT_PAGES_SHIFT, 848 pcpu_unit_pages = max_t(int, PCPU_MIN_UNIT_PAGES, PFN_UP(cpu_size));
844 order_base_2(cpu_size) - PAGE_SHIFT);
845 849
846 pcpu_static_size = cpu_size; 850 pcpu_static_size = cpu_size;
847 pcpu_unit_pages = 1 << pcpu_unit_pages_shift; 851 pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT;
848 pcpu_unit_shift = PAGE_SHIFT + pcpu_unit_pages_shift;
849 pcpu_unit_size = 1 << pcpu_unit_shift;
850 pcpu_chunk_size = num_possible_cpus() * pcpu_unit_size; 852 pcpu_chunk_size = num_possible_cpus() * pcpu_unit_size;
851 pcpu_nr_slots = pcpu_size_to_slot(pcpu_unit_size) + 1;
852 pcpu_chunk_struct_size = sizeof(struct pcpu_chunk) 853 pcpu_chunk_struct_size = sizeof(struct pcpu_chunk)
853 + num_possible_cpus() * pcpu_unit_pages * sizeof(struct page *); 854 + num_possible_cpus() * pcpu_unit_pages * sizeof(struct page *);
854 855
855 /* allocate chunk slots */ 856 /*
857 * Allocate chunk slots. The additional last slot is for
858 * empty chunks.
859 */
860 pcpu_nr_slots = __pcpu_size_to_slot(pcpu_unit_size) + 2;
856 pcpu_slot = alloc_bootmem(pcpu_nr_slots * sizeof(pcpu_slot[0])); 861 pcpu_slot = alloc_bootmem(pcpu_nr_slots * sizeof(pcpu_slot[0]));
857 for (i = 0; i < pcpu_nr_slots; i++) 862 for (i = 0; i < pcpu_nr_slots; i++)
858 INIT_LIST_HEAD(&pcpu_slot[i]); 863 INIT_LIST_HEAD(&pcpu_slot[i]);