diff options
author | Tejun Heo <tj@kernel.org> | 2009-02-23 21:57:21 -0500 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2009-02-23 21:57:21 -0500 |
commit | d9b55eeb1d55ef2dc5a4fdbff9604c2c68cb5649 (patch) | |
tree | 1f81884066512810be81731a76fb5e041ac62c22 | |
parent | 458a3e644c3327be529393982e24277eda8f1ac7 (diff) |
percpu: remove unit_size power-of-2 restriction
Impact: allow unit_size to be arbitrary multiple of PAGE_SIZE
In dynamic percpu allocator, there is no reason the unit size should
be power of two. Remove the restriction.
As non-power-of-two unit size means that empty chunks fall into the
same slot index as lightly occupied chunks which is bad for reclaming.
Reserve an extra slot for empty chunks.
Signed-off-by: Tejun Heo <tj@kernel.org>
-rw-r--r-- | mm/percpu.c | 33 |
1 files changed, 19 insertions, 14 deletions
diff --git a/mm/percpu.c b/mm/percpu.c index 41e7a5f5ab1b..d9e6e5d1dbd4 100644 --- a/mm/percpu.c +++ b/mm/percpu.c | |||
@@ -67,7 +67,7 @@ | |||
67 | #include <asm/cacheflush.h> | 67 | #include <asm/cacheflush.h> |
68 | #include <asm/tlbflush.h> | 68 | #include <asm/tlbflush.h> |
69 | 69 | ||
70 | #define PCPU_MIN_UNIT_PAGES_SHIFT 4 /* also max alloc size */ | 70 | #define PCPU_MIN_UNIT_PAGES 16 /* max alloc size in pages */ |
71 | #define PCPU_SLOT_BASE_SHIFT 5 /* 1-31 shares the same slot */ | 71 | #define PCPU_SLOT_BASE_SHIFT 5 /* 1-31 shares the same slot */ |
72 | #define PCPU_DFL_MAP_ALLOC 16 /* start a map with 16 ents */ | 72 | #define PCPU_DFL_MAP_ALLOC 16 /* start a map with 16 ents */ |
73 | 73 | ||
@@ -83,9 +83,7 @@ struct pcpu_chunk { | |||
83 | struct page *page[]; /* #cpus * UNIT_PAGES */ | 83 | struct page *page[]; /* #cpus * UNIT_PAGES */ |
84 | }; | 84 | }; |
85 | 85 | ||
86 | static int pcpu_unit_pages_shift; | ||
87 | static int pcpu_unit_pages; | 86 | static int pcpu_unit_pages; |
88 | static int pcpu_unit_shift; | ||
89 | static int pcpu_unit_size; | 87 | static int pcpu_unit_size; |
90 | static int pcpu_chunk_size; | 88 | static int pcpu_chunk_size; |
91 | static int pcpu_nr_slots; | 89 | static int pcpu_nr_slots; |
@@ -117,12 +115,19 @@ static DEFINE_MUTEX(pcpu_mutex); | |||
117 | static struct list_head *pcpu_slot; /* chunk list slots */ | 115 | static struct list_head *pcpu_slot; /* chunk list slots */ |
118 | static struct rb_root pcpu_addr_root = RB_ROOT; /* chunks by address */ | 116 | static struct rb_root pcpu_addr_root = RB_ROOT; /* chunks by address */ |
119 | 117 | ||
120 | static int pcpu_size_to_slot(int size) | 118 | static int __pcpu_size_to_slot(int size) |
121 | { | 119 | { |
122 | int highbit = fls(size); /* size is in bytes */ | 120 | int highbit = fls(size); /* size is in bytes */ |
123 | return max(highbit - PCPU_SLOT_BASE_SHIFT + 2, 1); | 121 | return max(highbit - PCPU_SLOT_BASE_SHIFT + 2, 1); |
124 | } | 122 | } |
125 | 123 | ||
124 | static int pcpu_size_to_slot(int size) | ||
125 | { | ||
126 | if (size == pcpu_unit_size) | ||
127 | return pcpu_nr_slots - 1; | ||
128 | return __pcpu_size_to_slot(size); | ||
129 | } | ||
130 | |||
126 | static int pcpu_chunk_slot(const struct pcpu_chunk *chunk) | 131 | static int pcpu_chunk_slot(const struct pcpu_chunk *chunk) |
127 | { | 132 | { |
128 | if (chunk->free_size < sizeof(int) || chunk->contig_hint < sizeof(int)) | 133 | if (chunk->free_size < sizeof(int) || chunk->contig_hint < sizeof(int)) |
@@ -133,7 +138,7 @@ static int pcpu_chunk_slot(const struct pcpu_chunk *chunk) | |||
133 | 138 | ||
134 | static int pcpu_page_idx(unsigned int cpu, int page_idx) | 139 | static int pcpu_page_idx(unsigned int cpu, int page_idx) |
135 | { | 140 | { |
136 | return (cpu << pcpu_unit_pages_shift) + page_idx; | 141 | return cpu * pcpu_unit_pages + page_idx; |
137 | } | 142 | } |
138 | 143 | ||
139 | static struct page **pcpu_chunk_pagep(struct pcpu_chunk *chunk, | 144 | static struct page **pcpu_chunk_pagep(struct pcpu_chunk *chunk, |
@@ -659,7 +664,7 @@ static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size) | |||
659 | goto err; | 664 | goto err; |
660 | 665 | ||
661 | for_each_possible_cpu(cpu) | 666 | for_each_possible_cpu(cpu) |
662 | memset(chunk->vm->addr + (cpu << pcpu_unit_shift) + off, 0, | 667 | memset(chunk->vm->addr + cpu * pcpu_unit_size + off, 0, |
663 | size); | 668 | size); |
664 | 669 | ||
665 | return 0; | 670 | return 0; |
@@ -722,7 +727,7 @@ void *__alloc_percpu(size_t size, size_t align) | |||
722 | struct pcpu_chunk *chunk; | 727 | struct pcpu_chunk *chunk; |
723 | int slot, off; | 728 | int slot, off; |
724 | 729 | ||
725 | if (unlikely(!size || size > PAGE_SIZE << PCPU_MIN_UNIT_PAGES_SHIFT || | 730 | if (unlikely(!size || size > PCPU_MIN_UNIT_PAGES * PAGE_SIZE || |
726 | align > PAGE_SIZE)) { | 731 | align > PAGE_SIZE)) { |
727 | WARN(true, "illegal size (%zu) or align (%zu) for " | 732 | WARN(true, "illegal size (%zu) or align (%zu) for " |
728 | "percpu allocation\n", size, align); | 733 | "percpu allocation\n", size, align); |
@@ -840,19 +845,19 @@ size_t __init pcpu_setup_static(pcpu_populate_pte_fn_t populate_pte_fn, | |||
840 | unsigned int cpu; | 845 | unsigned int cpu; |
841 | int err, i; | 846 | int err, i; |
842 | 847 | ||
843 | pcpu_unit_pages_shift = max_t(int, PCPU_MIN_UNIT_PAGES_SHIFT, | 848 | pcpu_unit_pages = max_t(int, PCPU_MIN_UNIT_PAGES, PFN_UP(cpu_size)); |
844 | order_base_2(cpu_size) - PAGE_SHIFT); | ||
845 | 849 | ||
846 | pcpu_static_size = cpu_size; | 850 | pcpu_static_size = cpu_size; |
847 | pcpu_unit_pages = 1 << pcpu_unit_pages_shift; | 851 | pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT; |
848 | pcpu_unit_shift = PAGE_SHIFT + pcpu_unit_pages_shift; | ||
849 | pcpu_unit_size = 1 << pcpu_unit_shift; | ||
850 | pcpu_chunk_size = num_possible_cpus() * pcpu_unit_size; | 852 | pcpu_chunk_size = num_possible_cpus() * pcpu_unit_size; |
851 | pcpu_nr_slots = pcpu_size_to_slot(pcpu_unit_size) + 1; | ||
852 | pcpu_chunk_struct_size = sizeof(struct pcpu_chunk) | 853 | pcpu_chunk_struct_size = sizeof(struct pcpu_chunk) |
853 | + num_possible_cpus() * pcpu_unit_pages * sizeof(struct page *); | 854 | + num_possible_cpus() * pcpu_unit_pages * sizeof(struct page *); |
854 | 855 | ||
855 | /* allocate chunk slots */ | 856 | /* |
857 | * Allocate chunk slots. The additional last slot is for | ||
858 | * empty chunks. | ||
859 | */ | ||
860 | pcpu_nr_slots = __pcpu_size_to_slot(pcpu_unit_size) + 2; | ||
856 | pcpu_slot = alloc_bootmem(pcpu_nr_slots * sizeof(pcpu_slot[0])); | 861 | pcpu_slot = alloc_bootmem(pcpu_nr_slots * sizeof(pcpu_slot[0])); |
857 | for (i = 0; i < pcpu_nr_slots; i++) | 862 | for (i = 0; i < pcpu_nr_slots; i++) |
858 | INIT_LIST_HEAD(&pcpu_slot[i]); | 863 | INIT_LIST_HEAD(&pcpu_slot[i]); |