aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2009-02-23 21:57:21 -0500
committerTejun Heo <tj@kernel.org>2009-02-23 21:57:21 -0500
commit8d408b4be37bc49c9086531f2ebe411cf5731746 (patch)
tree559a532a04b24dd164ec2c72ab545b30a5a604ef /mm
parentd9b55eeb1d55ef2dc5a4fdbff9604c2c68cb5649 (diff)
percpu: give more latitude to arch specific first chunk initialization
Impact: more latitude for first percpu chunk allocation The first percpu chunk serves the kernel static percpu area and may or may not contain extra room for further dynamic allocation. Initialization of the first chunk needs to be done before normal memory allocation service is up, so it has its own init path - pcpu_setup_static(). It seems archs need more latitude while initializing the first chunk for example to take advantage of large page mapping. This patch makes the following changes to allow this. * Define PERCPU_DYNAMIC_RESERVE to give arch hint about how much space to reserve in the first chunk for further dynamic allocation. * Rename pcpu_setup_static() to pcpu_setup_first_chunk(). * Make pcpu_setup_first_chunk() much more flexible by fetching page pointer by callback and adding optional @unit_size, @free_size and @base_addr arguments which allow archs to selectively part of chunk initialization to their likings. Signed-off-by: Tejun Heo <tj@kernel.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/percpu.c149
1 files changed, 116 insertions, 33 deletions
diff --git a/mm/percpu.c b/mm/percpu.c
index d9e6e5d1dbd4..9ac01980cce0 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -48,8 +48,8 @@
48 * - define __addr_to_pcpu_ptr() and __pcpu_ptr_to_addr() to translate 48 * - define __addr_to_pcpu_ptr() and __pcpu_ptr_to_addr() to translate
49 * regular address to percpu pointer and back 49 * regular address to percpu pointer and back
50 * 50 *
51 * - use pcpu_setup_static() during percpu area initialization to 51 * - use pcpu_setup_first_chunk() during percpu area initialization to
52 * setup kernel static percpu area 52 * setup the first chunk containing the kernel static percpu area
53 */ 53 */
54 54
55#include <linux/bitmap.h> 55#include <linux/bitmap.h>
@@ -67,7 +67,6 @@
67#include <asm/cacheflush.h> 67#include <asm/cacheflush.h>
68#include <asm/tlbflush.h> 68#include <asm/tlbflush.h>
69 69
70#define PCPU_MIN_UNIT_PAGES 16 /* max alloc size in pages */
71#define PCPU_SLOT_BASE_SHIFT 5 /* 1-31 shares the same slot */ 70#define PCPU_SLOT_BASE_SHIFT 5 /* 1-31 shares the same slot */
72#define PCPU_DFL_MAP_ALLOC 16 /* start a map with 16 ents */ 71#define PCPU_DFL_MAP_ALLOC 16 /* start a map with 16 ents */
73 72
@@ -80,6 +79,7 @@ struct pcpu_chunk {
80 int map_used; /* # of map entries used */ 79 int map_used; /* # of map entries used */
81 int map_alloc; /* # of map entries allocated */ 80 int map_alloc; /* # of map entries allocated */
82 int *map; /* allocation map */ 81 int *map; /* allocation map */
82 bool immutable; /* no [de]population allowed */
83 struct page *page[]; /* #cpus * UNIT_PAGES */ 83 struct page *page[]; /* #cpus * UNIT_PAGES */
84}; 84};
85 85
@@ -521,6 +521,9 @@ static void pcpu_unmap(struct pcpu_chunk *chunk, int page_start, int page_end,
521 unsigned int last = num_possible_cpus() - 1; 521 unsigned int last = num_possible_cpus() - 1;
522 unsigned int cpu; 522 unsigned int cpu;
523 523
524 /* unmap must not be done on immutable chunk */
525 WARN_ON(chunk->immutable);
526
524 /* 527 /*
525 * Each flushing trial can be very expensive, issue flush on 528 * Each flushing trial can be very expensive, issue flush on
526 * the whole region at once rather than doing it for each cpu. 529 * the whole region at once rather than doing it for each cpu.
@@ -602,6 +605,9 @@ static int pcpu_map(struct pcpu_chunk *chunk, int page_start, int page_end)
602 unsigned int cpu; 605 unsigned int cpu;
603 int err; 606 int err;
604 607
608 /* map must not be done on immutable chunk */
609 WARN_ON(chunk->immutable);
610
605 for_each_possible_cpu(cpu) { 611 for_each_possible_cpu(cpu) {
606 err = map_kernel_range_noflush( 612 err = map_kernel_range_noflush(
607 pcpu_chunk_addr(chunk, cpu, page_start), 613 pcpu_chunk_addr(chunk, cpu, page_start),
@@ -727,8 +733,7 @@ void *__alloc_percpu(size_t size, size_t align)
727 struct pcpu_chunk *chunk; 733 struct pcpu_chunk *chunk;
728 int slot, off; 734 int slot, off;
729 735
730 if (unlikely(!size || size > PCPU_MIN_UNIT_PAGES * PAGE_SIZE || 736 if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE)) {
731 align > PAGE_SIZE)) {
732 WARN(true, "illegal size (%zu) or align (%zu) for " 737 WARN(true, "illegal size (%zu) or align (%zu) for "
733 "percpu allocation\n", size, align); 738 "percpu allocation\n", size, align);
734 return NULL; 739 return NULL;
@@ -776,6 +781,7 @@ EXPORT_SYMBOL_GPL(__alloc_percpu);
776 781
777static void pcpu_kill_chunk(struct pcpu_chunk *chunk) 782static void pcpu_kill_chunk(struct pcpu_chunk *chunk)
778{ 783{
784 WARN_ON(chunk->immutable);
779 pcpu_depopulate_chunk(chunk, 0, pcpu_unit_size, false); 785 pcpu_depopulate_chunk(chunk, 0, pcpu_unit_size, false);
780 list_del(&chunk->list); 786 list_del(&chunk->list);
781 rb_erase(&chunk->rb_node, &pcpu_addr_root); 787 rb_erase(&chunk->rb_node, &pcpu_addr_root);
@@ -821,33 +827,73 @@ void free_percpu(void *ptr)
821EXPORT_SYMBOL_GPL(free_percpu); 827EXPORT_SYMBOL_GPL(free_percpu);
822 828
823/** 829/**
824 * pcpu_setup_static - initialize kernel static percpu area 830 * pcpu_setup_first_chunk - initialize the first percpu chunk
825 * @populate_pte_fn: callback to allocate pagetable 831 * @get_page_fn: callback to fetch page pointer
826 * @pages: num_possible_cpus() * PFN_UP(cpu_size) pages 832 * @static_size: the size of static percpu area in bytes
827 * @cpu_size: the size of static percpu area in bytes 833 * @unit_size: unit size in bytes, must be multiple of PAGE_SIZE, 0 for auto
828 * 834 * @free_size: free size in bytes, 0 for auto
829 * Initialize kernel static percpu area. The caller should allocate 835 * @base_addr: mapped address, NULL for auto
830 * all the necessary pages and pass them in @pages. 836 * @populate_pte_fn: callback to allocate pagetable, NULL if unnecessary
831 * @populate_pte_fn() is called on each page to be used for percpu 837 *
832 * mapping and is responsible for making sure all the necessary page 838 * Initialize the first percpu chunk which contains the kernel static
833 * tables for the page is allocated. 839 * perpcu area. This function is to be called from arch percpu area
840 * setup path. The first two parameters are mandatory. The rest are
841 * optional.
842 *
843 * @get_page_fn() should return pointer to percpu page given cpu
844 * number and page number. It should at least return enough pages to
845 * cover the static area. The returned pages for static area should
846 * have been initialized with valid data. If @unit_size is specified,
847 * it can also return pages after the static area. NULL return
848 * indicates end of pages for the cpu. Note that @get_page_fn() must
849 * return the same number of pages for all cpus.
850 *
851 * @unit_size, if non-zero, determines unit size and must be aligned
852 * to PAGE_SIZE and equal to or larger than @static_size + @free_size.
853 *
854 * @free_size determines the number of free bytes after the static
855 * area in the first chunk. If zero, whatever left is available.
856 * Specifying non-zero value make percpu leave the area after
857 * @static_size + @free_size alone.
858 *
859 * Non-null @base_addr means that the caller already allocated virtual
860 * region for the first chunk and mapped it. percpu must not mess
861 * with the chunk. Note that @base_addr with 0 @unit_size or non-NULL
862 * @populate_pte_fn doesn't make any sense.
863 *
864 * @populate_pte_fn is used to populate the pagetable. NULL means the
865 * caller already populated the pagetable.
834 * 866 *
835 * RETURNS: 867 * RETURNS:
836 * The determined pcpu_unit_size which can be used to initialize 868 * The determined pcpu_unit_size which can be used to initialize
837 * percpu access. 869 * percpu access.
838 */ 870 */
839size_t __init pcpu_setup_static(pcpu_populate_pte_fn_t populate_pte_fn, 871size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
840 struct page **pages, size_t cpu_size) 872 size_t static_size, size_t unit_size,
873 size_t free_size, void *base_addr,
874 pcpu_populate_pte_fn_t populate_pte_fn)
841{ 875{
842 static struct vm_struct static_vm; 876 static struct vm_struct static_vm;
843 struct pcpu_chunk *static_chunk; 877 struct pcpu_chunk *static_chunk;
844 int nr_cpu_pages = DIV_ROUND_UP(cpu_size, PAGE_SIZE);
845 unsigned int cpu; 878 unsigned int cpu;
879 int nr_pages;
846 int err, i; 880 int err, i;
847 881
848 pcpu_unit_pages = max_t(int, PCPU_MIN_UNIT_PAGES, PFN_UP(cpu_size)); 882 /* santiy checks */
883 BUG_ON(!static_size);
884 BUG_ON(!unit_size && free_size);
885 BUG_ON(unit_size && unit_size < static_size + free_size);
886 BUG_ON(unit_size & ~PAGE_MASK);
887 BUG_ON(base_addr && !unit_size);
888 BUG_ON(base_addr && populate_pte_fn);
849 889
850 pcpu_static_size = cpu_size; 890 if (unit_size)
891 pcpu_unit_pages = unit_size >> PAGE_SHIFT;
892 else
893 pcpu_unit_pages = max_t(int, PCPU_MIN_UNIT_SIZE >> PAGE_SHIFT,
894 PFN_UP(static_size));
895
896 pcpu_static_size = static_size;
851 pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT; 897 pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT;
852 pcpu_chunk_size = num_possible_cpus() * pcpu_unit_size; 898 pcpu_chunk_size = num_possible_cpus() * pcpu_unit_size;
853 pcpu_chunk_struct_size = sizeof(struct pcpu_chunk) 899 pcpu_chunk_struct_size = sizeof(struct pcpu_chunk)
@@ -862,29 +908,66 @@ size_t __init pcpu_setup_static(pcpu_populate_pte_fn_t populate_pte_fn,
862 for (i = 0; i < pcpu_nr_slots; i++) 908 for (i = 0; i < pcpu_nr_slots; i++)
863 INIT_LIST_HEAD(&pcpu_slot[i]); 909 INIT_LIST_HEAD(&pcpu_slot[i]);
864 910
865 /* init and register vm area */
866 static_vm.flags = VM_ALLOC;
867 static_vm.size = pcpu_chunk_size;
868 vm_area_register_early(&static_vm, PAGE_SIZE);
869
870 /* init static_chunk */ 911 /* init static_chunk */
871 static_chunk = alloc_bootmem(pcpu_chunk_struct_size); 912 static_chunk = alloc_bootmem(pcpu_chunk_struct_size);
872 INIT_LIST_HEAD(&static_chunk->list); 913 INIT_LIST_HEAD(&static_chunk->list);
873 static_chunk->vm = &static_vm; 914 static_chunk->vm = &static_vm;
874 static_chunk->free_size = pcpu_unit_size - pcpu_static_size; 915
916 if (free_size)
917 static_chunk->free_size = free_size;
918 else
919 static_chunk->free_size = pcpu_unit_size - pcpu_static_size;
920
875 static_chunk->contig_hint = static_chunk->free_size; 921 static_chunk->contig_hint = static_chunk->free_size;
876 922
877 /* assign pages and map them */ 923 /* allocate vm address */
924 static_vm.flags = VM_ALLOC;
925 static_vm.size = pcpu_chunk_size;
926
927 if (!base_addr)
928 vm_area_register_early(&static_vm, PAGE_SIZE);
929 else {
930 /*
931 * Pages already mapped. No need to remap into
932 * vmalloc area. In this case the static chunk can't
933 * be mapped or unmapped by percpu and is marked
934 * immutable.
935 */
936 static_vm.addr = base_addr;
937 static_chunk->immutable = true;
938 }
939
940 /* assign pages */
941 nr_pages = -1;
878 for_each_possible_cpu(cpu) { 942 for_each_possible_cpu(cpu) {
879 for (i = 0; i < nr_cpu_pages; i++) { 943 for (i = 0; i < pcpu_unit_pages; i++) {
880 *pcpu_chunk_pagep(static_chunk, cpu, i) = *pages++; 944 struct page *page = get_page_fn(cpu, i);
881 populate_pte_fn(pcpu_chunk_addr(static_chunk, cpu, i)); 945
946 if (!page)
947 break;
948 *pcpu_chunk_pagep(static_chunk, cpu, i) = page;
882 } 949 }
950
951 BUG_ON(i < PFN_UP(pcpu_static_size));
952
953 if (nr_pages < 0)
954 nr_pages = i;
955 else
956 BUG_ON(nr_pages != i);
883 } 957 }
884 958
885 err = pcpu_map(static_chunk, 0, nr_cpu_pages); 959 /* map them */
886 if (err) 960 if (populate_pte_fn) {
887 panic("failed to setup static percpu area, err=%d\n", err); 961 for_each_possible_cpu(cpu)
962 for (i = 0; i < nr_pages; i++)
963 populate_pte_fn(pcpu_chunk_addr(static_chunk,
964 cpu, i));
965
966 err = pcpu_map(static_chunk, 0, nr_pages);
967 if (err)
968 panic("failed to setup static percpu area, err=%d\n",
969 err);
970 }
888 971
889 /* link static_chunk in */ 972 /* link static_chunk in */
890 pcpu_chunk_relocate(static_chunk, -1); 973 pcpu_chunk_relocate(static_chunk, -1);