aboutsummaryrefslogtreecommitdiffstats
path: root/mm/percpu.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/percpu.c')
-rw-r--r--mm/percpu.c149
1 files changed, 116 insertions, 33 deletions
diff --git a/mm/percpu.c b/mm/percpu.c
index d9e6e5d1dbd4..9ac01980cce0 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -48,8 +48,8 @@
48 * - define __addr_to_pcpu_ptr() and __pcpu_ptr_to_addr() to translate 48 * - define __addr_to_pcpu_ptr() and __pcpu_ptr_to_addr() to translate
49 * regular address to percpu pointer and back 49 * regular address to percpu pointer and back
50 * 50 *
51 * - use pcpu_setup_static() during percpu area initialization to 51 * - use pcpu_setup_first_chunk() during percpu area initialization to
52 * setup kernel static percpu area 52 * setup the first chunk containing the kernel static percpu area
53 */ 53 */
54 54
55#include <linux/bitmap.h> 55#include <linux/bitmap.h>
@@ -67,7 +67,6 @@
67#include <asm/cacheflush.h> 67#include <asm/cacheflush.h>
68#include <asm/tlbflush.h> 68#include <asm/tlbflush.h>
69 69
70#define PCPU_MIN_UNIT_PAGES 16 /* max alloc size in pages */
71#define PCPU_SLOT_BASE_SHIFT 5 /* 1-31 shares the same slot */ 70#define PCPU_SLOT_BASE_SHIFT 5 /* 1-31 shares the same slot */
72#define PCPU_DFL_MAP_ALLOC 16 /* start a map with 16 ents */ 71#define PCPU_DFL_MAP_ALLOC 16 /* start a map with 16 ents */
73 72
@@ -80,6 +79,7 @@ struct pcpu_chunk {
80 int map_used; /* # of map entries used */ 79 int map_used; /* # of map entries used */
81 int map_alloc; /* # of map entries allocated */ 80 int map_alloc; /* # of map entries allocated */
82 int *map; /* allocation map */ 81 int *map; /* allocation map */
82 bool immutable; /* no [de]population allowed */
83 struct page *page[]; /* #cpus * UNIT_PAGES */ 83 struct page *page[]; /* #cpus * UNIT_PAGES */
84}; 84};
85 85
@@ -521,6 +521,9 @@ static void pcpu_unmap(struct pcpu_chunk *chunk, int page_start, int page_end,
521 unsigned int last = num_possible_cpus() - 1; 521 unsigned int last = num_possible_cpus() - 1;
522 unsigned int cpu; 522 unsigned int cpu;
523 523
524 /* unmap must not be done on immutable chunk */
525 WARN_ON(chunk->immutable);
526
524 /* 527 /*
525 * Each flushing trial can be very expensive, issue flush on 528 * Each flushing trial can be very expensive, issue flush on
526 * the whole region at once rather than doing it for each cpu. 529 * the whole region at once rather than doing it for each cpu.
@@ -602,6 +605,9 @@ static int pcpu_map(struct pcpu_chunk *chunk, int page_start, int page_end)
602 unsigned int cpu; 605 unsigned int cpu;
603 int err; 606 int err;
604 607
608 /* map must not be done on immutable chunk */
609 WARN_ON(chunk->immutable);
610
605 for_each_possible_cpu(cpu) { 611 for_each_possible_cpu(cpu) {
606 err = map_kernel_range_noflush( 612 err = map_kernel_range_noflush(
607 pcpu_chunk_addr(chunk, cpu, page_start), 613 pcpu_chunk_addr(chunk, cpu, page_start),
@@ -727,8 +733,7 @@ void *__alloc_percpu(size_t size, size_t align)
727 struct pcpu_chunk *chunk; 733 struct pcpu_chunk *chunk;
728 int slot, off; 734 int slot, off;
729 735
730 if (unlikely(!size || size > PCPU_MIN_UNIT_PAGES * PAGE_SIZE || 736 if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE)) {
731 align > PAGE_SIZE)) {
732 WARN(true, "illegal size (%zu) or align (%zu) for " 737 WARN(true, "illegal size (%zu) or align (%zu) for "
733 "percpu allocation\n", size, align); 738 "percpu allocation\n", size, align);
734 return NULL; 739 return NULL;
@@ -776,6 +781,7 @@ EXPORT_SYMBOL_GPL(__alloc_percpu);
776 781
777static void pcpu_kill_chunk(struct pcpu_chunk *chunk) 782static void pcpu_kill_chunk(struct pcpu_chunk *chunk)
778{ 783{
784 WARN_ON(chunk->immutable);
779 pcpu_depopulate_chunk(chunk, 0, pcpu_unit_size, false); 785 pcpu_depopulate_chunk(chunk, 0, pcpu_unit_size, false);
780 list_del(&chunk->list); 786 list_del(&chunk->list);
781 rb_erase(&chunk->rb_node, &pcpu_addr_root); 787 rb_erase(&chunk->rb_node, &pcpu_addr_root);
@@ -821,33 +827,73 @@ void free_percpu(void *ptr)
821EXPORT_SYMBOL_GPL(free_percpu); 827EXPORT_SYMBOL_GPL(free_percpu);
822 828
823/** 829/**
824 * pcpu_setup_static - initialize kernel static percpu area 830 * pcpu_setup_first_chunk - initialize the first percpu chunk
825 * @populate_pte_fn: callback to allocate pagetable 831 * @get_page_fn: callback to fetch page pointer
826 * @pages: num_possible_cpus() * PFN_UP(cpu_size) pages 832 * @static_size: the size of static percpu area in bytes
827 * @cpu_size: the size of static percpu area in bytes 833 * @unit_size: unit size in bytes, must be multiple of PAGE_SIZE, 0 for auto
828 * 834 * @free_size: free size in bytes, 0 for auto
829 * Initialize kernel static percpu area. The caller should allocate 835 * @base_addr: mapped address, NULL for auto
830 * all the necessary pages and pass them in @pages. 836 * @populate_pte_fn: callback to allocate pagetable, NULL if unnecessary
831 * @populate_pte_fn() is called on each page to be used for percpu 837 *
832 * mapping and is responsible for making sure all the necessary page 838 * Initialize the first percpu chunk which contains the kernel static
833 * tables for the page is allocated. 839 * perpcu area. This function is to be called from arch percpu area
840 * setup path. The first two parameters are mandatory. The rest are
841 * optional.
842 *
843 * @get_page_fn() should return pointer to percpu page given cpu
844 * number and page number. It should at least return enough pages to
845 * cover the static area. The returned pages for static area should
846 * have been initialized with valid data. If @unit_size is specified,
847 * it can also return pages after the static area. NULL return
848 * indicates end of pages for the cpu. Note that @get_page_fn() must
849 * return the same number of pages for all cpus.
850 *
851 * @unit_size, if non-zero, determines unit size and must be aligned
852 * to PAGE_SIZE and equal to or larger than @static_size + @free_size.
853 *
854 * @free_size determines the number of free bytes after the static
855 * area in the first chunk. If zero, whatever left is available.
856 * Specifying non-zero value make percpu leave the area after
857 * @static_size + @free_size alone.
858 *
859 * Non-null @base_addr means that the caller already allocated virtual
860 * region for the first chunk and mapped it. percpu must not mess
861 * with the chunk. Note that @base_addr with 0 @unit_size or non-NULL
862 * @populate_pte_fn doesn't make any sense.
863 *
864 * @populate_pte_fn is used to populate the pagetable. NULL means the
865 * caller already populated the pagetable.
834 * 866 *
835 * RETURNS: 867 * RETURNS:
836 * The determined pcpu_unit_size which can be used to initialize 868 * The determined pcpu_unit_size which can be used to initialize
837 * percpu access. 869 * percpu access.
838 */ 870 */
839size_t __init pcpu_setup_static(pcpu_populate_pte_fn_t populate_pte_fn, 871size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
840 struct page **pages, size_t cpu_size) 872 size_t static_size, size_t unit_size,
873 size_t free_size, void *base_addr,
874 pcpu_populate_pte_fn_t populate_pte_fn)
841{ 875{
842 static struct vm_struct static_vm; 876 static struct vm_struct static_vm;
843 struct pcpu_chunk *static_chunk; 877 struct pcpu_chunk *static_chunk;
844 int nr_cpu_pages = DIV_ROUND_UP(cpu_size, PAGE_SIZE);
845 unsigned int cpu; 878 unsigned int cpu;
879 int nr_pages;
846 int err, i; 880 int err, i;
847 881
848 pcpu_unit_pages = max_t(int, PCPU_MIN_UNIT_PAGES, PFN_UP(cpu_size)); 882 /* santiy checks */
883 BUG_ON(!static_size);
884 BUG_ON(!unit_size && free_size);
885 BUG_ON(unit_size && unit_size < static_size + free_size);
886 BUG_ON(unit_size & ~PAGE_MASK);
887 BUG_ON(base_addr && !unit_size);
888 BUG_ON(base_addr && populate_pte_fn);
849 889
850 pcpu_static_size = cpu_size; 890 if (unit_size)
891 pcpu_unit_pages = unit_size >> PAGE_SHIFT;
892 else
893 pcpu_unit_pages = max_t(int, PCPU_MIN_UNIT_SIZE >> PAGE_SHIFT,
894 PFN_UP(static_size));
895
896 pcpu_static_size = static_size;
851 pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT; 897 pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT;
852 pcpu_chunk_size = num_possible_cpus() * pcpu_unit_size; 898 pcpu_chunk_size = num_possible_cpus() * pcpu_unit_size;
853 pcpu_chunk_struct_size = sizeof(struct pcpu_chunk) 899 pcpu_chunk_struct_size = sizeof(struct pcpu_chunk)
@@ -862,29 +908,66 @@ size_t __init pcpu_setup_static(pcpu_populate_pte_fn_t populate_pte_fn,
862 for (i = 0; i < pcpu_nr_slots; i++) 908 for (i = 0; i < pcpu_nr_slots; i++)
863 INIT_LIST_HEAD(&pcpu_slot[i]); 909 INIT_LIST_HEAD(&pcpu_slot[i]);
864 910
865 /* init and register vm area */
866 static_vm.flags = VM_ALLOC;
867 static_vm.size = pcpu_chunk_size;
868 vm_area_register_early(&static_vm, PAGE_SIZE);
869
870 /* init static_chunk */ 911 /* init static_chunk */
871 static_chunk = alloc_bootmem(pcpu_chunk_struct_size); 912 static_chunk = alloc_bootmem(pcpu_chunk_struct_size);
872 INIT_LIST_HEAD(&static_chunk->list); 913 INIT_LIST_HEAD(&static_chunk->list);
873 static_chunk->vm = &static_vm; 914 static_chunk->vm = &static_vm;
874 static_chunk->free_size = pcpu_unit_size - pcpu_static_size; 915
916 if (free_size)
917 static_chunk->free_size = free_size;
918 else
919 static_chunk->free_size = pcpu_unit_size - pcpu_static_size;
920
875 static_chunk->contig_hint = static_chunk->free_size; 921 static_chunk->contig_hint = static_chunk->free_size;
876 922
877 /* assign pages and map them */ 923 /* allocate vm address */
924 static_vm.flags = VM_ALLOC;
925 static_vm.size = pcpu_chunk_size;
926
927 if (!base_addr)
928 vm_area_register_early(&static_vm, PAGE_SIZE);
929 else {
930 /*
931 * Pages already mapped. No need to remap into
932 * vmalloc area. In this case the static chunk can't
933 * be mapped or unmapped by percpu and is marked
934 * immutable.
935 */
936 static_vm.addr = base_addr;
937 static_chunk->immutable = true;
938 }
939
940 /* assign pages */
941 nr_pages = -1;
878 for_each_possible_cpu(cpu) { 942 for_each_possible_cpu(cpu) {
879 for (i = 0; i < nr_cpu_pages; i++) { 943 for (i = 0; i < pcpu_unit_pages; i++) {
880 *pcpu_chunk_pagep(static_chunk, cpu, i) = *pages++; 944 struct page *page = get_page_fn(cpu, i);
881 populate_pte_fn(pcpu_chunk_addr(static_chunk, cpu, i)); 945
946 if (!page)
947 break;
948 *pcpu_chunk_pagep(static_chunk, cpu, i) = page;
882 } 949 }
950
951 BUG_ON(i < PFN_UP(pcpu_static_size));
952
953 if (nr_pages < 0)
954 nr_pages = i;
955 else
956 BUG_ON(nr_pages != i);
883 } 957 }
884 958
885 err = pcpu_map(static_chunk, 0, nr_cpu_pages); 959 /* map them */
886 if (err) 960 if (populate_pte_fn) {
887 panic("failed to setup static percpu area, err=%d\n", err); 961 for_each_possible_cpu(cpu)
962 for (i = 0; i < nr_pages; i++)
963 populate_pte_fn(pcpu_chunk_addr(static_chunk,
964 cpu, i));
965
966 err = pcpu_map(static_chunk, 0, nr_pages);
967 if (err)
968 panic("failed to setup static percpu area, err=%d\n",
969 err);
970 }
888 971
889 /* link static_chunk in */ 972 /* link static_chunk in */
890 pcpu_chunk_relocate(static_chunk, -1); 973 pcpu_chunk_relocate(static_chunk, -1);