aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/percpu.c104
1 files changed, 30 insertions, 74 deletions
diff --git a/mm/percpu.c b/mm/percpu.c
index 17db527ee2e..21d938a1066 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -983,24 +983,22 @@ EXPORT_SYMBOL_GPL(free_percpu);
983 * pcpu_setup_first_chunk - initialize the first percpu chunk 983 * pcpu_setup_first_chunk - initialize the first percpu chunk
984 * @get_page_fn: callback to fetch page pointer 984 * @get_page_fn: callback to fetch page pointer
985 * @static_size: the size of static percpu area in bytes 985 * @static_size: the size of static percpu area in bytes
986 * @reserved_size: the size of reserved percpu area in bytes 986 * @reserved_size: the size of reserved percpu area in bytes, 0 for none
987 * @dyn_size: free size for dynamic allocation in bytes, -1 for auto 987 * @dyn_size: free size for dynamic allocation in bytes, -1 for auto
988 * @unit_size: unit size in bytes, must be multiple of PAGE_SIZE, -1 for auto 988 * @unit_size: unit size in bytes, must be multiple of PAGE_SIZE
989 * @base_addr: mapped address, NULL for auto 989 * @base_addr: mapped address
990 * @populate_pte_fn: callback to allocate pagetable, NULL if unnecessary
991 * 990 *
992 * Initialize the first percpu chunk which contains the kernel static 991 * Initialize the first percpu chunk which contains the kernel static
993 * perpcu area. This function is to be called from arch percpu area 992 * perpcu area. This function is to be called from arch percpu area
994 * setup path. The first two parameters are mandatory. The rest are 993 * setup path.
995 * optional.
996 * 994 *
997 * @get_page_fn() should return pointer to percpu page given cpu 995 * @get_page_fn() should return pointer to percpu page given cpu
998 * number and page number. It should at least return enough pages to 996 * number and page number. It should at least return enough pages to
999 * cover the static area. The returned pages for static area should 997 * cover the static area. The returned pages for static area should
1000 * have been initialized with valid data. If @unit_size is specified, 998 * have been initialized with valid data. It can also return pages
1001 * it can also return pages after the static area. NULL return 999 * after the static area. NULL return indicates end of pages for the
1002 * indicates end of pages for the cpu. Note that @get_page_fn() must 1000 * cpu. Note that @get_page_fn() must return the same number of pages
1003 * return the same number of pages for all cpus. 1001 * for all cpus.
1004 * 1002 *
1005 * @reserved_size, if non-zero, specifies the amount of bytes to 1003 * @reserved_size, if non-zero, specifies the amount of bytes to
1006 * reserve after the static area in the first chunk. This reserves 1004 * reserve after the static area in the first chunk. This reserves
@@ -1015,17 +1013,12 @@ EXPORT_SYMBOL_GPL(free_percpu);
1015 * non-negative value makes percpu leave alone the area beyond 1013 * non-negative value makes percpu leave alone the area beyond
1016 * @static_size + @reserved_size + @dyn_size. 1014 * @static_size + @reserved_size + @dyn_size.
1017 * 1015 *
1018 * @unit_size, if non-negative, specifies unit size and must be 1016 * @unit_size specifies unit size and must be aligned to PAGE_SIZE and
1019 * aligned to PAGE_SIZE and equal to or larger than @static_size + 1017 * equal to or larger than @static_size + @reserved_size + if
1020 * @reserved_size + if non-negative, @dyn_size. 1018 * non-negative, @dyn_size.
1021 *
1022 * Non-null @base_addr means that the caller already allocated virtual
1023 * region for the first chunk and mapped it. percpu must not mess
1024 * with the chunk. Note that @base_addr with 0 @unit_size or non-NULL
1025 * @populate_pte_fn doesn't make any sense.
1026 * 1019 *
1027 * @populate_pte_fn is used to populate the pagetable. NULL means the 1020 * The caller should have mapped the first chunk at @base_addr and
1028 * caller already populated the pagetable. 1021 * copied static data to each unit.
1029 * 1022 *
1030 * If the first chunk ends up with both reserved and dynamic areas, it 1023 * If the first chunk ends up with both reserved and dynamic areas, it
1031 * is served by two chunks - one to serve the core static and reserved 1024 * is served by two chunks - one to serve the core static and reserved
@@ -1040,9 +1033,8 @@ EXPORT_SYMBOL_GPL(free_percpu);
1040 */ 1033 */
1041size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, 1034size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
1042 size_t static_size, size_t reserved_size, 1035 size_t static_size, size_t reserved_size,
1043 ssize_t dyn_size, ssize_t unit_size, 1036 ssize_t dyn_size, size_t unit_size,
1044 void *base_addr, 1037 void *base_addr)
1045 pcpu_fc_populate_pte_fn_t populate_pte_fn)
1046{ 1038{
1047 static struct vm_struct first_vm; 1039 static struct vm_struct first_vm;
1048 static int smap[2], dmap[2]; 1040 static int smap[2], dmap[2];
@@ -1050,27 +1042,18 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
1050 (dyn_size >= 0 ? dyn_size : 0); 1042 (dyn_size >= 0 ? dyn_size : 0);
1051 struct pcpu_chunk *schunk, *dchunk = NULL; 1043 struct pcpu_chunk *schunk, *dchunk = NULL;
1052 unsigned int cpu; 1044 unsigned int cpu;
1053 int nr_pages; 1045 int i, nr_pages;
1054 int err, i;
1055 1046
1056 /* santiy checks */ 1047 /* santiy checks */
1057 BUILD_BUG_ON(ARRAY_SIZE(smap) >= PCPU_DFL_MAP_ALLOC || 1048 BUILD_BUG_ON(ARRAY_SIZE(smap) >= PCPU_DFL_MAP_ALLOC ||
1058 ARRAY_SIZE(dmap) >= PCPU_DFL_MAP_ALLOC); 1049 ARRAY_SIZE(dmap) >= PCPU_DFL_MAP_ALLOC);
1059 BUG_ON(!static_size); 1050 BUG_ON(!static_size);
1060 if (unit_size >= 0) { 1051 BUG_ON(!base_addr);
1061 BUG_ON(unit_size < size_sum); 1052 BUG_ON(unit_size < size_sum);
1062 BUG_ON(unit_size & ~PAGE_MASK); 1053 BUG_ON(unit_size & ~PAGE_MASK);
1063 BUG_ON(unit_size < PCPU_MIN_UNIT_SIZE); 1054 BUG_ON(unit_size < PCPU_MIN_UNIT_SIZE);
1064 } else
1065 BUG_ON(base_addr);
1066 BUG_ON(base_addr && populate_pte_fn);
1067
1068 if (unit_size >= 0)
1069 pcpu_unit_pages = unit_size >> PAGE_SHIFT;
1070 else
1071 pcpu_unit_pages = max_t(int, PCPU_MIN_UNIT_SIZE >> PAGE_SHIFT,
1072 PFN_UP(size_sum));
1073 1055
1056 pcpu_unit_pages = unit_size >> PAGE_SHIFT;
1074 pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT; 1057 pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT;
1075 pcpu_chunk_size = num_possible_cpus() * pcpu_unit_size; 1058 pcpu_chunk_size = num_possible_cpus() * pcpu_unit_size;
1076 pcpu_chunk_struct_size = sizeof(struct pcpu_chunk) 1059 pcpu_chunk_struct_size = sizeof(struct pcpu_chunk)
@@ -1079,6 +1062,10 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
1079 if (dyn_size < 0) 1062 if (dyn_size < 0)
1080 dyn_size = pcpu_unit_size - static_size - reserved_size; 1063 dyn_size = pcpu_unit_size - static_size - reserved_size;
1081 1064
1065 first_vm.flags = VM_ALLOC;
1066 first_vm.size = pcpu_chunk_size;
1067 first_vm.addr = base_addr;
1068
1082 /* 1069 /*
1083 * Allocate chunk slots. The additional last slot is for 1070 * Allocate chunk slots. The additional last slot is for
1084 * empty chunks. 1071 * empty chunks.
@@ -1101,6 +1088,7 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
1101 schunk->map = smap; 1088 schunk->map = smap;
1102 schunk->map_alloc = ARRAY_SIZE(smap); 1089 schunk->map_alloc = ARRAY_SIZE(smap);
1103 schunk->page = schunk->page_ar; 1090 schunk->page = schunk->page_ar;
1091 schunk->immutable = true;
1104 1092
1105 if (reserved_size) { 1093 if (reserved_size) {
1106 schunk->free_size = reserved_size; 1094 schunk->free_size = reserved_size;
@@ -1124,31 +1112,13 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
1124 dchunk->map = dmap; 1112 dchunk->map = dmap;
1125 dchunk->map_alloc = ARRAY_SIZE(dmap); 1113 dchunk->map_alloc = ARRAY_SIZE(dmap);
1126 dchunk->page = schunk->page_ar; /* share page map with schunk */ 1114 dchunk->page = schunk->page_ar; /* share page map with schunk */
1115 dchunk->immutable = true;
1127 1116
1128 dchunk->contig_hint = dchunk->free_size = dyn_size; 1117 dchunk->contig_hint = dchunk->free_size = dyn_size;
1129 dchunk->map[dchunk->map_used++] = -pcpu_reserved_chunk_limit; 1118 dchunk->map[dchunk->map_used++] = -pcpu_reserved_chunk_limit;
1130 dchunk->map[dchunk->map_used++] = dchunk->free_size; 1119 dchunk->map[dchunk->map_used++] = dchunk->free_size;
1131 } 1120 }
1132 1121
1133 /* allocate vm address */
1134 first_vm.flags = VM_ALLOC;
1135 first_vm.size = pcpu_chunk_size;
1136
1137 if (!base_addr)
1138 vm_area_register_early(&first_vm, PAGE_SIZE);
1139 else {
1140 /*
1141 * Pages already mapped. No need to remap into
1142 * vmalloc area. In this case the first chunks can't
1143 * be mapped or unmapped by percpu and are marked
1144 * immutable.
1145 */
1146 first_vm.addr = base_addr;
1147 schunk->immutable = true;
1148 if (dchunk)
1149 dchunk->immutable = true;
1150 }
1151
1152 /* assign pages */ 1122 /* assign pages */
1153 nr_pages = -1; 1123 nr_pages = -1;
1154 for_each_possible_cpu(cpu) { 1124 for_each_possible_cpu(cpu) {
@@ -1168,19 +1138,6 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
1168 BUG_ON(nr_pages != i); 1138 BUG_ON(nr_pages != i);
1169 } 1139 }
1170 1140
1171 /* map them */
1172 if (populate_pte_fn) {
1173 for_each_possible_cpu(cpu)
1174 for (i = 0; i < nr_pages; i++)
1175 populate_pte_fn(pcpu_chunk_addr(schunk,
1176 cpu, i));
1177
1178 err = pcpu_map(schunk, 0, nr_pages);
1179 if (err)
1180 panic("failed to setup static percpu area, err=%d\n",
1181 err);
1182 }
1183
1184 /* link the first chunk in */ 1141 /* link the first chunk in */
1185 pcpu_first_chunk = dchunk ?: schunk; 1142 pcpu_first_chunk = dchunk ?: schunk;
1186 pcpu_chunk_relocate(pcpu_first_chunk, -1); 1143 pcpu_chunk_relocate(pcpu_first_chunk, -1);
@@ -1282,7 +1239,7 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size,
1282 1239
1283 return pcpu_setup_first_chunk(pcpue_get_page, static_size, 1240 return pcpu_setup_first_chunk(pcpue_get_page, static_size,
1284 reserved_size, dyn_size, 1241 reserved_size, dyn_size,
1285 pcpue_unit_size, pcpue_ptr, NULL); 1242 pcpue_unit_size, pcpue_ptr);
1286} 1243}
1287 1244
1288/* 1245/*
@@ -1387,8 +1344,7 @@ ssize_t __init pcpu_4k_first_chunk(size_t static_size, size_t reserved_size,
1387 1344
1388 ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size, 1345 ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size,
1389 reserved_size, -1, 1346 reserved_size, -1,
1390 pcpu4k_unit_pages << PAGE_SHIFT, vm.addr, 1347 pcpu4k_unit_pages << PAGE_SHIFT, vm.addr);
1391 NULL);
1392 goto out_free_ar; 1348 goto out_free_ar;
1393 1349
1394enomem: 1350enomem:
@@ -1521,7 +1477,7 @@ ssize_t __init pcpu_lpage_first_chunk(size_t static_size, size_t reserved_size,
1521 1477
1522 ret = pcpu_setup_first_chunk(pcpul_get_page, static_size, 1478 ret = pcpu_setup_first_chunk(pcpul_get_page, static_size,
1523 reserved_size, dyn_size, pcpul_unit_size, 1479 reserved_size, dyn_size, pcpul_unit_size,
1524 pcpul_vm.addr, NULL); 1480 pcpul_vm.addr);
1525 1481
1526 /* sort pcpul_map array for pcpu_lpage_remapped() */ 1482 /* sort pcpul_map array for pcpu_lpage_remapped() */
1527 for (i = 0; i < num_possible_cpus() - 1; i++) 1483 for (i = 0; i < num_possible_cpus() - 1; i++)