diff options
author | Tejun Heo <tj@kernel.org> | 2014-09-02 14:46:05 -0400 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2014-09-02 14:46:05 -0400 |
commit | b539b87fed37ffc16c89a6bc3beca2d7aed82e1c (patch) | |
tree | 62657355b1f570ca9fe4cee455a41125e84dce7c | |
parent | 9c824b6a172c8d44a6b037946bae90127c969b1b (diff) |
percpu: implmeent pcpu_nr_empty_pop_pages and chunk->nr_populated
pcpu_nr_empty_pop_pages counts the number of empty populated pages
across all chunks and chunk->nr_populated counts the number of
populated pages in a chunk. Both will be used to implement pre/async
population for atomic allocations.
pcpu_chunk_[de]populated() are added to update chunk->populated,
chunk->nr_populated and pcpu_nr_empty_pop_pages together. All
successful chunk [de]populations should be followed by the
corresponding pcpu_chunk_[de]populated() calls.
Signed-off-by: Tejun Heo <tj@kernel.org>
-rw-r--r-- | mm/percpu-km.c | 2 | ||||
-rw-r--r-- | mm/percpu.c | 122 |
2 files changed, 114 insertions, 10 deletions
diff --git a/mm/percpu-km.c b/mm/percpu-km.c index e662b4947a65..10e3d0b8a86d 100644 --- a/mm/percpu-km.c +++ b/mm/percpu-km.c | |||
@@ -69,7 +69,7 @@ static struct pcpu_chunk *pcpu_create_chunk(void) | |||
69 | chunk->base_addr = page_address(pages) - pcpu_group_offsets[0]; | 69 | chunk->base_addr = page_address(pages) - pcpu_group_offsets[0]; |
70 | 70 | ||
71 | spin_lock_irq(&pcpu_lock); | 71 | spin_lock_irq(&pcpu_lock); |
72 | bitmap_fill(chunk->populated, nr_pages); | 72 | pcpu_chunk_populated(chunk, 0, nr_pages); |
73 | spin_unlock_irq(&pcpu_lock); | 73 | spin_unlock_irq(&pcpu_lock); |
74 | 74 | ||
75 | return chunk; | 75 | return chunk; |
diff --git a/mm/percpu.c b/mm/percpu.c index 546ced05cf33..4f2d58760c9c 100644 --- a/mm/percpu.c +++ b/mm/percpu.c | |||
@@ -113,6 +113,7 @@ struct pcpu_chunk { | |||
113 | void *data; /* chunk data */ | 113 | void *data; /* chunk data */ |
114 | int first_free; /* no free below this */ | 114 | int first_free; /* no free below this */ |
115 | bool immutable; /* no [de]population allowed */ | 115 | bool immutable; /* no [de]population allowed */ |
116 | int nr_populated; /* # of populated pages */ | ||
116 | unsigned long populated[]; /* populated bitmap */ | 117 | unsigned long populated[]; /* populated bitmap */ |
117 | }; | 118 | }; |
118 | 119 | ||
@@ -161,6 +162,12 @@ static DEFINE_MUTEX(pcpu_alloc_mutex); /* chunk create/destroy, [de]pop */ | |||
161 | 162 | ||
162 | static struct list_head *pcpu_slot __read_mostly; /* chunk list slots */ | 163 | static struct list_head *pcpu_slot __read_mostly; /* chunk list slots */ |
163 | 164 | ||
165 | /* | ||
166 | * The number of empty populated pages, protected by pcpu_lock. The | ||
167 | * reserved chunk doesn't contribute to the count. | ||
168 | */ | ||
169 | static int pcpu_nr_empty_pop_pages; | ||
170 | |||
164 | /* reclaim work to release fully free chunks, scheduled from free path */ | 171 | /* reclaim work to release fully free chunks, scheduled from free path */ |
165 | static void pcpu_reclaim(struct work_struct *work); | 172 | static void pcpu_reclaim(struct work_struct *work); |
166 | static DECLARE_WORK(pcpu_reclaim_work, pcpu_reclaim); | 173 | static DECLARE_WORK(pcpu_reclaim_work, pcpu_reclaim); |
@@ -296,6 +303,38 @@ static void pcpu_mem_free(void *ptr, size_t size) | |||
296 | } | 303 | } |
297 | 304 | ||
298 | /** | 305 | /** |
306 | * pcpu_count_occupied_pages - count the number of pages an area occupies | ||
307 | * @chunk: chunk of interest | ||
308 | * @i: index of the area in question | ||
309 | * | ||
310 | * Count the number of pages chunk's @i'th area occupies. When the area's | ||
311 | * start and/or end address isn't aligned to page boundary, the straddled | ||
312 | * page is included in the count iff the rest of the page is free. | ||
313 | */ | ||
314 | static int pcpu_count_occupied_pages(struct pcpu_chunk *chunk, int i) | ||
315 | { | ||
316 | int off = chunk->map[i] & ~1; | ||
317 | int end = chunk->map[i + 1] & ~1; | ||
318 | |||
319 | if (!PAGE_ALIGNED(off) && i > 0) { | ||
320 | int prev = chunk->map[i - 1]; | ||
321 | |||
322 | if (!(prev & 1) && prev <= round_down(off, PAGE_SIZE)) | ||
323 | off = round_down(off, PAGE_SIZE); | ||
324 | } | ||
325 | |||
326 | if (!PAGE_ALIGNED(end) && i + 1 < chunk->map_used) { | ||
327 | int next = chunk->map[i + 1]; | ||
328 | int nend = chunk->map[i + 2] & ~1; | ||
329 | |||
330 | if (!(next & 1) && nend >= round_up(end, PAGE_SIZE)) | ||
331 | end = round_up(end, PAGE_SIZE); | ||
332 | } | ||
333 | |||
334 | return max_t(int, PFN_DOWN(end) - PFN_UP(off), 0); | ||
335 | } | ||
336 | |||
337 | /** | ||
299 | * pcpu_chunk_relocate - put chunk in the appropriate chunk slot | 338 | * pcpu_chunk_relocate - put chunk in the appropriate chunk slot |
300 | * @chunk: chunk of interest | 339 | * @chunk: chunk of interest |
301 | * @oslot: the previous slot it was on | 340 | * @oslot: the previous slot it was on |
@@ -483,6 +522,7 @@ static int pcpu_fit_in_area(struct pcpu_chunk *chunk, int off, int this_size, | |||
483 | * @size: wanted size in bytes | 522 | * @size: wanted size in bytes |
484 | * @align: wanted align | 523 | * @align: wanted align |
485 | * @pop_only: allocate only from the populated area | 524 | * @pop_only: allocate only from the populated area |
525 | * @occ_pages_p: out param for the number of pages the area occupies | ||
486 | * | 526 | * |
487 | * Try to allocate @size bytes area aligned at @align from @chunk. | 527 | * Try to allocate @size bytes area aligned at @align from @chunk. |
488 | * Note that this function only allocates the offset. It doesn't | 528 | * Note that this function only allocates the offset. It doesn't |
@@ -498,7 +538,7 @@ static int pcpu_fit_in_area(struct pcpu_chunk *chunk, int off, int this_size, | |||
498 | * found. | 538 | * found. |
499 | */ | 539 | */ |
500 | static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align, | 540 | static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align, |
501 | bool pop_only) | 541 | bool pop_only, int *occ_pages_p) |
502 | { | 542 | { |
503 | int oslot = pcpu_chunk_slot(chunk); | 543 | int oslot = pcpu_chunk_slot(chunk); |
504 | int max_contig = 0; | 544 | int max_contig = 0; |
@@ -587,6 +627,7 @@ static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align, | |||
587 | chunk->free_size -= size; | 627 | chunk->free_size -= size; |
588 | *p |= 1; | 628 | *p |= 1; |
589 | 629 | ||
630 | *occ_pages_p = pcpu_count_occupied_pages(chunk, i); | ||
590 | pcpu_chunk_relocate(chunk, oslot); | 631 | pcpu_chunk_relocate(chunk, oslot); |
591 | return off; | 632 | return off; |
592 | } | 633 | } |
@@ -602,6 +643,7 @@ static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align, | |||
602 | * pcpu_free_area - free area to a pcpu_chunk | 643 | * pcpu_free_area - free area to a pcpu_chunk |
603 | * @chunk: chunk of interest | 644 | * @chunk: chunk of interest |
604 | * @freeme: offset of area to free | 645 | * @freeme: offset of area to free |
646 | * @occ_pages_p: out param for the number of pages the area occupies | ||
605 | * | 647 | * |
606 | * Free area starting from @freeme to @chunk. Note that this function | 648 | * Free area starting from @freeme to @chunk. Note that this function |
607 | * only modifies the allocation map. It doesn't depopulate or unmap | 649 | * only modifies the allocation map. It doesn't depopulate or unmap |
@@ -610,7 +652,8 @@ static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align, | |||
610 | * CONTEXT: | 652 | * CONTEXT: |
611 | * pcpu_lock. | 653 | * pcpu_lock. |
612 | */ | 654 | */ |
613 | static void pcpu_free_area(struct pcpu_chunk *chunk, int freeme) | 655 | static void pcpu_free_area(struct pcpu_chunk *chunk, int freeme, |
656 | int *occ_pages_p) | ||
614 | { | 657 | { |
615 | int oslot = pcpu_chunk_slot(chunk); | 658 | int oslot = pcpu_chunk_slot(chunk); |
616 | int off = 0; | 659 | int off = 0; |
@@ -641,6 +684,8 @@ static void pcpu_free_area(struct pcpu_chunk *chunk, int freeme) | |||
641 | *p = off &= ~1; | 684 | *p = off &= ~1; |
642 | chunk->free_size += (p[1] & ~1) - off; | 685 | chunk->free_size += (p[1] & ~1) - off; |
643 | 686 | ||
687 | *occ_pages_p = pcpu_count_occupied_pages(chunk, i); | ||
688 | |||
644 | /* merge with next? */ | 689 | /* merge with next? */ |
645 | if (!(p[1] & 1)) | 690 | if (!(p[1] & 1)) |
646 | to_free++; | 691 | to_free++; |
@@ -696,6 +741,50 @@ static void pcpu_free_chunk(struct pcpu_chunk *chunk) | |||
696 | pcpu_mem_free(chunk, pcpu_chunk_struct_size); | 741 | pcpu_mem_free(chunk, pcpu_chunk_struct_size); |
697 | } | 742 | } |
698 | 743 | ||
744 | /** | ||
745 | * pcpu_chunk_populated - post-population bookkeeping | ||
746 | * @chunk: pcpu_chunk which got populated | ||
747 | * @page_start: the start page | ||
748 | * @page_end: the end page | ||
749 | * | ||
750 | * Pages in [@page_start,@page_end) have been populated to @chunk. Update | ||
751 | * the bookkeeping information accordingly. Must be called after each | ||
752 | * successful population. | ||
753 | */ | ||
754 | static void pcpu_chunk_populated(struct pcpu_chunk *chunk, | ||
755 | int page_start, int page_end) | ||
756 | { | ||
757 | int nr = page_end - page_start; | ||
758 | |||
759 | lockdep_assert_held(&pcpu_lock); | ||
760 | |||
761 | bitmap_set(chunk->populated, page_start, nr); | ||
762 | chunk->nr_populated += nr; | ||
763 | pcpu_nr_empty_pop_pages += nr; | ||
764 | } | ||
765 | |||
766 | /** | ||
767 | * pcpu_chunk_depopulated - post-depopulation bookkeeping | ||
768 | * @chunk: pcpu_chunk which got depopulated | ||
769 | * @page_start: the start page | ||
770 | * @page_end: the end page | ||
771 | * | ||
772 | * Pages in [@page_start,@page_end) have been depopulated from @chunk. | ||
773 | * Update the bookkeeping information accordingly. Must be called after | ||
774 | * each successful depopulation. | ||
775 | */ | ||
776 | static void pcpu_chunk_depopulated(struct pcpu_chunk *chunk, | ||
777 | int page_start, int page_end) | ||
778 | { | ||
779 | int nr = page_end - page_start; | ||
780 | |||
781 | lockdep_assert_held(&pcpu_lock); | ||
782 | |||
783 | bitmap_clear(chunk->populated, page_start, nr); | ||
784 | chunk->nr_populated -= nr; | ||
785 | pcpu_nr_empty_pop_pages -= nr; | ||
786 | } | ||
787 | |||
699 | /* | 788 | /* |
700 | * Chunk management implementation. | 789 | * Chunk management implementation. |
701 | * | 790 | * |
@@ -772,6 +861,7 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved, | |||
772 | struct pcpu_chunk *chunk; | 861 | struct pcpu_chunk *chunk; |
773 | const char *err; | 862 | const char *err; |
774 | bool is_atomic = !(gfp & GFP_KERNEL); | 863 | bool is_atomic = !(gfp & GFP_KERNEL); |
864 | int occ_pages = 0; | ||
775 | int slot, off, new_alloc, cpu, ret; | 865 | int slot, off, new_alloc, cpu, ret; |
776 | unsigned long flags; | 866 | unsigned long flags; |
777 | void __percpu *ptr; | 867 | void __percpu *ptr; |
@@ -812,7 +902,8 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved, | |||
812 | spin_lock_irqsave(&pcpu_lock, flags); | 902 | spin_lock_irqsave(&pcpu_lock, flags); |
813 | } | 903 | } |
814 | 904 | ||
815 | off = pcpu_alloc_area(chunk, size, align, is_atomic); | 905 | off = pcpu_alloc_area(chunk, size, align, is_atomic, |
906 | &occ_pages); | ||
816 | if (off >= 0) | 907 | if (off >= 0) |
817 | goto area_found; | 908 | goto area_found; |
818 | 909 | ||
@@ -845,7 +936,8 @@ restart: | |||
845 | goto restart; | 936 | goto restart; |
846 | } | 937 | } |
847 | 938 | ||
848 | off = pcpu_alloc_area(chunk, size, align, is_atomic); | 939 | off = pcpu_alloc_area(chunk, size, align, is_atomic, |
940 | &occ_pages); | ||
849 | if (off >= 0) | 941 | if (off >= 0) |
850 | goto area_found; | 942 | goto area_found; |
851 | } | 943 | } |
@@ -899,17 +991,20 @@ area_found: | |||
899 | spin_lock_irqsave(&pcpu_lock, flags); | 991 | spin_lock_irqsave(&pcpu_lock, flags); |
900 | if (ret) { | 992 | if (ret) { |
901 | mutex_unlock(&pcpu_alloc_mutex); | 993 | mutex_unlock(&pcpu_alloc_mutex); |
902 | pcpu_free_area(chunk, off); | 994 | pcpu_free_area(chunk, off, &occ_pages); |
903 | err = "failed to populate"; | 995 | err = "failed to populate"; |
904 | goto fail_unlock; | 996 | goto fail_unlock; |
905 | } | 997 | } |
906 | bitmap_set(chunk->populated, rs, re - rs); | 998 | pcpu_chunk_populated(chunk, rs, re); |
907 | spin_unlock_irqrestore(&pcpu_lock, flags); | 999 | spin_unlock_irqrestore(&pcpu_lock, flags); |
908 | } | 1000 | } |
909 | 1001 | ||
910 | mutex_unlock(&pcpu_alloc_mutex); | 1002 | mutex_unlock(&pcpu_alloc_mutex); |
911 | } | 1003 | } |
912 | 1004 | ||
1005 | if (chunk != pcpu_reserved_chunk) | ||
1006 | pcpu_nr_empty_pop_pages -= occ_pages; | ||
1007 | |||
913 | /* clear the areas and return address relative to base address */ | 1008 | /* clear the areas and return address relative to base address */ |
914 | for_each_possible_cpu(cpu) | 1009 | for_each_possible_cpu(cpu) |
915 | memset((void *)pcpu_chunk_addr(chunk, cpu, 0) + off, 0, size); | 1010 | memset((void *)pcpu_chunk_addr(chunk, cpu, 0) + off, 0, size); |
@@ -1019,7 +1114,9 @@ static void pcpu_reclaim(struct work_struct *work) | |||
1019 | 1114 | ||
1020 | pcpu_for_each_pop_region(chunk, rs, re, 0, pcpu_unit_pages) { | 1115 | pcpu_for_each_pop_region(chunk, rs, re, 0, pcpu_unit_pages) { |
1021 | pcpu_depopulate_chunk(chunk, rs, re); | 1116 | pcpu_depopulate_chunk(chunk, rs, re); |
1022 | bitmap_clear(chunk->populated, rs, re - rs); | 1117 | spin_lock_irq(&pcpu_lock); |
1118 | pcpu_chunk_depopulated(chunk, rs, re); | ||
1119 | spin_unlock_irq(&pcpu_lock); | ||
1023 | } | 1120 | } |
1024 | pcpu_destroy_chunk(chunk); | 1121 | pcpu_destroy_chunk(chunk); |
1025 | } | 1122 | } |
@@ -1041,7 +1138,7 @@ void free_percpu(void __percpu *ptr) | |||
1041 | void *addr; | 1138 | void *addr; |
1042 | struct pcpu_chunk *chunk; | 1139 | struct pcpu_chunk *chunk; |
1043 | unsigned long flags; | 1140 | unsigned long flags; |
1044 | int off; | 1141 | int off, occ_pages; |
1045 | 1142 | ||
1046 | if (!ptr) | 1143 | if (!ptr) |
1047 | return; | 1144 | return; |
@@ -1055,7 +1152,10 @@ void free_percpu(void __percpu *ptr) | |||
1055 | chunk = pcpu_chunk_addr_search(addr); | 1152 | chunk = pcpu_chunk_addr_search(addr); |
1056 | off = addr - chunk->base_addr; | 1153 | off = addr - chunk->base_addr; |
1057 | 1154 | ||
1058 | pcpu_free_area(chunk, off); | 1155 | pcpu_free_area(chunk, off, &occ_pages); |
1156 | |||
1157 | if (chunk != pcpu_reserved_chunk) | ||
1158 | pcpu_nr_empty_pop_pages += occ_pages; | ||
1059 | 1159 | ||
1060 | /* if there are more than one fully free chunks, wake up grim reaper */ | 1160 | /* if there are more than one fully free chunks, wake up grim reaper */ |
1061 | if (chunk->free_size == pcpu_unit_size) { | 1161 | if (chunk->free_size == pcpu_unit_size) { |
@@ -1459,6 +1559,7 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, | |||
1459 | schunk->map_alloc = ARRAY_SIZE(smap); | 1559 | schunk->map_alloc = ARRAY_SIZE(smap); |
1460 | schunk->immutable = true; | 1560 | schunk->immutable = true; |
1461 | bitmap_fill(schunk->populated, pcpu_unit_pages); | 1561 | bitmap_fill(schunk->populated, pcpu_unit_pages); |
1562 | schunk->nr_populated = pcpu_unit_pages; | ||
1462 | 1563 | ||
1463 | if (ai->reserved_size) { | 1564 | if (ai->reserved_size) { |
1464 | schunk->free_size = ai->reserved_size; | 1565 | schunk->free_size = ai->reserved_size; |
@@ -1488,6 +1589,7 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, | |||
1488 | dchunk->map_alloc = ARRAY_SIZE(dmap); | 1589 | dchunk->map_alloc = ARRAY_SIZE(dmap); |
1489 | dchunk->immutable = true; | 1590 | dchunk->immutable = true; |
1490 | bitmap_fill(dchunk->populated, pcpu_unit_pages); | 1591 | bitmap_fill(dchunk->populated, pcpu_unit_pages); |
1592 | dchunk->nr_populated = pcpu_unit_pages; | ||
1491 | 1593 | ||
1492 | dchunk->contig_hint = dchunk->free_size = dyn_size; | 1594 | dchunk->contig_hint = dchunk->free_size = dyn_size; |
1493 | dchunk->map[0] = 1; | 1595 | dchunk->map[0] = 1; |
@@ -1498,6 +1600,8 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, | |||
1498 | 1600 | ||
1499 | /* link the first chunk in */ | 1601 | /* link the first chunk in */ |
1500 | pcpu_first_chunk = dchunk ?: schunk; | 1602 | pcpu_first_chunk = dchunk ?: schunk; |
1603 | pcpu_nr_empty_pop_pages += | ||
1604 | pcpu_count_occupied_pages(pcpu_first_chunk, 1); | ||
1501 | pcpu_chunk_relocate(pcpu_first_chunk, -1); | 1605 | pcpu_chunk_relocate(pcpu_first_chunk, -1); |
1502 | 1606 | ||
1503 | /* we're done */ | 1607 | /* we're done */ |