aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2014-09-02 14:46:05 -0400
committerTejun Heo <tj@kernel.org>2014-09-02 14:46:05 -0400
commitb539b87fed37ffc16c89a6bc3beca2d7aed82e1c (patch)
tree62657355b1f570ca9fe4cee455a41125e84dce7c
parent9c824b6a172c8d44a6b037946bae90127c969b1b (diff)
percpu: implmeent pcpu_nr_empty_pop_pages and chunk->nr_populated
pcpu_nr_empty_pop_pages counts the number of empty populated pages across all chunks and chunk->nr_populated counts the number of populated pages in a chunk. Both will be used to implement pre/async population for atomic allocations. pcpu_chunk_[de]populated() are added to update chunk->populated, chunk->nr_populated and pcpu_nr_empty_pop_pages together. All successful chunk [de]populations should be followed by the corresponding pcpu_chunk_[de]populated() calls. Signed-off-by: Tejun Heo <tj@kernel.org>
-rw-r--r--mm/percpu-km.c2
-rw-r--r--mm/percpu.c122
2 files changed, 114 insertions, 10 deletions
diff --git a/mm/percpu-km.c b/mm/percpu-km.c
index e662b4947a65..10e3d0b8a86d 100644
--- a/mm/percpu-km.c
+++ b/mm/percpu-km.c
@@ -69,7 +69,7 @@ static struct pcpu_chunk *pcpu_create_chunk(void)
69 chunk->base_addr = page_address(pages) - pcpu_group_offsets[0]; 69 chunk->base_addr = page_address(pages) - pcpu_group_offsets[0];
70 70
71 spin_lock_irq(&pcpu_lock); 71 spin_lock_irq(&pcpu_lock);
72 bitmap_fill(chunk->populated, nr_pages); 72 pcpu_chunk_populated(chunk, 0, nr_pages);
73 spin_unlock_irq(&pcpu_lock); 73 spin_unlock_irq(&pcpu_lock);
74 74
75 return chunk; 75 return chunk;
diff --git a/mm/percpu.c b/mm/percpu.c
index 546ced05cf33..4f2d58760c9c 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -113,6 +113,7 @@ struct pcpu_chunk {
113 void *data; /* chunk data */ 113 void *data; /* chunk data */
114 int first_free; /* no free below this */ 114 int first_free; /* no free below this */
115 bool immutable; /* no [de]population allowed */ 115 bool immutable; /* no [de]population allowed */
116 int nr_populated; /* # of populated pages */
116 unsigned long populated[]; /* populated bitmap */ 117 unsigned long populated[]; /* populated bitmap */
117}; 118};
118 119
@@ -161,6 +162,12 @@ static DEFINE_MUTEX(pcpu_alloc_mutex); /* chunk create/destroy, [de]pop */
161 162
162static struct list_head *pcpu_slot __read_mostly; /* chunk list slots */ 163static struct list_head *pcpu_slot __read_mostly; /* chunk list slots */
163 164
165/*
166 * The number of empty populated pages, protected by pcpu_lock. The
167 * reserved chunk doesn't contribute to the count.
168 */
169static int pcpu_nr_empty_pop_pages;
170
164/* reclaim work to release fully free chunks, scheduled from free path */ 171/* reclaim work to release fully free chunks, scheduled from free path */
165static void pcpu_reclaim(struct work_struct *work); 172static void pcpu_reclaim(struct work_struct *work);
166static DECLARE_WORK(pcpu_reclaim_work, pcpu_reclaim); 173static DECLARE_WORK(pcpu_reclaim_work, pcpu_reclaim);
@@ -296,6 +303,38 @@ static void pcpu_mem_free(void *ptr, size_t size)
296} 303}
297 304
298/** 305/**
306 * pcpu_count_occupied_pages - count the number of pages an area occupies
307 * @chunk: chunk of interest
308 * @i: index of the area in question
309 *
310 * Count the number of pages chunk's @i'th area occupies. When the area's
311 * start and/or end address isn't aligned to page boundary, the straddled
312 * page is included in the count iff the rest of the page is free.
313 */
314static int pcpu_count_occupied_pages(struct pcpu_chunk *chunk, int i)
315{
316 int off = chunk->map[i] & ~1;
317 int end = chunk->map[i + 1] & ~1;
318
319 if (!PAGE_ALIGNED(off) && i > 0) {
320 int prev = chunk->map[i - 1];
321
322 if (!(prev & 1) && prev <= round_down(off, PAGE_SIZE))
323 off = round_down(off, PAGE_SIZE);
324 }
325
326 if (!PAGE_ALIGNED(end) && i + 1 < chunk->map_used) {
327 int next = chunk->map[i + 1];
328 int nend = chunk->map[i + 2] & ~1;
329
330 if (!(next & 1) && nend >= round_up(end, PAGE_SIZE))
331 end = round_up(end, PAGE_SIZE);
332 }
333
334 return max_t(int, PFN_DOWN(end) - PFN_UP(off), 0);
335}
336
337/**
299 * pcpu_chunk_relocate - put chunk in the appropriate chunk slot 338 * pcpu_chunk_relocate - put chunk in the appropriate chunk slot
300 * @chunk: chunk of interest 339 * @chunk: chunk of interest
301 * @oslot: the previous slot it was on 340 * @oslot: the previous slot it was on
@@ -483,6 +522,7 @@ static int pcpu_fit_in_area(struct pcpu_chunk *chunk, int off, int this_size,
483 * @size: wanted size in bytes 522 * @size: wanted size in bytes
484 * @align: wanted align 523 * @align: wanted align
485 * @pop_only: allocate only from the populated area 524 * @pop_only: allocate only from the populated area
525 * @occ_pages_p: out param for the number of pages the area occupies
486 * 526 *
487 * Try to allocate @size bytes area aligned at @align from @chunk. 527 * Try to allocate @size bytes area aligned at @align from @chunk.
488 * Note that this function only allocates the offset. It doesn't 528 * Note that this function only allocates the offset. It doesn't
@@ -498,7 +538,7 @@ static int pcpu_fit_in_area(struct pcpu_chunk *chunk, int off, int this_size,
498 * found. 538 * found.
499 */ 539 */
500static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align, 540static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align,
501 bool pop_only) 541 bool pop_only, int *occ_pages_p)
502{ 542{
503 int oslot = pcpu_chunk_slot(chunk); 543 int oslot = pcpu_chunk_slot(chunk);
504 int max_contig = 0; 544 int max_contig = 0;
@@ -587,6 +627,7 @@ static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align,
587 chunk->free_size -= size; 627 chunk->free_size -= size;
588 *p |= 1; 628 *p |= 1;
589 629
630 *occ_pages_p = pcpu_count_occupied_pages(chunk, i);
590 pcpu_chunk_relocate(chunk, oslot); 631 pcpu_chunk_relocate(chunk, oslot);
591 return off; 632 return off;
592 } 633 }
@@ -602,6 +643,7 @@ static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align,
602 * pcpu_free_area - free area to a pcpu_chunk 643 * pcpu_free_area - free area to a pcpu_chunk
603 * @chunk: chunk of interest 644 * @chunk: chunk of interest
604 * @freeme: offset of area to free 645 * @freeme: offset of area to free
646 * @occ_pages_p: out param for the number of pages the area occupies
605 * 647 *
606 * Free area starting from @freeme to @chunk. Note that this function 648 * Free area starting from @freeme to @chunk. Note that this function
607 * only modifies the allocation map. It doesn't depopulate or unmap 649 * only modifies the allocation map. It doesn't depopulate or unmap
@@ -610,7 +652,8 @@ static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align,
610 * CONTEXT: 652 * CONTEXT:
611 * pcpu_lock. 653 * pcpu_lock.
612 */ 654 */
613static void pcpu_free_area(struct pcpu_chunk *chunk, int freeme) 655static void pcpu_free_area(struct pcpu_chunk *chunk, int freeme,
656 int *occ_pages_p)
614{ 657{
615 int oslot = pcpu_chunk_slot(chunk); 658 int oslot = pcpu_chunk_slot(chunk);
616 int off = 0; 659 int off = 0;
@@ -641,6 +684,8 @@ static void pcpu_free_area(struct pcpu_chunk *chunk, int freeme)
641 *p = off &= ~1; 684 *p = off &= ~1;
642 chunk->free_size += (p[1] & ~1) - off; 685 chunk->free_size += (p[1] & ~1) - off;
643 686
687 *occ_pages_p = pcpu_count_occupied_pages(chunk, i);
688
644 /* merge with next? */ 689 /* merge with next? */
645 if (!(p[1] & 1)) 690 if (!(p[1] & 1))
646 to_free++; 691 to_free++;
@@ -696,6 +741,50 @@ static void pcpu_free_chunk(struct pcpu_chunk *chunk)
696 pcpu_mem_free(chunk, pcpu_chunk_struct_size); 741 pcpu_mem_free(chunk, pcpu_chunk_struct_size);
697} 742}
698 743
744/**
745 * pcpu_chunk_populated - post-population bookkeeping
746 * @chunk: pcpu_chunk which got populated
747 * @page_start: the start page
748 * @page_end: the end page
749 *
750 * Pages in [@page_start,@page_end) have been populated to @chunk. Update
751 * the bookkeeping information accordingly. Must be called after each
752 * successful population.
753 */
754static void pcpu_chunk_populated(struct pcpu_chunk *chunk,
755 int page_start, int page_end)
756{
757 int nr = page_end - page_start;
758
759 lockdep_assert_held(&pcpu_lock);
760
761 bitmap_set(chunk->populated, page_start, nr);
762 chunk->nr_populated += nr;
763 pcpu_nr_empty_pop_pages += nr;
764}
765
766/**
767 * pcpu_chunk_depopulated - post-depopulation bookkeeping
768 * @chunk: pcpu_chunk which got depopulated
769 * @page_start: the start page
770 * @page_end: the end page
771 *
772 * Pages in [@page_start,@page_end) have been depopulated from @chunk.
773 * Update the bookkeeping information accordingly. Must be called after
774 * each successful depopulation.
775 */
776static void pcpu_chunk_depopulated(struct pcpu_chunk *chunk,
777 int page_start, int page_end)
778{
779 int nr = page_end - page_start;
780
781 lockdep_assert_held(&pcpu_lock);
782
783 bitmap_clear(chunk->populated, page_start, nr);
784 chunk->nr_populated -= nr;
785 pcpu_nr_empty_pop_pages -= nr;
786}
787
699/* 788/*
700 * Chunk management implementation. 789 * Chunk management implementation.
701 * 790 *
@@ -772,6 +861,7 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
772 struct pcpu_chunk *chunk; 861 struct pcpu_chunk *chunk;
773 const char *err; 862 const char *err;
774 bool is_atomic = !(gfp & GFP_KERNEL); 863 bool is_atomic = !(gfp & GFP_KERNEL);
864 int occ_pages = 0;
775 int slot, off, new_alloc, cpu, ret; 865 int slot, off, new_alloc, cpu, ret;
776 unsigned long flags; 866 unsigned long flags;
777 void __percpu *ptr; 867 void __percpu *ptr;
@@ -812,7 +902,8 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
812 spin_lock_irqsave(&pcpu_lock, flags); 902 spin_lock_irqsave(&pcpu_lock, flags);
813 } 903 }
814 904
815 off = pcpu_alloc_area(chunk, size, align, is_atomic); 905 off = pcpu_alloc_area(chunk, size, align, is_atomic,
906 &occ_pages);
816 if (off >= 0) 907 if (off >= 0)
817 goto area_found; 908 goto area_found;
818 909
@@ -845,7 +936,8 @@ restart:
845 goto restart; 936 goto restart;
846 } 937 }
847 938
848 off = pcpu_alloc_area(chunk, size, align, is_atomic); 939 off = pcpu_alloc_area(chunk, size, align, is_atomic,
940 &occ_pages);
849 if (off >= 0) 941 if (off >= 0)
850 goto area_found; 942 goto area_found;
851 } 943 }
@@ -899,17 +991,20 @@ area_found:
899 spin_lock_irqsave(&pcpu_lock, flags); 991 spin_lock_irqsave(&pcpu_lock, flags);
900 if (ret) { 992 if (ret) {
901 mutex_unlock(&pcpu_alloc_mutex); 993 mutex_unlock(&pcpu_alloc_mutex);
902 pcpu_free_area(chunk, off); 994 pcpu_free_area(chunk, off, &occ_pages);
903 err = "failed to populate"; 995 err = "failed to populate";
904 goto fail_unlock; 996 goto fail_unlock;
905 } 997 }
906 bitmap_set(chunk->populated, rs, re - rs); 998 pcpu_chunk_populated(chunk, rs, re);
907 spin_unlock_irqrestore(&pcpu_lock, flags); 999 spin_unlock_irqrestore(&pcpu_lock, flags);
908 } 1000 }
909 1001
910 mutex_unlock(&pcpu_alloc_mutex); 1002 mutex_unlock(&pcpu_alloc_mutex);
911 } 1003 }
912 1004
1005 if (chunk != pcpu_reserved_chunk)
1006 pcpu_nr_empty_pop_pages -= occ_pages;
1007
913 /* clear the areas and return address relative to base address */ 1008 /* clear the areas and return address relative to base address */
914 for_each_possible_cpu(cpu) 1009 for_each_possible_cpu(cpu)
915 memset((void *)pcpu_chunk_addr(chunk, cpu, 0) + off, 0, size); 1010 memset((void *)pcpu_chunk_addr(chunk, cpu, 0) + off, 0, size);
@@ -1019,7 +1114,9 @@ static void pcpu_reclaim(struct work_struct *work)
1019 1114
1020 pcpu_for_each_pop_region(chunk, rs, re, 0, pcpu_unit_pages) { 1115 pcpu_for_each_pop_region(chunk, rs, re, 0, pcpu_unit_pages) {
1021 pcpu_depopulate_chunk(chunk, rs, re); 1116 pcpu_depopulate_chunk(chunk, rs, re);
1022 bitmap_clear(chunk->populated, rs, re - rs); 1117 spin_lock_irq(&pcpu_lock);
1118 pcpu_chunk_depopulated(chunk, rs, re);
1119 spin_unlock_irq(&pcpu_lock);
1023 } 1120 }
1024 pcpu_destroy_chunk(chunk); 1121 pcpu_destroy_chunk(chunk);
1025 } 1122 }
@@ -1041,7 +1138,7 @@ void free_percpu(void __percpu *ptr)
1041 void *addr; 1138 void *addr;
1042 struct pcpu_chunk *chunk; 1139 struct pcpu_chunk *chunk;
1043 unsigned long flags; 1140 unsigned long flags;
1044 int off; 1141 int off, occ_pages;
1045 1142
1046 if (!ptr) 1143 if (!ptr)
1047 return; 1144 return;
@@ -1055,7 +1152,10 @@ void free_percpu(void __percpu *ptr)
1055 chunk = pcpu_chunk_addr_search(addr); 1152 chunk = pcpu_chunk_addr_search(addr);
1056 off = addr - chunk->base_addr; 1153 off = addr - chunk->base_addr;
1057 1154
1058 pcpu_free_area(chunk, off); 1155 pcpu_free_area(chunk, off, &occ_pages);
1156
1157 if (chunk != pcpu_reserved_chunk)
1158 pcpu_nr_empty_pop_pages += occ_pages;
1059 1159
1060 /* if there are more than one fully free chunks, wake up grim reaper */ 1160 /* if there are more than one fully free chunks, wake up grim reaper */
1061 if (chunk->free_size == pcpu_unit_size) { 1161 if (chunk->free_size == pcpu_unit_size) {
@@ -1459,6 +1559,7 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
1459 schunk->map_alloc = ARRAY_SIZE(smap); 1559 schunk->map_alloc = ARRAY_SIZE(smap);
1460 schunk->immutable = true; 1560 schunk->immutable = true;
1461 bitmap_fill(schunk->populated, pcpu_unit_pages); 1561 bitmap_fill(schunk->populated, pcpu_unit_pages);
1562 schunk->nr_populated = pcpu_unit_pages;
1462 1563
1463 if (ai->reserved_size) { 1564 if (ai->reserved_size) {
1464 schunk->free_size = ai->reserved_size; 1565 schunk->free_size = ai->reserved_size;
@@ -1488,6 +1589,7 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
1488 dchunk->map_alloc = ARRAY_SIZE(dmap); 1589 dchunk->map_alloc = ARRAY_SIZE(dmap);
1489 dchunk->immutable = true; 1590 dchunk->immutable = true;
1490 bitmap_fill(dchunk->populated, pcpu_unit_pages); 1591 bitmap_fill(dchunk->populated, pcpu_unit_pages);
1592 dchunk->nr_populated = pcpu_unit_pages;
1491 1593
1492 dchunk->contig_hint = dchunk->free_size = dyn_size; 1594 dchunk->contig_hint = dchunk->free_size = dyn_size;
1493 dchunk->map[0] = 1; 1595 dchunk->map[0] = 1;
@@ -1498,6 +1600,8 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
1498 1600
1499 /* link the first chunk in */ 1601 /* link the first chunk in */
1500 pcpu_first_chunk = dchunk ?: schunk; 1602 pcpu_first_chunk = dchunk ?: schunk;
1603 pcpu_nr_empty_pop_pages +=
1604 pcpu_count_occupied_pages(pcpu_first_chunk, 1);
1501 pcpu_chunk_relocate(pcpu_first_chunk, -1); 1605 pcpu_chunk_relocate(pcpu_first_chunk, -1);
1502 1606
1503 /* we're done */ 1607 /* we're done */