aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2014-09-02 14:46:02 -0400
committerTejun Heo <tj@kernel.org>2014-09-02 14:46:02 -0400
commitb38d08f3181c5025a7ce84646494cc4748492a3b (patch)
tree3e20c06c7dfe6f8fb301e01c2a4a5dc0b55f911e
parenta63d4ac4ab6094c051a5a240260d16117a7a2f86 (diff)
percpu: restructure locking
At first, the percpu allocator required a sleepable context for both alloc and free paths and used pcpu_alloc_mutex to protect everything. Later, pcpu_lock was introduced to protect the index data structure so that the free path can be invoked from atomic contexts. The conversion only updated what's necessary and left most of the allocation path under pcpu_alloc_mutex. The percpu allocator is planned to add support for atomic allocation and this patch restructures locking so that the coverage of pcpu_alloc_mutex is further reduced. * pcpu_alloc() now grab pcpu_alloc_mutex only while creating a new chunk and populating the allocated area. Everything else is now protected soley by pcpu_lock. After this change, multiple instances of pcpu_extend_area_map() may race but the function already implements sufficient synchronization using pcpu_lock. This also allows multiple allocators to arrive at new chunk creation. To avoid creating multiple empty chunks back-to-back, a new chunk is created iff there is no other empty chunk after grabbing pcpu_alloc_mutex. * pcpu_lock is now held while modifying chunk->populated bitmap. After this, all data structures are protected by pcpu_lock. Signed-off-by: Tejun Heo <tj@kernel.org>
-rw-r--r--mm/percpu-km.c2
-rw-r--r--mm/percpu.c75
2 files changed, 37 insertions, 40 deletions
diff --git a/mm/percpu-km.c b/mm/percpu-km.c
index 67a971b7f745..e662b4947a65 100644
--- a/mm/percpu-km.c
+++ b/mm/percpu-km.c
@@ -68,7 +68,9 @@ static struct pcpu_chunk *pcpu_create_chunk(void)
68 chunk->data = pages; 68 chunk->data = pages;
69 chunk->base_addr = page_address(pages) - pcpu_group_offsets[0]; 69 chunk->base_addr = page_address(pages) - pcpu_group_offsets[0];
70 70
71 spin_lock_irq(&pcpu_lock);
71 bitmap_fill(chunk->populated, nr_pages); 72 bitmap_fill(chunk->populated, nr_pages);
73 spin_unlock_irq(&pcpu_lock);
72 74
73 return chunk; 75 return chunk;
74} 76}
diff --git a/mm/percpu.c b/mm/percpu.c
index fe5de97d7caa..e59f7b405bed 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -152,31 +152,12 @@ static struct pcpu_chunk *pcpu_reserved_chunk;
152static int pcpu_reserved_chunk_limit; 152static int pcpu_reserved_chunk_limit;
153 153
154/* 154/*
155 * Synchronization rules. 155 * Free path accesses and alters only the index data structures and can be
156 * 156 * safely called from atomic context. When memory needs to be returned to
157 * There are two locks - pcpu_alloc_mutex and pcpu_lock. The former 157 * the system, free path schedules reclaim_work.
158 * protects allocation/reclaim paths, chunks, populated bitmap and
159 * vmalloc mapping. The latter is a spinlock and protects the index
160 * data structures - chunk slots, chunks and area maps in chunks.
161 *
162 * During allocation, pcpu_alloc_mutex is kept locked all the time and
163 * pcpu_lock is grabbed and released as necessary. All actual memory
164 * allocations are done using GFP_KERNEL with pcpu_lock released. In
165 * general, percpu memory can't be allocated with irq off but
166 * irqsave/restore are still used in alloc path so that it can be used
167 * from early init path - sched_init() specifically.
168 *
169 * Free path accesses and alters only the index data structures, so it
170 * can be safely called from atomic context. When memory needs to be
171 * returned to the system, free path schedules reclaim_work which
172 * grabs both pcpu_alloc_mutex and pcpu_lock, unlinks chunks to be
173 * reclaimed, release both locks and frees the chunks. Note that it's
174 * necessary to grab both locks to remove a chunk from circulation as
175 * allocation path might be referencing the chunk with only
176 * pcpu_alloc_mutex locked.
177 */ 158 */
178static DEFINE_MUTEX(pcpu_alloc_mutex); /* protects whole alloc and reclaim */ 159static DEFINE_SPINLOCK(pcpu_lock); /* all internal data structures */
179static DEFINE_SPINLOCK(pcpu_lock); /* protects index data structures */ 160static DEFINE_MUTEX(pcpu_alloc_mutex); /* chunk create/destroy, [de]pop */
180 161
181static struct list_head *pcpu_slot __read_mostly; /* chunk list slots */ 162static struct list_head *pcpu_slot __read_mostly; /* chunk list slots */
182 163
@@ -709,7 +690,7 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved)
709 static int warn_limit = 10; 690 static int warn_limit = 10;
710 struct pcpu_chunk *chunk; 691 struct pcpu_chunk *chunk;
711 const char *err; 692 const char *err;
712 int slot, off, new_alloc, cpu; 693 int slot, off, new_alloc, cpu, ret;
713 int page_start, page_end, rs, re; 694 int page_start, page_end, rs, re;
714 unsigned long flags; 695 unsigned long flags;
715 void __percpu *ptr; 696 void __percpu *ptr;
@@ -729,7 +710,6 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved)
729 return NULL; 710 return NULL;
730 } 711 }
731 712
732 mutex_lock(&pcpu_alloc_mutex);
733 spin_lock_irqsave(&pcpu_lock, flags); 713 spin_lock_irqsave(&pcpu_lock, flags);
734 714
735 /* serve reserved allocations from the reserved chunk if available */ 715 /* serve reserved allocations from the reserved chunk if available */
@@ -745,7 +725,7 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved)
745 spin_unlock_irqrestore(&pcpu_lock, flags); 725 spin_unlock_irqrestore(&pcpu_lock, flags);
746 if (pcpu_extend_area_map(chunk, new_alloc) < 0) { 726 if (pcpu_extend_area_map(chunk, new_alloc) < 0) {
747 err = "failed to extend area map of reserved chunk"; 727 err = "failed to extend area map of reserved chunk";
748 goto fail_unlock_mutex; 728 goto fail;
749 } 729 }
750 spin_lock_irqsave(&pcpu_lock, flags); 730 spin_lock_irqsave(&pcpu_lock, flags);
751 } 731 }
@@ -771,7 +751,7 @@ restart:
771 if (pcpu_extend_area_map(chunk, 751 if (pcpu_extend_area_map(chunk,
772 new_alloc) < 0) { 752 new_alloc) < 0) {
773 err = "failed to extend area map"; 753 err = "failed to extend area map";
774 goto fail_unlock_mutex; 754 goto fail;
775 } 755 }
776 spin_lock_irqsave(&pcpu_lock, flags); 756 spin_lock_irqsave(&pcpu_lock, flags);
777 /* 757 /*
@@ -787,37 +767,53 @@ restart:
787 } 767 }
788 } 768 }
789 769
790 /* hmmm... no space left, create a new chunk */
791 spin_unlock_irqrestore(&pcpu_lock, flags); 770 spin_unlock_irqrestore(&pcpu_lock, flags);
792 771
793 chunk = pcpu_create_chunk(); 772 /*
794 if (!chunk) { 773 * No space left. Create a new chunk. We don't want multiple
795 err = "failed to allocate new chunk"; 774 * tasks to create chunks simultaneously. Serialize and create iff
796 goto fail_unlock_mutex; 775 * there's still no empty chunk after grabbing the mutex.
776 */
777 mutex_lock(&pcpu_alloc_mutex);
778
779 if (list_empty(&pcpu_slot[pcpu_nr_slots - 1])) {
780 chunk = pcpu_create_chunk();
781 if (!chunk) {
782 err = "failed to allocate new chunk";
783 goto fail;
784 }
785
786 spin_lock_irqsave(&pcpu_lock, flags);
787 pcpu_chunk_relocate(chunk, -1);
788 } else {
789 spin_lock_irqsave(&pcpu_lock, flags);
797 } 790 }
798 791
799 spin_lock_irqsave(&pcpu_lock, flags); 792 mutex_unlock(&pcpu_alloc_mutex);
800 pcpu_chunk_relocate(chunk, -1);
801 goto restart; 793 goto restart;
802 794
803area_found: 795area_found:
804 spin_unlock_irqrestore(&pcpu_lock, flags); 796 spin_unlock_irqrestore(&pcpu_lock, flags);
805 797
806 /* populate if not all pages are already there */ 798 /* populate if not all pages are already there */
799 mutex_lock(&pcpu_alloc_mutex);
807 page_start = PFN_DOWN(off); 800 page_start = PFN_DOWN(off);
808 page_end = PFN_UP(off + size); 801 page_end = PFN_UP(off + size);
809 802
810 pcpu_for_each_unpop_region(chunk, rs, re, page_start, page_end) { 803 pcpu_for_each_unpop_region(chunk, rs, re, page_start, page_end) {
811 WARN_ON(chunk->immutable); 804 WARN_ON(chunk->immutable);
812 805
813 if (pcpu_populate_chunk(chunk, rs, re)) { 806 ret = pcpu_populate_chunk(chunk, rs, re);
814 spin_lock_irqsave(&pcpu_lock, flags); 807
808 spin_lock_irqsave(&pcpu_lock, flags);
809 if (ret) {
810 mutex_unlock(&pcpu_alloc_mutex);
815 pcpu_free_area(chunk, off); 811 pcpu_free_area(chunk, off);
816 err = "failed to populate"; 812 err = "failed to populate";
817 goto fail_unlock; 813 goto fail_unlock;
818 } 814 }
819
820 bitmap_set(chunk->populated, rs, re - rs); 815 bitmap_set(chunk->populated, rs, re - rs);
816 spin_unlock_irqrestore(&pcpu_lock, flags);
821 } 817 }
822 818
823 mutex_unlock(&pcpu_alloc_mutex); 819 mutex_unlock(&pcpu_alloc_mutex);
@@ -832,8 +828,7 @@ area_found:
832 828
833fail_unlock: 829fail_unlock:
834 spin_unlock_irqrestore(&pcpu_lock, flags); 830 spin_unlock_irqrestore(&pcpu_lock, flags);
835fail_unlock_mutex: 831fail:
836 mutex_unlock(&pcpu_alloc_mutex);
837 if (warn_limit) { 832 if (warn_limit) {
838 pr_warning("PERCPU: allocation failed, size=%zu align=%zu, " 833 pr_warning("PERCPU: allocation failed, size=%zu align=%zu, "
839 "%s\n", size, align, err); 834 "%s\n", size, align, err);