diff options
-rw-r--r-- | fs/btrfs/disk-io.c | 12 | ||||
-rw-r--r-- | fs/btrfs/extent_io.c | 6 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 8 | ||||
-rw-r--r-- | fs/xfs/xfs_mount.c | 4 | ||||
-rw-r--r-- | include/linux/backing-dev.h | 2 | ||||
-rw-r--r-- | include/linux/blk-cgroup.h | 6 | ||||
-rw-r--r-- | include/linux/mman.h | 2 | ||||
-rw-r--r-- | include/linux/percpu_counter.h | 7 | ||||
-rw-r--r-- | include/net/inet_frag.h | 4 | ||||
-rw-r--r-- | include/trace/events/percpu.h | 125 | ||||
-rw-r--r-- | lib/flex_proportions.c | 6 | ||||
-rw-r--r-- | lib/percpu_counter.c | 4 | ||||
-rw-r--r-- | mm/Kconfig | 8 | ||||
-rw-r--r-- | mm/Makefile | 1 | ||||
-rw-r--r-- | mm/percpu-internal.h | 166 | ||||
-rw-r--r-- | mm/percpu-km.c | 11 | ||||
-rw-r--r-- | mm/percpu-stats.c | 222 | ||||
-rw-r--r-- | mm/percpu-vm.c | 12 | ||||
-rw-r--r-- | mm/percpu.c | 85 |
19 files changed, 621 insertions, 70 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 5abcbdc743fa..086dcbadce09 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -1256,9 +1256,9 @@ void clean_tree_block(struct btrfs_fs_info *fs_info, | |||
1256 | btrfs_assert_tree_locked(buf); | 1256 | btrfs_assert_tree_locked(buf); |
1257 | 1257 | ||
1258 | if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)) { | 1258 | if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)) { |
1259 | __percpu_counter_add(&fs_info->dirty_metadata_bytes, | 1259 | percpu_counter_add_batch(&fs_info->dirty_metadata_bytes, |
1260 | -buf->len, | 1260 | -buf->len, |
1261 | fs_info->dirty_metadata_batch); | 1261 | fs_info->dirty_metadata_batch); |
1262 | /* ugh, clear_extent_buffer_dirty needs to lock the page */ | 1262 | /* ugh, clear_extent_buffer_dirty needs to lock the page */ |
1263 | btrfs_set_lock_blocking(buf); | 1263 | btrfs_set_lock_blocking(buf); |
1264 | clear_extent_buffer_dirty(buf); | 1264 | clear_extent_buffer_dirty(buf); |
@@ -4047,9 +4047,9 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) | |||
4047 | buf->start, transid, fs_info->generation); | 4047 | buf->start, transid, fs_info->generation); |
4048 | was_dirty = set_extent_buffer_dirty(buf); | 4048 | was_dirty = set_extent_buffer_dirty(buf); |
4049 | if (!was_dirty) | 4049 | if (!was_dirty) |
4050 | __percpu_counter_add(&fs_info->dirty_metadata_bytes, | 4050 | percpu_counter_add_batch(&fs_info->dirty_metadata_bytes, |
4051 | buf->len, | 4051 | buf->len, |
4052 | fs_info->dirty_metadata_batch); | 4052 | fs_info->dirty_metadata_batch); |
4053 | #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY | 4053 | #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY |
4054 | if (btrfs_header_level(buf) == 0 && check_leaf(root, buf)) { | 4054 | if (btrfs_header_level(buf) == 0 && check_leaf(root, buf)) { |
4055 | btrfs_print_leaf(fs_info, buf); | 4055 | btrfs_print_leaf(fs_info, buf); |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 7a18b5762ac9..556484cf5d93 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -3577,9 +3577,9 @@ lock_extent_buffer_for_io(struct extent_buffer *eb, | |||
3577 | set_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags); | 3577 | set_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags); |
3578 | spin_unlock(&eb->refs_lock); | 3578 | spin_unlock(&eb->refs_lock); |
3579 | btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); | 3579 | btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); |
3580 | __percpu_counter_add(&fs_info->dirty_metadata_bytes, | 3580 | percpu_counter_add_batch(&fs_info->dirty_metadata_bytes, |
3581 | -eb->len, | 3581 | -eb->len, |
3582 | fs_info->dirty_metadata_batch); | 3582 | fs_info->dirty_metadata_batch); |
3583 | ret = 1; | 3583 | ret = 1; |
3584 | } else { | 3584 | } else { |
3585 | spin_unlock(&eb->refs_lock); | 3585 | spin_unlock(&eb->refs_lock); |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 8d050314591c..06dea7c89bbd 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -1763,8 +1763,8 @@ static void btrfs_set_bit_hook(void *private_data, | |||
1763 | if (btrfs_is_testing(fs_info)) | 1763 | if (btrfs_is_testing(fs_info)) |
1764 | return; | 1764 | return; |
1765 | 1765 | ||
1766 | __percpu_counter_add(&fs_info->delalloc_bytes, len, | 1766 | percpu_counter_add_batch(&fs_info->delalloc_bytes, len, |
1767 | fs_info->delalloc_batch); | 1767 | fs_info->delalloc_batch); |
1768 | spin_lock(&BTRFS_I(inode)->lock); | 1768 | spin_lock(&BTRFS_I(inode)->lock); |
1769 | BTRFS_I(inode)->delalloc_bytes += len; | 1769 | BTRFS_I(inode)->delalloc_bytes += len; |
1770 | if (*bits & EXTENT_DEFRAG) | 1770 | if (*bits & EXTENT_DEFRAG) |
@@ -1838,8 +1838,8 @@ static void btrfs_clear_bit_hook(void *private_data, | |||
1838 | &inode->vfs_inode, | 1838 | &inode->vfs_inode, |
1839 | state->start, len); | 1839 | state->start, len); |
1840 | 1840 | ||
1841 | __percpu_counter_add(&fs_info->delalloc_bytes, -len, | 1841 | percpu_counter_add_batch(&fs_info->delalloc_bytes, -len, |
1842 | fs_info->delalloc_batch); | 1842 | fs_info->delalloc_batch); |
1843 | spin_lock(&inode->lock); | 1843 | spin_lock(&inode->lock); |
1844 | inode->delalloc_bytes -= len; | 1844 | inode->delalloc_bytes -= len; |
1845 | if (do_list && inode->delalloc_bytes == 0 && | 1845 | if (do_list && inode->delalloc_bytes == 0 && |
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index d249546da15e..43d07f9c4e9e 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c | |||
@@ -1211,7 +1211,7 @@ xfs_mod_icount( | |||
1211 | struct xfs_mount *mp, | 1211 | struct xfs_mount *mp, |
1212 | int64_t delta) | 1212 | int64_t delta) |
1213 | { | 1213 | { |
1214 | __percpu_counter_add(&mp->m_icount, delta, XFS_ICOUNT_BATCH); | 1214 | percpu_counter_add_batch(&mp->m_icount, delta, XFS_ICOUNT_BATCH); |
1215 | if (__percpu_counter_compare(&mp->m_icount, 0, XFS_ICOUNT_BATCH) < 0) { | 1215 | if (__percpu_counter_compare(&mp->m_icount, 0, XFS_ICOUNT_BATCH) < 0) { |
1216 | ASSERT(0); | 1216 | ASSERT(0); |
1217 | percpu_counter_add(&mp->m_icount, -delta); | 1217 | percpu_counter_add(&mp->m_icount, -delta); |
@@ -1290,7 +1290,7 @@ xfs_mod_fdblocks( | |||
1290 | else | 1290 | else |
1291 | batch = XFS_FDBLOCKS_BATCH; | 1291 | batch = XFS_FDBLOCKS_BATCH; |
1292 | 1292 | ||
1293 | __percpu_counter_add(&mp->m_fdblocks, delta, batch); | 1293 | percpu_counter_add_batch(&mp->m_fdblocks, delta, batch); |
1294 | if (__percpu_counter_compare(&mp->m_fdblocks, mp->m_alloc_set_aside, | 1294 | if (__percpu_counter_compare(&mp->m_fdblocks, mp->m_alloc_set_aside, |
1295 | XFS_FDBLOCKS_BATCH) >= 0) { | 1295 | XFS_FDBLOCKS_BATCH) >= 0) { |
1296 | /* we had space! */ | 1296 | /* we had space! */ |
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 557d84063934..ace73f96eb1e 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h | |||
@@ -66,7 +66,7 @@ static inline bool bdi_has_dirty_io(struct backing_dev_info *bdi) | |||
66 | static inline void __add_wb_stat(struct bdi_writeback *wb, | 66 | static inline void __add_wb_stat(struct bdi_writeback *wb, |
67 | enum wb_stat_item item, s64 amount) | 67 | enum wb_stat_item item, s64 amount) |
68 | { | 68 | { |
69 | __percpu_counter_add(&wb->stat[item], amount, WB_STAT_BATCH); | 69 | percpu_counter_add_batch(&wb->stat[item], amount, WB_STAT_BATCH); |
70 | } | 70 | } |
71 | 71 | ||
72 | static inline void __inc_wb_stat(struct bdi_writeback *wb, | 72 | static inline void __inc_wb_stat(struct bdi_writeback *wb, |
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 01b62e7bac74..7104bea8dab1 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h | |||
@@ -518,7 +518,7 @@ static inline void blkg_stat_exit(struct blkg_stat *stat) | |||
518 | */ | 518 | */ |
519 | static inline void blkg_stat_add(struct blkg_stat *stat, uint64_t val) | 519 | static inline void blkg_stat_add(struct blkg_stat *stat, uint64_t val) |
520 | { | 520 | { |
521 | __percpu_counter_add(&stat->cpu_cnt, val, BLKG_STAT_CPU_BATCH); | 521 | percpu_counter_add_batch(&stat->cpu_cnt, val, BLKG_STAT_CPU_BATCH); |
522 | } | 522 | } |
523 | 523 | ||
524 | /** | 524 | /** |
@@ -597,14 +597,14 @@ static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat, | |||
597 | else | 597 | else |
598 | cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_READ]; | 598 | cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_READ]; |
599 | 599 | ||
600 | __percpu_counter_add(cnt, val, BLKG_STAT_CPU_BATCH); | 600 | percpu_counter_add_batch(cnt, val, BLKG_STAT_CPU_BATCH); |
601 | 601 | ||
602 | if (op_is_sync(op)) | 602 | if (op_is_sync(op)) |
603 | cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_SYNC]; | 603 | cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_SYNC]; |
604 | else | 604 | else |
605 | cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_ASYNC]; | 605 | cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_ASYNC]; |
606 | 606 | ||
607 | __percpu_counter_add(cnt, val, BLKG_STAT_CPU_BATCH); | 607 | percpu_counter_add_batch(cnt, val, BLKG_STAT_CPU_BATCH); |
608 | } | 608 | } |
609 | 609 | ||
610 | /** | 610 | /** |
diff --git a/include/linux/mman.h b/include/linux/mman.h index 634c4c51fe3a..c8367041fafd 100644 --- a/include/linux/mman.h +++ b/include/linux/mman.h | |||
@@ -22,7 +22,7 @@ unsigned long vm_memory_committed(void); | |||
22 | 22 | ||
23 | static inline void vm_acct_memory(long pages) | 23 | static inline void vm_acct_memory(long pages) |
24 | { | 24 | { |
25 | __percpu_counter_add(&vm_committed_as, pages, vm_committed_as_batch); | 25 | percpu_counter_add_batch(&vm_committed_as, pages, vm_committed_as_batch); |
26 | } | 26 | } |
27 | 27 | ||
28 | static inline void vm_unacct_memory(long pages) | 28 | static inline void vm_unacct_memory(long pages) |
diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h index 84a109449610..ec065387f443 100644 --- a/include/linux/percpu_counter.h +++ b/include/linux/percpu_counter.h | |||
@@ -39,7 +39,8 @@ int __percpu_counter_init(struct percpu_counter *fbc, s64 amount, gfp_t gfp, | |||
39 | 39 | ||
40 | void percpu_counter_destroy(struct percpu_counter *fbc); | 40 | void percpu_counter_destroy(struct percpu_counter *fbc); |
41 | void percpu_counter_set(struct percpu_counter *fbc, s64 amount); | 41 | void percpu_counter_set(struct percpu_counter *fbc, s64 amount); |
42 | void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch); | 42 | void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, |
43 | s32 batch); | ||
43 | s64 __percpu_counter_sum(struct percpu_counter *fbc); | 44 | s64 __percpu_counter_sum(struct percpu_counter *fbc); |
44 | int __percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch); | 45 | int __percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch); |
45 | 46 | ||
@@ -50,7 +51,7 @@ static inline int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs) | |||
50 | 51 | ||
51 | static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount) | 52 | static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount) |
52 | { | 53 | { |
53 | __percpu_counter_add(fbc, amount, percpu_counter_batch); | 54 | percpu_counter_add_batch(fbc, amount, percpu_counter_batch); |
54 | } | 55 | } |
55 | 56 | ||
56 | static inline s64 percpu_counter_sum_positive(struct percpu_counter *fbc) | 57 | static inline s64 percpu_counter_sum_positive(struct percpu_counter *fbc) |
@@ -136,7 +137,7 @@ percpu_counter_add(struct percpu_counter *fbc, s64 amount) | |||
136 | } | 137 | } |
137 | 138 | ||
138 | static inline void | 139 | static inline void |
139 | __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch) | 140 | percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, s32 batch) |
140 | { | 141 | { |
141 | percpu_counter_add(fbc, amount); | 142 | percpu_counter_add(fbc, amount); |
142 | } | 143 | } |
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 440c1e9d0623..6fdcd2427776 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h | |||
@@ -154,12 +154,12 @@ static inline int frag_mem_limit(struct netns_frags *nf) | |||
154 | 154 | ||
155 | static inline void sub_frag_mem_limit(struct netns_frags *nf, int i) | 155 | static inline void sub_frag_mem_limit(struct netns_frags *nf, int i) |
156 | { | 156 | { |
157 | __percpu_counter_add(&nf->mem, -i, frag_percpu_counter_batch); | 157 | percpu_counter_add_batch(&nf->mem, -i, frag_percpu_counter_batch); |
158 | } | 158 | } |
159 | 159 | ||
160 | static inline void add_frag_mem_limit(struct netns_frags *nf, int i) | 160 | static inline void add_frag_mem_limit(struct netns_frags *nf, int i) |
161 | { | 161 | { |
162 | __percpu_counter_add(&nf->mem, i, frag_percpu_counter_batch); | 162 | percpu_counter_add_batch(&nf->mem, i, frag_percpu_counter_batch); |
163 | } | 163 | } |
164 | 164 | ||
165 | static inline unsigned int sum_frag_mem_limit(struct netns_frags *nf) | 165 | static inline unsigned int sum_frag_mem_limit(struct netns_frags *nf) |
diff --git a/include/trace/events/percpu.h b/include/trace/events/percpu.h new file mode 100644 index 000000000000..ad34b1bae047 --- /dev/null +++ b/include/trace/events/percpu.h | |||
@@ -0,0 +1,125 @@ | |||
1 | #undef TRACE_SYSTEM | ||
2 | #define TRACE_SYSTEM percpu | ||
3 | |||
4 | #if !defined(_TRACE_PERCPU_H) || defined(TRACE_HEADER_MULTI_READ) | ||
5 | #define _TRACE_PERCPU_H | ||
6 | |||
7 | #include <linux/tracepoint.h> | ||
8 | |||
9 | TRACE_EVENT(percpu_alloc_percpu, | ||
10 | |||
11 | TP_PROTO(bool reserved, bool is_atomic, size_t size, | ||
12 | size_t align, void *base_addr, int off, void __percpu *ptr), | ||
13 | |||
14 | TP_ARGS(reserved, is_atomic, size, align, base_addr, off, ptr), | ||
15 | |||
16 | TP_STRUCT__entry( | ||
17 | __field( bool, reserved ) | ||
18 | __field( bool, is_atomic ) | ||
19 | __field( size_t, size ) | ||
20 | __field( size_t, align ) | ||
21 | __field( void *, base_addr ) | ||
22 | __field( int, off ) | ||
23 | __field( void __percpu *, ptr ) | ||
24 | ), | ||
25 | |||
26 | TP_fast_assign( | ||
27 | __entry->reserved = reserved; | ||
28 | __entry->is_atomic = is_atomic; | ||
29 | __entry->size = size; | ||
30 | __entry->align = align; | ||
31 | __entry->base_addr = base_addr; | ||
32 | __entry->off = off; | ||
33 | __entry->ptr = ptr; | ||
34 | ), | ||
35 | |||
36 | TP_printk("reserved=%d is_atomic=%d size=%zu align=%zu base_addr=%p off=%d ptr=%p", | ||
37 | __entry->reserved, __entry->is_atomic, | ||
38 | __entry->size, __entry->align, | ||
39 | __entry->base_addr, __entry->off, __entry->ptr) | ||
40 | ); | ||
41 | |||
42 | TRACE_EVENT(percpu_free_percpu, | ||
43 | |||
44 | TP_PROTO(void *base_addr, int off, void __percpu *ptr), | ||
45 | |||
46 | TP_ARGS(base_addr, off, ptr), | ||
47 | |||
48 | TP_STRUCT__entry( | ||
49 | __field( void *, base_addr ) | ||
50 | __field( int, off ) | ||
51 | __field( void __percpu *, ptr ) | ||
52 | ), | ||
53 | |||
54 | TP_fast_assign( | ||
55 | __entry->base_addr = base_addr; | ||
56 | __entry->off = off; | ||
57 | __entry->ptr = ptr; | ||
58 | ), | ||
59 | |||
60 | TP_printk("base_addr=%p off=%d ptr=%p", | ||
61 | __entry->base_addr, __entry->off, __entry->ptr) | ||
62 | ); | ||
63 | |||
64 | TRACE_EVENT(percpu_alloc_percpu_fail, | ||
65 | |||
66 | TP_PROTO(bool reserved, bool is_atomic, size_t size, size_t align), | ||
67 | |||
68 | TP_ARGS(reserved, is_atomic, size, align), | ||
69 | |||
70 | TP_STRUCT__entry( | ||
71 | __field( bool, reserved ) | ||
72 | __field( bool, is_atomic ) | ||
73 | __field( size_t, size ) | ||
74 | __field( size_t, align ) | ||
75 | ), | ||
76 | |||
77 | TP_fast_assign( | ||
78 | __entry->reserved = reserved; | ||
79 | __entry->is_atomic = is_atomic; | ||
80 | __entry->size = size; | ||
81 | __entry->align = align; | ||
82 | ), | ||
83 | |||
84 | TP_printk("reserved=%d is_atomic=%d size=%zu align=%zu", | ||
85 | __entry->reserved, __entry->is_atomic, | ||
86 | __entry->size, __entry->align) | ||
87 | ); | ||
88 | |||
89 | TRACE_EVENT(percpu_create_chunk, | ||
90 | |||
91 | TP_PROTO(void *base_addr), | ||
92 | |||
93 | TP_ARGS(base_addr), | ||
94 | |||
95 | TP_STRUCT__entry( | ||
96 | __field( void *, base_addr ) | ||
97 | ), | ||
98 | |||
99 | TP_fast_assign( | ||
100 | __entry->base_addr = base_addr; | ||
101 | ), | ||
102 | |||
103 | TP_printk("base_addr=%p", __entry->base_addr) | ||
104 | ); | ||
105 | |||
106 | TRACE_EVENT(percpu_destroy_chunk, | ||
107 | |||
108 | TP_PROTO(void *base_addr), | ||
109 | |||
110 | TP_ARGS(base_addr), | ||
111 | |||
112 | TP_STRUCT__entry( | ||
113 | __field( void *, base_addr ) | ||
114 | ), | ||
115 | |||
116 | TP_fast_assign( | ||
117 | __entry->base_addr = base_addr; | ||
118 | ), | ||
119 | |||
120 | TP_printk("base_addr=%p", __entry->base_addr) | ||
121 | ); | ||
122 | |||
123 | #endif /* _TRACE_PERCPU_H */ | ||
124 | |||
125 | #include <trace/define_trace.h> | ||
diff --git a/lib/flex_proportions.c b/lib/flex_proportions.c index a71cf1bdd4c9..2cc1f94e03a1 100644 --- a/lib/flex_proportions.c +++ b/lib/flex_proportions.c | |||
@@ -207,7 +207,7 @@ static void fprop_reflect_period_percpu(struct fprop_global *p, | |||
207 | if (val < (nr_cpu_ids * PROP_BATCH)) | 207 | if (val < (nr_cpu_ids * PROP_BATCH)) |
208 | val = percpu_counter_sum(&pl->events); | 208 | val = percpu_counter_sum(&pl->events); |
209 | 209 | ||
210 | __percpu_counter_add(&pl->events, | 210 | percpu_counter_add_batch(&pl->events, |
211 | -val + (val >> (period-pl->period)), PROP_BATCH); | 211 | -val + (val >> (period-pl->period)), PROP_BATCH); |
212 | } else | 212 | } else |
213 | percpu_counter_set(&pl->events, 0); | 213 | percpu_counter_set(&pl->events, 0); |
@@ -219,7 +219,7 @@ static void fprop_reflect_period_percpu(struct fprop_global *p, | |||
219 | void __fprop_inc_percpu(struct fprop_global *p, struct fprop_local_percpu *pl) | 219 | void __fprop_inc_percpu(struct fprop_global *p, struct fprop_local_percpu *pl) |
220 | { | 220 | { |
221 | fprop_reflect_period_percpu(p, pl); | 221 | fprop_reflect_period_percpu(p, pl); |
222 | __percpu_counter_add(&pl->events, 1, PROP_BATCH); | 222 | percpu_counter_add_batch(&pl->events, 1, PROP_BATCH); |
223 | percpu_counter_add(&p->events, 1); | 223 | percpu_counter_add(&p->events, 1); |
224 | } | 224 | } |
225 | 225 | ||
@@ -267,6 +267,6 @@ void __fprop_inc_percpu_max(struct fprop_global *p, | |||
267 | return; | 267 | return; |
268 | } else | 268 | } else |
269 | fprop_reflect_period_percpu(p, pl); | 269 | fprop_reflect_period_percpu(p, pl); |
270 | __percpu_counter_add(&pl->events, 1, PROP_BATCH); | 270 | percpu_counter_add_batch(&pl->events, 1, PROP_BATCH); |
271 | percpu_counter_add(&p->events, 1); | 271 | percpu_counter_add(&p->events, 1); |
272 | } | 272 | } |
diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c index 9c21000df0b5..8ee7e5ec21be 100644 --- a/lib/percpu_counter.c +++ b/lib/percpu_counter.c | |||
@@ -72,7 +72,7 @@ void percpu_counter_set(struct percpu_counter *fbc, s64 amount) | |||
72 | } | 72 | } |
73 | EXPORT_SYMBOL(percpu_counter_set); | 73 | EXPORT_SYMBOL(percpu_counter_set); |
74 | 74 | ||
75 | void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch) | 75 | void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, s32 batch) |
76 | { | 76 | { |
77 | s64 count; | 77 | s64 count; |
78 | 78 | ||
@@ -89,7 +89,7 @@ void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch) | |||
89 | } | 89 | } |
90 | preempt_enable(); | 90 | preempt_enable(); |
91 | } | 91 | } |
92 | EXPORT_SYMBOL(__percpu_counter_add); | 92 | EXPORT_SYMBOL(percpu_counter_add_batch); |
93 | 93 | ||
94 | /* | 94 | /* |
95 | * Add up all the per-cpu counts, return the result. This is a more accurate | 95 | * Add up all the per-cpu counts, return the result. This is a more accurate |
diff --git a/mm/Kconfig b/mm/Kconfig index 398b46064544..665cb370ad38 100644 --- a/mm/Kconfig +++ b/mm/Kconfig | |||
@@ -706,3 +706,11 @@ config ARCH_USES_HIGH_VMA_FLAGS | |||
706 | bool | 706 | bool |
707 | config ARCH_HAS_PKEYS | 707 | config ARCH_HAS_PKEYS |
708 | bool | 708 | bool |
709 | |||
710 | config PERCPU_STATS | ||
711 | bool "Collect percpu memory statistics" | ||
712 | default n | ||
713 | help | ||
714 | This feature collects and exposes statistics via debugfs. The | ||
715 | information includes global and per chunk statistics, which can | ||
716 | be used to help understand percpu memory usage. | ||
diff --git a/mm/Makefile b/mm/Makefile index 026f6a828a50..411bd24d4a7c 100644 --- a/mm/Makefile +++ b/mm/Makefile | |||
@@ -103,3 +103,4 @@ obj-$(CONFIG_IDLE_PAGE_TRACKING) += page_idle.o | |||
103 | obj-$(CONFIG_FRAME_VECTOR) += frame_vector.o | 103 | obj-$(CONFIG_FRAME_VECTOR) += frame_vector.o |
104 | obj-$(CONFIG_DEBUG_PAGE_REF) += debug_page_ref.o | 104 | obj-$(CONFIG_DEBUG_PAGE_REF) += debug_page_ref.o |
105 | obj-$(CONFIG_HARDENED_USERCOPY) += usercopy.o | 105 | obj-$(CONFIG_HARDENED_USERCOPY) += usercopy.o |
106 | obj-$(CONFIG_PERCPU_STATS) += percpu-stats.o | ||
diff --git a/mm/percpu-internal.h b/mm/percpu-internal.h new file mode 100644 index 000000000000..cd2442e13d8f --- /dev/null +++ b/mm/percpu-internal.h | |||
@@ -0,0 +1,166 @@ | |||
1 | #ifndef _MM_PERCPU_INTERNAL_H | ||
2 | #define _MM_PERCPU_INTERNAL_H | ||
3 | |||
4 | #include <linux/types.h> | ||
5 | #include <linux/percpu.h> | ||
6 | |||
7 | struct pcpu_chunk { | ||
8 | #ifdef CONFIG_PERCPU_STATS | ||
9 | int nr_alloc; /* # of allocations */ | ||
10 | size_t max_alloc_size; /* largest allocation size */ | ||
11 | #endif | ||
12 | |||
13 | struct list_head list; /* linked to pcpu_slot lists */ | ||
14 | int free_size; /* free bytes in the chunk */ | ||
15 | int contig_hint; /* max contiguous size hint */ | ||
16 | void *base_addr; /* base address of this chunk */ | ||
17 | |||
18 | int map_used; /* # of map entries used before the sentry */ | ||
19 | int map_alloc; /* # of map entries allocated */ | ||
20 | int *map; /* allocation map */ | ||
21 | struct list_head map_extend_list;/* on pcpu_map_extend_chunks */ | ||
22 | |||
23 | void *data; /* chunk data */ | ||
24 | int first_free; /* no free below this */ | ||
25 | bool immutable; /* no [de]population allowed */ | ||
26 | bool has_reserved; /* Indicates if chunk has reserved space | ||
27 | at the beginning. Reserved chunk will | ||
28 | contain reservation for static chunk. | ||
29 | Dynamic chunk will contain reservation | ||
30 | for static and reserved chunks. */ | ||
31 | int nr_populated; /* # of populated pages */ | ||
32 | unsigned long populated[]; /* populated bitmap */ | ||
33 | }; | ||
34 | |||
35 | extern spinlock_t pcpu_lock; | ||
36 | |||
37 | extern struct list_head *pcpu_slot; | ||
38 | extern int pcpu_nr_slots; | ||
39 | |||
40 | extern struct pcpu_chunk *pcpu_first_chunk; | ||
41 | extern struct pcpu_chunk *pcpu_reserved_chunk; | ||
42 | |||
43 | #ifdef CONFIG_PERCPU_STATS | ||
44 | |||
45 | #include <linux/spinlock.h> | ||
46 | |||
47 | struct percpu_stats { | ||
48 | u64 nr_alloc; /* lifetime # of allocations */ | ||
49 | u64 nr_dealloc; /* lifetime # of deallocations */ | ||
50 | u64 nr_cur_alloc; /* current # of allocations */ | ||
51 | u64 nr_max_alloc; /* max # of live allocations */ | ||
52 | u32 nr_chunks; /* current # of live chunks */ | ||
53 | u32 nr_max_chunks; /* max # of live chunks */ | ||
54 | size_t min_alloc_size; /* min allocaiton size */ | ||
55 | size_t max_alloc_size; /* max allocation size */ | ||
56 | }; | ||
57 | |||
58 | extern struct percpu_stats pcpu_stats; | ||
59 | extern struct pcpu_alloc_info pcpu_stats_ai; | ||
60 | |||
61 | /* | ||
62 | * For debug purposes. We don't care about the flexible array. | ||
63 | */ | ||
64 | static inline void pcpu_stats_save_ai(const struct pcpu_alloc_info *ai) | ||
65 | { | ||
66 | memcpy(&pcpu_stats_ai, ai, sizeof(struct pcpu_alloc_info)); | ||
67 | |||
68 | /* initialize min_alloc_size to unit_size */ | ||
69 | pcpu_stats.min_alloc_size = pcpu_stats_ai.unit_size; | ||
70 | } | ||
71 | |||
72 | /* | ||
73 | * pcpu_stats_area_alloc - increment area allocation stats | ||
74 | * @chunk: the location of the area being allocated | ||
75 | * @size: size of area to allocate in bytes | ||
76 | * | ||
77 | * CONTEXT: | ||
78 | * pcpu_lock. | ||
79 | */ | ||
80 | static inline void pcpu_stats_area_alloc(struct pcpu_chunk *chunk, size_t size) | ||
81 | { | ||
82 | lockdep_assert_held(&pcpu_lock); | ||
83 | |||
84 | pcpu_stats.nr_alloc++; | ||
85 | pcpu_stats.nr_cur_alloc++; | ||
86 | pcpu_stats.nr_max_alloc = | ||
87 | max(pcpu_stats.nr_max_alloc, pcpu_stats.nr_cur_alloc); | ||
88 | pcpu_stats.min_alloc_size = | ||
89 | min(pcpu_stats.min_alloc_size, size); | ||
90 | pcpu_stats.max_alloc_size = | ||
91 | max(pcpu_stats.max_alloc_size, size); | ||
92 | |||
93 | chunk->nr_alloc++; | ||
94 | chunk->max_alloc_size = max(chunk->max_alloc_size, size); | ||
95 | } | ||
96 | |||
97 | /* | ||
98 | * pcpu_stats_area_dealloc - decrement allocation stats | ||
99 | * @chunk: the location of the area being deallocated | ||
100 | * | ||
101 | * CONTEXT: | ||
102 | * pcpu_lock. | ||
103 | */ | ||
104 | static inline void pcpu_stats_area_dealloc(struct pcpu_chunk *chunk) | ||
105 | { | ||
106 | lockdep_assert_held(&pcpu_lock); | ||
107 | |||
108 | pcpu_stats.nr_dealloc++; | ||
109 | pcpu_stats.nr_cur_alloc--; | ||
110 | |||
111 | chunk->nr_alloc--; | ||
112 | } | ||
113 | |||
114 | /* | ||
115 | * pcpu_stats_chunk_alloc - increment chunk stats | ||
116 | */ | ||
117 | static inline void pcpu_stats_chunk_alloc(void) | ||
118 | { | ||
119 | unsigned long flags; | ||
120 | spin_lock_irqsave(&pcpu_lock, flags); | ||
121 | |||
122 | pcpu_stats.nr_chunks++; | ||
123 | pcpu_stats.nr_max_chunks = | ||
124 | max(pcpu_stats.nr_max_chunks, pcpu_stats.nr_chunks); | ||
125 | |||
126 | spin_unlock_irqrestore(&pcpu_lock, flags); | ||
127 | } | ||
128 | |||
129 | /* | ||
130 | * pcpu_stats_chunk_dealloc - decrement chunk stats | ||
131 | */ | ||
132 | static inline void pcpu_stats_chunk_dealloc(void) | ||
133 | { | ||
134 | unsigned long flags; | ||
135 | spin_lock_irqsave(&pcpu_lock, flags); | ||
136 | |||
137 | pcpu_stats.nr_chunks--; | ||
138 | |||
139 | spin_unlock_irqrestore(&pcpu_lock, flags); | ||
140 | } | ||
141 | |||
142 | #else | ||
143 | |||
144 | static inline void pcpu_stats_save_ai(const struct pcpu_alloc_info *ai) | ||
145 | { | ||
146 | } | ||
147 | |||
148 | static inline void pcpu_stats_area_alloc(struct pcpu_chunk *chunk, size_t size) | ||
149 | { | ||
150 | } | ||
151 | |||
152 | static inline void pcpu_stats_area_dealloc(struct pcpu_chunk *chunk) | ||
153 | { | ||
154 | } | ||
155 | |||
156 | static inline void pcpu_stats_chunk_alloc(void) | ||
157 | { | ||
158 | } | ||
159 | |||
160 | static inline void pcpu_stats_chunk_dealloc(void) | ||
161 | { | ||
162 | } | ||
163 | |||
164 | #endif /* !CONFIG_PERCPU_STATS */ | ||
165 | |||
166 | #endif | ||
diff --git a/mm/percpu-km.c b/mm/percpu-km.c index d66911ff42d9..eb58aa4c0997 100644 --- a/mm/percpu-km.c +++ b/mm/percpu-km.c | |||
@@ -72,6 +72,9 @@ static struct pcpu_chunk *pcpu_create_chunk(void) | |||
72 | pcpu_chunk_populated(chunk, 0, nr_pages); | 72 | pcpu_chunk_populated(chunk, 0, nr_pages); |
73 | spin_unlock_irq(&pcpu_lock); | 73 | spin_unlock_irq(&pcpu_lock); |
74 | 74 | ||
75 | pcpu_stats_chunk_alloc(); | ||
76 | trace_percpu_create_chunk(chunk->base_addr); | ||
77 | |||
75 | return chunk; | 78 | return chunk; |
76 | } | 79 | } |
77 | 80 | ||
@@ -79,7 +82,13 @@ static void pcpu_destroy_chunk(struct pcpu_chunk *chunk) | |||
79 | { | 82 | { |
80 | const int nr_pages = pcpu_group_sizes[0] >> PAGE_SHIFT; | 83 | const int nr_pages = pcpu_group_sizes[0] >> PAGE_SHIFT; |
81 | 84 | ||
82 | if (chunk && chunk->data) | 85 | if (!chunk) |
86 | return; | ||
87 | |||
88 | pcpu_stats_chunk_dealloc(); | ||
89 | trace_percpu_destroy_chunk(chunk->base_addr); | ||
90 | |||
91 | if (chunk->data) | ||
83 | __free_pages(chunk->data, order_base_2(nr_pages)); | 92 | __free_pages(chunk->data, order_base_2(nr_pages)); |
84 | pcpu_free_chunk(chunk); | 93 | pcpu_free_chunk(chunk); |
85 | } | 94 | } |
diff --git a/mm/percpu-stats.c b/mm/percpu-stats.c new file mode 100644 index 000000000000..03524a56eeff --- /dev/null +++ b/mm/percpu-stats.c | |||
@@ -0,0 +1,222 @@ | |||
1 | /* | ||
2 | * mm/percpu-debug.c | ||
3 | * | ||
4 | * Copyright (C) 2017 Facebook Inc. | ||
5 | * Copyright (C) 2017 Dennis Zhou <dennisz@fb.com> | ||
6 | * | ||
7 | * This file is released under the GPLv2. | ||
8 | * | ||
9 | * Prints statistics about the percpu allocator and backing chunks. | ||
10 | */ | ||
11 | #include <linux/debugfs.h> | ||
12 | #include <linux/list.h> | ||
13 | #include <linux/percpu.h> | ||
14 | #include <linux/seq_file.h> | ||
15 | #include <linux/sort.h> | ||
16 | #include <linux/vmalloc.h> | ||
17 | |||
18 | #include "percpu-internal.h" | ||
19 | |||
20 | #define P(X, Y) \ | ||
21 | seq_printf(m, " %-24s: %8lld\n", X, (long long int)Y) | ||
22 | |||
23 | struct percpu_stats pcpu_stats; | ||
24 | struct pcpu_alloc_info pcpu_stats_ai; | ||
25 | |||
26 | static int cmpint(const void *a, const void *b) | ||
27 | { | ||
28 | return *(int *)a - *(int *)b; | ||
29 | } | ||
30 | |||
31 | /* | ||
32 | * Iterates over all chunks to find the max # of map entries used. | ||
33 | */ | ||
34 | static int find_max_map_used(void) | ||
35 | { | ||
36 | struct pcpu_chunk *chunk; | ||
37 | int slot, max_map_used; | ||
38 | |||
39 | max_map_used = 0; | ||
40 | for (slot = 0; slot < pcpu_nr_slots; slot++) | ||
41 | list_for_each_entry(chunk, &pcpu_slot[slot], list) | ||
42 | max_map_used = max(max_map_used, chunk->map_used); | ||
43 | |||
44 | return max_map_used; | ||
45 | } | ||
46 | |||
47 | /* | ||
48 | * Prints out chunk state. Fragmentation is considered between | ||
49 | * the beginning of the chunk to the last allocation. | ||
50 | */ | ||
51 | static void chunk_map_stats(struct seq_file *m, struct pcpu_chunk *chunk, | ||
52 | void *buffer) | ||
53 | { | ||
54 | int i, s_index, last_alloc, alloc_sign, as_len; | ||
55 | int *alloc_sizes, *p; | ||
56 | /* statistics */ | ||
57 | int sum_frag = 0, max_frag = 0; | ||
58 | int cur_min_alloc = 0, cur_med_alloc = 0, cur_max_alloc = 0; | ||
59 | |||
60 | alloc_sizes = buffer; | ||
61 | s_index = chunk->has_reserved ? 1 : 0; | ||
62 | |||
63 | /* find last allocation */ | ||
64 | last_alloc = -1; | ||
65 | for (i = chunk->map_used - 1; i >= s_index; i--) { | ||
66 | if (chunk->map[i] & 1) { | ||
67 | last_alloc = i; | ||
68 | break; | ||
69 | } | ||
70 | } | ||
71 | |||
72 | /* if the chunk is not empty - ignoring reserve */ | ||
73 | if (last_alloc >= s_index) { | ||
74 | as_len = last_alloc + 1 - s_index; | ||
75 | |||
76 | /* | ||
77 | * Iterate through chunk map computing size info. | ||
78 | * The first bit is overloaded to be a used flag. | ||
79 | * negative = free space, positive = allocated | ||
80 | */ | ||
81 | for (i = 0, p = chunk->map + s_index; i < as_len; i++, p++) { | ||
82 | alloc_sign = (*p & 1) ? 1 : -1; | ||
83 | alloc_sizes[i] = alloc_sign * | ||
84 | ((p[1] & ~1) - (p[0] & ~1)); | ||
85 | } | ||
86 | |||
87 | sort(alloc_sizes, as_len, sizeof(chunk->map[0]), cmpint, NULL); | ||
88 | |||
89 | /* Iterate through the unallocated fragements. */ | ||
90 | for (i = 0, p = alloc_sizes; *p < 0 && i < as_len; i++, p++) { | ||
91 | sum_frag -= *p; | ||
92 | max_frag = max(max_frag, -1 * (*p)); | ||
93 | } | ||
94 | |||
95 | cur_min_alloc = alloc_sizes[i]; | ||
96 | cur_med_alloc = alloc_sizes[(i + as_len - 1) / 2]; | ||
97 | cur_max_alloc = alloc_sizes[as_len - 1]; | ||
98 | } | ||
99 | |||
100 | P("nr_alloc", chunk->nr_alloc); | ||
101 | P("max_alloc_size", chunk->max_alloc_size); | ||
102 | P("free_size", chunk->free_size); | ||
103 | P("contig_hint", chunk->contig_hint); | ||
104 | P("sum_frag", sum_frag); | ||
105 | P("max_frag", max_frag); | ||
106 | P("cur_min_alloc", cur_min_alloc); | ||
107 | P("cur_med_alloc", cur_med_alloc); | ||
108 | P("cur_max_alloc", cur_max_alloc); | ||
109 | seq_putc(m, '\n'); | ||
110 | } | ||
111 | |||
112 | static int percpu_stats_show(struct seq_file *m, void *v) | ||
113 | { | ||
114 | struct pcpu_chunk *chunk; | ||
115 | int slot, max_map_used; | ||
116 | void *buffer; | ||
117 | |||
118 | alloc_buffer: | ||
119 | spin_lock_irq(&pcpu_lock); | ||
120 | max_map_used = find_max_map_used(); | ||
121 | spin_unlock_irq(&pcpu_lock); | ||
122 | |||
123 | buffer = vmalloc(max_map_used * sizeof(pcpu_first_chunk->map[0])); | ||
124 | if (!buffer) | ||
125 | return -ENOMEM; | ||
126 | |||
127 | spin_lock_irq(&pcpu_lock); | ||
128 | |||
129 | /* if the buffer allocated earlier is too small */ | ||
130 | if (max_map_used < find_max_map_used()) { | ||
131 | spin_unlock_irq(&pcpu_lock); | ||
132 | vfree(buffer); | ||
133 | goto alloc_buffer; | ||
134 | } | ||
135 | |||
136 | #define PL(X) \ | ||
137 | seq_printf(m, " %-24s: %8lld\n", #X, (long long int)pcpu_stats_ai.X) | ||
138 | |||
139 | seq_printf(m, | ||
140 | "Percpu Memory Statistics\n" | ||
141 | "Allocation Info:\n" | ||
142 | "----------------------------------------\n"); | ||
143 | PL(unit_size); | ||
144 | PL(static_size); | ||
145 | PL(reserved_size); | ||
146 | PL(dyn_size); | ||
147 | PL(atom_size); | ||
148 | PL(alloc_size); | ||
149 | seq_putc(m, '\n'); | ||
150 | |||
151 | #undef PL | ||
152 | |||
153 | #define PU(X) \ | ||
154 | seq_printf(m, " %-18s: %14llu\n", #X, (unsigned long long)pcpu_stats.X) | ||
155 | |||
156 | seq_printf(m, | ||
157 | "Global Stats:\n" | ||
158 | "----------------------------------------\n"); | ||
159 | PU(nr_alloc); | ||
160 | PU(nr_dealloc); | ||
161 | PU(nr_cur_alloc); | ||
162 | PU(nr_max_alloc); | ||
163 | PU(nr_chunks); | ||
164 | PU(nr_max_chunks); | ||
165 | PU(min_alloc_size); | ||
166 | PU(max_alloc_size); | ||
167 | seq_putc(m, '\n'); | ||
168 | |||
169 | #undef PU | ||
170 | |||
171 | seq_printf(m, | ||
172 | "Per Chunk Stats:\n" | ||
173 | "----------------------------------------\n"); | ||
174 | |||
175 | if (pcpu_reserved_chunk) { | ||
176 | seq_puts(m, "Chunk: <- Reserved Chunk\n"); | ||
177 | chunk_map_stats(m, pcpu_reserved_chunk, buffer); | ||
178 | } | ||
179 | |||
180 | for (slot = 0; slot < pcpu_nr_slots; slot++) { | ||
181 | list_for_each_entry(chunk, &pcpu_slot[slot], list) { | ||
182 | if (chunk == pcpu_first_chunk) { | ||
183 | seq_puts(m, "Chunk: <- First Chunk\n"); | ||
184 | chunk_map_stats(m, chunk, buffer); | ||
185 | |||
186 | |||
187 | } else { | ||
188 | seq_puts(m, "Chunk:\n"); | ||
189 | chunk_map_stats(m, chunk, buffer); | ||
190 | } | ||
191 | |||
192 | } | ||
193 | } | ||
194 | |||
195 | spin_unlock_irq(&pcpu_lock); | ||
196 | |||
197 | vfree(buffer); | ||
198 | |||
199 | return 0; | ||
200 | } | ||
201 | |||
202 | static int percpu_stats_open(struct inode *inode, struct file *filp) | ||
203 | { | ||
204 | return single_open(filp, percpu_stats_show, NULL); | ||
205 | } | ||
206 | |||
207 | static const struct file_operations percpu_stats_fops = { | ||
208 | .open = percpu_stats_open, | ||
209 | .read = seq_read, | ||
210 | .llseek = seq_lseek, | ||
211 | .release = single_release, | ||
212 | }; | ||
213 | |||
214 | static int __init init_percpu_stats_debugfs(void) | ||
215 | { | ||
216 | debugfs_create_file("percpu_stats", 0444, NULL, NULL, | ||
217 | &percpu_stats_fops); | ||
218 | |||
219 | return 0; | ||
220 | } | ||
221 | |||
222 | late_initcall(init_percpu_stats_debugfs); | ||
diff --git a/mm/percpu-vm.c b/mm/percpu-vm.c index 9ac639499bd1..15dab691ea70 100644 --- a/mm/percpu-vm.c +++ b/mm/percpu-vm.c | |||
@@ -343,12 +343,22 @@ static struct pcpu_chunk *pcpu_create_chunk(void) | |||
343 | 343 | ||
344 | chunk->data = vms; | 344 | chunk->data = vms; |
345 | chunk->base_addr = vms[0]->addr - pcpu_group_offsets[0]; | 345 | chunk->base_addr = vms[0]->addr - pcpu_group_offsets[0]; |
346 | |||
347 | pcpu_stats_chunk_alloc(); | ||
348 | trace_percpu_create_chunk(chunk->base_addr); | ||
349 | |||
346 | return chunk; | 350 | return chunk; |
347 | } | 351 | } |
348 | 352 | ||
349 | static void pcpu_destroy_chunk(struct pcpu_chunk *chunk) | 353 | static void pcpu_destroy_chunk(struct pcpu_chunk *chunk) |
350 | { | 354 | { |
351 | if (chunk && chunk->data) | 355 | if (!chunk) |
356 | return; | ||
357 | |||
358 | pcpu_stats_chunk_dealloc(); | ||
359 | trace_percpu_destroy_chunk(chunk->base_addr); | ||
360 | |||
361 | if (chunk->data) | ||
352 | pcpu_free_vm_areas(chunk->data, pcpu_nr_groups); | 362 | pcpu_free_vm_areas(chunk->data, pcpu_nr_groups); |
353 | pcpu_free_chunk(chunk); | 363 | pcpu_free_chunk(chunk); |
354 | } | 364 | } |
diff --git a/mm/percpu.c b/mm/percpu.c index e0aa8ae7bde7..bd4130a69bbc 100644 --- a/mm/percpu.c +++ b/mm/percpu.c | |||
@@ -76,6 +76,11 @@ | |||
76 | #include <asm/tlbflush.h> | 76 | #include <asm/tlbflush.h> |
77 | #include <asm/io.h> | 77 | #include <asm/io.h> |
78 | 78 | ||
79 | #define CREATE_TRACE_POINTS | ||
80 | #include <trace/events/percpu.h> | ||
81 | |||
82 | #include "percpu-internal.h" | ||
83 | |||
79 | #define PCPU_SLOT_BASE_SHIFT 5 /* 1-31 shares the same slot */ | 84 | #define PCPU_SLOT_BASE_SHIFT 5 /* 1-31 shares the same slot */ |
80 | #define PCPU_DFL_MAP_ALLOC 16 /* start a map with 16 ents */ | 85 | #define PCPU_DFL_MAP_ALLOC 16 /* start a map with 16 ents */ |
81 | #define PCPU_ATOMIC_MAP_MARGIN_LOW 32 | 86 | #define PCPU_ATOMIC_MAP_MARGIN_LOW 32 |
@@ -103,53 +108,35 @@ | |||
103 | #define __pcpu_ptr_to_addr(ptr) (void __force *)(ptr) | 108 | #define __pcpu_ptr_to_addr(ptr) (void __force *)(ptr) |
104 | #endif /* CONFIG_SMP */ | 109 | #endif /* CONFIG_SMP */ |
105 | 110 | ||
106 | struct pcpu_chunk { | 111 | static int pcpu_unit_pages __ro_after_init; |
107 | struct list_head list; /* linked to pcpu_slot lists */ | 112 | static int pcpu_unit_size __ro_after_init; |
108 | int free_size; /* free bytes in the chunk */ | 113 | static int pcpu_nr_units __ro_after_init; |
109 | int contig_hint; /* max contiguous size hint */ | 114 | static int pcpu_atom_size __ro_after_init; |
110 | void *base_addr; /* base address of this chunk */ | 115 | int pcpu_nr_slots __ro_after_init; |
111 | 116 | static size_t pcpu_chunk_struct_size __ro_after_init; | |
112 | int map_used; /* # of map entries used before the sentry */ | ||
113 | int map_alloc; /* # of map entries allocated */ | ||
114 | int *map; /* allocation map */ | ||
115 | struct list_head map_extend_list;/* on pcpu_map_extend_chunks */ | ||
116 | |||
117 | void *data; /* chunk data */ | ||
118 | int first_free; /* no free below this */ | ||
119 | bool immutable; /* no [de]population allowed */ | ||
120 | int nr_populated; /* # of populated pages */ | ||
121 | unsigned long populated[]; /* populated bitmap */ | ||
122 | }; | ||
123 | |||
124 | static int pcpu_unit_pages __read_mostly; | ||
125 | static int pcpu_unit_size __read_mostly; | ||
126 | static int pcpu_nr_units __read_mostly; | ||
127 | static int pcpu_atom_size __read_mostly; | ||
128 | static int pcpu_nr_slots __read_mostly; | ||
129 | static size_t pcpu_chunk_struct_size __read_mostly; | ||
130 | 117 | ||
131 | /* cpus with the lowest and highest unit addresses */ | 118 | /* cpus with the lowest and highest unit addresses */ |
132 | static unsigned int pcpu_low_unit_cpu __read_mostly; | 119 | static unsigned int pcpu_low_unit_cpu __ro_after_init; |
133 | static unsigned int pcpu_high_unit_cpu __read_mostly; | 120 | static unsigned int pcpu_high_unit_cpu __ro_after_init; |
134 | 121 | ||
135 | /* the address of the first chunk which starts with the kernel static area */ | 122 | /* the address of the first chunk which starts with the kernel static area */ |
136 | void *pcpu_base_addr __read_mostly; | 123 | void *pcpu_base_addr __ro_after_init; |
137 | EXPORT_SYMBOL_GPL(pcpu_base_addr); | 124 | EXPORT_SYMBOL_GPL(pcpu_base_addr); |
138 | 125 | ||
139 | static const int *pcpu_unit_map __read_mostly; /* cpu -> unit */ | 126 | static const int *pcpu_unit_map __ro_after_init; /* cpu -> unit */ |
140 | const unsigned long *pcpu_unit_offsets __read_mostly; /* cpu -> unit offset */ | 127 | const unsigned long *pcpu_unit_offsets __ro_after_init; /* cpu -> unit offset */ |
141 | 128 | ||
142 | /* group information, used for vm allocation */ | 129 | /* group information, used for vm allocation */ |
143 | static int pcpu_nr_groups __read_mostly; | 130 | static int pcpu_nr_groups __ro_after_init; |
144 | static const unsigned long *pcpu_group_offsets __read_mostly; | 131 | static const unsigned long *pcpu_group_offsets __ro_after_init; |
145 | static const size_t *pcpu_group_sizes __read_mostly; | 132 | static const size_t *pcpu_group_sizes __ro_after_init; |
146 | 133 | ||
147 | /* | 134 | /* |
148 | * The first chunk which always exists. Note that unlike other | 135 | * The first chunk which always exists. Note that unlike other |
149 | * chunks, this one can be allocated and mapped in several different | 136 | * chunks, this one can be allocated and mapped in several different |
150 | * ways and thus often doesn't live in the vmalloc area. | 137 | * ways and thus often doesn't live in the vmalloc area. |
151 | */ | 138 | */ |
152 | static struct pcpu_chunk *pcpu_first_chunk; | 139 | struct pcpu_chunk *pcpu_first_chunk __ro_after_init; |
153 | 140 | ||
154 | /* | 141 | /* |
155 | * Optional reserved chunk. This chunk reserves part of the first | 142 | * Optional reserved chunk. This chunk reserves part of the first |
@@ -158,13 +145,13 @@ static struct pcpu_chunk *pcpu_first_chunk; | |||
158 | * area doesn't exist, the following variables contain NULL and 0 | 145 | * area doesn't exist, the following variables contain NULL and 0 |
159 | * respectively. | 146 | * respectively. |
160 | */ | 147 | */ |
161 | static struct pcpu_chunk *pcpu_reserved_chunk; | 148 | struct pcpu_chunk *pcpu_reserved_chunk __ro_after_init; |
162 | static int pcpu_reserved_chunk_limit; | 149 | static int pcpu_reserved_chunk_limit __ro_after_init; |
163 | 150 | ||
164 | static DEFINE_SPINLOCK(pcpu_lock); /* all internal data structures */ | 151 | DEFINE_SPINLOCK(pcpu_lock); /* all internal data structures */ |
165 | static DEFINE_MUTEX(pcpu_alloc_mutex); /* chunk create/destroy, [de]pop, map ext */ | 152 | static DEFINE_MUTEX(pcpu_alloc_mutex); /* chunk create/destroy, [de]pop, map ext */ |
166 | 153 | ||
167 | static struct list_head *pcpu_slot __read_mostly; /* chunk list slots */ | 154 | struct list_head *pcpu_slot __ro_after_init; /* chunk list slots */ |
168 | 155 | ||
169 | /* chunks which need their map areas extended, protected by pcpu_lock */ | 156 | /* chunks which need their map areas extended, protected by pcpu_lock */ |
170 | static LIST_HEAD(pcpu_map_extend_chunks); | 157 | static LIST_HEAD(pcpu_map_extend_chunks); |
@@ -672,6 +659,9 @@ static void pcpu_free_area(struct pcpu_chunk *chunk, int freeme, | |||
672 | int to_free = 0; | 659 | int to_free = 0; |
673 | int *p; | 660 | int *p; |
674 | 661 | ||
662 | lockdep_assert_held(&pcpu_lock); | ||
663 | pcpu_stats_area_dealloc(chunk); | ||
664 | |||
675 | freeme |= 1; /* we are searching for <given offset, in use> pair */ | 665 | freeme |= 1; /* we are searching for <given offset, in use> pair */ |
676 | 666 | ||
677 | i = 0; | 667 | i = 0; |
@@ -735,6 +725,7 @@ static struct pcpu_chunk *pcpu_alloc_chunk(void) | |||
735 | chunk->map[0] = 0; | 725 | chunk->map[0] = 0; |
736 | chunk->map[1] = pcpu_unit_size | 1; | 726 | chunk->map[1] = pcpu_unit_size | 1; |
737 | chunk->map_used = 1; | 727 | chunk->map_used = 1; |
728 | chunk->has_reserved = false; | ||
738 | 729 | ||
739 | INIT_LIST_HEAD(&chunk->list); | 730 | INIT_LIST_HEAD(&chunk->list); |
740 | INIT_LIST_HEAD(&chunk->map_extend_list); | 731 | INIT_LIST_HEAD(&chunk->map_extend_list); |
@@ -965,8 +956,10 @@ restart: | |||
965 | * tasks to create chunks simultaneously. Serialize and create iff | 956 | * tasks to create chunks simultaneously. Serialize and create iff |
966 | * there's still no empty chunk after grabbing the mutex. | 957 | * there's still no empty chunk after grabbing the mutex. |
967 | */ | 958 | */ |
968 | if (is_atomic) | 959 | if (is_atomic) { |
960 | err = "atomic alloc failed, no space left"; | ||
969 | goto fail; | 961 | goto fail; |
962 | } | ||
970 | 963 | ||
971 | if (list_empty(&pcpu_slot[pcpu_nr_slots - 1])) { | 964 | if (list_empty(&pcpu_slot[pcpu_nr_slots - 1])) { |
972 | chunk = pcpu_create_chunk(); | 965 | chunk = pcpu_create_chunk(); |
@@ -984,6 +977,7 @@ restart: | |||
984 | goto restart; | 977 | goto restart; |
985 | 978 | ||
986 | area_found: | 979 | area_found: |
980 | pcpu_stats_area_alloc(chunk, size); | ||
987 | spin_unlock_irqrestore(&pcpu_lock, flags); | 981 | spin_unlock_irqrestore(&pcpu_lock, flags); |
988 | 982 | ||
989 | /* populate if not all pages are already there */ | 983 | /* populate if not all pages are already there */ |
@@ -1026,11 +1020,17 @@ area_found: | |||
1026 | 1020 | ||
1027 | ptr = __addr_to_pcpu_ptr(chunk->base_addr + off); | 1021 | ptr = __addr_to_pcpu_ptr(chunk->base_addr + off); |
1028 | kmemleak_alloc_percpu(ptr, size, gfp); | 1022 | kmemleak_alloc_percpu(ptr, size, gfp); |
1023 | |||
1024 | trace_percpu_alloc_percpu(reserved, is_atomic, size, align, | ||
1025 | chunk->base_addr, off, ptr); | ||
1026 | |||
1029 | return ptr; | 1027 | return ptr; |
1030 | 1028 | ||
1031 | fail_unlock: | 1029 | fail_unlock: |
1032 | spin_unlock_irqrestore(&pcpu_lock, flags); | 1030 | spin_unlock_irqrestore(&pcpu_lock, flags); |
1033 | fail: | 1031 | fail: |
1032 | trace_percpu_alloc_percpu_fail(reserved, is_atomic, size, align); | ||
1033 | |||
1034 | if (!is_atomic && warn_limit) { | 1034 | if (!is_atomic && warn_limit) { |
1035 | pr_warn("allocation failed, size=%zu align=%zu atomic=%d, %s\n", | 1035 | pr_warn("allocation failed, size=%zu align=%zu atomic=%d, %s\n", |
1036 | size, align, is_atomic, err); | 1036 | size, align, is_atomic, err); |
@@ -1280,6 +1280,8 @@ void free_percpu(void __percpu *ptr) | |||
1280 | } | 1280 | } |
1281 | } | 1281 | } |
1282 | 1282 | ||
1283 | trace_percpu_free_percpu(chunk->base_addr, off, ptr); | ||
1284 | |||
1283 | spin_unlock_irqrestore(&pcpu_lock, flags); | 1285 | spin_unlock_irqrestore(&pcpu_lock, flags); |
1284 | } | 1286 | } |
1285 | EXPORT_SYMBOL_GPL(free_percpu); | 1287 | EXPORT_SYMBOL_GPL(free_percpu); |
@@ -1656,6 +1658,8 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, | |||
1656 | pcpu_chunk_struct_size = sizeof(struct pcpu_chunk) + | 1658 | pcpu_chunk_struct_size = sizeof(struct pcpu_chunk) + |
1657 | BITS_TO_LONGS(pcpu_unit_pages) * sizeof(unsigned long); | 1659 | BITS_TO_LONGS(pcpu_unit_pages) * sizeof(unsigned long); |
1658 | 1660 | ||
1661 | pcpu_stats_save_ai(ai); | ||
1662 | |||
1659 | /* | 1663 | /* |
1660 | * Allocate chunk slots. The additional last slot is for | 1664 | * Allocate chunk slots. The additional last slot is for |
1661 | * empty chunks. | 1665 | * empty chunks. |
@@ -1699,6 +1703,7 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, | |||
1699 | if (schunk->free_size) | 1703 | if (schunk->free_size) |
1700 | schunk->map[++schunk->map_used] = ai->static_size + schunk->free_size; | 1704 | schunk->map[++schunk->map_used] = ai->static_size + schunk->free_size; |
1701 | schunk->map[schunk->map_used] |= 1; | 1705 | schunk->map[schunk->map_used] |= 1; |
1706 | schunk->has_reserved = true; | ||
1702 | 1707 | ||
1703 | /* init dynamic chunk if necessary */ | 1708 | /* init dynamic chunk if necessary */ |
1704 | if (dyn_size) { | 1709 | if (dyn_size) { |
@@ -1717,6 +1722,7 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, | |||
1717 | dchunk->map[1] = pcpu_reserved_chunk_limit; | 1722 | dchunk->map[1] = pcpu_reserved_chunk_limit; |
1718 | dchunk->map[2] = (pcpu_reserved_chunk_limit + dchunk->free_size) | 1; | 1723 | dchunk->map[2] = (pcpu_reserved_chunk_limit + dchunk->free_size) | 1; |
1719 | dchunk->map_used = 2; | 1724 | dchunk->map_used = 2; |
1725 | dchunk->has_reserved = true; | ||
1720 | } | 1726 | } |
1721 | 1727 | ||
1722 | /* link the first chunk in */ | 1728 | /* link the first chunk in */ |
@@ -1725,6 +1731,9 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, | |||
1725 | pcpu_count_occupied_pages(pcpu_first_chunk, 1); | 1731 | pcpu_count_occupied_pages(pcpu_first_chunk, 1); |
1726 | pcpu_chunk_relocate(pcpu_first_chunk, -1); | 1732 | pcpu_chunk_relocate(pcpu_first_chunk, -1); |
1727 | 1733 | ||
1734 | pcpu_stats_chunk_alloc(); | ||
1735 | trace_percpu_create_chunk(base_addr); | ||
1736 | |||
1728 | /* we're done */ | 1737 | /* we're done */ |
1729 | pcpu_base_addr = base_addr; | 1738 | pcpu_base_addr = base_addr; |
1730 | return 0; | 1739 | return 0; |