aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Dumazet <eric.dumazet@gmail.com>2011-06-10 15:45:51 -0400
committerDavid S. Miller <davem@davemloft.net>2011-06-11 19:23:59 -0400
commit8f0ea0fe3a036a47767f9c80e81b13e379a1f43b (patch)
tree6f4079c8da32f3e1ac4860ac9f90b26e9df86e3b
parent830a9c75514b477994fd3847f72654d3dbdfa5ca (diff)
snmp: reduce percpu needs by 50%
SNMP mibs use two percpu arrays, one used in BH context, another in USER context. With increasing number of cpus in machines, and fact that ipv6 uses per network device ipstats_mib, this is consuming a lot of memory if many network devices are registered. commit be281e554e2a (ipv6: reduce per device ICMP mib sizes) shrinked percpu needs for ipv6, but we can reduce memory use a bit more. With recent percpu infrastructure (irqsafe_cpu_inc() ...), we no longer need this BH/USER separation since we can update counters in a single x86 instruction, regardless of the BH/USER context. Other arches than x86 might need to disable irq in their irqsafe_cpu_inc() implementation : If this happens to be a problem, we can make SNMP_ARRAY_SZ arch dependent, but a previous poll ( https://lkml.org/lkml/2011/3/17/174 ) to arch maintainers did not raise strong opposition. Only on 32bit arches, we need to disable BH for 64bit counters updates done from USER context (currently used for IP MIB) This also reduces vmlinux size : 1) x86_64 build $ size vmlinux.before vmlinux.after text data bss dec hex filename 7853650 1293772 1896448 11043870 a8841e vmlinux.before 7850578 1293772 1896448 11040798 a8781e vmlinux.after 2) i386 build $ size vmlinux.before vmlinux.afterpatch text data bss dec hex filename 6039335 635076 3670016 10344427 9dd7eb vmlinux.before 6037342 635076 3670016 10342434 9dd022 vmlinux.afterpatch Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> CC: Andi Kleen <andi@firstfloor.org> CC: Ingo Molnar <mingo@elte.hu> CC: Tejun Heo <tj@kernel.org> CC: Christoph Lameter <cl@linux-foundation.org> CC: Benjamin Herrenschmidt <benh@kernel.crashing.org CC: linux-arch@vger.kernel.org Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/snmp.h86
-rw-r--r--net/ipv4/af_inet.c52
2 files changed, 56 insertions, 82 deletions
diff --git a/include/net/snmp.h b/include/net/snmp.h
index 479083a78b0c..8f0f9ac0307f 100644
--- a/include/net/snmp.h
+++ b/include/net/snmp.h
@@ -116,57 +116,51 @@ struct linux_xfrm_mib {
116 unsigned long mibs[LINUX_MIB_XFRMMAX]; 116 unsigned long mibs[LINUX_MIB_XFRMMAX];
117}; 117};
118 118
119/* 119#define SNMP_ARRAY_SZ 1
120 * FIXME: On x86 and some other CPUs the split into user and softirq parts 120
121 * is not needed because addl $1,memory is atomic against interrupts (but
122 * atomic_inc would be overkill because of the lock cycles). Wants new
123 * nonlocked_atomic_inc() primitives -AK
124 */
125#define DEFINE_SNMP_STAT(type, name) \ 121#define DEFINE_SNMP_STAT(type, name) \
126 __typeof__(type) __percpu *name[2] 122 __typeof__(type) __percpu *name[SNMP_ARRAY_SZ]
127#define DEFINE_SNMP_STAT_ATOMIC(type, name) \ 123#define DEFINE_SNMP_STAT_ATOMIC(type, name) \
128 __typeof__(type) *name 124 __typeof__(type) *name
129#define DECLARE_SNMP_STAT(type, name) \ 125#define DECLARE_SNMP_STAT(type, name) \
130 extern __typeof__(type) __percpu *name[2] 126 extern __typeof__(type) __percpu *name[SNMP_ARRAY_SZ]
131
132#define SNMP_STAT_BHPTR(name) (name[0])
133#define SNMP_STAT_USRPTR(name) (name[1])
134 127
135#define SNMP_INC_STATS_BH(mib, field) \ 128#define SNMP_INC_STATS_BH(mib, field) \
136 __this_cpu_inc(mib[0]->mibs[field]) 129 __this_cpu_inc(mib[0]->mibs[field])
130
137#define SNMP_INC_STATS_USER(mib, field) \ 131#define SNMP_INC_STATS_USER(mib, field) \
138 this_cpu_inc(mib[1]->mibs[field]) 132 irqsafe_cpu_inc(mib[0]->mibs[field])
133
139#define SNMP_INC_STATS_ATOMIC_LONG(mib, field) \ 134#define SNMP_INC_STATS_ATOMIC_LONG(mib, field) \
140 atomic_long_inc(&mib->mibs[field]) 135 atomic_long_inc(&mib->mibs[field])
136
141#define SNMP_INC_STATS(mib, field) \ 137#define SNMP_INC_STATS(mib, field) \
142 this_cpu_inc(mib[!in_softirq()]->mibs[field]) 138 irqsafe_cpu_inc(mib[0]->mibs[field])
139
143#define SNMP_DEC_STATS(mib, field) \ 140#define SNMP_DEC_STATS(mib, field) \
144 this_cpu_dec(mib[!in_softirq()]->mibs[field]) 141 irqsafe_cpu_dec(mib[0]->mibs[field])
142
145#define SNMP_ADD_STATS_BH(mib, field, addend) \ 143#define SNMP_ADD_STATS_BH(mib, field, addend) \
146 __this_cpu_add(mib[0]->mibs[field], addend) 144 __this_cpu_add(mib[0]->mibs[field], addend)
145
147#define SNMP_ADD_STATS_USER(mib, field, addend) \ 146#define SNMP_ADD_STATS_USER(mib, field, addend) \
148 this_cpu_add(mib[1]->mibs[field], addend) 147 irqsafe_cpu_add(mib[0]->mibs[field], addend)
148
149#define SNMP_ADD_STATS(mib, field, addend) \ 149#define SNMP_ADD_STATS(mib, field, addend) \
150 this_cpu_add(mib[!in_softirq()]->mibs[field], addend) 150 irqsafe_cpu_add(mib[0]->mibs[field], addend)
151/* 151/*
152 * Use "__typeof__(*mib[0]) *ptr" instead of "__typeof__(mib[0]) ptr" 152 * Use "__typeof__(*mib[0]) *ptr" instead of "__typeof__(mib[0]) ptr"
153 * to make @ptr a non-percpu pointer. 153 * to make @ptr a non-percpu pointer.
154 */ 154 */
155#define SNMP_UPD_PO_STATS(mib, basefield, addend) \ 155#define SNMP_UPD_PO_STATS(mib, basefield, addend) \
156 do { \ 156 do { \
157 __typeof__(*mib[0]) *ptr; \ 157 irqsafe_cpu_inc(mib[0]->mibs[basefield##PKTS]); \
158 preempt_disable(); \ 158 irqsafe_cpu_add(mib[0]->mibs[basefield##OCTETS], addend); \
159 ptr = this_cpu_ptr((mib)[!in_softirq()]); \
160 ptr->mibs[basefield##PKTS]++; \
161 ptr->mibs[basefield##OCTETS] += addend;\
162 preempt_enable(); \
163 } while (0) 159 } while (0)
164#define SNMP_UPD_PO_STATS_BH(mib, basefield, addend) \ 160#define SNMP_UPD_PO_STATS_BH(mib, basefield, addend) \
165 do { \ 161 do { \
166 __typeof__(*mib[0]) *ptr = \ 162 __this_cpu_inc(mib[0]->mibs[basefield##PKTS]); \
167 __this_cpu_ptr((mib)[0]); \ 163 __this_cpu_add(mib[0]->mibs[basefield##OCTETS], addend); \
168 ptr->mibs[basefield##PKTS]++; \
169 ptr->mibs[basefield##OCTETS] += addend;\
170 } while (0) 164 } while (0)
171 165
172 166
@@ -179,40 +173,20 @@ struct linux_xfrm_mib {
179 ptr->mibs[field] += addend; \ 173 ptr->mibs[field] += addend; \
180 u64_stats_update_end(&ptr->syncp); \ 174 u64_stats_update_end(&ptr->syncp); \
181 } while (0) 175 } while (0)
176
182#define SNMP_ADD_STATS64_USER(mib, field, addend) \ 177#define SNMP_ADD_STATS64_USER(mib, field, addend) \
183 do { \ 178 do { \
184 __typeof__(*mib[0]) *ptr; \ 179 local_bh_disable(); \
185 preempt_disable(); \ 180 SNMP_ADD_STATS64_BH(mib, field, addend); \
186 ptr = __this_cpu_ptr((mib)[1]); \ 181 local_bh_enable(); \
187 u64_stats_update_begin(&ptr->syncp); \
188 ptr->mibs[field] += addend; \
189 u64_stats_update_end(&ptr->syncp); \
190 preempt_enable(); \
191 } while (0) 182 } while (0)
183
192#define SNMP_ADD_STATS64(mib, field, addend) \ 184#define SNMP_ADD_STATS64(mib, field, addend) \
193 do { \ 185 SNMP_ADD_STATS64_USER(mib, field, addend)
194 __typeof__(*mib[0]) *ptr; \ 186
195 preempt_disable(); \
196 ptr = __this_cpu_ptr((mib)[!in_softirq()]); \
197 u64_stats_update_begin(&ptr->syncp); \
198 ptr->mibs[field] += addend; \
199 u64_stats_update_end(&ptr->syncp); \
200 preempt_enable(); \
201 } while (0)
202#define SNMP_INC_STATS64_BH(mib, field) SNMP_ADD_STATS64_BH(mib, field, 1) 187#define SNMP_INC_STATS64_BH(mib, field) SNMP_ADD_STATS64_BH(mib, field, 1)
203#define SNMP_INC_STATS64_USER(mib, field) SNMP_ADD_STATS64_USER(mib, field, 1) 188#define SNMP_INC_STATS64_USER(mib, field) SNMP_ADD_STATS64_USER(mib, field, 1)
204#define SNMP_INC_STATS64(mib, field) SNMP_ADD_STATS64(mib, field, 1) 189#define SNMP_INC_STATS64(mib, field) SNMP_ADD_STATS64(mib, field, 1)
205#define SNMP_UPD_PO_STATS64(mib, basefield, addend) \
206 do { \
207 __typeof__(*mib[0]) *ptr; \
208 preempt_disable(); \
209 ptr = __this_cpu_ptr((mib)[!in_softirq()]); \
210 u64_stats_update_begin(&ptr->syncp); \
211 ptr->mibs[basefield##PKTS]++; \
212 ptr->mibs[basefield##OCTETS] += addend; \
213 u64_stats_update_end(&ptr->syncp); \
214 preempt_enable(); \
215 } while (0)
216#define SNMP_UPD_PO_STATS64_BH(mib, basefield, addend) \ 190#define SNMP_UPD_PO_STATS64_BH(mib, basefield, addend) \
217 do { \ 191 do { \
218 __typeof__(*mib[0]) *ptr; \ 192 __typeof__(*mib[0]) *ptr; \
@@ -222,6 +196,12 @@ struct linux_xfrm_mib {
222 ptr->mibs[basefield##OCTETS] += addend; \ 196 ptr->mibs[basefield##OCTETS] += addend; \
223 u64_stats_update_end(&ptr->syncp); \ 197 u64_stats_update_end(&ptr->syncp); \
224 } while (0) 198 } while (0)
199#define SNMP_UPD_PO_STATS64(mib, basefield, addend) \
200 do { \
201 local_bh_disable(); \
202 SNMP_UPD_PO_STATS64_BH(mib, basefield, addend); \
203 local_bh_enable(); \
204 } while (0)
225#else 205#else
226#define SNMP_INC_STATS64_BH(mib, field) SNMP_INC_STATS_BH(mib, field) 206#define SNMP_INC_STATS64_BH(mib, field) SNMP_INC_STATS_BH(mib, field)
227#define SNMP_INC_STATS64_USER(mib, field) SNMP_INC_STATS_USER(mib, field) 207#define SNMP_INC_STATS64_USER(mib, field) SNMP_INC_STATS_USER(mib, field)
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 9c1926027a26..83673d23d4dd 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1437,11 +1437,11 @@ EXPORT_SYMBOL_GPL(inet_ctl_sock_create);
1437unsigned long snmp_fold_field(void __percpu *mib[], int offt) 1437unsigned long snmp_fold_field(void __percpu *mib[], int offt)
1438{ 1438{
1439 unsigned long res = 0; 1439 unsigned long res = 0;
1440 int i; 1440 int i, j;
1441 1441
1442 for_each_possible_cpu(i) { 1442 for_each_possible_cpu(i) {
1443 res += *(((unsigned long *) per_cpu_ptr(mib[0], i)) + offt); 1443 for (j = 0; j < SNMP_ARRAY_SZ; j++)
1444 res += *(((unsigned long *) per_cpu_ptr(mib[1], i)) + offt); 1444 res += *(((unsigned long *) per_cpu_ptr(mib[j], i)) + offt);
1445 } 1445 }
1446 return res; 1446 return res;
1447} 1447}
@@ -1455,28 +1455,19 @@ u64 snmp_fold_field64(void __percpu *mib[], int offt, size_t syncp_offset)
1455 int cpu; 1455 int cpu;
1456 1456
1457 for_each_possible_cpu(cpu) { 1457 for_each_possible_cpu(cpu) {
1458 void *bhptr, *userptr; 1458 void *bhptr;
1459 struct u64_stats_sync *syncp; 1459 struct u64_stats_sync *syncp;
1460 u64 v_bh, v_user; 1460 u64 v;
1461 unsigned int start; 1461 unsigned int start;
1462 1462
1463 /* first mib used by softirq context, we must use _bh() accessors */ 1463 bhptr = per_cpu_ptr(mib[0], cpu);
1464 bhptr = per_cpu_ptr(SNMP_STAT_BHPTR(mib), cpu);
1465 syncp = (struct u64_stats_sync *)(bhptr + syncp_offset); 1464 syncp = (struct u64_stats_sync *)(bhptr + syncp_offset);
1466 do { 1465 do {
1467 start = u64_stats_fetch_begin_bh(syncp); 1466 start = u64_stats_fetch_begin_bh(syncp);
1468 v_bh = *(((u64 *) bhptr) + offt); 1467 v = *(((u64 *) bhptr) + offt);
1469 } while (u64_stats_fetch_retry_bh(syncp, start)); 1468 } while (u64_stats_fetch_retry_bh(syncp, start));
1470 1469
1471 /* second mib used in USER context */ 1470 res += v;
1472 userptr = per_cpu_ptr(SNMP_STAT_USRPTR(mib), cpu);
1473 syncp = (struct u64_stats_sync *)(userptr + syncp_offset);
1474 do {
1475 start = u64_stats_fetch_begin(syncp);
1476 v_user = *(((u64 *) userptr) + offt);
1477 } while (u64_stats_fetch_retry(syncp, start));
1478
1479 res += v_bh + v_user;
1480 } 1471 }
1481 return res; 1472 return res;
1482} 1473}
@@ -1488,25 +1479,28 @@ int snmp_mib_init(void __percpu *ptr[2], size_t mibsize, size_t align)
1488 BUG_ON(ptr == NULL); 1479 BUG_ON(ptr == NULL);
1489 ptr[0] = __alloc_percpu(mibsize, align); 1480 ptr[0] = __alloc_percpu(mibsize, align);
1490 if (!ptr[0]) 1481 if (!ptr[0])
1491 goto err0; 1482 return -ENOMEM;
1483#if SNMP_ARRAY_SZ == 2
1492 ptr[1] = __alloc_percpu(mibsize, align); 1484 ptr[1] = __alloc_percpu(mibsize, align);
1493 if (!ptr[1]) 1485 if (!ptr[1]) {
1494 goto err1; 1486 free_percpu(ptr[0]);
1487 ptr[0] = NULL;
1488 return -ENOMEM;
1489 }
1490#endif
1495 return 0; 1491 return 0;
1496err1:
1497 free_percpu(ptr[0]);
1498 ptr[0] = NULL;
1499err0:
1500 return -ENOMEM;
1501} 1492}
1502EXPORT_SYMBOL_GPL(snmp_mib_init); 1493EXPORT_SYMBOL_GPL(snmp_mib_init);
1503 1494
1504void snmp_mib_free(void __percpu *ptr[2]) 1495void snmp_mib_free(void __percpu *ptr[SNMP_ARRAY_SZ])
1505{ 1496{
1497 int i;
1498
1506 BUG_ON(ptr == NULL); 1499 BUG_ON(ptr == NULL);
1507 free_percpu(ptr[0]); 1500 for (i = 0; i < SNMP_ARRAY_SZ; i++) {
1508 free_percpu(ptr[1]); 1501 free_percpu(ptr[i]);
1509 ptr[0] = ptr[1] = NULL; 1502 ptr[i] = NULL;
1503 }
1510} 1504}
1511EXPORT_SYMBOL_GPL(snmp_mib_free); 1505EXPORT_SYMBOL_GPL(snmp_mib_free);
1512 1506