aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/icmp.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/icmp.c')
-rw-r--r--net/ipv4/icmp.c188
1 files changed, 119 insertions, 69 deletions
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index a944e8053e28..803bc9f173a7 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -188,29 +188,6 @@ struct icmp_err icmp_err_convert[] = {
188 }, 188 },
189}; 189};
190 190
191/* Control parameters for ECHO replies. */
192int sysctl_icmp_echo_ignore_all __read_mostly;
193int sysctl_icmp_echo_ignore_broadcasts __read_mostly = 1;
194
195/* Control parameter - ignore bogus broadcast responses? */
196int sysctl_icmp_ignore_bogus_error_responses __read_mostly = 1;
197
198/*
199 * Configurable global rate limit.
200 *
201 * ratelimit defines tokens/packet consumed for dst->rate_token bucket
202 * ratemask defines which icmp types are ratelimited by setting
203 * it's bit position.
204 *
205 * default:
206 * dest unreachable (3), source quench (4),
207 * time exceeded (11), parameter problem (12)
208 */
209
210int sysctl_icmp_ratelimit __read_mostly = 1 * HZ;
211int sysctl_icmp_ratemask __read_mostly = 0x1818;
212int sysctl_icmp_errors_use_inbound_ifaddr __read_mostly;
213
214/* 191/*
215 * ICMP control array. This specifies what to do with each ICMP. 192 * ICMP control array. This specifies what to do with each ICMP.
216 */ 193 */
@@ -229,14 +206,16 @@ static const struct icmp_control icmp_pointers[NR_ICMP_TYPES+1];
229 * 206 *
230 * On SMP we have one ICMP socket per-cpu. 207 * On SMP we have one ICMP socket per-cpu.
231 */ 208 */
232static DEFINE_PER_CPU(struct socket *, __icmp_socket) = NULL; 209static struct sock *icmp_sk(struct net *net)
233#define icmp_socket __get_cpu_var(__icmp_socket) 210{
211 return net->ipv4.icmp_sk[smp_processor_id()];
212}
234 213
235static inline int icmp_xmit_lock(void) 214static inline int icmp_xmit_lock(struct sock *sk)
236{ 215{
237 local_bh_disable(); 216 local_bh_disable();
238 217
239 if (unlikely(!spin_trylock(&icmp_socket->sk->sk_lock.slock))) { 218 if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
240 /* This can happen if the output path signals a 219 /* This can happen if the output path signals a
241 * dst_link_failure() for an outgoing ICMP packet. 220 * dst_link_failure() for an outgoing ICMP packet.
242 */ 221 */
@@ -246,9 +225,9 @@ static inline int icmp_xmit_lock(void)
246 return 0; 225 return 0;
247} 226}
248 227
249static inline void icmp_xmit_unlock(void) 228static inline void icmp_xmit_unlock(struct sock *sk)
250{ 229{
251 spin_unlock_bh(&icmp_socket->sk->sk_lock.slock); 230 spin_unlock_bh(&sk->sk_lock.slock);
252} 231}
253 232
254/* 233/*
@@ -291,7 +270,8 @@ int xrlim_allow(struct dst_entry *dst, int timeout)
291 return rc; 270 return rc;
292} 271}
293 272
294static inline int icmpv4_xrlim_allow(struct rtable *rt, int type, int code) 273static inline int icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
274 int type, int code)
295{ 275{
296 struct dst_entry *dst = &rt->u.dst; 276 struct dst_entry *dst = &rt->u.dst;
297 int rc = 1; 277 int rc = 1;
@@ -308,8 +288,8 @@ static inline int icmpv4_xrlim_allow(struct rtable *rt, int type, int code)
308 goto out; 288 goto out;
309 289
310 /* Limit if icmp type is enabled in ratemask. */ 290 /* Limit if icmp type is enabled in ratemask. */
311 if ((1 << type) & sysctl_icmp_ratemask) 291 if ((1 << type) & net->ipv4.sysctl_icmp_ratemask)
312 rc = xrlim_allow(dst, sysctl_icmp_ratelimit); 292 rc = xrlim_allow(dst, net->ipv4.sysctl_icmp_ratelimit);
313out: 293out:
314 return rc; 294 return rc;
315} 295}
@@ -346,19 +326,21 @@ static int icmp_glue_bits(void *from, char *to, int offset, int len, int odd,
346static void icmp_push_reply(struct icmp_bxm *icmp_param, 326static void icmp_push_reply(struct icmp_bxm *icmp_param,
347 struct ipcm_cookie *ipc, struct rtable *rt) 327 struct ipcm_cookie *ipc, struct rtable *rt)
348{ 328{
329 struct sock *sk;
349 struct sk_buff *skb; 330 struct sk_buff *skb;
350 331
351 if (ip_append_data(icmp_socket->sk, icmp_glue_bits, icmp_param, 332 sk = icmp_sk(dev_net(rt->u.dst.dev));
333 if (ip_append_data(sk, icmp_glue_bits, icmp_param,
352 icmp_param->data_len+icmp_param->head_len, 334 icmp_param->data_len+icmp_param->head_len,
353 icmp_param->head_len, 335 icmp_param->head_len,
354 ipc, rt, MSG_DONTWAIT) < 0) 336 ipc, rt, MSG_DONTWAIT) < 0)
355 ip_flush_pending_frames(icmp_socket->sk); 337 ip_flush_pending_frames(sk);
356 else if ((skb = skb_peek(&icmp_socket->sk->sk_write_queue)) != NULL) { 338 else if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) {
357 struct icmphdr *icmph = icmp_hdr(skb); 339 struct icmphdr *icmph = icmp_hdr(skb);
358 __wsum csum = 0; 340 __wsum csum = 0;
359 struct sk_buff *skb1; 341 struct sk_buff *skb1;
360 342
361 skb_queue_walk(&icmp_socket->sk->sk_write_queue, skb1) { 343 skb_queue_walk(&sk->sk_write_queue, skb1) {
362 csum = csum_add(csum, skb1->csum); 344 csum = csum_add(csum, skb1->csum);
363 } 345 }
364 csum = csum_partial_copy_nocheck((void *)&icmp_param->data, 346 csum = csum_partial_copy_nocheck((void *)&icmp_param->data,
@@ -366,7 +348,7 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param,
366 icmp_param->head_len, csum); 348 icmp_param->head_len, csum);
367 icmph->checksum = csum_fold(csum); 349 icmph->checksum = csum_fold(csum);
368 skb->ip_summed = CHECKSUM_NONE; 350 skb->ip_summed = CHECKSUM_NONE;
369 ip_push_pending_frames(icmp_socket->sk); 351 ip_push_pending_frames(sk);
370 } 352 }
371} 353}
372 354
@@ -376,16 +358,17 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param,
376 358
377static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) 359static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
378{ 360{
379 struct sock *sk = icmp_socket->sk;
380 struct inet_sock *inet = inet_sk(sk);
381 struct ipcm_cookie ipc; 361 struct ipcm_cookie ipc;
382 struct rtable *rt = (struct rtable *)skb->dst; 362 struct rtable *rt = skb->rtable;
363 struct net *net = dev_net(rt->u.dst.dev);
364 struct sock *sk = icmp_sk(net);
365 struct inet_sock *inet = inet_sk(sk);
383 __be32 daddr; 366 __be32 daddr;
384 367
385 if (ip_options_echo(&icmp_param->replyopts, skb)) 368 if (ip_options_echo(&icmp_param->replyopts, skb))
386 return; 369 return;
387 370
388 if (icmp_xmit_lock()) 371 if (icmp_xmit_lock(sk))
389 return; 372 return;
390 373
391 icmp_param->data.icmph.checksum = 0; 374 icmp_param->data.icmph.checksum = 0;
@@ -405,15 +388,15 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
405 .tos = RT_TOS(ip_hdr(skb)->tos) } }, 388 .tos = RT_TOS(ip_hdr(skb)->tos) } },
406 .proto = IPPROTO_ICMP }; 389 .proto = IPPROTO_ICMP };
407 security_skb_classify_flow(skb, &fl); 390 security_skb_classify_flow(skb, &fl);
408 if (ip_route_output_key(rt->u.dst.dev->nd_net, &rt, &fl)) 391 if (ip_route_output_key(net, &rt, &fl))
409 goto out_unlock; 392 goto out_unlock;
410 } 393 }
411 if (icmpv4_xrlim_allow(rt, icmp_param->data.icmph.type, 394 if (icmpv4_xrlim_allow(net, rt, icmp_param->data.icmph.type,
412 icmp_param->data.icmph.code)) 395 icmp_param->data.icmph.code))
413 icmp_push_reply(icmp_param, &ipc, rt); 396 icmp_push_reply(icmp_param, &ipc, rt);
414 ip_rt_put(rt); 397 ip_rt_put(rt);
415out_unlock: 398out_unlock:
416 icmp_xmit_unlock(); 399 icmp_xmit_unlock(sk);
417} 400}
418 401
419 402
@@ -433,15 +416,17 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
433 struct iphdr *iph; 416 struct iphdr *iph;
434 int room; 417 int room;
435 struct icmp_bxm icmp_param; 418 struct icmp_bxm icmp_param;
436 struct rtable *rt = (struct rtable *)skb_in->dst; 419 struct rtable *rt = skb_in->rtable;
437 struct ipcm_cookie ipc; 420 struct ipcm_cookie ipc;
438 __be32 saddr; 421 __be32 saddr;
439 u8 tos; 422 u8 tos;
440 struct net *net; 423 struct net *net;
424 struct sock *sk;
441 425
442 if (!rt) 426 if (!rt)
443 goto out; 427 goto out;
444 net = rt->u.dst.dev->nd_net; 428 net = dev_net(rt->u.dst.dev);
429 sk = icmp_sk(net);
445 430
446 /* 431 /*
447 * Find the original header. It is expected to be valid, of course. 432 * Find the original header. It is expected to be valid, of course.
@@ -505,7 +490,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
505 } 490 }
506 } 491 }
507 492
508 if (icmp_xmit_lock()) 493 if (icmp_xmit_lock(sk))
509 return; 494 return;
510 495
511 /* 496 /*
@@ -516,7 +501,8 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
516 if (!(rt->rt_flags & RTCF_LOCAL)) { 501 if (!(rt->rt_flags & RTCF_LOCAL)) {
517 struct net_device *dev = NULL; 502 struct net_device *dev = NULL;
518 503
519 if (rt->fl.iif && sysctl_icmp_errors_use_inbound_ifaddr) 504 if (rt->fl.iif &&
505 net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr)
520 dev = dev_get_by_index(net, rt->fl.iif); 506 dev = dev_get_by_index(net, rt->fl.iif);
521 507
522 if (dev) { 508 if (dev) {
@@ -544,7 +530,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
544 icmp_param.data.icmph.checksum = 0; 530 icmp_param.data.icmph.checksum = 0;
545 icmp_param.skb = skb_in; 531 icmp_param.skb = skb_in;
546 icmp_param.offset = skb_network_offset(skb_in); 532 icmp_param.offset = skb_network_offset(skb_in);
547 inet_sk(icmp_socket->sk)->tos = tos; 533 inet_sk(sk)->tos = tos;
548 ipc.addr = iph->saddr; 534 ipc.addr = iph->saddr;
549 ipc.opt = &icmp_param.replyopts; 535 ipc.opt = &icmp_param.replyopts;
550 536
@@ -609,7 +595,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
609 RT_TOS(tos), rt2->u.dst.dev); 595 RT_TOS(tos), rt2->u.dst.dev);
610 596
611 dst_release(&rt2->u.dst); 597 dst_release(&rt2->u.dst);
612 rt2 = (struct rtable *)skb_in->dst; 598 rt2 = skb_in->rtable;
613 skb_in->dst = odst; 599 skb_in->dst = odst;
614 } 600 }
615 601
@@ -632,7 +618,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
632 } 618 }
633 619
634route_done: 620route_done:
635 if (!icmpv4_xrlim_allow(rt, type, code)) 621 if (!icmpv4_xrlim_allow(net, rt, type, code))
636 goto ende; 622 goto ende;
637 623
638 /* RFC says return as much as we can without exceeding 576 bytes. */ 624 /* RFC says return as much as we can without exceeding 576 bytes. */
@@ -652,7 +638,7 @@ route_done:
652ende: 638ende:
653 ip_rt_put(rt); 639 ip_rt_put(rt);
654out_unlock: 640out_unlock:
655 icmp_xmit_unlock(); 641 icmp_xmit_unlock(sk);
656out:; 642out:;
657} 643}
658 644
@@ -670,7 +656,7 @@ static void icmp_unreach(struct sk_buff *skb)
670 u32 info = 0; 656 u32 info = 0;
671 struct net *net; 657 struct net *net;
672 658
673 net = skb->dst->dev->nd_net; 659 net = dev_net(skb->dst->dev);
674 660
675 /* 661 /*
676 * Incomplete header ? 662 * Incomplete header ?
@@ -738,7 +724,7 @@ static void icmp_unreach(struct sk_buff *skb)
738 * get the other vendor to fix their kit. 724 * get the other vendor to fix their kit.
739 */ 725 */
740 726
741 if (!sysctl_icmp_ignore_bogus_error_responses && 727 if (!net->ipv4.sysctl_icmp_ignore_bogus_error_responses &&
742 inet_addr_type(net, iph->daddr) == RTN_BROADCAST) { 728 inet_addr_type(net, iph->daddr) == RTN_BROADCAST) {
743 if (net_ratelimit()) 729 if (net_ratelimit())
744 printk(KERN_WARNING "%u.%u.%u.%u sent an invalid ICMP " 730 printk(KERN_WARNING "%u.%u.%u.%u sent an invalid ICMP "
@@ -833,7 +819,10 @@ out_err:
833 819
834static void icmp_echo(struct sk_buff *skb) 820static void icmp_echo(struct sk_buff *skb)
835{ 821{
836 if (!sysctl_icmp_echo_ignore_all) { 822 struct net *net;
823
824 net = dev_net(skb->dst->dev);
825 if (!net->ipv4.sysctl_icmp_echo_ignore_all) {
837 struct icmp_bxm icmp_param; 826 struct icmp_bxm icmp_param;
838 827
839 icmp_param.data.icmph = *icmp_hdr(skb); 828 icmp_param.data.icmph = *icmp_hdr(skb);
@@ -936,7 +925,7 @@ static void icmp_address(struct sk_buff *skb)
936 925
937static void icmp_address_reply(struct sk_buff *skb) 926static void icmp_address_reply(struct sk_buff *skb)
938{ 927{
939 struct rtable *rt = (struct rtable *)skb->dst; 928 struct rtable *rt = skb->rtable;
940 struct net_device *dev = skb->dev; 929 struct net_device *dev = skb->dev;
941 struct in_device *in_dev; 930 struct in_device *in_dev;
942 struct in_ifaddr *ifa; 931 struct in_ifaddr *ifa;
@@ -981,7 +970,7 @@ static void icmp_discard(struct sk_buff *skb)
981int icmp_rcv(struct sk_buff *skb) 970int icmp_rcv(struct sk_buff *skb)
982{ 971{
983 struct icmphdr *icmph; 972 struct icmphdr *icmph;
984 struct rtable *rt = (struct rtable *)skb->dst; 973 struct rtable *rt = skb->rtable;
985 974
986 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { 975 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
987 int nh; 976 int nh;
@@ -1036,6 +1025,9 @@ int icmp_rcv(struct sk_buff *skb)
1036 */ 1025 */
1037 1026
1038 if (rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) { 1027 if (rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
1028 struct net *net;
1029
1030 net = dev_net(rt->u.dst.dev);
1039 /* 1031 /*
1040 * RFC 1122: 3.2.2.6 An ICMP_ECHO to broadcast MAY be 1032 * RFC 1122: 3.2.2.6 An ICMP_ECHO to broadcast MAY be
1041 * silently ignored (we let user decide with a sysctl). 1033 * silently ignored (we let user decide with a sysctl).
@@ -1044,7 +1036,7 @@ int icmp_rcv(struct sk_buff *skb)
1044 */ 1036 */
1045 if ((icmph->type == ICMP_ECHO || 1037 if ((icmph->type == ICMP_ECHO ||
1046 icmph->type == ICMP_TIMESTAMP) && 1038 icmph->type == ICMP_TIMESTAMP) &&
1047 sysctl_icmp_echo_ignore_broadcasts) { 1039 net->ipv4.sysctl_icmp_echo_ignore_broadcasts) {
1048 goto error; 1040 goto error;
1049 } 1041 }
1050 if (icmph->type != ICMP_ECHO && 1042 if (icmph->type != ICMP_ECHO &&
@@ -1139,29 +1131,46 @@ static const struct icmp_control icmp_pointers[NR_ICMP_TYPES + 1] = {
1139 }, 1131 },
1140}; 1132};
1141 1133
1142void __init icmp_init(struct net_proto_family *ops) 1134static void __net_exit icmp_sk_exit(struct net *net)
1143{ 1135{
1144 struct inet_sock *inet;
1145 int i; 1136 int i;
1146 1137
1147 for_each_possible_cpu(i) { 1138 for_each_possible_cpu(i)
1148 int err; 1139 sk_release_kernel(net->ipv4.icmp_sk[i]);
1140 kfree(net->ipv4.icmp_sk);
1141 net->ipv4.icmp_sk = NULL;
1142}
1143
1144int __net_init icmp_sk_init(struct net *net)
1145{
1146 int i, err;
1149 1147
1150 err = sock_create_kern(PF_INET, SOCK_RAW, IPPROTO_ICMP, 1148 net->ipv4.icmp_sk =
1151 &per_cpu(__icmp_socket, i)); 1149 kzalloc(nr_cpu_ids * sizeof(struct sock *), GFP_KERNEL);
1150 if (net->ipv4.icmp_sk == NULL)
1151 return -ENOMEM;
1152 1152
1153 for_each_possible_cpu(i) {
1154 struct sock *sk;
1155 struct socket *sock;
1156 struct inet_sock *inet;
1157
1158 err = sock_create_kern(PF_INET, SOCK_RAW, IPPROTO_ICMP, &sock);
1153 if (err < 0) 1159 if (err < 0)
1154 panic("Failed to create the ICMP control socket.\n"); 1160 goto fail;
1161
1162 net->ipv4.icmp_sk[i] = sk = sock->sk;
1163 sk_change_net(sk, net);
1155 1164
1156 per_cpu(__icmp_socket, i)->sk->sk_allocation = GFP_ATOMIC; 1165 sk->sk_allocation = GFP_ATOMIC;
1157 1166
1158 /* Enough space for 2 64K ICMP packets, including 1167 /* Enough space for 2 64K ICMP packets, including
1159 * sk_buff struct overhead. 1168 * sk_buff struct overhead.
1160 */ 1169 */
1161 per_cpu(__icmp_socket, i)->sk->sk_sndbuf = 1170 sk->sk_sndbuf =
1162 (2 * ((64 * 1024) + sizeof(struct sk_buff))); 1171 (2 * ((64 * 1024) + sizeof(struct sk_buff)));
1163 1172
1164 inet = inet_sk(per_cpu(__icmp_socket, i)->sk); 1173 inet = inet_sk(sk);
1165 inet->uc_ttl = -1; 1174 inet->uc_ttl = -1;
1166 inet->pmtudisc = IP_PMTUDISC_DONT; 1175 inet->pmtudisc = IP_PMTUDISC_DONT;
1167 1176
@@ -1169,8 +1178,49 @@ void __init icmp_init(struct net_proto_family *ops)
1169 * see it, we do not wish this socket to see incoming 1178 * see it, we do not wish this socket to see incoming
1170 * packets. 1179 * packets.
1171 */ 1180 */
1172 per_cpu(__icmp_socket, i)->sk->sk_prot->unhash(per_cpu(__icmp_socket, i)->sk); 1181 sk->sk_prot->unhash(sk);
1173 } 1182 }
1183
1184 /* Control parameters for ECHO replies. */
1185 net->ipv4.sysctl_icmp_echo_ignore_all = 0;
1186 net->ipv4.sysctl_icmp_echo_ignore_broadcasts = 1;
1187
1188 /* Control parameter - ignore bogus broadcast responses? */
1189 net->ipv4.sysctl_icmp_ignore_bogus_error_responses = 1;
1190
1191 /*
1192 * Configurable global rate limit.
1193 *
1194 * ratelimit defines tokens/packet consumed for dst->rate_token
1195 * bucket ratemask defines which icmp types are ratelimited by
1196 * setting it's bit position.
1197 *
1198 * default:
1199 * dest unreachable (3), source quench (4),
1200 * time exceeded (11), parameter problem (12)
1201 */
1202
1203 net->ipv4.sysctl_icmp_ratelimit = 1 * HZ;
1204 net->ipv4.sysctl_icmp_ratemask = 0x1818;
1205 net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr = 0;
1206
1207 return 0;
1208
1209fail:
1210 for_each_possible_cpu(i)
1211 sk_release_kernel(net->ipv4.icmp_sk[i]);
1212 kfree(net->ipv4.icmp_sk);
1213 return err;
1214}
1215
1216static struct pernet_operations __net_initdata icmp_sk_ops = {
1217 .init = icmp_sk_init,
1218 .exit = icmp_sk_exit,
1219};
1220
1221int __init icmp_init(void)
1222{
1223 return register_pernet_device(&icmp_sk_ops);
1174} 1224}
1175 1225
1176EXPORT_SYMBOL(icmp_err_convert); 1226EXPORT_SYMBOL(icmp_err_convert);