aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorVasiliy Kulikov <segoon@openwall.com>2011-05-13 06:01:00 -0400
committerDavid S. Miller <davem@davemloft.net>2011-05-13 16:08:13 -0400
commitc319b4d76b9e583a5d88d6bf190e079c4e43213d (patch)
tree22fcc6f1c671908d640145c1f82e5290cd40f715 /net
parentf20190302e3e697a166cc28ebef43058749dedda (diff)
net: ipv4: add IPPROTO_ICMP socket kind
This patch adds IPPROTO_ICMP socket kind. It makes it possible to send ICMP_ECHO messages and receive the corresponding ICMP_ECHOREPLY messages without any special privileges. In other words, the patch makes it possible to implement setuid-less and CAP_NET_RAW-less /bin/ping. In order not to increase the kernel's attack surface, the new functionality is disabled by default, but is enabled at bootup by supporting Linux distributions, optionally with restriction to a group or a group range (see below). Similar functionality is implemented in Mac OS X: http://www.manpagez.com/man/4/icmp/ A new ping socket is created with socket(PF_INET, SOCK_DGRAM, PROT_ICMP) Message identifiers (octets 4-5 of ICMP header) are interpreted as local ports. Addresses are stored in struct sockaddr_in. No port numbers are reserved for privileged processes, port 0 is reserved for API ("let the kernel pick a free number"). There is no notion of remote ports, remote port numbers provided by the user (e.g. in connect()) are ignored. Data sent and received include ICMP headers. This is deliberate to: 1) Avoid the need to transport headers values like sequence numbers by other means. 2) Make it easier to port existing programs using raw sockets. ICMP headers given to send() are checked and sanitized. The type must be ICMP_ECHO and the code must be zero (future extensions might relax this, see below). The id is set to the number (local port) of the socket, the checksum is always recomputed. ICMP reply packets received from the network are demultiplexed according to their id's, and are returned by recv() without any modifications. IP header information and ICMP errors of those packets may be obtained via ancillary data (IP_RECVTTL, IP_RETOPTS, and IP_RECVERR). ICMP source quenches and redirects are reported as fake errors via the error queue (IP_RECVERR); the next hop address for redirects is saved to ee_info (in network order). socket(2) is restricted to the group range specified in "/proc/sys/net/ipv4/ping_group_range". It is "1 0" by default, meaning that nobody (not even root) may create ping sockets. Setting it to "100 100" would grant permissions to the single group (to either make /sbin/ping g+s and owned by this group or to grant permissions to the "netadmins" group), "0 4294967295" would enable it for the world, "100 4294967295" would enable it for the users, but not daemons. The existing code might be (in the unlikely case anyone needs it) extended rather easily to handle other similar pairs of ICMP messages (Timestamp/Reply, Information Request/Reply, Address Mask Request/Reply etc.). Userspace ping util & patch for it: http://openwall.info/wiki/people/segoon/ping For Openwall GNU/*/Linux it was the last step on the road to the setuid-less distro. A revision of this patch (for RHEL5/OpenVZ kernels) is in use in Owl-current, such as in the 2011/03/12 LiveCD ISOs: http://mirrors.kernel.org/openwall/Owl/current/iso/ Initially this functionality was written by Pavel Kankovsky for Linux 2.4.32, but unfortunately it was never made public. All ping options (-b, -p, -Q, -R, -s, -t, -T, -M, -I), are tested with the patch. PATCH v3: - switched to flowi4. - minor changes to be consistent with raw sockets code. PATCH v2: - changed ping_debug() to pr_debug(). - removed CONFIG_IP_PING. - removed ping_seq_fops.owner field (unused for procfs). - switched to proc_net_fops_create(). - switched to %pK in seq_printf(). PATCH v1: - fixed checksumming bug. - CAP_NET_RAW may not create icmp sockets anymore. RFC v2: - minor cleanups. - introduced sysctl'able group range to restrict socket(2). Signed-off-by: Vasiliy Kulikov <segoon@openwall.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/ipv4/Makefile2
-rw-r--r--net/ipv4/af_inet.c22
-rw-r--r--net/ipv4/icmp.c12
-rw-r--r--net/ipv4/ping.c937
-rw-r--r--net/ipv4/sysctl_net_ipv4.c80
5 files changed, 1051 insertions, 2 deletions
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 0dc772d0d125..f2dc69cffb57 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -11,7 +11,7 @@ obj-y := route.o inetpeer.o protocol.o \
11 datagram.o raw.o udp.o udplite.o \ 11 datagram.o raw.o udp.o udplite.o \
12 arp.o icmp.o devinet.o af_inet.o igmp.o \ 12 arp.o icmp.o devinet.o af_inet.o igmp.o \
13 fib_frontend.o fib_semantics.o fib_trie.o \ 13 fib_frontend.o fib_semantics.o fib_trie.o \
14 inet_fragment.o 14 inet_fragment.o ping.o
15 15
16obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o 16obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o
17obj-$(CONFIG_PROC_FS) += proc.o 17obj-$(CONFIG_PROC_FS) += proc.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 851aa056854b..cc1463156cd0 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -105,6 +105,7 @@
105#include <net/tcp.h> 105#include <net/tcp.h>
106#include <net/udp.h> 106#include <net/udp.h>
107#include <net/udplite.h> 107#include <net/udplite.h>
108#include <net/ping.h>
108#include <linux/skbuff.h> 109#include <linux/skbuff.h>
109#include <net/sock.h> 110#include <net/sock.h>
110#include <net/raw.h> 111#include <net/raw.h>
@@ -1008,6 +1009,14 @@ static struct inet_protosw inetsw_array[] =
1008 .flags = INET_PROTOSW_PERMANENT, 1009 .flags = INET_PROTOSW_PERMANENT,
1009 }, 1010 },
1010 1011
1012 {
1013 .type = SOCK_DGRAM,
1014 .protocol = IPPROTO_ICMP,
1015 .prot = &ping_prot,
1016 .ops = &inet_dgram_ops,
1017 .no_check = UDP_CSUM_DEFAULT,
1018 .flags = INET_PROTOSW_REUSE,
1019 },
1011 1020
1012 { 1021 {
1013 .type = SOCK_RAW, 1022 .type = SOCK_RAW,
@@ -1527,6 +1536,7 @@ static const struct net_protocol udp_protocol = {
1527 1536
1528static const struct net_protocol icmp_protocol = { 1537static const struct net_protocol icmp_protocol = {
1529 .handler = icmp_rcv, 1538 .handler = icmp_rcv,
1539 .err_handler = ping_err,
1530 .no_policy = 1, 1540 .no_policy = 1,
1531 .netns_ok = 1, 1541 .netns_ok = 1,
1532}; 1542};
@@ -1642,6 +1652,10 @@ static int __init inet_init(void)
1642 if (rc) 1652 if (rc)
1643 goto out_unregister_udp_proto; 1653 goto out_unregister_udp_proto;
1644 1654
1655 rc = proto_register(&ping_prot, 1);
1656 if (rc)
1657 goto out_unregister_raw_proto;
1658
1645 /* 1659 /*
1646 * Tell SOCKET that we are alive... 1660 * Tell SOCKET that we are alive...
1647 */ 1661 */
@@ -1697,6 +1711,8 @@ static int __init inet_init(void)
1697 /* Add UDP-Lite (RFC 3828) */ 1711 /* Add UDP-Lite (RFC 3828) */
1698 udplite4_register(); 1712 udplite4_register();
1699 1713
1714 ping_init();
1715
1700 /* 1716 /*
1701 * Set the ICMP layer up 1717 * Set the ICMP layer up
1702 */ 1718 */
@@ -1727,6 +1743,8 @@ static int __init inet_init(void)
1727 rc = 0; 1743 rc = 0;
1728out: 1744out:
1729 return rc; 1745 return rc;
1746out_unregister_raw_proto:
1747 proto_unregister(&raw_prot);
1730out_unregister_udp_proto: 1748out_unregister_udp_proto:
1731 proto_unregister(&udp_prot); 1749 proto_unregister(&udp_prot);
1732out_unregister_tcp_proto: 1750out_unregister_tcp_proto:
@@ -1751,11 +1769,15 @@ static int __init ipv4_proc_init(void)
1751 goto out_tcp; 1769 goto out_tcp;
1752 if (udp4_proc_init()) 1770 if (udp4_proc_init())
1753 goto out_udp; 1771 goto out_udp;
1772 if (ping_proc_init())
1773 goto out_ping;
1754 if (ip_misc_proc_init()) 1774 if (ip_misc_proc_init())
1755 goto out_misc; 1775 goto out_misc;
1756out: 1776out:
1757 return rc; 1777 return rc;
1758out_misc: 1778out_misc:
1779 ping_proc_exit();
1780out_ping:
1759 udp4_proc_exit(); 1781 udp4_proc_exit();
1760out_udp: 1782out_udp:
1761 tcp4_proc_exit(); 1783 tcp4_proc_exit();
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 3314394f0aab..3f47585aad68 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -83,6 +83,7 @@
83#include <net/tcp.h> 83#include <net/tcp.h>
84#include <net/udp.h> 84#include <net/udp.h>
85#include <net/raw.h> 85#include <net/raw.h>
86#include <net/ping.h>
86#include <linux/skbuff.h> 87#include <linux/skbuff.h>
87#include <net/sock.h> 88#include <net/sock.h>
88#include <linux/errno.h> 89#include <linux/errno.h>
@@ -781,6 +782,15 @@ static void icmp_redirect(struct sk_buff *skb)
781 iph->saddr, skb->dev); 782 iph->saddr, skb->dev);
782 break; 783 break;
783 } 784 }
785
786 /* Ping wants to see redirects.
787 * Let's pretend they are errors of sorts... */
788 if (iph->protocol == IPPROTO_ICMP &&
789 iph->ihl >= 5 &&
790 pskb_may_pull(skb, (iph->ihl<<2)+8)) {
791 ping_err(skb, icmp_hdr(skb)->un.gateway);
792 }
793
784out: 794out:
785 return; 795 return;
786out_err: 796out_err:
@@ -1041,7 +1051,7 @@ error:
1041 */ 1051 */
1042static const struct icmp_control icmp_pointers[NR_ICMP_TYPES + 1] = { 1052static const struct icmp_control icmp_pointers[NR_ICMP_TYPES + 1] = {
1043 [ICMP_ECHOREPLY] = { 1053 [ICMP_ECHOREPLY] = {
1044 .handler = icmp_discard, 1054 .handler = ping_rcv,
1045 }, 1055 },
1046 [1] = { 1056 [1] = {
1047 .handler = icmp_discard, 1057 .handler = icmp_discard,
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
new file mode 100644
index 000000000000..a77e2d788dac
--- /dev/null
+++ b/net/ipv4/ping.c
@@ -0,0 +1,937 @@
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * "Ping" sockets
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 *
13 * Based on ipv4/udp.c code.
14 *
15 * Authors: Vasiliy Kulikov / Openwall (for Linux 2.6),
16 * Pavel Kankovsky (for Linux 2.4.32)
17 *
18 * Pavel gave all rights to bugs to Vasiliy,
19 * none of the bugs are Pavel's now.
20 *
21 */
22
23#include <asm/system.h>
24#include <linux/uaccess.h>
25#include <asm/ioctls.h>
26#include <linux/types.h>
27#include <linux/fcntl.h>
28#include <linux/socket.h>
29#include <linux/sockios.h>
30#include <linux/in.h>
31#include <linux/errno.h>
32#include <linux/timer.h>
33#include <linux/mm.h>
34#include <linux/inet.h>
35#include <linux/netdevice.h>
36#include <net/snmp.h>
37#include <net/ip.h>
38#include <net/ipv6.h>
39#include <net/icmp.h>
40#include <net/protocol.h>
41#include <linux/skbuff.h>
42#include <linux/proc_fs.h>
43#include <net/sock.h>
44#include <net/ping.h>
45#include <net/icmp.h>
46#include <net/udp.h>
47#include <net/route.h>
48#include <net/inet_common.h>
49#include <net/checksum.h>
50
51
52struct ping_table ping_table __read_mostly;
53
54u16 ping_port_rover;
55
56static inline int ping_hashfn(struct net *net, unsigned num, unsigned mask)
57{
58 int res = (num + net_hash_mix(net)) & mask;
59 pr_debug("hash(%d) = %d\n", num, res);
60 return res;
61}
62
63static inline struct hlist_nulls_head *ping_hashslot(struct ping_table *table,
64 struct net *net, unsigned num)
65{
66 return &table->hash[ping_hashfn(net, num, PING_HTABLE_MASK)];
67}
68
69static int ping_v4_get_port(struct sock *sk, unsigned short ident)
70{
71 struct hlist_nulls_node *node;
72 struct hlist_nulls_head *hlist;
73 struct inet_sock *isk, *isk2;
74 struct sock *sk2 = NULL;
75
76 isk = inet_sk(sk);
77 write_lock_bh(&ping_table.lock);
78 if (ident == 0) {
79 u32 i;
80 u16 result = ping_port_rover + 1;
81
82 for (i = 0; i < (1L << 16); i++, result++) {
83 if (!result)
84 result++; /* avoid zero */
85 hlist = ping_hashslot(&ping_table, sock_net(sk),
86 result);
87 ping_portaddr_for_each_entry(sk2, node, hlist) {
88 isk2 = inet_sk(sk2);
89
90 if (isk2->inet_num == result)
91 goto next_port;
92 }
93
94 /* found */
95 ping_port_rover = ident = result;
96 break;
97next_port:
98 ;
99 }
100 if (i >= (1L << 16))
101 goto fail;
102 } else {
103 hlist = ping_hashslot(&ping_table, sock_net(sk), ident);
104 ping_portaddr_for_each_entry(sk2, node, hlist) {
105 isk2 = inet_sk(sk2);
106
107 if ((isk2->inet_num == ident) &&
108 (sk2 != sk) &&
109 (!sk2->sk_reuse || !sk->sk_reuse))
110 goto fail;
111 }
112 }
113
114 pr_debug("found port/ident = %d\n", ident);
115 isk->inet_num = ident;
116 if (sk_unhashed(sk)) {
117 pr_debug("was not hashed\n");
118 sock_hold(sk);
119 hlist_nulls_add_head(&sk->sk_nulls_node, hlist);
120 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
121 }
122 write_unlock_bh(&ping_table.lock);
123 return 0;
124
125fail:
126 write_unlock_bh(&ping_table.lock);
127 return 1;
128}
129
130static void ping_v4_hash(struct sock *sk)
131{
132 pr_debug("ping_v4_hash(sk->port=%u)\n", inet_sk(sk)->inet_num);
133 BUG(); /* "Please do not press this button again." */
134}
135
136static void ping_v4_unhash(struct sock *sk)
137{
138 struct inet_sock *isk = inet_sk(sk);
139 pr_debug("ping_v4_unhash(isk=%p,isk->num=%u)\n", isk, isk->inet_num);
140 if (sk_hashed(sk)) {
141 struct hlist_nulls_head *hslot;
142
143 hslot = ping_hashslot(&ping_table, sock_net(sk), isk->inet_num);
144 write_lock_bh(&ping_table.lock);
145 hlist_nulls_del(&sk->sk_nulls_node);
146 sock_put(sk);
147 isk->inet_num = isk->inet_sport = 0;
148 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
149 write_unlock_bh(&ping_table.lock);
150 }
151}
152
153struct sock *ping_v4_lookup(struct net *net, u32 saddr, u32 daddr,
154 u16 ident, int dif)
155{
156 struct hlist_nulls_head *hslot = ping_hashslot(&ping_table, net, ident);
157 struct sock *sk = NULL;
158 struct inet_sock *isk;
159 struct hlist_nulls_node *hnode;
160
161 pr_debug("try to find: num = %d, daddr = %ld, dif = %d\n",
162 (int)ident, (unsigned long)daddr, dif);
163 read_lock_bh(&ping_table.lock);
164
165 ping_portaddr_for_each_entry(sk, hnode, hslot) {
166 isk = inet_sk(sk);
167
168 pr_debug("found: %p: num = %d, daddr = %ld, dif = %d\n", sk,
169 (int)isk->inet_num, (unsigned long)isk->inet_rcv_saddr,
170 sk->sk_bound_dev_if);
171
172 pr_debug("iterate\n");
173 if (isk->inet_num != ident)
174 continue;
175 if (isk->inet_rcv_saddr && isk->inet_rcv_saddr != daddr)
176 continue;
177 if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)
178 continue;
179
180 sock_hold(sk);
181 goto exit;
182 }
183
184 sk = NULL;
185exit:
186 read_unlock_bh(&ping_table.lock);
187
188 return sk;
189}
190
191static int ping_init_sock(struct sock *sk)
192{
193 struct net *net = sock_net(sk);
194 gid_t group = current_egid();
195 gid_t range[2];
196 struct group_info *group_info = get_current_groups();
197 int i, j, count = group_info->ngroups;
198
199 inet_get_ping_group_range_net(net, range, range+1);
200 if (range[0] <= group && group <= range[1])
201 return 0;
202
203 for (i = 0; i < group_info->nblocks; i++) {
204 int cp_count = min_t(int, NGROUPS_PER_BLOCK, count);
205
206 for (j = 0; j < cp_count; j++) {
207 group = group_info->blocks[i][j];
208 if (range[0] <= group && group <= range[1])
209 return 0;
210 }
211
212 count -= cp_count;
213 }
214
215 return -EACCES;
216}
217
218static void ping_close(struct sock *sk, long timeout)
219{
220 pr_debug("ping_close(sk=%p,sk->num=%u)\n",
221 inet_sk(sk), inet_sk(sk)->inet_num);
222 pr_debug("isk->refcnt = %d\n", sk->sk_refcnt.counter);
223
224 sk_common_release(sk);
225}
226
227/*
228 * We need our own bind because there are no privileged id's == local ports.
229 * Moreover, we don't allow binding to multi- and broadcast addresses.
230 */
231
232static int ping_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
233{
234 struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
235 struct inet_sock *isk = inet_sk(sk);
236 unsigned short snum;
237 int chk_addr_ret;
238 int err;
239
240 if (addr_len < sizeof(struct sockaddr_in))
241 return -EINVAL;
242
243 pr_debug("ping_v4_bind(sk=%p,sa_addr=%08x,sa_port=%d)\n",
244 sk, addr->sin_addr.s_addr, ntohs(addr->sin_port));
245
246 chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr);
247 if (addr->sin_addr.s_addr == INADDR_ANY)
248 chk_addr_ret = RTN_LOCAL;
249
250 if ((sysctl_ip_nonlocal_bind == 0 &&
251 isk->freebind == 0 && isk->transparent == 0 &&
252 chk_addr_ret != RTN_LOCAL) ||
253 chk_addr_ret == RTN_MULTICAST ||
254 chk_addr_ret == RTN_BROADCAST)
255 return -EADDRNOTAVAIL;
256
257 lock_sock(sk);
258
259 err = -EINVAL;
260 if (isk->inet_num != 0)
261 goto out;
262
263 err = -EADDRINUSE;
264 isk->inet_rcv_saddr = isk->inet_saddr = addr->sin_addr.s_addr;
265 snum = ntohs(addr->sin_port);
266 if (ping_v4_get_port(sk, snum) != 0) {
267 isk->inet_saddr = isk->inet_rcv_saddr = 0;
268 goto out;
269 }
270
271 pr_debug("after bind(): num = %d, daddr = %ld, dif = %d\n",
272 (int)isk->inet_num,
273 (unsigned long) isk->inet_rcv_saddr,
274 (int)sk->sk_bound_dev_if);
275
276 err = 0;
277 if (isk->inet_rcv_saddr)
278 sk->sk_userlocks |= SOCK_BINDADDR_LOCK;
279 if (snum)
280 sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
281 isk->inet_sport = htons(isk->inet_num);
282 isk->inet_daddr = 0;
283 isk->inet_dport = 0;
284 sk_dst_reset(sk);
285out:
286 release_sock(sk);
287 pr_debug("ping_v4_bind -> %d\n", err);
288 return err;
289}
290
291/*
292 * Is this a supported type of ICMP message?
293 */
294
295static inline int ping_supported(int type, int code)
296{
297 if (type == ICMP_ECHO && code == 0)
298 return 1;
299 return 0;
300}
301
302/*
303 * This routine is called by the ICMP module when it gets some
304 * sort of error condition.
305 */
306
307static int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
308
309void ping_err(struct sk_buff *skb, u32 info)
310{
311 struct iphdr *iph = (struct iphdr *)skb->data;
312 struct icmphdr *icmph = (struct icmphdr *)(skb->data+(iph->ihl<<2));
313 struct inet_sock *inet_sock;
314 int type = icmph->type;
315 int code = icmph->code;
316 struct net *net = dev_net(skb->dev);
317 struct sock *sk;
318 int harderr;
319 int err;
320
321 /* We assume the packet has already been checked by icmp_unreach */
322
323 if (!ping_supported(icmph->type, icmph->code))
324 return;
325
326 pr_debug("ping_err(type=%04x,code=%04x,id=%04x,seq=%04x)\n", type,
327 code, ntohs(icmph->un.echo.id), ntohs(icmph->un.echo.sequence));
328
329 sk = ping_v4_lookup(net, iph->daddr, iph->saddr,
330 ntohs(icmph->un.echo.id), skb->dev->ifindex);
331 if (sk == NULL) {
332 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
333 pr_debug("no socket, dropping\n");
334 return; /* No socket for error */
335 }
336 pr_debug("err on socket %p\n", sk);
337
338 err = 0;
339 harderr = 0;
340 inet_sock = inet_sk(sk);
341
342 switch (type) {
343 default:
344 case ICMP_TIME_EXCEEDED:
345 err = EHOSTUNREACH;
346 break;
347 case ICMP_SOURCE_QUENCH:
348 /* This is not a real error but ping wants to see it.
349 * Report it with some fake errno. */
350 err = EREMOTEIO;
351 break;
352 case ICMP_PARAMETERPROB:
353 err = EPROTO;
354 harderr = 1;
355 break;
356 case ICMP_DEST_UNREACH:
357 if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */
358 if (inet_sock->pmtudisc != IP_PMTUDISC_DONT) {
359 err = EMSGSIZE;
360 harderr = 1;
361 break;
362 }
363 goto out;
364 }
365 err = EHOSTUNREACH;
366 if (code <= NR_ICMP_UNREACH) {
367 harderr = icmp_err_convert[code].fatal;
368 err = icmp_err_convert[code].errno;
369 }
370 break;
371 case ICMP_REDIRECT:
372 /* See ICMP_SOURCE_QUENCH */
373 err = EREMOTEIO;
374 break;
375 }
376
377 /*
378 * RFC1122: OK. Passes ICMP errors back to application, as per
379 * 4.1.3.3.
380 */
381 if (!inet_sock->recverr) {
382 if (!harderr || sk->sk_state != TCP_ESTABLISHED)
383 goto out;
384 } else {
385 ip_icmp_error(sk, skb, err, 0 /* no remote port */,
386 info, (u8 *)icmph);
387 }
388 sk->sk_err = err;
389 sk->sk_error_report(sk);
390out:
391 sock_put(sk);
392}
393
394/*
395 * Copy and checksum an ICMP Echo packet from user space into a buffer.
396 */
397
398struct pingfakehdr {
399 struct icmphdr icmph;
400 struct iovec *iov;
401 u32 wcheck;
402};
403
404static int ping_getfrag(void *from, char * to,
405 int offset, int fraglen, int odd, struct sk_buff *skb)
406{
407 struct pingfakehdr *pfh = (struct pingfakehdr *)from;
408
409 if (offset == 0) {
410 if (fraglen < sizeof(struct icmphdr))
411 BUG();
412 if (csum_partial_copy_fromiovecend(to + sizeof(struct icmphdr),
413 pfh->iov, 0, fraglen - sizeof(struct icmphdr),
414 &pfh->wcheck))
415 return -EFAULT;
416
417 return 0;
418 }
419 if (offset < sizeof(struct icmphdr))
420 BUG();
421 if (csum_partial_copy_fromiovecend
422 (to, pfh->iov, offset - sizeof(struct icmphdr),
423 fraglen, &pfh->wcheck))
424 return -EFAULT;
425 return 0;
426}
427
428static int ping_push_pending_frames(struct sock *sk, struct pingfakehdr *pfh, struct flowi4 *fl4)
429{
430 struct sk_buff *skb = skb_peek(&sk->sk_write_queue);
431
432 pfh->wcheck = csum_partial((char *)&pfh->icmph,
433 sizeof(struct icmphdr), pfh->wcheck);
434 pfh->icmph.checksum = csum_fold(pfh->wcheck);
435 memcpy(icmp_hdr(skb), &pfh->icmph, sizeof(struct icmphdr));
436 skb->ip_summed = CHECKSUM_NONE;
437 return ip_push_pending_frames(sk, fl4);
438}
439
440int ping_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
441 size_t len)
442{
443 struct net *net = sock_net(sk);
444 struct flowi4 fl4;
445 struct inet_sock *inet = inet_sk(sk);
446 struct ipcm_cookie ipc;
447 struct icmphdr user_icmph;
448 struct pingfakehdr pfh;
449 struct rtable *rt = NULL;
450 struct ip_options_data opt_copy;
451 int free = 0;
452 u32 saddr, daddr, faddr;
453 u8 tos;
454 int err;
455
456 pr_debug("ping_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num);
457
458
459 if (len > 0xFFFF)
460 return -EMSGSIZE;
461
462 /*
463 * Check the flags.
464 */
465
466 /* Mirror BSD error message compatibility */
467 if (msg->msg_flags & MSG_OOB)
468 return -EOPNOTSUPP;
469
470 /*
471 * Fetch the ICMP header provided by the userland.
472 * iovec is modified!
473 */
474
475 if (memcpy_fromiovec((u8 *)&user_icmph, msg->msg_iov,
476 sizeof(struct icmphdr)))
477 return -EFAULT;
478 if (!ping_supported(user_icmph.type, user_icmph.code))
479 return -EINVAL;
480
481 /*
482 * Get and verify the address.
483 */
484
485 if (msg->msg_name) {
486 struct sockaddr_in *usin = (struct sockaddr_in *)msg->msg_name;
487 if (msg->msg_namelen < sizeof(*usin))
488 return -EINVAL;
489 if (usin->sin_family != AF_INET)
490 return -EINVAL;
491 daddr = usin->sin_addr.s_addr;
492 /* no remote port */
493 } else {
494 if (sk->sk_state != TCP_ESTABLISHED)
495 return -EDESTADDRREQ;
496 daddr = inet->inet_daddr;
497 /* no remote port */
498 }
499
500 ipc.addr = inet->inet_saddr;
501 ipc.opt = NULL;
502 ipc.oif = sk->sk_bound_dev_if;
503 ipc.tx_flags = 0;
504 err = sock_tx_timestamp(sk, &ipc.tx_flags);
505 if (err)
506 return err;
507
508 if (msg->msg_controllen) {
509 err = ip_cmsg_send(sock_net(sk), msg, &ipc);
510 if (err)
511 return err;
512 if (ipc.opt)
513 free = 1;
514 }
515 if (!ipc.opt) {
516 struct ip_options_rcu *inet_opt;
517
518 rcu_read_lock();
519 inet_opt = rcu_dereference(inet->inet_opt);
520 if (inet_opt) {
521 memcpy(&opt_copy, inet_opt,
522 sizeof(*inet_opt) + inet_opt->opt.optlen);
523 ipc.opt = &opt_copy.opt;
524 }
525 rcu_read_unlock();
526 }
527
528 saddr = ipc.addr;
529 ipc.addr = faddr = daddr;
530
531 if (ipc.opt && ipc.opt->opt.srr) {
532 if (!daddr)
533 return -EINVAL;
534 faddr = ipc.opt->opt.faddr;
535 }
536 tos = RT_TOS(inet->tos);
537 if (sock_flag(sk, SOCK_LOCALROUTE) ||
538 (msg->msg_flags & MSG_DONTROUTE) ||
539 (ipc.opt && ipc.opt->opt.is_strictroute)) {
540 tos |= RTO_ONLINK;
541 }
542
543 if (ipv4_is_multicast(daddr)) {
544 if (!ipc.oif)
545 ipc.oif = inet->mc_index;
546 if (!saddr)
547 saddr = inet->mc_addr;
548 }
549
550 flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos,
551 RT_SCOPE_UNIVERSE, sk->sk_protocol,
552 inet_sk_flowi_flags(sk), faddr, saddr, 0, 0);
553
554 security_sk_classify_flow(sk, flowi4_to_flowi(&fl4));
555 rt = ip_route_output_flow(net, &fl4, sk);
556 if (IS_ERR(rt)) {
557 err = PTR_ERR(rt);
558 rt = NULL;
559 if (err == -ENETUNREACH)
560 IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);
561 goto out;
562 }
563
564 err = -EACCES;
565 if ((rt->rt_flags & RTCF_BROADCAST) &&
566 !sock_flag(sk, SOCK_BROADCAST))
567 goto out;
568
569 if (msg->msg_flags & MSG_CONFIRM)
570 goto do_confirm;
571back_from_confirm:
572
573 if (!ipc.addr)
574 ipc.addr = fl4.daddr;
575
576 lock_sock(sk);
577
578 pfh.icmph.type = user_icmph.type; /* already checked */
579 pfh.icmph.code = user_icmph.code; /* ditto */
580 pfh.icmph.checksum = 0;
581 pfh.icmph.un.echo.id = inet->inet_sport;
582 pfh.icmph.un.echo.sequence = user_icmph.un.echo.sequence;
583 pfh.iov = msg->msg_iov;
584 pfh.wcheck = 0;
585
586 err = ip_append_data(sk, &fl4, ping_getfrag, &pfh, len,
587 0, &ipc, &rt, msg->msg_flags);
588 if (err)
589 ip_flush_pending_frames(sk);
590 else
591 err = ping_push_pending_frames(sk, &pfh, &fl4);
592 release_sock(sk);
593
594out:
595 ip_rt_put(rt);
596 if (free)
597 kfree(ipc.opt);
598 if (!err) {
599 icmp_out_count(sock_net(sk), user_icmph.type);
600 return len;
601 }
602 return err;
603
604do_confirm:
605 dst_confirm(&rt->dst);
606 if (!(msg->msg_flags & MSG_PROBE) || len)
607 goto back_from_confirm;
608 err = 0;
609 goto out;
610}
611
612/*
613 * IOCTL requests applicable to the UDP^H^H^HICMP protocol
614 */
615
616int ping_ioctl(struct sock *sk, int cmd, unsigned long arg)
617{
618 pr_debug("ping_ioctl(sk=%p,sk->num=%u,cmd=%d,arg=%lu)\n",
619 inet_sk(sk), inet_sk(sk)->inet_num, cmd, arg);
620 switch (cmd) {
621 case SIOCOUTQ:
622 case SIOCINQ:
623 return udp_ioctl(sk, cmd, arg);
624 default:
625 return -ENOIOCTLCMD;
626 }
627}
628
629int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
630 size_t len, int noblock, int flags, int *addr_len)
631{
632 struct inet_sock *isk = inet_sk(sk);
633 struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name;
634 struct sk_buff *skb;
635 int copied, err;
636
637 pr_debug("ping_recvmsg(sk=%p,sk->num=%u)\n", isk, isk->inet_num);
638
639 if (flags & MSG_OOB)
640 goto out;
641
642 if (addr_len)
643 *addr_len = sizeof(*sin);
644
645 if (flags & MSG_ERRQUEUE)
646 return ip_recv_error(sk, msg, len);
647
648 skb = skb_recv_datagram(sk, flags, noblock, &err);
649 if (!skb)
650 goto out;
651
652 copied = skb->len;
653 if (copied > len) {
654 msg->msg_flags |= MSG_TRUNC;
655 copied = len;
656 }
657
658 /* Don't bother checking the checksum */
659 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
660 if (err)
661 goto done;
662
663 sock_recv_timestamp(msg, sk, skb);
664
665 /* Copy the address. */
666 if (sin) {
667 sin->sin_family = AF_INET;
668 sin->sin_port = 0 /* skb->h.uh->source */;
669 sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
670 memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
671 }
672 if (isk->cmsg_flags)
673 ip_cmsg_recv(msg, skb);
674 err = copied;
675
676done:
677 skb_free_datagram(sk, skb);
678out:
679 pr_debug("ping_recvmsg -> %d\n", err);
680 return err;
681}
682
683static int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
684{
685 pr_debug("ping_queue_rcv_skb(sk=%p,sk->num=%d,skb=%p)\n",
686 inet_sk(sk), inet_sk(sk)->inet_num, skb);
687 if (sock_queue_rcv_skb(sk, skb) < 0) {
688 ICMP_INC_STATS_BH(sock_net(sk), ICMP_MIB_INERRORS);
689 kfree_skb(skb);
690 pr_debug("ping_queue_rcv_skb -> failed\n");
691 return -1;
692 }
693 return 0;
694}
695
696
697/*
698 * All we need to do is get the socket.
699 */
700
701void ping_rcv(struct sk_buff *skb)
702{
703 struct sock *sk;
704 struct net *net = dev_net(skb->dev);
705 struct iphdr *iph = ip_hdr(skb);
706 struct icmphdr *icmph = icmp_hdr(skb);
707 u32 saddr = iph->saddr;
708 u32 daddr = iph->daddr;
709
710 /* We assume the packet has already been checked by icmp_rcv */
711
712 pr_debug("ping_rcv(skb=%p,id=%04x,seq=%04x)\n",
713 skb, ntohs(icmph->un.echo.id), ntohs(icmph->un.echo.sequence));
714
715 /* Push ICMP header back */
716 skb_push(skb, skb->data - (u8 *)icmph);
717
718 sk = ping_v4_lookup(net, saddr, daddr, ntohs(icmph->un.echo.id),
719 skb->dev->ifindex);
720 if (sk != NULL) {
721 pr_debug("rcv on socket %p\n", sk);
722 ping_queue_rcv_skb(sk, skb_get(skb));
723 sock_put(sk);
724 return;
725 }
726 pr_debug("no socket, dropping\n");
727
728 /* We're called from icmp_rcv(). kfree_skb() is done there. */
729}
730
731struct proto ping_prot = {
732 .name = "PING",
733 .owner = THIS_MODULE,
734 .init = ping_init_sock,
735 .close = ping_close,
736 .connect = ip4_datagram_connect,
737 .disconnect = udp_disconnect,
738 .ioctl = ping_ioctl,
739 .setsockopt = ip_setsockopt,
740 .getsockopt = ip_getsockopt,
741 .sendmsg = ping_sendmsg,
742 .recvmsg = ping_recvmsg,
743 .bind = ping_bind,
744 .backlog_rcv = ping_queue_rcv_skb,
745 .hash = ping_v4_hash,
746 .unhash = ping_v4_unhash,
747 .get_port = ping_v4_get_port,
748 .obj_size = sizeof(struct inet_sock),
749};
750EXPORT_SYMBOL(ping_prot);
751
752#ifdef CONFIG_PROC_FS
753
754static struct sock *ping_get_first(struct seq_file *seq, int start)
755{
756 struct sock *sk;
757 struct ping_iter_state *state = seq->private;
758 struct net *net = seq_file_net(seq);
759
760 for (state->bucket = start; state->bucket < PING_HTABLE_SIZE;
761 ++state->bucket) {
762 struct hlist_nulls_node *node;
763 struct hlist_nulls_head *hslot = &ping_table.hash[state->bucket];
764
765 if (hlist_nulls_empty(hslot))
766 continue;
767
768 sk_nulls_for_each(sk, node, hslot) {
769 if (net_eq(sock_net(sk), net))
770 goto found;
771 }
772 }
773 sk = NULL;
774found:
775 return sk;
776}
777
778static struct sock *ping_get_next(struct seq_file *seq, struct sock *sk)
779{
780 struct ping_iter_state *state = seq->private;
781 struct net *net = seq_file_net(seq);
782
783 do {
784 sk = sk_nulls_next(sk);
785 } while (sk && (!net_eq(sock_net(sk), net)));
786
787 if (!sk)
788 return ping_get_first(seq, state->bucket + 1);
789 return sk;
790}
791
792static struct sock *ping_get_idx(struct seq_file *seq, loff_t pos)
793{
794 struct sock *sk = ping_get_first(seq, 0);
795
796 if (sk)
797 while (pos && (sk = ping_get_next(seq, sk)) != NULL)
798 --pos;
799 return pos ? NULL : sk;
800}
801
802static void *ping_seq_start(struct seq_file *seq, loff_t *pos)
803{
804 struct ping_iter_state *state = seq->private;
805 state->bucket = 0;
806
807 read_lock_bh(&ping_table.lock);
808
809 return *pos ? ping_get_idx(seq, *pos-1) : SEQ_START_TOKEN;
810}
811
812static void *ping_seq_next(struct seq_file *seq, void *v, loff_t *pos)
813{
814 struct sock *sk;
815
816 if (v == SEQ_START_TOKEN)
817 sk = ping_get_idx(seq, 0);
818 else
819 sk = ping_get_next(seq, v);
820
821 ++*pos;
822 return sk;
823}
824
825static void ping_seq_stop(struct seq_file *seq, void *v)
826{
827 read_unlock_bh(&ping_table.lock);
828}
829
830static void ping_format_sock(struct sock *sp, struct seq_file *f,
831 int bucket, int *len)
832{
833 struct inet_sock *inet = inet_sk(sp);
834 __be32 dest = inet->inet_daddr;
835 __be32 src = inet->inet_rcv_saddr;
836 __u16 destp = ntohs(inet->inet_dport);
837 __u16 srcp = ntohs(inet->inet_sport);
838
839 seq_printf(f, "%5d: %08X:%04X %08X:%04X"
840 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %d%n",
841 bucket, src, srcp, dest, destp, sp->sk_state,
842 sk_wmem_alloc_get(sp),
843 sk_rmem_alloc_get(sp),
844 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp),
845 atomic_read(&sp->sk_refcnt), sp,
846 atomic_read(&sp->sk_drops), len);
847}
848
849static int ping_seq_show(struct seq_file *seq, void *v)
850{
851 if (v == SEQ_START_TOKEN)
852 seq_printf(seq, "%-127s\n",
853 " sl local_address rem_address st tx_queue "
854 "rx_queue tr tm->when retrnsmt uid timeout "
855 "inode ref pointer drops");
856 else {
857 struct ping_iter_state *state = seq->private;
858 int len;
859
860 ping_format_sock(v, seq, state->bucket, &len);
861 seq_printf(seq, "%*s\n", 127 - len, "");
862 }
863 return 0;
864}
865
866static const struct seq_operations ping_seq_ops = {
867 .show = ping_seq_show,
868 .start = ping_seq_start,
869 .next = ping_seq_next,
870 .stop = ping_seq_stop,
871};
872
873static int ping_seq_open(struct inode *inode, struct file *file)
874{
875 return seq_open_net(inode, file, &ping_seq_ops,
876 sizeof(struct ping_iter_state));
877}
878
879static const struct file_operations ping_seq_fops = {
880 .open = ping_seq_open,
881 .read = seq_read,
882 .llseek = seq_lseek,
883 .release = seq_release_net,
884};
885
886static int ping_proc_register(struct net *net)
887{
888 struct proc_dir_entry *p;
889 int rc = 0;
890
891 p = proc_net_fops_create(net, "icmp", S_IRUGO, &ping_seq_fops);
892 if (!p)
893 rc = -ENOMEM;
894 return rc;
895}
896
897static void ping_proc_unregister(struct net *net)
898{
899 proc_net_remove(net, "icmp");
900}
901
902
903static int __net_init ping_proc_init_net(struct net *net)
904{
905 return ping_proc_register(net);
906}
907
908static void __net_exit ping_proc_exit_net(struct net *net)
909{
910 ping_proc_unregister(net);
911}
912
913static struct pernet_operations ping_net_ops = {
914 .init = ping_proc_init_net,
915 .exit = ping_proc_exit_net,
916};
917
918int __init ping_proc_init(void)
919{
920 return register_pernet_subsys(&ping_net_ops);
921}
922
923void ping_proc_exit(void)
924{
925 unregister_pernet_subsys(&ping_net_ops);
926}
927
928#endif
929
930void __init ping_init(void)
931{
932 int i;
933
934 for (i = 0; i < PING_HTABLE_SIZE; i++)
935 INIT_HLIST_NULLS_HEAD(&ping_table.hash[i], i);
936 rwlock_init(&ping_table.lock);
937}
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 321e6e84dbcc..28e8273bbef8 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -13,6 +13,7 @@
13#include <linux/seqlock.h> 13#include <linux/seqlock.h>
14#include <linux/init.h> 14#include <linux/init.h>
15#include <linux/slab.h> 15#include <linux/slab.h>
16#include <linux/nsproxy.h>
16#include <net/snmp.h> 17#include <net/snmp.h>
17#include <net/icmp.h> 18#include <net/icmp.h>
18#include <net/ip.h> 19#include <net/ip.h>
@@ -21,6 +22,7 @@
21#include <net/udp.h> 22#include <net/udp.h>
22#include <net/cipso_ipv4.h> 23#include <net/cipso_ipv4.h>
23#include <net/inet_frag.h> 24#include <net/inet_frag.h>
25#include <net/ping.h>
24 26
25static int zero; 27static int zero;
26static int tcp_retr1_max = 255; 28static int tcp_retr1_max = 255;
@@ -30,6 +32,8 @@ static int tcp_adv_win_scale_min = -31;
30static int tcp_adv_win_scale_max = 31; 32static int tcp_adv_win_scale_max = 31;
31static int ip_ttl_min = 1; 33static int ip_ttl_min = 1;
32static int ip_ttl_max = 255; 34static int ip_ttl_max = 255;
35static int ip_ping_group_range_min[] = { 0, 0 };
36static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX };
33 37
34/* Update system visible IP port range */ 38/* Update system visible IP port range */
35static void set_local_port_range(int range[2]) 39static void set_local_port_range(int range[2])
@@ -68,6 +72,65 @@ static int ipv4_local_port_range(ctl_table *table, int write,
68 return ret; 72 return ret;
69} 73}
70 74
75
76void inet_get_ping_group_range_net(struct net *net, gid_t *low, gid_t *high)
77{
78 gid_t *data = net->ipv4.sysctl_ping_group_range;
79 unsigned seq;
80 do {
81 seq = read_seqbegin(&sysctl_local_ports.lock);
82
83 *low = data[0];
84 *high = data[1];
85 } while (read_seqretry(&sysctl_local_ports.lock, seq));
86}
87
88void inet_get_ping_group_range_table(struct ctl_table *table, gid_t *low, gid_t *high)
89{
90 gid_t *data = table->data;
91 unsigned seq;
92 do {
93 seq = read_seqbegin(&sysctl_local_ports.lock);
94
95 *low = data[0];
96 *high = data[1];
97 } while (read_seqretry(&sysctl_local_ports.lock, seq));
98}
99
100/* Update system visible IP port range */
101static void set_ping_group_range(struct ctl_table *table, int range[2])
102{
103 gid_t *data = table->data;
104 write_seqlock(&sysctl_local_ports.lock);
105 data[0] = range[0];
106 data[1] = range[1];
107 write_sequnlock(&sysctl_local_ports.lock);
108}
109
110/* Validate changes from /proc interface. */
111static int ipv4_ping_group_range(ctl_table *table, int write,
112 void __user *buffer,
113 size_t *lenp, loff_t *ppos)
114{
115 int ret;
116 gid_t range[2];
117 ctl_table tmp = {
118 .data = &range,
119 .maxlen = sizeof(range),
120 .mode = table->mode,
121 .extra1 = &ip_ping_group_range_min,
122 .extra2 = &ip_ping_group_range_max,
123 };
124
125 inet_get_ping_group_range_table(table, range, range + 1);
126 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
127
128 if (write && ret == 0)
129 set_ping_group_range(table, range);
130
131 return ret;
132}
133
71static int proc_tcp_congestion_control(ctl_table *ctl, int write, 134static int proc_tcp_congestion_control(ctl_table *ctl, int write,
72 void __user *buffer, size_t *lenp, loff_t *ppos) 135 void __user *buffer, size_t *lenp, loff_t *ppos)
73{ 136{
@@ -677,6 +740,13 @@ static struct ctl_table ipv4_net_table[] = {
677 .mode = 0644, 740 .mode = 0644,
678 .proc_handler = proc_dointvec 741 .proc_handler = proc_dointvec
679 }, 742 },
743 {
744 .procname = "ping_group_range",
745 .data = &init_net.ipv4.sysctl_ping_group_range,
746 .maxlen = sizeof(init_net.ipv4.sysctl_ping_group_range),
747 .mode = 0644,
748 .proc_handler = ipv4_ping_group_range,
749 },
680 { } 750 { }
681}; 751};
682 752
@@ -711,8 +781,18 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
711 &net->ipv4.sysctl_icmp_ratemask; 781 &net->ipv4.sysctl_icmp_ratemask;
712 table[6].data = 782 table[6].data =
713 &net->ipv4.sysctl_rt_cache_rebuild_count; 783 &net->ipv4.sysctl_rt_cache_rebuild_count;
784 table[7].data =
785 &net->ipv4.sysctl_ping_group_range;
786
714 } 787 }
715 788
789 /*
790 * Sane defaults - nobody may create ping sockets.
791 * Boot scripts should set this to distro-specific group.
792 */
793 net->ipv4.sysctl_ping_group_range[0] = 1;
794 net->ipv4.sysctl_ping_group_range[1] = 0;
795
716 net->ipv4.sysctl_rt_cache_rebuild_count = 4; 796 net->ipv4.sysctl_rt_cache_rebuild_count = 4;
717 797
718 net->ipv4.ipv4_hdr = register_net_sysctl_table(net, 798 net->ipv4.ipv4_hdr = register_net_sysctl_table(net,