aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/af_inet.c
diff options
context:
space:
mode:
authorVasiliy Kulikov <segoon@openwall.com>2011-05-13 06:01:00 -0400
committerDavid S. Miller <davem@davemloft.net>2011-05-13 16:08:13 -0400
commitc319b4d76b9e583a5d88d6bf190e079c4e43213d (patch)
tree22fcc6f1c671908d640145c1f82e5290cd40f715 /net/ipv4/af_inet.c
parentf20190302e3e697a166cc28ebef43058749dedda (diff)
net: ipv4: add IPPROTO_ICMP socket kind
This patch adds IPPROTO_ICMP socket kind. It makes it possible to send ICMP_ECHO messages and receive the corresponding ICMP_ECHOREPLY messages without any special privileges. In other words, the patch makes it possible to implement setuid-less and CAP_NET_RAW-less /bin/ping. In order not to increase the kernel's attack surface, the new functionality is disabled by default, but is enabled at bootup by supporting Linux distributions, optionally with restriction to a group or a group range (see below). Similar functionality is implemented in Mac OS X: http://www.manpagez.com/man/4/icmp/ A new ping socket is created with socket(PF_INET, SOCK_DGRAM, PROT_ICMP) Message identifiers (octets 4-5 of ICMP header) are interpreted as local ports. Addresses are stored in struct sockaddr_in. No port numbers are reserved for privileged processes, port 0 is reserved for API ("let the kernel pick a free number"). There is no notion of remote ports, remote port numbers provided by the user (e.g. in connect()) are ignored. Data sent and received include ICMP headers. This is deliberate to: 1) Avoid the need to transport headers values like sequence numbers by other means. 2) Make it easier to port existing programs using raw sockets. ICMP headers given to send() are checked and sanitized. The type must be ICMP_ECHO and the code must be zero (future extensions might relax this, see below). The id is set to the number (local port) of the socket, the checksum is always recomputed. ICMP reply packets received from the network are demultiplexed according to their id's, and are returned by recv() without any modifications. IP header information and ICMP errors of those packets may be obtained via ancillary data (IP_RECVTTL, IP_RETOPTS, and IP_RECVERR). ICMP source quenches and redirects are reported as fake errors via the error queue (IP_RECVERR); the next hop address for redirects is saved to ee_info (in network order). socket(2) is restricted to the group range specified in "/proc/sys/net/ipv4/ping_group_range". It is "1 0" by default, meaning that nobody (not even root) may create ping sockets. Setting it to "100 100" would grant permissions to the single group (to either make /sbin/ping g+s and owned by this group or to grant permissions to the "netadmins" group), "0 4294967295" would enable it for the world, "100 4294967295" would enable it for the users, but not daemons. The existing code might be (in the unlikely case anyone needs it) extended rather easily to handle other similar pairs of ICMP messages (Timestamp/Reply, Information Request/Reply, Address Mask Request/Reply etc.). Userspace ping util & patch for it: http://openwall.info/wiki/people/segoon/ping For Openwall GNU/*/Linux it was the last step on the road to the setuid-less distro. A revision of this patch (for RHEL5/OpenVZ kernels) is in use in Owl-current, such as in the 2011/03/12 LiveCD ISOs: http://mirrors.kernel.org/openwall/Owl/current/iso/ Initially this functionality was written by Pavel Kankovsky for Linux 2.4.32, but unfortunately it was never made public. All ping options (-b, -p, -Q, -R, -s, -t, -T, -M, -I), are tested with the patch. PATCH v3: - switched to flowi4. - minor changes to be consistent with raw sockets code. PATCH v2: - changed ping_debug() to pr_debug(). - removed CONFIG_IP_PING. - removed ping_seq_fops.owner field (unused for procfs). - switched to proc_net_fops_create(). - switched to %pK in seq_printf(). PATCH v1: - fixed checksumming bug. - CAP_NET_RAW may not create icmp sockets anymore. RFC v2: - minor cleanups. - introduced sysctl'able group range to restrict socket(2). Signed-off-by: Vasiliy Kulikov <segoon@openwall.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/af_inet.c')
-rw-r--r--net/ipv4/af_inet.c22
1 files changed, 22 insertions, 0 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 851aa056854b..cc1463156cd0 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -105,6 +105,7 @@
105#include <net/tcp.h> 105#include <net/tcp.h>
106#include <net/udp.h> 106#include <net/udp.h>
107#include <net/udplite.h> 107#include <net/udplite.h>
108#include <net/ping.h>
108#include <linux/skbuff.h> 109#include <linux/skbuff.h>
109#include <net/sock.h> 110#include <net/sock.h>
110#include <net/raw.h> 111#include <net/raw.h>
@@ -1008,6 +1009,14 @@ static struct inet_protosw inetsw_array[] =
1008 .flags = INET_PROTOSW_PERMANENT, 1009 .flags = INET_PROTOSW_PERMANENT,
1009 }, 1010 },
1010 1011
1012 {
1013 .type = SOCK_DGRAM,
1014 .protocol = IPPROTO_ICMP,
1015 .prot = &ping_prot,
1016 .ops = &inet_dgram_ops,
1017 .no_check = UDP_CSUM_DEFAULT,
1018 .flags = INET_PROTOSW_REUSE,
1019 },
1011 1020
1012 { 1021 {
1013 .type = SOCK_RAW, 1022 .type = SOCK_RAW,
@@ -1527,6 +1536,7 @@ static const struct net_protocol udp_protocol = {
1527 1536
1528static const struct net_protocol icmp_protocol = { 1537static const struct net_protocol icmp_protocol = {
1529 .handler = icmp_rcv, 1538 .handler = icmp_rcv,
1539 .err_handler = ping_err,
1530 .no_policy = 1, 1540 .no_policy = 1,
1531 .netns_ok = 1, 1541 .netns_ok = 1,
1532}; 1542};
@@ -1642,6 +1652,10 @@ static int __init inet_init(void)
1642 if (rc) 1652 if (rc)
1643 goto out_unregister_udp_proto; 1653 goto out_unregister_udp_proto;
1644 1654
1655 rc = proto_register(&ping_prot, 1);
1656 if (rc)
1657 goto out_unregister_raw_proto;
1658
1645 /* 1659 /*
1646 * Tell SOCKET that we are alive... 1660 * Tell SOCKET that we are alive...
1647 */ 1661 */
@@ -1697,6 +1711,8 @@ static int __init inet_init(void)
1697 /* Add UDP-Lite (RFC 3828) */ 1711 /* Add UDP-Lite (RFC 3828) */
1698 udplite4_register(); 1712 udplite4_register();
1699 1713
1714 ping_init();
1715
1700 /* 1716 /*
1701 * Set the ICMP layer up 1717 * Set the ICMP layer up
1702 */ 1718 */
@@ -1727,6 +1743,8 @@ static int __init inet_init(void)
1727 rc = 0; 1743 rc = 0;
1728out: 1744out:
1729 return rc; 1745 return rc;
1746out_unregister_raw_proto:
1747 proto_unregister(&raw_prot);
1730out_unregister_udp_proto: 1748out_unregister_udp_proto:
1731 proto_unregister(&udp_prot); 1749 proto_unregister(&udp_prot);
1732out_unregister_tcp_proto: 1750out_unregister_tcp_proto:
@@ -1751,11 +1769,15 @@ static int __init ipv4_proc_init(void)
1751 goto out_tcp; 1769 goto out_tcp;
1752 if (udp4_proc_init()) 1770 if (udp4_proc_init())
1753 goto out_udp; 1771 goto out_udp;
1772 if (ping_proc_init())
1773 goto out_ping;
1754 if (ip_misc_proc_init()) 1774 if (ip_misc_proc_init())
1755 goto out_misc; 1775 goto out_misc;
1756out: 1776out:
1757 return rc; 1777 return rc;
1758out_misc: 1778out_misc:
1779 ping_proc_exit();
1780out_ping:
1759 udp4_proc_exit(); 1781 udp4_proc_exit();
1760out_udp: 1782out_udp:
1761 tcp4_proc_exit(); 1783 tcp4_proc_exit();