aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorDaniel Borkmann <daniel@iogearbox.net>2015-09-09 20:10:57 -0400
committerDavid S. Miller <davem@davemloft.net>2015-09-10 00:43:22 -0400
commit6bb0fef489f667cf701853054f44579754f00a06 (patch)
tree65da22b6b07b2883c3171c256ef0e28482a49526 /net
parenta66e36568e30ed3714c0e3a12bd3b64696343ff5 (diff)
netlink, mmap: fix edge-case leakages in nf queue zero-copy
When netlink mmap on receive side is the consumer of nf queue data, it can happen that in some edge cases, we write skb shared info into the user space mmap buffer: Assume a possible rx ring frame size of only 4096, and the network skb, which is being zero-copied into the netlink skb, contains page frags with an overall skb->len larger than the linear part of the netlink skb. skb_zerocopy(), which is generic and thus not aware of the fact that shared info cannot be accessed for such skbs then tries to write and fill frags, thus leaking kernel data/pointers and in some corner cases possibly writing out of bounds of the mmap area (when filling the last slot in the ring buffer this way). I.e. the ring buffer slot is then of status NL_MMAP_STATUS_VALID, has an advertised length larger than 4096, where the linear part is visible at the slot beginning, and the leaked sizeof(struct skb_shared_info) has been written to the beginning of the next slot (also corrupting the struct nl_mmap_hdr slot header incl. status etc), since skb->end points to skb->data + ring->frame_size - NL_MMAP_HDRLEN. The fix adds and lets __netlink_alloc_skb() take the actual needed linear room for the network skb + meta data into account. It's completely irrelevant for non-mmaped netlink sockets, but in case mmap sockets are used, it can be decided whether the available skb_tailroom() is really large enough for the buffer, or whether it needs to internally fallback to a normal alloc_skb(). >From nf queue side, the information whether the destination port is an mmap RX ring is not really available without extra port-to-socket lookup, thus it can only be determined in lower layers i.e. when __netlink_alloc_skb() is called that checks internally for this. I chose to add the extra ldiff parameter as mmap will then still work: We have data_len and hlen in nfqnl_build_packet_message(), data_len is the full length (capped at queue->copy_range) for skb_zerocopy() and hlen some possible part of data_len that needs to be copied; the rem_len variable indicates the needed remaining linear mmap space. The only other workaround in nf queue internally would be after allocation time by f.e. cap'ing the data_len to the skb_tailroom() iff we deal with an mmap skb, but that would 1) expose the fact that we use a mmap skb to upper layers, and 2) trim the skb where we otherwise could just have moved the full skb into the normal receive queue. After the patch, in my test case the ring slot doesn't fit and therefore shows NL_MMAP_STATUS_COPY, where a full skb carries all the data and thus needs to be picked up via recv(). Fixes: 3ab1f683bf8b ("nfnetlink: add support for memory mapped netlink") Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/netfilter/nfnetlink_queue_core.c5
-rw-r--r--net/netlink/af_netlink.c18
2 files changed, 15 insertions, 8 deletions
diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c
index 685cc6a17163..a5cd6d90b78b 100644
--- a/net/netfilter/nfnetlink_queue_core.c
+++ b/net/netfilter/nfnetlink_queue_core.c
@@ -301,7 +301,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
301 __be32 **packet_id_ptr) 301 __be32 **packet_id_ptr)
302{ 302{
303 size_t size; 303 size_t size;
304 size_t data_len = 0, cap_len = 0; 304 size_t data_len = 0, cap_len = 0, rem_len = 0;
305 unsigned int hlen = 0; 305 unsigned int hlen = 0;
306 struct sk_buff *skb; 306 struct sk_buff *skb;
307 struct nlattr *nla; 307 struct nlattr *nla;
@@ -360,6 +360,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
360 hlen = min_t(unsigned int, hlen, data_len); 360 hlen = min_t(unsigned int, hlen, data_len);
361 size += sizeof(struct nlattr) + hlen; 361 size += sizeof(struct nlattr) + hlen;
362 cap_len = entskb->len; 362 cap_len = entskb->len;
363 rem_len = data_len - hlen;
363 break; 364 break;
364 } 365 }
365 366
@@ -377,7 +378,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
377 size += nla_total_size(seclen); 378 size += nla_total_size(seclen);
378 } 379 }
379 380
380 skb = nfnetlink_alloc_skb(net, size, queue->peer_portid, 381 skb = __netlink_alloc_skb(net->nfnl, size, rem_len, queue->peer_portid,
381 GFP_ATOMIC); 382 GFP_ATOMIC);
382 if (!skb) { 383 if (!skb) {
383 skb_tx_error(entskb); 384 skb_tx_error(entskb);
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 173817a5dfad..7f86d3b55060 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1844,15 +1844,16 @@ retry:
1844} 1844}
1845EXPORT_SYMBOL(netlink_unicast); 1845EXPORT_SYMBOL(netlink_unicast);
1846 1846
1847struct sk_buff *netlink_alloc_skb(struct sock *ssk, unsigned int size, 1847struct sk_buff *__netlink_alloc_skb(struct sock *ssk, unsigned int size,
1848 u32 dst_portid, gfp_t gfp_mask) 1848 unsigned int ldiff, u32 dst_portid,
1849 gfp_t gfp_mask)
1849{ 1850{
1850#ifdef CONFIG_NETLINK_MMAP 1851#ifdef CONFIG_NETLINK_MMAP
1852 unsigned int maxlen, linear_size;
1851 struct sock *sk = NULL; 1853 struct sock *sk = NULL;
1852 struct sk_buff *skb; 1854 struct sk_buff *skb;
1853 struct netlink_ring *ring; 1855 struct netlink_ring *ring;
1854 struct nl_mmap_hdr *hdr; 1856 struct nl_mmap_hdr *hdr;
1855 unsigned int maxlen;
1856 1857
1857 sk = netlink_getsockbyportid(ssk, dst_portid); 1858 sk = netlink_getsockbyportid(ssk, dst_portid);
1858 if (IS_ERR(sk)) 1859 if (IS_ERR(sk))
@@ -1863,7 +1864,11 @@ struct sk_buff *netlink_alloc_skb(struct sock *ssk, unsigned int size,
1863 if (ring->pg_vec == NULL) 1864 if (ring->pg_vec == NULL)
1864 goto out_put; 1865 goto out_put;
1865 1866
1866 if (ring->frame_size - NL_MMAP_HDRLEN < size) 1867 /* We need to account the full linear size needed as a ring
1868 * slot cannot have non-linear parts.
1869 */
1870 linear_size = size + ldiff;
1871 if (ring->frame_size - NL_MMAP_HDRLEN < linear_size)
1867 goto out_put; 1872 goto out_put;
1868 1873
1869 skb = alloc_skb_head(gfp_mask); 1874 skb = alloc_skb_head(gfp_mask);
@@ -1877,13 +1882,14 @@ struct sk_buff *netlink_alloc_skb(struct sock *ssk, unsigned int size,
1877 1882
1878 /* check again under lock */ 1883 /* check again under lock */
1879 maxlen = ring->frame_size - NL_MMAP_HDRLEN; 1884 maxlen = ring->frame_size - NL_MMAP_HDRLEN;
1880 if (maxlen < size) 1885 if (maxlen < linear_size)
1881 goto out_free; 1886 goto out_free;
1882 1887
1883 netlink_forward_ring(ring); 1888 netlink_forward_ring(ring);
1884 hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED); 1889 hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED);
1885 if (hdr == NULL) 1890 if (hdr == NULL)
1886 goto err2; 1891 goto err2;
1892
1887 netlink_ring_setup_skb(skb, sk, ring, hdr); 1893 netlink_ring_setup_skb(skb, sk, ring, hdr);
1888 netlink_set_status(hdr, NL_MMAP_STATUS_RESERVED); 1894 netlink_set_status(hdr, NL_MMAP_STATUS_RESERVED);
1889 atomic_inc(&ring->pending); 1895 atomic_inc(&ring->pending);
@@ -1909,7 +1915,7 @@ out:
1909#endif 1915#endif
1910 return alloc_skb(size, gfp_mask); 1916 return alloc_skb(size, gfp_mask);
1911} 1917}
1912EXPORT_SYMBOL_GPL(netlink_alloc_skb); 1918EXPORT_SYMBOL_GPL(__netlink_alloc_skb);
1913 1919
1914int netlink_has_listeners(struct sock *sk, unsigned int group) 1920int netlink_has_listeners(struct sock *sk, unsigned int group)
1915{ 1921{