aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/Kconfig6
-rw-r--r--net/ipv4/arp.c52
-rw-r--r--net/ipv4/devinet.c1
-rw-r--r--net/ipv4/ipconfig.c2
-rw-r--r--net/ipv4/route.c7
-rw-r--r--net/ipv4/syncookies.c3
-rw-r--r--net/ipv4/tcp.c30
-rw-r--r--net/ipv4/tcp_output.c22
8 files changed, 89 insertions, 34 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 70491d9035eb..0c94a1ac2946 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -166,7 +166,7 @@ config IP_PNP_DHCP
166 166
167 If unsure, say Y. Note that if you want to use DHCP, a DHCP server 167 If unsure, say Y. Note that if you want to use DHCP, a DHCP server
168 must be operating on your network. Read 168 must be operating on your network. Read
169 <file:Documentation/filesystems/nfsroot.txt> for details. 169 <file:Documentation/filesystems/nfs/nfsroot.txt> for details.
170 170
171config IP_PNP_BOOTP 171config IP_PNP_BOOTP
172 bool "IP: BOOTP support" 172 bool "IP: BOOTP support"
@@ -181,7 +181,7 @@ config IP_PNP_BOOTP
181 does BOOTP itself, providing all necessary information on the kernel 181 does BOOTP itself, providing all necessary information on the kernel
182 command line, you can say N here. If unsure, say Y. Note that if you 182 command line, you can say N here. If unsure, say Y. Note that if you
183 want to use BOOTP, a BOOTP server must be operating on your network. 183 want to use BOOTP, a BOOTP server must be operating on your network.
184 Read <file:Documentation/filesystems/nfsroot.txt> for details. 184 Read <file:Documentation/filesystems/nfs/nfsroot.txt> for details.
185 185
186config IP_PNP_RARP 186config IP_PNP_RARP
187 bool "IP: RARP support" 187 bool "IP: RARP support"
@@ -194,7 +194,7 @@ config IP_PNP_RARP
194 older protocol which is being obsoleted by BOOTP and DHCP), say Y 194 older protocol which is being obsoleted by BOOTP and DHCP), say Y
195 here. Note that if you want to use RARP, a RARP server must be 195 here. Note that if you want to use RARP, a RARP server must be
196 operating on your network. Read 196 operating on your network. Read
197 <file:Documentation/filesystems/nfsroot.txt> for details. 197 <file:Documentation/filesystems/nfs/nfsroot.txt> for details.
198 198
199# not yet ready.. 199# not yet ready..
200# bool ' IP: ARP support' CONFIG_IP_PNP_ARP 200# bool ' IP: ARP support' CONFIG_IP_PNP_ARP
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index c95cd93acf29..078709233bc4 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -70,6 +70,7 @@
70 * bonding can change the skb before 70 * bonding can change the skb before
71 * sending (e.g. insert 8021q tag). 71 * sending (e.g. insert 8021q tag).
72 * Harald Welte : convert to make use of jenkins hash 72 * Harald Welte : convert to make use of jenkins hash
73 * Jesper D. Brouer: Proxy ARP PVLAN RFC 3069 support.
73 */ 74 */
74 75
75#include <linux/module.h> 76#include <linux/module.h>
@@ -524,12 +525,15 @@ int arp_bind_neighbour(struct dst_entry *dst)
524/* 525/*
525 * Check if we can use proxy ARP for this path 526 * Check if we can use proxy ARP for this path
526 */ 527 */
527 528static inline int arp_fwd_proxy(struct in_device *in_dev,
528static inline int arp_fwd_proxy(struct in_device *in_dev, struct rtable *rt) 529 struct net_device *dev, struct rtable *rt)
529{ 530{
530 struct in_device *out_dev; 531 struct in_device *out_dev;
531 int imi, omi = -1; 532 int imi, omi = -1;
532 533
534 if (rt->u.dst.dev == dev)
535 return 0;
536
533 if (!IN_DEV_PROXY_ARP(in_dev)) 537 if (!IN_DEV_PROXY_ARP(in_dev))
534 return 0; 538 return 0;
535 539
@@ -548,6 +552,43 @@ static inline int arp_fwd_proxy(struct in_device *in_dev, struct rtable *rt)
548} 552}
549 553
550/* 554/*
555 * Check for RFC3069 proxy arp private VLAN (allow to send back to same dev)
556 *
557 * RFC3069 supports proxy arp replies back to the same interface. This
558 * is done to support (ethernet) switch features, like RFC 3069, where
559 * the individual ports are not allowed to communicate with each
560 * other, BUT they are allowed to talk to the upstream router. As
561 * described in RFC 3069, it is possible to allow these hosts to
562 * communicate through the upstream router, by proxy_arp'ing.
563 *
564 * RFC 3069: "VLAN Aggregation for Efficient IP Address Allocation"
565 *
566 * This technology is known by different names:
567 * In RFC 3069 it is called VLAN Aggregation.
568 * Cisco and Allied Telesyn call it Private VLAN.
569 * Hewlett-Packard call it Source-Port filtering or port-isolation.
570 * Ericsson call it MAC-Forced Forwarding (RFC Draft).
571 *
572 */
573static inline int arp_fwd_pvlan(struct in_device *in_dev,
574 struct net_device *dev, struct rtable *rt,
575 __be32 sip, __be32 tip)
576{
577 /* Private VLAN is only concerned about the same ethernet segment */
578 if (rt->u.dst.dev != dev)
579 return 0;
580
581 /* Don't reply on self probes (often done by windowz boxes)*/
582 if (sip == tip)
583 return 0;
584
585 if (IN_DEV_PROXY_ARP_PVLAN(in_dev))
586 return 1;
587 else
588 return 0;
589}
590
591/*
551 * Interface to link layer: send routine and receive handler. 592 * Interface to link layer: send routine and receive handler.
552 */ 593 */
553 594
@@ -833,8 +874,11 @@ static int arp_process(struct sk_buff *skb)
833 } 874 }
834 goto out; 875 goto out;
835 } else if (IN_DEV_FORWARD(in_dev)) { 876 } else if (IN_DEV_FORWARD(in_dev)) {
836 if (addr_type == RTN_UNICAST && rt->u.dst.dev != dev && 877 if (addr_type == RTN_UNICAST &&
837 (arp_fwd_proxy(in_dev, rt) || pneigh_lookup(&arp_tbl, net, &tip, dev, 0))) { 878 (arp_fwd_proxy(in_dev, dev, rt) ||
879 arp_fwd_pvlan(in_dev, dev, rt, sip, tip) ||
880 pneigh_lookup(&arp_tbl, net, &tip, dev, 0)))
881 {
838 n = neigh_event_ns(&arp_tbl, sha, &sip, dev); 882 n = neigh_event_ns(&arp_tbl, sha, &sip, dev);
839 if (n) 883 if (n)
840 neigh_release(n); 884 neigh_release(n);
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 040c4f05b653..cd71a3908391 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1408,6 +1408,7 @@ static struct devinet_sysctl_table {
1408 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"), 1408 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1409 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"), 1409 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1410 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"), 1410 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
1411 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
1411 1412
1412 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"), 1413 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1413 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"), 1414 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 4e08b7f2331c..10a6a604bf32 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -1446,7 +1446,7 @@ late_initcall(ip_auto_config);
1446 1446
1447/* 1447/*
1448 * Decode any IP configuration options in the "ip=" or "nfsaddrs=" kernel 1448 * Decode any IP configuration options in the "ip=" or "nfsaddrs=" kernel
1449 * command line parameter. See Documentation/filesystems/nfsroot.txt. 1449 * command line parameter. See Documentation/filesystems/nfs/nfsroot.txt.
1450 */ 1450 */
1451static int __init ic_proto_name(char *name) 1451static int __init ic_proto_name(char *name)
1452{ 1452{
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index e446496f564f..1cc339441e7d 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1988,8 +1988,13 @@ static int __mkroute_input(struct sk_buff *skb,
1988 if (skb->protocol != htons(ETH_P_IP)) { 1988 if (skb->protocol != htons(ETH_P_IP)) {
1989 /* Not IP (i.e. ARP). Do not create route, if it is 1989 /* Not IP (i.e. ARP). Do not create route, if it is
1990 * invalid for proxy arp. DNAT routes are always valid. 1990 * invalid for proxy arp. DNAT routes are always valid.
1991 *
1992 * Proxy arp feature have been extended to allow, ARP
1993 * replies back to the same interface, to support
1994 * Private VLAN switch technologies. See arp.c.
1991 */ 1995 */
1992 if (out_dev == in_dev) { 1996 if (out_dev == in_dev &&
1997 IN_DEV_PROXY_ARP_PVLAN(in_dev) == 0) {
1993 err = -EINVAL; 1998 err = -EINVAL;
1994 goto cleanup; 1999 goto cleanup;
1995 } 2000 }
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 66fd80ef2473..5c24db4a3c91 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -358,7 +358,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
358 358
359 tcp_select_initial_window(tcp_full_space(sk), req->mss, 359 tcp_select_initial_window(tcp_full_space(sk), req->mss,
360 &req->rcv_wnd, &req->window_clamp, 360 &req->rcv_wnd, &req->window_clamp,
361 ireq->wscale_ok, &rcv_wscale); 361 ireq->wscale_ok, &rcv_wscale,
362 dst_metric(&rt->u.dst, RTAX_INITRWND));
362 363
363 ireq->rcv_wscale = rcv_wscale; 364 ireq->rcv_wscale = rcv_wscale;
364 365
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index b0a26bb25e2e..d5d69ea8f249 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -536,8 +536,7 @@ static inline void skb_entail(struct sock *sk, struct sk_buff *skb)
536 tp->nonagle &= ~TCP_NAGLE_PUSH; 536 tp->nonagle &= ~TCP_NAGLE_PUSH;
537} 537}
538 538
539static inline void tcp_mark_urg(struct tcp_sock *tp, int flags, 539static inline void tcp_mark_urg(struct tcp_sock *tp, int flags)
540 struct sk_buff *skb)
541{ 540{
542 if (flags & MSG_OOB) 541 if (flags & MSG_OOB)
543 tp->snd_up = tp->write_seq; 542 tp->snd_up = tp->write_seq;
@@ -546,13 +545,13 @@ static inline void tcp_mark_urg(struct tcp_sock *tp, int flags,
546static inline void tcp_push(struct sock *sk, int flags, int mss_now, 545static inline void tcp_push(struct sock *sk, int flags, int mss_now,
547 int nonagle) 546 int nonagle)
548{ 547{
549 struct tcp_sock *tp = tcp_sk(sk);
550
551 if (tcp_send_head(sk)) { 548 if (tcp_send_head(sk)) {
552 struct sk_buff *skb = tcp_write_queue_tail(sk); 549 struct tcp_sock *tp = tcp_sk(sk);
550
553 if (!(flags & MSG_MORE) || forced_push(tp)) 551 if (!(flags & MSG_MORE) || forced_push(tp))
554 tcp_mark_push(tp, skb); 552 tcp_mark_push(tp, tcp_write_queue_tail(sk));
555 tcp_mark_urg(tp, flags, skb); 553
554 tcp_mark_urg(tp, flags);
556 __tcp_push_pending_frames(sk, mss_now, 555 __tcp_push_pending_frames(sk, mss_now,
557 (flags & MSG_MORE) ? TCP_NAGLE_CORK : nonagle); 556 (flags & MSG_MORE) ? TCP_NAGLE_CORK : nonagle);
558 } 557 }
@@ -877,12 +876,12 @@ ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset,
877#define TCP_PAGE(sk) (sk->sk_sndmsg_page) 876#define TCP_PAGE(sk) (sk->sk_sndmsg_page)
878#define TCP_OFF(sk) (sk->sk_sndmsg_off) 877#define TCP_OFF(sk) (sk->sk_sndmsg_off)
879 878
880static inline int select_size(struct sock *sk) 879static inline int select_size(struct sock *sk, int sg)
881{ 880{
882 struct tcp_sock *tp = tcp_sk(sk); 881 struct tcp_sock *tp = tcp_sk(sk);
883 int tmp = tp->mss_cache; 882 int tmp = tp->mss_cache;
884 883
885 if (sk->sk_route_caps & NETIF_F_SG) { 884 if (sg) {
886 if (sk_can_gso(sk)) 885 if (sk_can_gso(sk))
887 tmp = 0; 886 tmp = 0;
888 else { 887 else {
@@ -906,7 +905,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
906 struct sk_buff *skb; 905 struct sk_buff *skb;
907 int iovlen, flags; 906 int iovlen, flags;
908 int mss_now, size_goal; 907 int mss_now, size_goal;
909 int err, copied; 908 int sg, err, copied;
910 long timeo; 909 long timeo;
911 910
912 lock_sock(sk); 911 lock_sock(sk);
@@ -934,6 +933,8 @@ int tcp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
934 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) 933 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
935 goto out_err; 934 goto out_err;
936 935
936 sg = sk->sk_route_caps & NETIF_F_SG;
937
937 while (--iovlen >= 0) { 938 while (--iovlen >= 0) {
938 int seglen = iov->iov_len; 939 int seglen = iov->iov_len;
939 unsigned char __user *from = iov->iov_base; 940 unsigned char __user *from = iov->iov_base;
@@ -959,8 +960,9 @@ new_segment:
959 if (!sk_stream_memory_free(sk)) 960 if (!sk_stream_memory_free(sk))
960 goto wait_for_sndbuf; 961 goto wait_for_sndbuf;
961 962
962 skb = sk_stream_alloc_skb(sk, select_size(sk), 963 skb = sk_stream_alloc_skb(sk,
963 sk->sk_allocation); 964 select_size(sk, sg),
965 sk->sk_allocation);
964 if (!skb) 966 if (!skb)
965 goto wait_for_memory; 967 goto wait_for_memory;
966 968
@@ -997,9 +999,7 @@ new_segment:
997 /* We can extend the last page 999 /* We can extend the last page
998 * fragment. */ 1000 * fragment. */
999 merge = 1; 1001 merge = 1;
1000 } else if (i == MAX_SKB_FRAGS || 1002 } else if (i == MAX_SKB_FRAGS || !sg) {
1001 (!i &&
1002 !(sk->sk_route_caps & NETIF_F_SG))) {
1003 /* Need to add new fragment and cannot 1003 /* Need to add new fragment and cannot
1004 * do this because interface is non-SG, 1004 * do this because interface is non-SG,
1005 * or because all the page slots are 1005 * or because all the page slots are
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 383ce237640f..4a1605d3f909 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -183,7 +183,8 @@ static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts)
183 */ 183 */
184void tcp_select_initial_window(int __space, __u32 mss, 184void tcp_select_initial_window(int __space, __u32 mss,
185 __u32 *rcv_wnd, __u32 *window_clamp, 185 __u32 *rcv_wnd, __u32 *window_clamp,
186 int wscale_ok, __u8 *rcv_wscale) 186 int wscale_ok, __u8 *rcv_wscale,
187 __u32 init_rcv_wnd)
187{ 188{
188 unsigned int space = (__space < 0 ? 0 : __space); 189 unsigned int space = (__space < 0 ? 0 : __space);
189 190
@@ -232,7 +233,13 @@ void tcp_select_initial_window(int __space, __u32 mss,
232 init_cwnd = 2; 233 init_cwnd = 2;
233 else if (mss > 1460) 234 else if (mss > 1460)
234 init_cwnd = 3; 235 init_cwnd = 3;
235 if (*rcv_wnd > init_cwnd * mss) 236 /* when initializing use the value from init_rcv_wnd
237 * rather than the default from above
238 */
239 if (init_rcv_wnd &&
240 (*rcv_wnd > init_rcv_wnd * mss))
241 *rcv_wnd = init_rcv_wnd * mss;
242 else if (*rcv_wnd > init_cwnd * mss)
236 *rcv_wnd = init_cwnd * mss; 243 *rcv_wnd = init_cwnd * mss;
237 } 244 }
238 245
@@ -1794,11 +1801,6 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
1794void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, 1801void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
1795 int nonagle) 1802 int nonagle)
1796{ 1803{
1797 struct sk_buff *skb = tcp_send_head(sk);
1798
1799 if (!skb)
1800 return;
1801
1802 /* If we are closed, the bytes will have to remain here. 1804 /* If we are closed, the bytes will have to remain here.
1803 * In time closedown will finish, we empty the write queue and 1805 * In time closedown will finish, we empty the write queue and
1804 * all will be happy. 1806 * all will be happy.
@@ -2422,7 +2424,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2422 &req->rcv_wnd, 2424 &req->rcv_wnd,
2423 &req->window_clamp, 2425 &req->window_clamp,
2424 ireq->wscale_ok, 2426 ireq->wscale_ok,
2425 &rcv_wscale); 2427 &rcv_wscale,
2428 dst_metric(dst, RTAX_INITRWND));
2426 ireq->rcv_wscale = rcv_wscale; 2429 ireq->rcv_wscale = rcv_wscale;
2427 } 2430 }
2428 2431
@@ -2549,7 +2552,8 @@ static void tcp_connect_init(struct sock *sk)
2549 &tp->rcv_wnd, 2552 &tp->rcv_wnd,
2550 &tp->window_clamp, 2553 &tp->window_clamp,
2551 sysctl_tcp_window_scaling, 2554 sysctl_tcp_window_scaling,
2552 &rcv_wscale); 2555 &rcv_wscale,
2556 dst_metric(dst, RTAX_INITRWND));
2553 2557
2554 tp->rx_opt.rcv_wscale = rcv_wscale; 2558 tp->rx_opt.rcv_wscale = rcv_wscale;
2555 tp->rcv_ssthresh = tp->rcv_wnd; 2559 tp->rcv_ssthresh = tp->rcv_wnd;