aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
authorEric Dumazet <dada1@cosmosbay.com>2008-11-24 18:52:46 -0500
committerDavid S. Miller <davem@davemloft.net>2008-11-24 18:52:46 -0500
commit2e77d89b2fa8e3f8325b8ce7893ec3645f41aff5 (patch)
treeae40aa75449f705bd166630f9bcb5f41373d8248 /net/ipv4
parent4db0acf3c0afbbbb2ae35a65f8896ca6655a47ec (diff)
net: avoid a pair of dst_hold()/dst_release() in ip_append_data()
We can reduce pressure on dst entry refcount that slowdown UDP transmit path on SMP machines. This pressure is visible on RTP servers when delivering content to mediagateways, especially big ones, handling thousand of streams. Several cpus send UDP frames to the same destination, hence use the same dst entry. This patch makes ip_append_data() eventually steal the refcount its callers had to take on the dst entry. This doesnt avoid all refcounting, but still gives speedups on SMP, on UDP/RAW transmit path Signed-off-by: Eric Dumazet <dada1@cosmosbay.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/icmp.c8
-rw-r--r--net/ipv4/ip_output.c11
-rw-r--r--net/ipv4/raw.c2
-rw-r--r--net/ipv4/udp.c2
4 files changed, 14 insertions, 9 deletions
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 21e497efbd7f..7b88be9803b1 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -321,12 +321,12 @@ static int icmp_glue_bits(void *from, char *to, int offset, int len, int odd,
321} 321}
322 322
323static void icmp_push_reply(struct icmp_bxm *icmp_param, 323static void icmp_push_reply(struct icmp_bxm *icmp_param,
324 struct ipcm_cookie *ipc, struct rtable *rt) 324 struct ipcm_cookie *ipc, struct rtable **rt)
325{ 325{
326 struct sock *sk; 326 struct sock *sk;
327 struct sk_buff *skb; 327 struct sk_buff *skb;
328 328
329 sk = icmp_sk(dev_net(rt->u.dst.dev)); 329 sk = icmp_sk(dev_net((*rt)->u.dst.dev));
330 if (ip_append_data(sk, icmp_glue_bits, icmp_param, 330 if (ip_append_data(sk, icmp_glue_bits, icmp_param,
331 icmp_param->data_len+icmp_param->head_len, 331 icmp_param->data_len+icmp_param->head_len,
332 icmp_param->head_len, 332 icmp_param->head_len,
@@ -392,7 +392,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
392 } 392 }
393 if (icmpv4_xrlim_allow(net, rt, icmp_param->data.icmph.type, 393 if (icmpv4_xrlim_allow(net, rt, icmp_param->data.icmph.type,
394 icmp_param->data.icmph.code)) 394 icmp_param->data.icmph.code))
395 icmp_push_reply(icmp_param, &ipc, rt); 395 icmp_push_reply(icmp_param, &ipc, &rt);
396 ip_rt_put(rt); 396 ip_rt_put(rt);
397out_unlock: 397out_unlock:
398 icmp_xmit_unlock(sk); 398 icmp_xmit_unlock(sk);
@@ -635,7 +635,7 @@ route_done:
635 icmp_param.data_len = room; 635 icmp_param.data_len = room;
636 icmp_param.head_len = sizeof(struct icmphdr); 636 icmp_param.head_len = sizeof(struct icmphdr);
637 637
638 icmp_push_reply(&icmp_param, &ipc, rt); 638 icmp_push_reply(&icmp_param, &ipc, &rt);
639ende: 639ende:
640 ip_rt_put(rt); 640 ip_rt_put(rt);
641out_unlock: 641out_unlock:
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 46d7be233eac..5516825a0751 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -778,7 +778,7 @@ int ip_append_data(struct sock *sk,
778 int getfrag(void *from, char *to, int offset, int len, 778 int getfrag(void *from, char *to, int offset, int len,
779 int odd, struct sk_buff *skb), 779 int odd, struct sk_buff *skb),
780 void *from, int length, int transhdrlen, 780 void *from, int length, int transhdrlen,
781 struct ipcm_cookie *ipc, struct rtable *rt, 781 struct ipcm_cookie *ipc, struct rtable **rtp,
782 unsigned int flags) 782 unsigned int flags)
783{ 783{
784 struct inet_sock *inet = inet_sk(sk); 784 struct inet_sock *inet = inet_sk(sk);
@@ -793,6 +793,7 @@ int ip_append_data(struct sock *sk,
793 int offset = 0; 793 int offset = 0;
794 unsigned int maxfraglen, fragheaderlen; 794 unsigned int maxfraglen, fragheaderlen;
795 int csummode = CHECKSUM_NONE; 795 int csummode = CHECKSUM_NONE;
796 struct rtable *rt;
796 797
797 if (flags&MSG_PROBE) 798 if (flags&MSG_PROBE)
798 return 0; 799 return 0;
@@ -812,7 +813,11 @@ int ip_append_data(struct sock *sk,
812 inet->cork.flags |= IPCORK_OPT; 813 inet->cork.flags |= IPCORK_OPT;
813 inet->cork.addr = ipc->addr; 814 inet->cork.addr = ipc->addr;
814 } 815 }
815 dst_hold(&rt->u.dst); 816 rt = *rtp;
817 /*
818 * We steal reference to this route, caller should not release it
819 */
820 *rtp = NULL;
816 inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ? 821 inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ?
817 rt->u.dst.dev->mtu : 822 rt->u.dst.dev->mtu :
818 dst_mtu(rt->u.dst.path); 823 dst_mtu(rt->u.dst.path);
@@ -1391,7 +1396,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
1391 sk->sk_protocol = ip_hdr(skb)->protocol; 1396 sk->sk_protocol = ip_hdr(skb)->protocol;
1392 sk->sk_bound_dev_if = arg->bound_dev_if; 1397 sk->sk_bound_dev_if = arg->bound_dev_if;
1393 ip_append_data(sk, ip_reply_glue_bits, arg->iov->iov_base, len, 0, 1398 ip_append_data(sk, ip_reply_glue_bits, arg->iov->iov_base, len, 0,
1394 &ipc, rt, MSG_DONTWAIT); 1399 &ipc, &rt, MSG_DONTWAIT);
1395 if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) { 1400 if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) {
1396 if (arg->csumoffset >= 0) 1401 if (arg->csumoffset >= 0)
1397 *((__sum16 *)skb_transport_header(skb) + 1402 *((__sum16 *)skb_transport_header(skb) +
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 998fcffc9e15..dff8bc4e0fac 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -572,7 +572,7 @@ back_from_confirm:
572 ipc.addr = rt->rt_dst; 572 ipc.addr = rt->rt_dst;
573 lock_sock(sk); 573 lock_sock(sk);
574 err = ip_append_data(sk, ip_generic_getfrag, msg->msg_iov, len, 0, 574 err = ip_append_data(sk, ip_generic_getfrag, msg->msg_iov, len, 0,
575 &ipc, rt, msg->msg_flags); 575 &ipc, &rt, msg->msg_flags);
576 if (err) 576 if (err)
577 ip_flush_pending_frames(sk); 577 ip_flush_pending_frames(sk);
578 else if (!(msg->msg_flags & MSG_MORE)) 578 else if (!(msg->msg_flags & MSG_MORE))
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index da869ce041d9..549114472db3 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -719,7 +719,7 @@ do_append_data:
719 up->len += ulen; 719 up->len += ulen;
720 getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; 720 getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag;
721 err = ip_append_data(sk, getfrag, msg->msg_iov, ulen, 721 err = ip_append_data(sk, getfrag, msg->msg_iov, ulen,
722 sizeof(struct udphdr), &ipc, rt, 722 sizeof(struct udphdr), &ipc, &rt,
723 corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); 723 corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
724 if (err) 724 if (err)
725 udp_flush_pending_frames(sk); 725 udp_flush_pending_frames(sk);