diff options
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/Kconfig | 4 | ||||
-rw-r--r-- | net/ipv4/fib_trie.c | 6 | ||||
-rw-r--r-- | net/ipv4/ipconfig.c | 12 | ||||
-rw-r--r-- | net/ipv4/route.c | 60 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 5 | ||||
-rw-r--r-- | net/ipv4/tcp_vegas.c | 11 |
6 files changed, 52 insertions, 46 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index b2cf91e4cca..5b919f7b45d 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig | |||
@@ -407,8 +407,8 @@ config INET_XFRM_MODE_BEET | |||
407 | If unsure, say Y. | 407 | If unsure, say Y. |
408 | 408 | ||
409 | config INET_LRO | 409 | config INET_LRO |
410 | tristate "Large Receive Offload (ipv4/tcp)" | 410 | bool "Large Receive Offload (ipv4/tcp)" |
411 | 411 | default y | |
412 | ---help--- | 412 | ---help--- |
413 | Support for Large Receive Offload (ipv4/tcp). | 413 | Support for Large Receive Offload (ipv4/tcp). |
414 | 414 | ||
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index ec0ae490f0b..33c7c85dfe4 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c | |||
@@ -986,9 +986,12 @@ fib_find_node(struct trie *t, u32 key) | |||
986 | static struct node *trie_rebalance(struct trie *t, struct tnode *tn) | 986 | static struct node *trie_rebalance(struct trie *t, struct tnode *tn) |
987 | { | 987 | { |
988 | int wasfull; | 988 | int wasfull; |
989 | t_key cindex, key = tn->key; | 989 | t_key cindex, key; |
990 | struct tnode *tp; | 990 | struct tnode *tp; |
991 | 991 | ||
992 | preempt_disable(); | ||
993 | key = tn->key; | ||
994 | |||
992 | while (tn != NULL && (tp = node_parent((struct node *)tn)) != NULL) { | 995 | while (tn != NULL && (tp = node_parent((struct node *)tn)) != NULL) { |
993 | cindex = tkey_extract_bits(key, tp->pos, tp->bits); | 996 | cindex = tkey_extract_bits(key, tp->pos, tp->bits); |
994 | wasfull = tnode_full(tp, tnode_get_child(tp, cindex)); | 997 | wasfull = tnode_full(tp, tnode_get_child(tp, cindex)); |
@@ -1007,6 +1010,7 @@ static struct node *trie_rebalance(struct trie *t, struct tnode *tn) | |||
1007 | if (IS_TNODE(tn)) | 1010 | if (IS_TNODE(tn)) |
1008 | tn = (struct tnode *)resize(t, (struct tnode *)tn); | 1011 | tn = (struct tnode *)resize(t, (struct tnode *)tn); |
1009 | 1012 | ||
1013 | preempt_enable(); | ||
1010 | return (struct node *)tn; | 1014 | return (struct node *)tn; |
1011 | } | 1015 | } |
1012 | 1016 | ||
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 90d22ae0a41..88bf051d0cb 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c | |||
@@ -139,6 +139,8 @@ __be32 ic_servaddr = NONE; /* Boot server IP address */ | |||
139 | __be32 root_server_addr = NONE; /* Address of NFS server */ | 139 | __be32 root_server_addr = NONE; /* Address of NFS server */ |
140 | u8 root_server_path[256] = { 0, }; /* Path to mount as root */ | 140 | u8 root_server_path[256] = { 0, }; /* Path to mount as root */ |
141 | 141 | ||
142 | u32 ic_dev_xid; /* Device under configuration */ | ||
143 | |||
142 | /* vendor class identifier */ | 144 | /* vendor class identifier */ |
143 | static char vendor_class_identifier[253] __initdata; | 145 | static char vendor_class_identifier[253] __initdata; |
144 | 146 | ||
@@ -932,6 +934,13 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str | |||
932 | goto drop_unlock; | 934 | goto drop_unlock; |
933 | } | 935 | } |
934 | 936 | ||
937 | /* Is it a reply for the device we are configuring? */ | ||
938 | if (b->xid != ic_dev_xid) { | ||
939 | if (net_ratelimit()) | ||
940 | printk(KERN_ERR "DHCP/BOOTP: Ignoring delayed packet \n"); | ||
941 | goto drop_unlock; | ||
942 | } | ||
943 | |||
935 | /* Parse extensions */ | 944 | /* Parse extensions */ |
936 | if (ext_len >= 4 && | 945 | if (ext_len >= 4 && |
937 | !memcmp(b->exten, ic_bootp_cookie, 4)) { /* Check magic cookie */ | 946 | !memcmp(b->exten, ic_bootp_cookie, 4)) { /* Check magic cookie */ |
@@ -1115,6 +1124,9 @@ static int __init ic_dynamic(void) | |||
1115 | get_random_bytes(&timeout, sizeof(timeout)); | 1124 | get_random_bytes(&timeout, sizeof(timeout)); |
1116 | timeout = CONF_BASE_TIMEOUT + (timeout % (unsigned) CONF_TIMEOUT_RANDOM); | 1125 | timeout = CONF_BASE_TIMEOUT + (timeout % (unsigned) CONF_TIMEOUT_RANDOM); |
1117 | for (;;) { | 1126 | for (;;) { |
1127 | /* Track the device we are configuring */ | ||
1128 | ic_dev_xid = d->xid; | ||
1129 | |||
1118 | #ifdef IPCONFIG_BOOTP | 1130 | #ifdef IPCONFIG_BOOTP |
1119 | if (do_bootp && (d->able & IC_BOOTP)) | 1131 | if (do_bootp && (d->able & IC_BOOTP)) |
1120 | ic_bootp_send_if(d, jiffies - start_jiffies); | 1132 | ic_bootp_send_if(d, jiffies - start_jiffies); |
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index c4c60e9f068..28205e5bfa9 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -784,8 +784,8 @@ static void rt_check_expire(void) | |||
784 | { | 784 | { |
785 | static unsigned int rover; | 785 | static unsigned int rover; |
786 | unsigned int i = rover, goal; | 786 | unsigned int i = rover, goal; |
787 | struct rtable *rth, **rthp; | 787 | struct rtable *rth, *aux, **rthp; |
788 | unsigned long length = 0, samples = 0; | 788 | unsigned long samples = 0; |
789 | unsigned long sum = 0, sum2 = 0; | 789 | unsigned long sum = 0, sum2 = 0; |
790 | u64 mult; | 790 | u64 mult; |
791 | 791 | ||
@@ -795,9 +795,9 @@ static void rt_check_expire(void) | |||
795 | goal = (unsigned int)mult; | 795 | goal = (unsigned int)mult; |
796 | if (goal > rt_hash_mask) | 796 | if (goal > rt_hash_mask) |
797 | goal = rt_hash_mask + 1; | 797 | goal = rt_hash_mask + 1; |
798 | length = 0; | ||
799 | for (; goal > 0; goal--) { | 798 | for (; goal > 0; goal--) { |
800 | unsigned long tmo = ip_rt_gc_timeout; | 799 | unsigned long tmo = ip_rt_gc_timeout; |
800 | unsigned long length; | ||
801 | 801 | ||
802 | i = (i + 1) & rt_hash_mask; | 802 | i = (i + 1) & rt_hash_mask; |
803 | rthp = &rt_hash_table[i].chain; | 803 | rthp = &rt_hash_table[i].chain; |
@@ -809,8 +809,10 @@ static void rt_check_expire(void) | |||
809 | 809 | ||
810 | if (*rthp == NULL) | 810 | if (*rthp == NULL) |
811 | continue; | 811 | continue; |
812 | length = 0; | ||
812 | spin_lock_bh(rt_hash_lock_addr(i)); | 813 | spin_lock_bh(rt_hash_lock_addr(i)); |
813 | while ((rth = *rthp) != NULL) { | 814 | while ((rth = *rthp) != NULL) { |
815 | prefetch(rth->u.dst.rt_next); | ||
814 | if (rt_is_expired(rth)) { | 816 | if (rt_is_expired(rth)) { |
815 | *rthp = rth->u.dst.rt_next; | 817 | *rthp = rth->u.dst.rt_next; |
816 | rt_free(rth); | 818 | rt_free(rth); |
@@ -819,33 +821,30 @@ static void rt_check_expire(void) | |||
819 | if (rth->u.dst.expires) { | 821 | if (rth->u.dst.expires) { |
820 | /* Entry is expired even if it is in use */ | 822 | /* Entry is expired even if it is in use */ |
821 | if (time_before_eq(jiffies, rth->u.dst.expires)) { | 823 | if (time_before_eq(jiffies, rth->u.dst.expires)) { |
824 | nofree: | ||
822 | tmo >>= 1; | 825 | tmo >>= 1; |
823 | rthp = &rth->u.dst.rt_next; | 826 | rthp = &rth->u.dst.rt_next; |
824 | /* | 827 | /* |
825 | * Only bump our length if the hash | 828 | * We only count entries on |
826 | * inputs on entries n and n+1 are not | ||
827 | * the same, we only count entries on | ||
828 | * a chain with equal hash inputs once | 829 | * a chain with equal hash inputs once |
829 | * so that entries for different QOS | 830 | * so that entries for different QOS |
830 | * levels, and other non-hash input | 831 | * levels, and other non-hash input |
831 | * attributes don't unfairly skew | 832 | * attributes don't unfairly skew |
832 | * the length computation | 833 | * the length computation |
833 | */ | 834 | */ |
834 | if ((*rthp == NULL) || | 835 | for (aux = rt_hash_table[i].chain;;) { |
835 | !compare_hash_inputs(&(*rthp)->fl, | 836 | if (aux == rth) { |
836 | &rth->fl)) | 837 | length += ONE; |
837 | length += ONE; | 838 | break; |
839 | } | ||
840 | if (compare_hash_inputs(&aux->fl, &rth->fl)) | ||
841 | break; | ||
842 | aux = aux->u.dst.rt_next; | ||
843 | } | ||
838 | continue; | 844 | continue; |
839 | } | 845 | } |
840 | } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout)) { | 846 | } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout)) |
841 | tmo >>= 1; | 847 | goto nofree; |
842 | rthp = &rth->u.dst.rt_next; | ||
843 | if ((*rthp == NULL) || | ||
844 | !compare_hash_inputs(&(*rthp)->fl, | ||
845 | &rth->fl)) | ||
846 | length += ONE; | ||
847 | continue; | ||
848 | } | ||
849 | 848 | ||
850 | /* Cleanup aged off entries. */ | 849 | /* Cleanup aged off entries. */ |
851 | *rthp = rth->u.dst.rt_next; | 850 | *rthp = rth->u.dst.rt_next; |
@@ -1068,7 +1067,6 @@ out: return 0; | |||
1068 | static int rt_intern_hash(unsigned hash, struct rtable *rt, struct rtable **rp) | 1067 | static int rt_intern_hash(unsigned hash, struct rtable *rt, struct rtable **rp) |
1069 | { | 1068 | { |
1070 | struct rtable *rth, **rthp; | 1069 | struct rtable *rth, **rthp; |
1071 | struct rtable *rthi; | ||
1072 | unsigned long now; | 1070 | unsigned long now; |
1073 | struct rtable *cand, **candp; | 1071 | struct rtable *cand, **candp; |
1074 | u32 min_score; | 1072 | u32 min_score; |
@@ -1088,7 +1086,6 @@ restart: | |||
1088 | } | 1086 | } |
1089 | 1087 | ||
1090 | rthp = &rt_hash_table[hash].chain; | 1088 | rthp = &rt_hash_table[hash].chain; |
1091 | rthi = NULL; | ||
1092 | 1089 | ||
1093 | spin_lock_bh(rt_hash_lock_addr(hash)); | 1090 | spin_lock_bh(rt_hash_lock_addr(hash)); |
1094 | while ((rth = *rthp) != NULL) { | 1091 | while ((rth = *rthp) != NULL) { |
@@ -1134,17 +1131,6 @@ restart: | |||
1134 | chain_length++; | 1131 | chain_length++; |
1135 | 1132 | ||
1136 | rthp = &rth->u.dst.rt_next; | 1133 | rthp = &rth->u.dst.rt_next; |
1137 | |||
1138 | /* | ||
1139 | * check to see if the next entry in the chain | ||
1140 | * contains the same hash input values as rt. If it does | ||
1141 | * This is where we will insert into the list, instead of | ||
1142 | * at the head. This groups entries that differ by aspects not | ||
1143 | * relvant to the hash function together, which we use to adjust | ||
1144 | * our chain length | ||
1145 | */ | ||
1146 | if (*rthp && compare_hash_inputs(&(*rthp)->fl, &rt->fl)) | ||
1147 | rthi = rth; | ||
1148 | } | 1134 | } |
1149 | 1135 | ||
1150 | if (cand) { | 1136 | if (cand) { |
@@ -1205,10 +1191,7 @@ restart: | |||
1205 | } | 1191 | } |
1206 | } | 1192 | } |
1207 | 1193 | ||
1208 | if (rthi) | 1194 | rt->u.dst.rt_next = rt_hash_table[hash].chain; |
1209 | rt->u.dst.rt_next = rthi->u.dst.rt_next; | ||
1210 | else | ||
1211 | rt->u.dst.rt_next = rt_hash_table[hash].chain; | ||
1212 | 1195 | ||
1213 | #if RT_CACHE_DEBUG >= 2 | 1196 | #if RT_CACHE_DEBUG >= 2 |
1214 | if (rt->u.dst.rt_next) { | 1197 | if (rt->u.dst.rt_next) { |
@@ -1224,10 +1207,7 @@ restart: | |||
1224 | * previous writes to rt are comitted to memory | 1207 | * previous writes to rt are comitted to memory |
1225 | * before making rt visible to other CPUS. | 1208 | * before making rt visible to other CPUS. |
1226 | */ | 1209 | */ |
1227 | if (rthi) | 1210 | rcu_assign_pointer(rt_hash_table[hash].chain, rt); |
1228 | rcu_assign_pointer(rthi->u.dst.rt_next, rt); | ||
1229 | else | ||
1230 | rcu_assign_pointer(rt_hash_table[hash].chain, rt); | ||
1231 | 1211 | ||
1232 | spin_unlock_bh(rt_hash_lock_addr(hash)); | 1212 | spin_unlock_bh(rt_hash_lock_addr(hash)); |
1233 | *rp = rt; | 1213 | *rp = rt; |
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 1d7f49c6f0c..7a0f0b27bf1 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -1321,6 +1321,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
1321 | struct task_struct *user_recv = NULL; | 1321 | struct task_struct *user_recv = NULL; |
1322 | int copied_early = 0; | 1322 | int copied_early = 0; |
1323 | struct sk_buff *skb; | 1323 | struct sk_buff *skb; |
1324 | u32 urg_hole = 0; | ||
1324 | 1325 | ||
1325 | lock_sock(sk); | 1326 | lock_sock(sk); |
1326 | 1327 | ||
@@ -1532,7 +1533,8 @@ do_prequeue: | |||
1532 | } | 1533 | } |
1533 | } | 1534 | } |
1534 | } | 1535 | } |
1535 | if ((flags & MSG_PEEK) && peek_seq != tp->copied_seq) { | 1536 | if ((flags & MSG_PEEK) && |
1537 | (peek_seq - copied - urg_hole != tp->copied_seq)) { | ||
1536 | if (net_ratelimit()) | 1538 | if (net_ratelimit()) |
1537 | printk(KERN_DEBUG "TCP(%s:%d): Application bug, race in MSG_PEEK.\n", | 1539 | printk(KERN_DEBUG "TCP(%s:%d): Application bug, race in MSG_PEEK.\n", |
1538 | current->comm, task_pid_nr(current)); | 1540 | current->comm, task_pid_nr(current)); |
@@ -1553,6 +1555,7 @@ do_prequeue: | |||
1553 | if (!urg_offset) { | 1555 | if (!urg_offset) { |
1554 | if (!sock_flag(sk, SOCK_URGINLINE)) { | 1556 | if (!sock_flag(sk, SOCK_URGINLINE)) { |
1555 | ++*seq; | 1557 | ++*seq; |
1558 | urg_hole++; | ||
1556 | offset++; | 1559 | offset++; |
1557 | used--; | 1560 | used--; |
1558 | if (!used) | 1561 | if (!used) |
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index a453aac91bd..c6743eec9b7 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c | |||
@@ -158,6 +158,11 @@ void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event) | |||
158 | } | 158 | } |
159 | EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event); | 159 | EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event); |
160 | 160 | ||
161 | static inline u32 tcp_vegas_ssthresh(struct tcp_sock *tp) | ||
162 | { | ||
163 | return min(tp->snd_ssthresh, tp->snd_cwnd-1); | ||
164 | } | ||
165 | |||
161 | static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) | 166 | static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) |
162 | { | 167 | { |
163 | struct tcp_sock *tp = tcp_sk(sk); | 168 | struct tcp_sock *tp = tcp_sk(sk); |
@@ -221,11 +226,10 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) | |||
221 | */ | 226 | */ |
222 | diff = tp->snd_cwnd * (rtt-vegas->baseRTT) / vegas->baseRTT; | 227 | diff = tp->snd_cwnd * (rtt-vegas->baseRTT) / vegas->baseRTT; |
223 | 228 | ||
224 | if (diff > gamma && tp->snd_ssthresh > 2 ) { | 229 | if (diff > gamma && tp->snd_cwnd <= tp->snd_ssthresh) { |
225 | /* Going too fast. Time to slow down | 230 | /* Going too fast. Time to slow down |
226 | * and switch to congestion avoidance. | 231 | * and switch to congestion avoidance. |
227 | */ | 232 | */ |
228 | tp->snd_ssthresh = 2; | ||
229 | 233 | ||
230 | /* Set cwnd to match the actual rate | 234 | /* Set cwnd to match the actual rate |
231 | * exactly: | 235 | * exactly: |
@@ -235,6 +239,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) | |||
235 | * utilization. | 239 | * utilization. |
236 | */ | 240 | */ |
237 | tp->snd_cwnd = min(tp->snd_cwnd, (u32)target_cwnd+1); | 241 | tp->snd_cwnd = min(tp->snd_cwnd, (u32)target_cwnd+1); |
242 | tp->snd_ssthresh = tcp_vegas_ssthresh(tp); | ||
238 | 243 | ||
239 | } else if (tp->snd_cwnd <= tp->snd_ssthresh) { | 244 | } else if (tp->snd_cwnd <= tp->snd_ssthresh) { |
240 | /* Slow start. */ | 245 | /* Slow start. */ |
@@ -250,6 +255,8 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) | |||
250 | * we slow down. | 255 | * we slow down. |
251 | */ | 256 | */ |
252 | tp->snd_cwnd--; | 257 | tp->snd_cwnd--; |
258 | tp->snd_ssthresh | ||
259 | = tcp_vegas_ssthresh(tp); | ||
253 | } else if (diff < alpha) { | 260 | } else if (diff < alpha) { |
254 | /* We don't have enough extra packets | 261 | /* We don't have enough extra packets |
255 | * in the network, so speed up. | 262 | * in the network, so speed up. |