diff options
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/Kconfig | 4 | ||||
-rw-r--r-- | net/ipv4/fib_trie.c | 6 | ||||
-rw-r--r-- | net/ipv4/ipconfig.c | 12 | ||||
-rw-r--r-- | net/ipv4/netfilter/arp_tables.c | 125 | ||||
-rw-r--r-- | net/ipv4/netfilter/ip_tables.c | 126 | ||||
-rw-r--r-- | net/ipv4/netfilter/nf_nat_core.c | 3 | ||||
-rw-r--r-- | net/ipv4/route.c | 62 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 11 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 10 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 2 | ||||
-rw-r--r-- | net/ipv4/tcp_vegas.c | 11 |
11 files changed, 131 insertions, 241 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index b2cf91e4ccaa..5b919f7b45db 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig | |||
@@ -407,8 +407,8 @@ config INET_XFRM_MODE_BEET | |||
407 | If unsure, say Y. | 407 | If unsure, say Y. |
408 | 408 | ||
409 | config INET_LRO | 409 | config INET_LRO |
410 | tristate "Large Receive Offload (ipv4/tcp)" | 410 | bool "Large Receive Offload (ipv4/tcp)" |
411 | 411 | default y | |
412 | ---help--- | 412 | ---help--- |
413 | Support for Large Receive Offload (ipv4/tcp). | 413 | Support for Large Receive Offload (ipv4/tcp). |
414 | 414 | ||
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index ec0ae490f0b6..33c7c85dfe40 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c | |||
@@ -986,9 +986,12 @@ fib_find_node(struct trie *t, u32 key) | |||
986 | static struct node *trie_rebalance(struct trie *t, struct tnode *tn) | 986 | static struct node *trie_rebalance(struct trie *t, struct tnode *tn) |
987 | { | 987 | { |
988 | int wasfull; | 988 | int wasfull; |
989 | t_key cindex, key = tn->key; | 989 | t_key cindex, key; |
990 | struct tnode *tp; | 990 | struct tnode *tp; |
991 | 991 | ||
992 | preempt_disable(); | ||
993 | key = tn->key; | ||
994 | |||
992 | while (tn != NULL && (tp = node_parent((struct node *)tn)) != NULL) { | 995 | while (tn != NULL && (tp = node_parent((struct node *)tn)) != NULL) { |
993 | cindex = tkey_extract_bits(key, tp->pos, tp->bits); | 996 | cindex = tkey_extract_bits(key, tp->pos, tp->bits); |
994 | wasfull = tnode_full(tp, tnode_get_child(tp, cindex)); | 997 | wasfull = tnode_full(tp, tnode_get_child(tp, cindex)); |
@@ -1007,6 +1010,7 @@ static struct node *trie_rebalance(struct trie *t, struct tnode *tn) | |||
1007 | if (IS_TNODE(tn)) | 1010 | if (IS_TNODE(tn)) |
1008 | tn = (struct tnode *)resize(t, (struct tnode *)tn); | 1011 | tn = (struct tnode *)resize(t, (struct tnode *)tn); |
1009 | 1012 | ||
1013 | preempt_enable(); | ||
1010 | return (struct node *)tn; | 1014 | return (struct node *)tn; |
1011 | } | 1015 | } |
1012 | 1016 | ||
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 90d22ae0a419..88bf051d0cbb 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c | |||
@@ -139,6 +139,8 @@ __be32 ic_servaddr = NONE; /* Boot server IP address */ | |||
139 | __be32 root_server_addr = NONE; /* Address of NFS server */ | 139 | __be32 root_server_addr = NONE; /* Address of NFS server */ |
140 | u8 root_server_path[256] = { 0, }; /* Path to mount as root */ | 140 | u8 root_server_path[256] = { 0, }; /* Path to mount as root */ |
141 | 141 | ||
142 | u32 ic_dev_xid; /* Device under configuration */ | ||
143 | |||
142 | /* vendor class identifier */ | 144 | /* vendor class identifier */ |
143 | static char vendor_class_identifier[253] __initdata; | 145 | static char vendor_class_identifier[253] __initdata; |
144 | 146 | ||
@@ -932,6 +934,13 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str | |||
932 | goto drop_unlock; | 934 | goto drop_unlock; |
933 | } | 935 | } |
934 | 936 | ||
937 | /* Is it a reply for the device we are configuring? */ | ||
938 | if (b->xid != ic_dev_xid) { | ||
939 | if (net_ratelimit()) | ||
940 | printk(KERN_ERR "DHCP/BOOTP: Ignoring delayed packet \n"); | ||
941 | goto drop_unlock; | ||
942 | } | ||
943 | |||
935 | /* Parse extensions */ | 944 | /* Parse extensions */ |
936 | if (ext_len >= 4 && | 945 | if (ext_len >= 4 && |
937 | !memcmp(b->exten, ic_bootp_cookie, 4)) { /* Check magic cookie */ | 946 | !memcmp(b->exten, ic_bootp_cookie, 4)) { /* Check magic cookie */ |
@@ -1115,6 +1124,9 @@ static int __init ic_dynamic(void) | |||
1115 | get_random_bytes(&timeout, sizeof(timeout)); | 1124 | get_random_bytes(&timeout, sizeof(timeout)); |
1116 | timeout = CONF_BASE_TIMEOUT + (timeout % (unsigned) CONF_TIMEOUT_RANDOM); | 1125 | timeout = CONF_BASE_TIMEOUT + (timeout % (unsigned) CONF_TIMEOUT_RANDOM); |
1117 | for (;;) { | 1126 | for (;;) { |
1127 | /* Track the device we are configuring */ | ||
1128 | ic_dev_xid = d->xid; | ||
1129 | |||
1118 | #ifdef IPCONFIG_BOOTP | 1130 | #ifdef IPCONFIG_BOOTP |
1119 | if (do_bootp && (d->able & IC_BOOTP)) | 1131 | if (do_bootp && (d->able & IC_BOOTP)) |
1120 | ic_bootp_send_if(d, jiffies - start_jiffies); | 1132 | ic_bootp_send_if(d, jiffies - start_jiffies); |
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 5ba533d234db..831fe1879dc0 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c | |||
@@ -253,9 +253,9 @@ unsigned int arpt_do_table(struct sk_buff *skb, | |||
253 | indev = in ? in->name : nulldevname; | 253 | indev = in ? in->name : nulldevname; |
254 | outdev = out ? out->name : nulldevname; | 254 | outdev = out ? out->name : nulldevname; |
255 | 255 | ||
256 | rcu_read_lock_bh(); | 256 | xt_info_rdlock_bh(); |
257 | private = rcu_dereference(table->private); | 257 | private = table->private; |
258 | table_base = rcu_dereference(private->entries[smp_processor_id()]); | 258 | table_base = private->entries[smp_processor_id()]; |
259 | 259 | ||
260 | e = get_entry(table_base, private->hook_entry[hook]); | 260 | e = get_entry(table_base, private->hook_entry[hook]); |
261 | back = get_entry(table_base, private->underflow[hook]); | 261 | back = get_entry(table_base, private->underflow[hook]); |
@@ -273,6 +273,7 @@ unsigned int arpt_do_table(struct sk_buff *skb, | |||
273 | 273 | ||
274 | hdr_len = sizeof(*arp) + (2 * sizeof(struct in_addr)) + | 274 | hdr_len = sizeof(*arp) + (2 * sizeof(struct in_addr)) + |
275 | (2 * skb->dev->addr_len); | 275 | (2 * skb->dev->addr_len); |
276 | |||
276 | ADD_COUNTER(e->counters, hdr_len, 1); | 277 | ADD_COUNTER(e->counters, hdr_len, 1); |
277 | 278 | ||
278 | t = arpt_get_target(e); | 279 | t = arpt_get_target(e); |
@@ -328,8 +329,7 @@ unsigned int arpt_do_table(struct sk_buff *skb, | |||
328 | e = (void *)e + e->next_offset; | 329 | e = (void *)e + e->next_offset; |
329 | } | 330 | } |
330 | } while (!hotdrop); | 331 | } while (!hotdrop); |
331 | 332 | xt_info_rdunlock_bh(); | |
332 | rcu_read_unlock_bh(); | ||
333 | 333 | ||
334 | if (hotdrop) | 334 | if (hotdrop) |
335 | return NF_DROP; | 335 | return NF_DROP; |
@@ -711,9 +711,12 @@ static void get_counters(const struct xt_table_info *t, | |||
711 | /* Instead of clearing (by a previous call to memset()) | 711 | /* Instead of clearing (by a previous call to memset()) |
712 | * the counters and using adds, we set the counters | 712 | * the counters and using adds, we set the counters |
713 | * with data used by 'current' CPU | 713 | * with data used by 'current' CPU |
714 | * We dont care about preemption here. | 714 | * |
715 | * Bottom half has to be disabled to prevent deadlock | ||
716 | * if new softirq were to run and call ipt_do_table | ||
715 | */ | 717 | */ |
716 | curcpu = raw_smp_processor_id(); | 718 | local_bh_disable(); |
719 | curcpu = smp_processor_id(); | ||
717 | 720 | ||
718 | i = 0; | 721 | i = 0; |
719 | ARPT_ENTRY_ITERATE(t->entries[curcpu], | 722 | ARPT_ENTRY_ITERATE(t->entries[curcpu], |
@@ -726,73 +729,22 @@ static void get_counters(const struct xt_table_info *t, | |||
726 | if (cpu == curcpu) | 729 | if (cpu == curcpu) |
727 | continue; | 730 | continue; |
728 | i = 0; | 731 | i = 0; |
732 | xt_info_wrlock(cpu); | ||
729 | ARPT_ENTRY_ITERATE(t->entries[cpu], | 733 | ARPT_ENTRY_ITERATE(t->entries[cpu], |
730 | t->size, | 734 | t->size, |
731 | add_entry_to_counter, | 735 | add_entry_to_counter, |
732 | counters, | 736 | counters, |
733 | &i); | 737 | &i); |
738 | xt_info_wrunlock(cpu); | ||
734 | } | 739 | } |
735 | } | ||
736 | |||
737 | |||
738 | /* We're lazy, and add to the first CPU; overflow works its fey magic | ||
739 | * and everything is OK. */ | ||
740 | static int | ||
741 | add_counter_to_entry(struct arpt_entry *e, | ||
742 | const struct xt_counters addme[], | ||
743 | unsigned int *i) | ||
744 | { | ||
745 | ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); | ||
746 | |||
747 | (*i)++; | ||
748 | return 0; | ||
749 | } | ||
750 | |||
751 | /* Take values from counters and add them back onto the current cpu */ | ||
752 | static void put_counters(struct xt_table_info *t, | ||
753 | const struct xt_counters counters[]) | ||
754 | { | ||
755 | unsigned int i, cpu; | ||
756 | |||
757 | local_bh_disable(); | ||
758 | cpu = smp_processor_id(); | ||
759 | i = 0; | ||
760 | ARPT_ENTRY_ITERATE(t->entries[cpu], | ||
761 | t->size, | ||
762 | add_counter_to_entry, | ||
763 | counters, | ||
764 | &i); | ||
765 | local_bh_enable(); | 740 | local_bh_enable(); |
766 | } | 741 | } |
767 | 742 | ||
768 | static inline int | ||
769 | zero_entry_counter(struct arpt_entry *e, void *arg) | ||
770 | { | ||
771 | e->counters.bcnt = 0; | ||
772 | e->counters.pcnt = 0; | ||
773 | return 0; | ||
774 | } | ||
775 | |||
776 | static void | ||
777 | clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info) | ||
778 | { | ||
779 | unsigned int cpu; | ||
780 | const void *loc_cpu_entry = info->entries[raw_smp_processor_id()]; | ||
781 | |||
782 | memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); | ||
783 | for_each_possible_cpu(cpu) { | ||
784 | memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size); | ||
785 | ARPT_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size, | ||
786 | zero_entry_counter, NULL); | ||
787 | } | ||
788 | } | ||
789 | |||
790 | static struct xt_counters *alloc_counters(struct xt_table *table) | 743 | static struct xt_counters *alloc_counters(struct xt_table *table) |
791 | { | 744 | { |
792 | unsigned int countersize; | 745 | unsigned int countersize; |
793 | struct xt_counters *counters; | 746 | struct xt_counters *counters; |
794 | struct xt_table_info *private = table->private; | 747 | struct xt_table_info *private = table->private; |
795 | struct xt_table_info *info; | ||
796 | 748 | ||
797 | /* We need atomic snapshot of counters: rest doesn't change | 749 | /* We need atomic snapshot of counters: rest doesn't change |
798 | * (other than comefrom, which userspace doesn't care | 750 | * (other than comefrom, which userspace doesn't care |
@@ -802,30 +754,11 @@ static struct xt_counters *alloc_counters(struct xt_table *table) | |||
802 | counters = vmalloc_node(countersize, numa_node_id()); | 754 | counters = vmalloc_node(countersize, numa_node_id()); |
803 | 755 | ||
804 | if (counters == NULL) | 756 | if (counters == NULL) |
805 | goto nomem; | 757 | return ERR_PTR(-ENOMEM); |
806 | |||
807 | info = xt_alloc_table_info(private->size); | ||
808 | if (!info) | ||
809 | goto free_counters; | ||
810 | |||
811 | clone_counters(info, private); | ||
812 | |||
813 | mutex_lock(&table->lock); | ||
814 | xt_table_entry_swap_rcu(private, info); | ||
815 | synchronize_net(); /* Wait until smoke has cleared */ | ||
816 | 758 | ||
817 | get_counters(info, counters); | 759 | get_counters(private, counters); |
818 | put_counters(private, counters); | ||
819 | mutex_unlock(&table->lock); | ||
820 | |||
821 | xt_free_table_info(info); | ||
822 | 760 | ||
823 | return counters; | 761 | return counters; |
824 | |||
825 | free_counters: | ||
826 | vfree(counters); | ||
827 | nomem: | ||
828 | return ERR_PTR(-ENOMEM); | ||
829 | } | 762 | } |
830 | 763 | ||
831 | static int copy_entries_to_user(unsigned int total_size, | 764 | static int copy_entries_to_user(unsigned int total_size, |
@@ -1094,8 +1027,9 @@ static int __do_replace(struct net *net, const char *name, | |||
1094 | (newinfo->number <= oldinfo->initial_entries)) | 1027 | (newinfo->number <= oldinfo->initial_entries)) |
1095 | module_put(t->me); | 1028 | module_put(t->me); |
1096 | 1029 | ||
1097 | /* Get the old counters. */ | 1030 | /* Get the old counters, and synchronize with replace */ |
1098 | get_counters(oldinfo, counters); | 1031 | get_counters(oldinfo, counters); |
1032 | |||
1099 | /* Decrease module usage counts and free resource */ | 1033 | /* Decrease module usage counts and free resource */ |
1100 | loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; | 1034 | loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; |
1101 | ARPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry, | 1035 | ARPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry, |
@@ -1165,10 +1099,23 @@ static int do_replace(struct net *net, void __user *user, unsigned int len) | |||
1165 | return ret; | 1099 | return ret; |
1166 | } | 1100 | } |
1167 | 1101 | ||
1102 | /* We're lazy, and add to the first CPU; overflow works its fey magic | ||
1103 | * and everything is OK. */ | ||
1104 | static int | ||
1105 | add_counter_to_entry(struct arpt_entry *e, | ||
1106 | const struct xt_counters addme[], | ||
1107 | unsigned int *i) | ||
1108 | { | ||
1109 | ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); | ||
1110 | |||
1111 | (*i)++; | ||
1112 | return 0; | ||
1113 | } | ||
1114 | |||
1168 | static int do_add_counters(struct net *net, void __user *user, unsigned int len, | 1115 | static int do_add_counters(struct net *net, void __user *user, unsigned int len, |
1169 | int compat) | 1116 | int compat) |
1170 | { | 1117 | { |
1171 | unsigned int i; | 1118 | unsigned int i, curcpu; |
1172 | struct xt_counters_info tmp; | 1119 | struct xt_counters_info tmp; |
1173 | struct xt_counters *paddc; | 1120 | struct xt_counters *paddc; |
1174 | unsigned int num_counters; | 1121 | unsigned int num_counters; |
@@ -1224,26 +1171,26 @@ static int do_add_counters(struct net *net, void __user *user, unsigned int len, | |||
1224 | goto free; | 1171 | goto free; |
1225 | } | 1172 | } |
1226 | 1173 | ||
1227 | mutex_lock(&t->lock); | 1174 | local_bh_disable(); |
1228 | private = t->private; | 1175 | private = t->private; |
1229 | if (private->number != num_counters) { | 1176 | if (private->number != num_counters) { |
1230 | ret = -EINVAL; | 1177 | ret = -EINVAL; |
1231 | goto unlock_up_free; | 1178 | goto unlock_up_free; |
1232 | } | 1179 | } |
1233 | 1180 | ||
1234 | preempt_disable(); | ||
1235 | i = 0; | 1181 | i = 0; |
1236 | /* Choose the copy that is on our node */ | 1182 | /* Choose the copy that is on our node */ |
1237 | loc_cpu_entry = private->entries[smp_processor_id()]; | 1183 | curcpu = smp_processor_id(); |
1184 | loc_cpu_entry = private->entries[curcpu]; | ||
1185 | xt_info_wrlock(curcpu); | ||
1238 | ARPT_ENTRY_ITERATE(loc_cpu_entry, | 1186 | ARPT_ENTRY_ITERATE(loc_cpu_entry, |
1239 | private->size, | 1187 | private->size, |
1240 | add_counter_to_entry, | 1188 | add_counter_to_entry, |
1241 | paddc, | 1189 | paddc, |
1242 | &i); | 1190 | &i); |
1243 | preempt_enable(); | 1191 | xt_info_wrunlock(curcpu); |
1244 | unlock_up_free: | 1192 | unlock_up_free: |
1245 | mutex_unlock(&t->lock); | 1193 | local_bh_enable(); |
1246 | |||
1247 | xt_table_unlock(t); | 1194 | xt_table_unlock(t); |
1248 | module_put(t->me); | 1195 | module_put(t->me); |
1249 | free: | 1196 | free: |
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 810c0b62c7d4..2ec8d7290c40 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c | |||
@@ -338,10 +338,9 @@ ipt_do_table(struct sk_buff *skb, | |||
338 | tgpar.hooknum = hook; | 338 | tgpar.hooknum = hook; |
339 | 339 | ||
340 | IP_NF_ASSERT(table->valid_hooks & (1 << hook)); | 340 | IP_NF_ASSERT(table->valid_hooks & (1 << hook)); |
341 | 341 | xt_info_rdlock_bh(); | |
342 | rcu_read_lock_bh(); | 342 | private = table->private; |
343 | private = rcu_dereference(table->private); | 343 | table_base = private->entries[smp_processor_id()]; |
344 | table_base = rcu_dereference(private->entries[smp_processor_id()]); | ||
345 | 344 | ||
346 | e = get_entry(table_base, private->hook_entry[hook]); | 345 | e = get_entry(table_base, private->hook_entry[hook]); |
347 | 346 | ||
@@ -436,8 +435,7 @@ ipt_do_table(struct sk_buff *skb, | |||
436 | e = (void *)e + e->next_offset; | 435 | e = (void *)e + e->next_offset; |
437 | } | 436 | } |
438 | } while (!hotdrop); | 437 | } while (!hotdrop); |
439 | 438 | xt_info_rdunlock_bh(); | |
440 | rcu_read_unlock_bh(); | ||
441 | 439 | ||
442 | #ifdef DEBUG_ALLOW_ALL | 440 | #ifdef DEBUG_ALLOW_ALL |
443 | return NF_ACCEPT; | 441 | return NF_ACCEPT; |
@@ -896,10 +894,13 @@ get_counters(const struct xt_table_info *t, | |||
896 | 894 | ||
897 | /* Instead of clearing (by a previous call to memset()) | 895 | /* Instead of clearing (by a previous call to memset()) |
898 | * the counters and using adds, we set the counters | 896 | * the counters and using adds, we set the counters |
899 | * with data used by 'current' CPU | 897 | * with data used by 'current' CPU. |
900 | * We dont care about preemption here. | 898 | * |
899 | * Bottom half has to be disabled to prevent deadlock | ||
900 | * if new softirq were to run and call ipt_do_table | ||
901 | */ | 901 | */ |
902 | curcpu = raw_smp_processor_id(); | 902 | local_bh_disable(); |
903 | curcpu = smp_processor_id(); | ||
903 | 904 | ||
904 | i = 0; | 905 | i = 0; |
905 | IPT_ENTRY_ITERATE(t->entries[curcpu], | 906 | IPT_ENTRY_ITERATE(t->entries[curcpu], |
@@ -912,74 +913,22 @@ get_counters(const struct xt_table_info *t, | |||
912 | if (cpu == curcpu) | 913 | if (cpu == curcpu) |
913 | continue; | 914 | continue; |
914 | i = 0; | 915 | i = 0; |
916 | xt_info_wrlock(cpu); | ||
915 | IPT_ENTRY_ITERATE(t->entries[cpu], | 917 | IPT_ENTRY_ITERATE(t->entries[cpu], |
916 | t->size, | 918 | t->size, |
917 | add_entry_to_counter, | 919 | add_entry_to_counter, |
918 | counters, | 920 | counters, |
919 | &i); | 921 | &i); |
922 | xt_info_wrunlock(cpu); | ||
920 | } | 923 | } |
921 | |||
922 | } | ||
923 | |||
924 | /* We're lazy, and add to the first CPU; overflow works its fey magic | ||
925 | * and everything is OK. */ | ||
926 | static int | ||
927 | add_counter_to_entry(struct ipt_entry *e, | ||
928 | const struct xt_counters addme[], | ||
929 | unsigned int *i) | ||
930 | { | ||
931 | ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); | ||
932 | |||
933 | (*i)++; | ||
934 | return 0; | ||
935 | } | ||
936 | |||
937 | /* Take values from counters and add them back onto the current cpu */ | ||
938 | static void put_counters(struct xt_table_info *t, | ||
939 | const struct xt_counters counters[]) | ||
940 | { | ||
941 | unsigned int i, cpu; | ||
942 | |||
943 | local_bh_disable(); | ||
944 | cpu = smp_processor_id(); | ||
945 | i = 0; | ||
946 | IPT_ENTRY_ITERATE(t->entries[cpu], | ||
947 | t->size, | ||
948 | add_counter_to_entry, | ||
949 | counters, | ||
950 | &i); | ||
951 | local_bh_enable(); | 924 | local_bh_enable(); |
952 | } | 925 | } |
953 | 926 | ||
954 | |||
955 | static inline int | ||
956 | zero_entry_counter(struct ipt_entry *e, void *arg) | ||
957 | { | ||
958 | e->counters.bcnt = 0; | ||
959 | e->counters.pcnt = 0; | ||
960 | return 0; | ||
961 | } | ||
962 | |||
963 | static void | ||
964 | clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info) | ||
965 | { | ||
966 | unsigned int cpu; | ||
967 | const void *loc_cpu_entry = info->entries[raw_smp_processor_id()]; | ||
968 | |||
969 | memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); | ||
970 | for_each_possible_cpu(cpu) { | ||
971 | memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size); | ||
972 | IPT_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size, | ||
973 | zero_entry_counter, NULL); | ||
974 | } | ||
975 | } | ||
976 | |||
977 | static struct xt_counters * alloc_counters(struct xt_table *table) | 927 | static struct xt_counters * alloc_counters(struct xt_table *table) |
978 | { | 928 | { |
979 | unsigned int countersize; | 929 | unsigned int countersize; |
980 | struct xt_counters *counters; | 930 | struct xt_counters *counters; |
981 | struct xt_table_info *private = table->private; | 931 | struct xt_table_info *private = table->private; |
982 | struct xt_table_info *info; | ||
983 | 932 | ||
984 | /* We need atomic snapshot of counters: rest doesn't change | 933 | /* We need atomic snapshot of counters: rest doesn't change |
985 | (other than comefrom, which userspace doesn't care | 934 | (other than comefrom, which userspace doesn't care |
@@ -988,30 +937,11 @@ static struct xt_counters * alloc_counters(struct xt_table *table) | |||
988 | counters = vmalloc_node(countersize, numa_node_id()); | 937 | counters = vmalloc_node(countersize, numa_node_id()); |
989 | 938 | ||
990 | if (counters == NULL) | 939 | if (counters == NULL) |
991 | goto nomem; | 940 | return ERR_PTR(-ENOMEM); |
992 | 941 | ||
993 | info = xt_alloc_table_info(private->size); | 942 | get_counters(private, counters); |
994 | if (!info) | ||
995 | goto free_counters; | ||
996 | |||
997 | clone_counters(info, private); | ||
998 | |||
999 | mutex_lock(&table->lock); | ||
1000 | xt_table_entry_swap_rcu(private, info); | ||
1001 | synchronize_net(); /* Wait until smoke has cleared */ | ||
1002 | |||
1003 | get_counters(info, counters); | ||
1004 | put_counters(private, counters); | ||
1005 | mutex_unlock(&table->lock); | ||
1006 | |||
1007 | xt_free_table_info(info); | ||
1008 | 943 | ||
1009 | return counters; | 944 | return counters; |
1010 | |||
1011 | free_counters: | ||
1012 | vfree(counters); | ||
1013 | nomem: | ||
1014 | return ERR_PTR(-ENOMEM); | ||
1015 | } | 945 | } |
1016 | 946 | ||
1017 | static int | 947 | static int |
@@ -1306,8 +1236,9 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, | |||
1306 | (newinfo->number <= oldinfo->initial_entries)) | 1236 | (newinfo->number <= oldinfo->initial_entries)) |
1307 | module_put(t->me); | 1237 | module_put(t->me); |
1308 | 1238 | ||
1309 | /* Get the old counters. */ | 1239 | /* Get the old counters, and synchronize with replace */ |
1310 | get_counters(oldinfo, counters); | 1240 | get_counters(oldinfo, counters); |
1241 | |||
1311 | /* Decrease module usage counts and free resource */ | 1242 | /* Decrease module usage counts and free resource */ |
1312 | loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; | 1243 | loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; |
1313 | IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry, | 1244 | IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry, |
@@ -1377,11 +1308,23 @@ do_replace(struct net *net, void __user *user, unsigned int len) | |||
1377 | return ret; | 1308 | return ret; |
1378 | } | 1309 | } |
1379 | 1310 | ||
1311 | /* We're lazy, and add to the first CPU; overflow works its fey magic | ||
1312 | * and everything is OK. */ | ||
1313 | static int | ||
1314 | add_counter_to_entry(struct ipt_entry *e, | ||
1315 | const struct xt_counters addme[], | ||
1316 | unsigned int *i) | ||
1317 | { | ||
1318 | ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); | ||
1319 | |||
1320 | (*i)++; | ||
1321 | return 0; | ||
1322 | } | ||
1380 | 1323 | ||
1381 | static int | 1324 | static int |
1382 | do_add_counters(struct net *net, void __user *user, unsigned int len, int compat) | 1325 | do_add_counters(struct net *net, void __user *user, unsigned int len, int compat) |
1383 | { | 1326 | { |
1384 | unsigned int i; | 1327 | unsigned int i, curcpu; |
1385 | struct xt_counters_info tmp; | 1328 | struct xt_counters_info tmp; |
1386 | struct xt_counters *paddc; | 1329 | struct xt_counters *paddc; |
1387 | unsigned int num_counters; | 1330 | unsigned int num_counters; |
@@ -1437,25 +1380,26 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, int compat | |||
1437 | goto free; | 1380 | goto free; |
1438 | } | 1381 | } |
1439 | 1382 | ||
1440 | mutex_lock(&t->lock); | 1383 | local_bh_disable(); |
1441 | private = t->private; | 1384 | private = t->private; |
1442 | if (private->number != num_counters) { | 1385 | if (private->number != num_counters) { |
1443 | ret = -EINVAL; | 1386 | ret = -EINVAL; |
1444 | goto unlock_up_free; | 1387 | goto unlock_up_free; |
1445 | } | 1388 | } |
1446 | 1389 | ||
1447 | preempt_disable(); | ||
1448 | i = 0; | 1390 | i = 0; |
1449 | /* Choose the copy that is on our node */ | 1391 | /* Choose the copy that is on our node */ |
1450 | loc_cpu_entry = private->entries[raw_smp_processor_id()]; | 1392 | curcpu = smp_processor_id(); |
1393 | loc_cpu_entry = private->entries[curcpu]; | ||
1394 | xt_info_wrlock(curcpu); | ||
1451 | IPT_ENTRY_ITERATE(loc_cpu_entry, | 1395 | IPT_ENTRY_ITERATE(loc_cpu_entry, |
1452 | private->size, | 1396 | private->size, |
1453 | add_counter_to_entry, | 1397 | add_counter_to_entry, |
1454 | paddc, | 1398 | paddc, |
1455 | &i); | 1399 | &i); |
1456 | preempt_enable(); | 1400 | xt_info_wrunlock(curcpu); |
1457 | unlock_up_free: | 1401 | unlock_up_free: |
1458 | mutex_unlock(&t->lock); | 1402 | local_bh_enable(); |
1459 | xt_table_unlock(t); | 1403 | xt_table_unlock(t); |
1460 | module_put(t->me); | 1404 | module_put(t->me); |
1461 | free: | 1405 | free: |
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index fe65187810f0..3229e0a81ba6 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c | |||
@@ -211,7 +211,8 @@ find_best_ips_proto(struct nf_conntrack_tuple *tuple, | |||
211 | minip = ntohl(range->min_ip); | 211 | minip = ntohl(range->min_ip); |
212 | maxip = ntohl(range->max_ip); | 212 | maxip = ntohl(range->max_ip); |
213 | j = jhash_2words((__force u32)tuple->src.u3.ip, | 213 | j = jhash_2words((__force u32)tuple->src.u3.ip, |
214 | (__force u32)tuple->dst.u3.ip, 0); | 214 | range->flags & IP_NAT_RANGE_PERSISTENT ? |
215 | (__force u32)tuple->dst.u3.ip : 0, 0); | ||
215 | j = ((u64)j * (maxip - minip + 1)) >> 32; | 216 | j = ((u64)j * (maxip - minip + 1)) >> 32; |
216 | *var_ipp = htonl(minip + j); | 217 | *var_ipp = htonl(minip + j); |
217 | } | 218 | } |
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index c40debe51b38..28205e5bfa9b 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -784,8 +784,8 @@ static void rt_check_expire(void) | |||
784 | { | 784 | { |
785 | static unsigned int rover; | 785 | static unsigned int rover; |
786 | unsigned int i = rover, goal; | 786 | unsigned int i = rover, goal; |
787 | struct rtable *rth, **rthp; | 787 | struct rtable *rth, *aux, **rthp; |
788 | unsigned long length = 0, samples = 0; | 788 | unsigned long samples = 0; |
789 | unsigned long sum = 0, sum2 = 0; | 789 | unsigned long sum = 0, sum2 = 0; |
790 | u64 mult; | 790 | u64 mult; |
791 | 791 | ||
@@ -795,9 +795,9 @@ static void rt_check_expire(void) | |||
795 | goal = (unsigned int)mult; | 795 | goal = (unsigned int)mult; |
796 | if (goal > rt_hash_mask) | 796 | if (goal > rt_hash_mask) |
797 | goal = rt_hash_mask + 1; | 797 | goal = rt_hash_mask + 1; |
798 | length = 0; | ||
799 | for (; goal > 0; goal--) { | 798 | for (; goal > 0; goal--) { |
800 | unsigned long tmo = ip_rt_gc_timeout; | 799 | unsigned long tmo = ip_rt_gc_timeout; |
800 | unsigned long length; | ||
801 | 801 | ||
802 | i = (i + 1) & rt_hash_mask; | 802 | i = (i + 1) & rt_hash_mask; |
803 | rthp = &rt_hash_table[i].chain; | 803 | rthp = &rt_hash_table[i].chain; |
@@ -809,8 +809,10 @@ static void rt_check_expire(void) | |||
809 | 809 | ||
810 | if (*rthp == NULL) | 810 | if (*rthp == NULL) |
811 | continue; | 811 | continue; |
812 | length = 0; | ||
812 | spin_lock_bh(rt_hash_lock_addr(i)); | 813 | spin_lock_bh(rt_hash_lock_addr(i)); |
813 | while ((rth = *rthp) != NULL) { | 814 | while ((rth = *rthp) != NULL) { |
815 | prefetch(rth->u.dst.rt_next); | ||
814 | if (rt_is_expired(rth)) { | 816 | if (rt_is_expired(rth)) { |
815 | *rthp = rth->u.dst.rt_next; | 817 | *rthp = rth->u.dst.rt_next; |
816 | rt_free(rth); | 818 | rt_free(rth); |
@@ -819,33 +821,30 @@ static void rt_check_expire(void) | |||
819 | if (rth->u.dst.expires) { | 821 | if (rth->u.dst.expires) { |
820 | /* Entry is expired even if it is in use */ | 822 | /* Entry is expired even if it is in use */ |
821 | if (time_before_eq(jiffies, rth->u.dst.expires)) { | 823 | if (time_before_eq(jiffies, rth->u.dst.expires)) { |
824 | nofree: | ||
822 | tmo >>= 1; | 825 | tmo >>= 1; |
823 | rthp = &rth->u.dst.rt_next; | 826 | rthp = &rth->u.dst.rt_next; |
824 | /* | 827 | /* |
825 | * Only bump our length if the hash | 828 | * We only count entries on |
826 | * inputs on entries n and n+1 are not | ||
827 | * the same, we only count entries on | ||
828 | * a chain with equal hash inputs once | 829 | * a chain with equal hash inputs once |
829 | * so that entries for different QOS | 830 | * so that entries for different QOS |
830 | * levels, and other non-hash input | 831 | * levels, and other non-hash input |
831 | * attributes don't unfairly skew | 832 | * attributes don't unfairly skew |
832 | * the length computation | 833 | * the length computation |
833 | */ | 834 | */ |
834 | if ((*rthp == NULL) || | 835 | for (aux = rt_hash_table[i].chain;;) { |
835 | !compare_hash_inputs(&(*rthp)->fl, | 836 | if (aux == rth) { |
836 | &rth->fl)) | 837 | length += ONE; |
837 | length += ONE; | 838 | break; |
839 | } | ||
840 | if (compare_hash_inputs(&aux->fl, &rth->fl)) | ||
841 | break; | ||
842 | aux = aux->u.dst.rt_next; | ||
843 | } | ||
838 | continue; | 844 | continue; |
839 | } | 845 | } |
840 | } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout)) { | 846 | } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout)) |
841 | tmo >>= 1; | 847 | goto nofree; |
842 | rthp = &rth->u.dst.rt_next; | ||
843 | if ((*rthp == NULL) || | ||
844 | !compare_hash_inputs(&(*rthp)->fl, | ||
845 | &rth->fl)) | ||
846 | length += ONE; | ||
847 | continue; | ||
848 | } | ||
849 | 848 | ||
850 | /* Cleanup aged off entries. */ | 849 | /* Cleanup aged off entries. */ |
851 | *rthp = rth->u.dst.rt_next; | 850 | *rthp = rth->u.dst.rt_next; |
@@ -1068,7 +1067,6 @@ out: return 0; | |||
1068 | static int rt_intern_hash(unsigned hash, struct rtable *rt, struct rtable **rp) | 1067 | static int rt_intern_hash(unsigned hash, struct rtable *rt, struct rtable **rp) |
1069 | { | 1068 | { |
1070 | struct rtable *rth, **rthp; | 1069 | struct rtable *rth, **rthp; |
1071 | struct rtable *rthi; | ||
1072 | unsigned long now; | 1070 | unsigned long now; |
1073 | struct rtable *cand, **candp; | 1071 | struct rtable *cand, **candp; |
1074 | u32 min_score; | 1072 | u32 min_score; |
@@ -1088,7 +1086,6 @@ restart: | |||
1088 | } | 1086 | } |
1089 | 1087 | ||
1090 | rthp = &rt_hash_table[hash].chain; | 1088 | rthp = &rt_hash_table[hash].chain; |
1091 | rthi = NULL; | ||
1092 | 1089 | ||
1093 | spin_lock_bh(rt_hash_lock_addr(hash)); | 1090 | spin_lock_bh(rt_hash_lock_addr(hash)); |
1094 | while ((rth = *rthp) != NULL) { | 1091 | while ((rth = *rthp) != NULL) { |
@@ -1134,17 +1131,6 @@ restart: | |||
1134 | chain_length++; | 1131 | chain_length++; |
1135 | 1132 | ||
1136 | rthp = &rth->u.dst.rt_next; | 1133 | rthp = &rth->u.dst.rt_next; |
1137 | |||
1138 | /* | ||
1139 | * check to see if the next entry in the chain | ||
1140 | * contains the same hash input values as rt. If it does | ||
1141 | * This is where we will insert into the list, instead of | ||
1142 | * at the head. This groups entries that differ by aspects not | ||
1143 | * relvant to the hash function together, which we use to adjust | ||
1144 | * our chain length | ||
1145 | */ | ||
1146 | if (*rthp && compare_hash_inputs(&(*rthp)->fl, &rt->fl)) | ||
1147 | rthi = rth; | ||
1148 | } | 1134 | } |
1149 | 1135 | ||
1150 | if (cand) { | 1136 | if (cand) { |
@@ -1205,10 +1191,7 @@ restart: | |||
1205 | } | 1191 | } |
1206 | } | 1192 | } |
1207 | 1193 | ||
1208 | if (rthi) | 1194 | rt->u.dst.rt_next = rt_hash_table[hash].chain; |
1209 | rt->u.dst.rt_next = rthi->u.dst.rt_next; | ||
1210 | else | ||
1211 | rt->u.dst.rt_next = rt_hash_table[hash].chain; | ||
1212 | 1195 | ||
1213 | #if RT_CACHE_DEBUG >= 2 | 1196 | #if RT_CACHE_DEBUG >= 2 |
1214 | if (rt->u.dst.rt_next) { | 1197 | if (rt->u.dst.rt_next) { |
@@ -1224,10 +1207,7 @@ restart: | |||
1224 | * previous writes to rt are comitted to memory | 1207 | * previous writes to rt are comitted to memory |
1225 | * before making rt visible to other CPUS. | 1208 | * before making rt visible to other CPUS. |
1226 | */ | 1209 | */ |
1227 | if (rthi) | 1210 | rcu_assign_pointer(rt_hash_table[hash].chain, rt); |
1228 | rcu_assign_pointer(rthi->u.dst.rt_next, rt); | ||
1229 | else | ||
1230 | rcu_assign_pointer(rt_hash_table[hash].chain, rt); | ||
1231 | 1211 | ||
1232 | spin_unlock_bh(rt_hash_lock_addr(hash)); | 1212 | spin_unlock_bh(rt_hash_lock_addr(hash)); |
1233 | *rp = rt; | 1213 | *rp = rt; |
@@ -3397,7 +3377,7 @@ int __init ip_rt_init(void) | |||
3397 | 0, | 3377 | 0, |
3398 | &rt_hash_log, | 3378 | &rt_hash_log, |
3399 | &rt_hash_mask, | 3379 | &rt_hash_mask, |
3400 | 0); | 3380 | rhash_entries ? 0 : 512 * 1024); |
3401 | memset(rt_hash_table, 0, (rt_hash_mask + 1) * sizeof(struct rt_hash_bucket)); | 3381 | memset(rt_hash_table, 0, (rt_hash_mask + 1) * sizeof(struct rt_hash_bucket)); |
3402 | rt_hash_lock_init(); | 3382 | rt_hash_lock_init(); |
3403 | 3383 | ||
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index fafbec8b073e..7a0f0b27bf1f 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -1321,6 +1321,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
1321 | struct task_struct *user_recv = NULL; | 1321 | struct task_struct *user_recv = NULL; |
1322 | int copied_early = 0; | 1322 | int copied_early = 0; |
1323 | struct sk_buff *skb; | 1323 | struct sk_buff *skb; |
1324 | u32 urg_hole = 0; | ||
1324 | 1325 | ||
1325 | lock_sock(sk); | 1326 | lock_sock(sk); |
1326 | 1327 | ||
@@ -1532,7 +1533,8 @@ do_prequeue: | |||
1532 | } | 1533 | } |
1533 | } | 1534 | } |
1534 | } | 1535 | } |
1535 | if ((flags & MSG_PEEK) && peek_seq != tp->copied_seq) { | 1536 | if ((flags & MSG_PEEK) && |
1537 | (peek_seq - copied - urg_hole != tp->copied_seq)) { | ||
1536 | if (net_ratelimit()) | 1538 | if (net_ratelimit()) |
1537 | printk(KERN_DEBUG "TCP(%s:%d): Application bug, race in MSG_PEEK.\n", | 1539 | printk(KERN_DEBUG "TCP(%s:%d): Application bug, race in MSG_PEEK.\n", |
1538 | current->comm, task_pid_nr(current)); | 1540 | current->comm, task_pid_nr(current)); |
@@ -1553,6 +1555,7 @@ do_prequeue: | |||
1553 | if (!urg_offset) { | 1555 | if (!urg_offset) { |
1554 | if (!sock_flag(sk, SOCK_URGINLINE)) { | 1556 | if (!sock_flag(sk, SOCK_URGINLINE)) { |
1555 | ++*seq; | 1557 | ++*seq; |
1558 | urg_hole++; | ||
1556 | offset++; | 1559 | offset++; |
1557 | used--; | 1560 | used--; |
1558 | if (!used) | 1561 | if (!used) |
@@ -2511,6 +2514,7 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb) | |||
2511 | struct sk_buff *p; | 2514 | struct sk_buff *p; |
2512 | struct tcphdr *th; | 2515 | struct tcphdr *th; |
2513 | struct tcphdr *th2; | 2516 | struct tcphdr *th2; |
2517 | unsigned int len; | ||
2514 | unsigned int thlen; | 2518 | unsigned int thlen; |
2515 | unsigned int flags; | 2519 | unsigned int flags; |
2516 | unsigned int mss = 1; | 2520 | unsigned int mss = 1; |
@@ -2531,6 +2535,7 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb) | |||
2531 | 2535 | ||
2532 | skb_gro_pull(skb, thlen); | 2536 | skb_gro_pull(skb, thlen); |
2533 | 2537 | ||
2538 | len = skb_gro_len(skb); | ||
2534 | flags = tcp_flag_word(th); | 2539 | flags = tcp_flag_word(th); |
2535 | 2540 | ||
2536 | for (; (p = *head); head = &p->next) { | 2541 | for (; (p = *head); head = &p->next) { |
@@ -2561,7 +2566,7 @@ found: | |||
2561 | 2566 | ||
2562 | mss = skb_shinfo(p)->gso_size; | 2567 | mss = skb_shinfo(p)->gso_size; |
2563 | 2568 | ||
2564 | flush |= (skb_gro_len(skb) > mss) | !skb_gro_len(skb); | 2569 | flush |= (len > mss) | !len; |
2565 | flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq); | 2570 | flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq); |
2566 | 2571 | ||
2567 | if (flush || skb_gro_receive(head, skb)) { | 2572 | if (flush || skb_gro_receive(head, skb)) { |
@@ -2574,7 +2579,7 @@ found: | |||
2574 | tcp_flag_word(th2) |= flags & (TCP_FLAG_FIN | TCP_FLAG_PSH); | 2579 | tcp_flag_word(th2) |= flags & (TCP_FLAG_FIN | TCP_FLAG_PSH); |
2575 | 2580 | ||
2576 | out_check_final: | 2581 | out_check_final: |
2577 | flush = skb_gro_len(skb) < mss; | 2582 | flush = len < mss; |
2578 | flush |= flags & (TCP_FLAG_URG | TCP_FLAG_PSH | TCP_FLAG_RST | | 2583 | flush |= flags & (TCP_FLAG_URG | TCP_FLAG_PSH | TCP_FLAG_RST | |
2579 | TCP_FLAG_SYN | TCP_FLAG_FIN); | 2584 | TCP_FLAG_SYN | TCP_FLAG_FIN); |
2580 | 2585 | ||
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index c96a6bb25430..eec3e6f9956c 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -597,16 +597,6 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb) | |||
597 | tcp_grow_window(sk, skb); | 597 | tcp_grow_window(sk, skb); |
598 | } | 598 | } |
599 | 599 | ||
600 | static u32 tcp_rto_min(struct sock *sk) | ||
601 | { | ||
602 | struct dst_entry *dst = __sk_dst_get(sk); | ||
603 | u32 rto_min = TCP_RTO_MIN; | ||
604 | |||
605 | if (dst && dst_metric_locked(dst, RTAX_RTO_MIN)) | ||
606 | rto_min = dst_metric_rtt(dst, RTAX_RTO_MIN); | ||
607 | return rto_min; | ||
608 | } | ||
609 | |||
610 | /* Called to compute a smoothed rtt estimate. The data fed to this | 600 | /* Called to compute a smoothed rtt estimate. The data fed to this |
611 | * routine either comes from timestamps, or from segments that were | 601 | * routine either comes from timestamps, or from segments that were |
612 | * known _not_ to have been retransmitted [see Karn/Partridge | 602 | * known _not_ to have been retransmitted [see Karn/Partridge |
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 53300fa2359f..59aec609cec6 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -778,7 +778,7 @@ static void tcp_adjust_pcount(struct sock *sk, struct sk_buff *skb, int decr) | |||
778 | 778 | ||
779 | if (tp->lost_skb_hint && | 779 | if (tp->lost_skb_hint && |
780 | before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(tp->lost_skb_hint)->seq) && | 780 | before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(tp->lost_skb_hint)->seq) && |
781 | (tcp_is_fack(tp) || TCP_SKB_CB(skb)->sacked)) | 781 | (tcp_is_fack(tp) || (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))) |
782 | tp->lost_cnt_hint -= decr; | 782 | tp->lost_cnt_hint -= decr; |
783 | 783 | ||
784 | tcp_verify_left_out(tp); | 784 | tcp_verify_left_out(tp); |
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index a453aac91bd3..c6743eec9b7d 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c | |||
@@ -158,6 +158,11 @@ void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event) | |||
158 | } | 158 | } |
159 | EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event); | 159 | EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event); |
160 | 160 | ||
161 | static inline u32 tcp_vegas_ssthresh(struct tcp_sock *tp) | ||
162 | { | ||
163 | return min(tp->snd_ssthresh, tp->snd_cwnd-1); | ||
164 | } | ||
165 | |||
161 | static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) | 166 | static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) |
162 | { | 167 | { |
163 | struct tcp_sock *tp = tcp_sk(sk); | 168 | struct tcp_sock *tp = tcp_sk(sk); |
@@ -221,11 +226,10 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) | |||
221 | */ | 226 | */ |
222 | diff = tp->snd_cwnd * (rtt-vegas->baseRTT) / vegas->baseRTT; | 227 | diff = tp->snd_cwnd * (rtt-vegas->baseRTT) / vegas->baseRTT; |
223 | 228 | ||
224 | if (diff > gamma && tp->snd_ssthresh > 2 ) { | 229 | if (diff > gamma && tp->snd_cwnd <= tp->snd_ssthresh) { |
225 | /* Going too fast. Time to slow down | 230 | /* Going too fast. Time to slow down |
226 | * and switch to congestion avoidance. | 231 | * and switch to congestion avoidance. |
227 | */ | 232 | */ |
228 | tp->snd_ssthresh = 2; | ||
229 | 233 | ||
230 | /* Set cwnd to match the actual rate | 234 | /* Set cwnd to match the actual rate |
231 | * exactly: | 235 | * exactly: |
@@ -235,6 +239,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) | |||
235 | * utilization. | 239 | * utilization. |
236 | */ | 240 | */ |
237 | tp->snd_cwnd = min(tp->snd_cwnd, (u32)target_cwnd+1); | 241 | tp->snd_cwnd = min(tp->snd_cwnd, (u32)target_cwnd+1); |
242 | tp->snd_ssthresh = tcp_vegas_ssthresh(tp); | ||
238 | 243 | ||
239 | } else if (tp->snd_cwnd <= tp->snd_ssthresh) { | 244 | } else if (tp->snd_cwnd <= tp->snd_ssthresh) { |
240 | /* Slow start. */ | 245 | /* Slow start. */ |
@@ -250,6 +255,8 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) | |||
250 | * we slow down. | 255 | * we slow down. |
251 | */ | 256 | */ |
252 | tp->snd_cwnd--; | 257 | tp->snd_cwnd--; |
258 | tp->snd_ssthresh | ||
259 | = tcp_vegas_ssthresh(tp); | ||
253 | } else if (diff < alpha) { | 260 | } else if (diff < alpha) { |
254 | /* We don't have enough extra packets | 261 | /* We don't have enough extra packets |
255 | * in the network, so speed up. | 262 | * in the network, so speed up. |