aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@sunset.davemloft.net>2005-09-18 03:17:10 -0400
committerDavid S. Miller <davem@sunset.davemloft.net>2005-09-18 03:17:10 -0400
commit21f130a2370ba837cdfc5204ebe52e7c664fec3d (patch)
tree7f8a30088d8d39eab9350c59b6638661309ffe89
parentbc5e8fdfc622b03acf5ac974a1b8b26da6511c99 (diff)
parentc58ec93245a1fb7354f9e331960380827b9f41db (diff)
Merge master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
-rw-r--r--CREDITS26
-rw-r--r--MAINTAINERS13
-rw-r--r--drivers/net/tg3.c108
-rw-r--r--include/linux/dccp.h40
-rw-r--r--include/linux/pci_ids.h1
-rw-r--r--net/Kconfig3
-rw-r--r--net/dccp/dccp.h9
-rw-r--r--net/dccp/input.c22
-rw-r--r--net/dccp/ipv4.c38
-rw-r--r--net/dccp/minisocks.c6
-rw-r--r--net/dccp/output.c14
-rw-r--r--net/dccp/proto.c85
-rw-r--r--net/ipv4/netfilter/Kconfig16
-rw-r--r--net/ipv4/netfilter/ip_conntrack_core.c5
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c223
-rw-r--r--net/socket.c3
16 files changed, 380 insertions, 232 deletions
diff --git a/CREDITS b/CREDITS
index f553f8cfaa62..a347520bef2d 100644
--- a/CREDITS
+++ b/CREDITS
@@ -2211,6 +2211,15 @@ D: OV511 driver
2211S: (address available on request) 2211S: (address available on request)
2212S: USA 2212S: USA
2213 2213
2214N: Ian McDonald
2215E: iam4@cs.waikato.ac.nz
2216E: imcdnzl@gmail.com
2217W: http://wand.net.nz/~iam4
2218W: http://imcdnzl.blogspot.com
2219D: DCCP, CCID3
2220S: Hamilton
2221S: New Zealand
2222
2214N: Patrick McHardy 2223N: Patrick McHardy
2215E: kaber@trash.net 2224E: kaber@trash.net
2216P: 1024D/12155E80 B128 7DE6 FF0A C2B2 48BE AB4C C9D4 964E 1215 5E80 2225P: 1024D/12155E80 B128 7DE6 FF0A C2B2 48BE AB4C C9D4 964E 1215 5E80
@@ -2246,19 +2255,12 @@ S: D-90453 Nuernberg
2246S: Germany 2255S: Germany
2247 2256
2248N: Arnaldo Carvalho de Melo 2257N: Arnaldo Carvalho de Melo
2249E: acme@conectiva.com.br 2258E: acme@mandriva.com
2250E: acme@kernel.org 2259E: acme@ghostprotocols.net
2251E: acme@gnu.org 2260W: http://oops.ghostprotocols.net:81/blog/
2252W: http://bazar2.conectiva.com.br/~acme
2253W: http://advogato.org/person/acme
2254P: 1024D/9224DF01 D5DF E3BB E3C8 BCBB F8AD 841A B6AB 4681 9224 DF01 2261P: 1024D/9224DF01 D5DF E3BB E3C8 BCBB F8AD 841A B6AB 4681 9224 DF01
2255D: wanrouter hacking 2262D: IPX, LLC, DCCP, cyc2x, wl3501_cs, net/ hacks
2256D: misc Makefile, Config.in, drivers and network stacks fixes 2263S: Mandriva
2257D: IPX & LLC network stacks maintainer
2258D: Cyclom 2X synchronous card driver
2259D: wl3501 PCMCIA wireless card driver
2260D: i18n for minicom, net-tools, util-linux, fetchmail, etc
2261S: Conectiva S.A.
2262S: R. Tocantins, 89 - Cristo Rei 2264S: R. Tocantins, 89 - Cristo Rei
2263S: 80050-430 - Curitiba - Paraná 2265S: 80050-430 - Curitiba - Paraná
2264S: Brazil 2266S: Brazil
diff --git a/MAINTAINERS b/MAINTAINERS
index d1e0eb46d201..dc8f3babcabd 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -686,6 +686,13 @@ P: Guennadi Liakhovetski
686M: g.liakhovetski@gmx.de 686M: g.liakhovetski@gmx.de
687S: Maintained 687S: Maintained
688 688
689DCCP PROTOCOL
690P: Arnaldo Carvalho de Melo
691M: acme@mandriva.com
692L: dccp@vger.kernel.org
693W: http://www.wlug.org.nz/DCCP
694S: Maintained
695
689DECnet NETWORK LAYER 696DECnet NETWORK LAYER
690P: Patrick Caulfield 697P: Patrick Caulfield
691M: patrick@tykepenguin.com 698M: patrick@tykepenguin.com
@@ -2271,12 +2278,6 @@ M: R.E.Wolff@BitWizard.nl
2271L: linux-kernel@vger.kernel.org ? 2278L: linux-kernel@vger.kernel.org ?
2272S: Supported 2279S: Supported
2273 2280
2274SPX NETWORK LAYER
2275P: Jay Schulist
2276M: jschlst@samba.org
2277L: netdev@vger.kernel.org
2278S: Supported
2279
2280SRM (Alpha) environment access 2281SRM (Alpha) environment access
2281P: Jan-Benedict Glaw 2282P: Jan-Benedict Glaw
2282M: jbglaw@lug-owl.de 2283M: jbglaw@lug-owl.de
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 7599f52e15b3..81f4aedf534c 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -67,8 +67,8 @@
67 67
68#define DRV_MODULE_NAME "tg3" 68#define DRV_MODULE_NAME "tg3"
69#define PFX DRV_MODULE_NAME ": " 69#define PFX DRV_MODULE_NAME ": "
70#define DRV_MODULE_VERSION "3.39" 70#define DRV_MODULE_VERSION "3.40"
71#define DRV_MODULE_RELDATE "September 5, 2005" 71#define DRV_MODULE_RELDATE "September 15, 2005"
72 72
73#define TG3_DEF_MAC_MODE 0 73#define TG3_DEF_MAC_MODE 0
74#define TG3_DEF_RX_MODE 0 74#define TG3_DEF_RX_MODE 0
@@ -3442,31 +3442,47 @@ static void tg3_tx_timeout(struct net_device *dev)
3442 schedule_work(&tp->reset_task); 3442 schedule_work(&tp->reset_task);
3443} 3443}
3444 3444
3445/* Test for DMA buffers crossing any 4GB boundaries: 4G, 8G, etc */
3446static inline int tg3_4g_overflow_test(dma_addr_t mapping, int len)
3447{
3448 u32 base = (u32) mapping & 0xffffffff;
3449
3450 return ((base > 0xffffdcc0) &&
3451 (base + len + 8 < base));
3452}
3453
3445static void tg3_set_txd(struct tg3 *, int, dma_addr_t, int, u32, u32); 3454static void tg3_set_txd(struct tg3 *, int, dma_addr_t, int, u32, u32);
3446 3455
3447static int tigon3_4gb_hwbug_workaround(struct tg3 *tp, struct sk_buff *skb, 3456static int tigon3_4gb_hwbug_workaround(struct tg3 *tp, struct sk_buff *skb,
3448 u32 guilty_entry, int guilty_len, 3457 u32 last_plus_one, u32 *start,
3449 u32 last_plus_one, u32 *start, u32 mss) 3458 u32 base_flags, u32 mss)
3450{ 3459{
3451 struct sk_buff *new_skb = skb_copy(skb, GFP_ATOMIC); 3460 struct sk_buff *new_skb = skb_copy(skb, GFP_ATOMIC);
3452 dma_addr_t new_addr; 3461 dma_addr_t new_addr = 0;
3453 u32 entry = *start; 3462 u32 entry = *start;
3454 int i; 3463 int i, ret = 0;
3455 3464
3456 if (!new_skb) { 3465 if (!new_skb) {
3457 dev_kfree_skb(skb); 3466 ret = -1;
3458 return -1; 3467 } else {
3468 /* New SKB is guaranteed to be linear. */
3469 entry = *start;
3470 new_addr = pci_map_single(tp->pdev, new_skb->data, new_skb->len,
3471 PCI_DMA_TODEVICE);
3472 /* Make sure new skb does not cross any 4G boundaries.
3473 * Drop the packet if it does.
3474 */
3475 if (tg3_4g_overflow_test(new_addr, new_skb->len)) {
3476 ret = -1;
3477 dev_kfree_skb(new_skb);
3478 new_skb = NULL;
3479 } else {
3480 tg3_set_txd(tp, entry, new_addr, new_skb->len,
3481 base_flags, 1 | (mss << 1));
3482 *start = NEXT_TX(entry);
3483 }
3459 } 3484 }
3460 3485
3461 /* New SKB is guaranteed to be linear. */
3462 entry = *start;
3463 new_addr = pci_map_single(tp->pdev, new_skb->data, new_skb->len,
3464 PCI_DMA_TODEVICE);
3465 tg3_set_txd(tp, entry, new_addr, new_skb->len,
3466 (skb->ip_summed == CHECKSUM_HW) ?
3467 TXD_FLAG_TCPUDP_CSUM : 0, 1 | (mss << 1));
3468 *start = NEXT_TX(entry);
3469
3470 /* Now clean up the sw ring entries. */ 3486 /* Now clean up the sw ring entries. */
3471 i = 0; 3487 i = 0;
3472 while (entry != last_plus_one) { 3488 while (entry != last_plus_one) {
@@ -3491,7 +3507,7 @@ static int tigon3_4gb_hwbug_workaround(struct tg3 *tp, struct sk_buff *skb,
3491 3507
3492 dev_kfree_skb(skb); 3508 dev_kfree_skb(skb);
3493 3509
3494 return 0; 3510 return ret;
3495} 3511}
3496 3512
3497static void tg3_set_txd(struct tg3 *tp, int entry, 3513static void tg3_set_txd(struct tg3 *tp, int entry,
@@ -3517,19 +3533,10 @@ static void tg3_set_txd(struct tg3 *tp, int entry,
3517 txd->vlan_tag = vlan_tag << TXD_VLAN_TAG_SHIFT; 3533 txd->vlan_tag = vlan_tag << TXD_VLAN_TAG_SHIFT;
3518} 3534}
3519 3535
3520static inline int tg3_4g_overflow_test(dma_addr_t mapping, int len)
3521{
3522 u32 base = (u32) mapping & 0xffffffff;
3523
3524 return ((base > 0xffffdcc0) &&
3525 (base + len + 8 < base));
3526}
3527
3528static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) 3536static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
3529{ 3537{
3530 struct tg3 *tp = netdev_priv(dev); 3538 struct tg3 *tp = netdev_priv(dev);
3531 dma_addr_t mapping; 3539 dma_addr_t mapping;
3532 unsigned int i;
3533 u32 len, entry, base_flags, mss; 3540 u32 len, entry, base_flags, mss;
3534 int would_hit_hwbug; 3541 int would_hit_hwbug;
3535 3542
@@ -3624,7 +3631,7 @@ static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
3624 would_hit_hwbug = 0; 3631 would_hit_hwbug = 0;
3625 3632
3626 if (tg3_4g_overflow_test(mapping, len)) 3633 if (tg3_4g_overflow_test(mapping, len))
3627 would_hit_hwbug = entry + 1; 3634 would_hit_hwbug = 1;
3628 3635
3629 tg3_set_txd(tp, entry, mapping, len, base_flags, 3636 tg3_set_txd(tp, entry, mapping, len, base_flags,
3630 (skb_shinfo(skb)->nr_frags == 0) | (mss << 1)); 3637 (skb_shinfo(skb)->nr_frags == 0) | (mss << 1));
@@ -3648,12 +3655,8 @@ static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
3648 tp->tx_buffers[entry].skb = NULL; 3655 tp->tx_buffers[entry].skb = NULL;
3649 pci_unmap_addr_set(&tp->tx_buffers[entry], mapping, mapping); 3656 pci_unmap_addr_set(&tp->tx_buffers[entry], mapping, mapping);
3650 3657
3651 if (tg3_4g_overflow_test(mapping, len)) { 3658 if (tg3_4g_overflow_test(mapping, len))
3652 /* Only one should match. */ 3659 would_hit_hwbug = 1;
3653 if (would_hit_hwbug)
3654 BUG();
3655 would_hit_hwbug = entry + 1;
3656 }
3657 3660
3658 if (tp->tg3_flags2 & TG3_FLG2_HW_TSO) 3661 if (tp->tg3_flags2 & TG3_FLG2_HW_TSO)
3659 tg3_set_txd(tp, entry, mapping, len, 3662 tg3_set_txd(tp, entry, mapping, len,
@@ -3669,34 +3672,15 @@ static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
3669 if (would_hit_hwbug) { 3672 if (would_hit_hwbug) {
3670 u32 last_plus_one = entry; 3673 u32 last_plus_one = entry;
3671 u32 start; 3674 u32 start;
3672 unsigned int len = 0;
3673
3674 would_hit_hwbug -= 1;
3675 entry = entry - 1 - skb_shinfo(skb)->nr_frags;
3676 entry &= (TG3_TX_RING_SIZE - 1);
3677 start = entry;
3678 i = 0;
3679 while (entry != last_plus_one) {
3680 if (i == 0)
3681 len = skb_headlen(skb);
3682 else
3683 len = skb_shinfo(skb)->frags[i-1].size;
3684 3675
3685 if (entry == would_hit_hwbug) 3676 start = entry - 1 - skb_shinfo(skb)->nr_frags;
3686 break; 3677 start &= (TG3_TX_RING_SIZE - 1);
3687
3688 i++;
3689 entry = NEXT_TX(entry);
3690
3691 }
3692 3678
3693 /* If the workaround fails due to memory/mapping 3679 /* If the workaround fails due to memory/mapping
3694 * failure, silently drop this packet. 3680 * failure, silently drop this packet.
3695 */ 3681 */
3696 if (tigon3_4gb_hwbug_workaround(tp, skb, 3682 if (tigon3_4gb_hwbug_workaround(tp, skb, last_plus_one,
3697 entry, len, 3683 &start, base_flags, mss))
3698 last_plus_one,
3699 &start, mss))
3700 goto out_unlock; 3684 goto out_unlock;
3701 3685
3702 entry = start; 3686 entry = start;
@@ -9271,6 +9255,8 @@ static int __devinit tg3_get_invariants(struct tg3 *tp)
9271 static struct pci_device_id write_reorder_chipsets[] = { 9255 static struct pci_device_id write_reorder_chipsets[] = {
9272 { PCI_DEVICE(PCI_VENDOR_ID_AMD, 9256 { PCI_DEVICE(PCI_VENDOR_ID_AMD,
9273 PCI_DEVICE_ID_AMD_FE_GATE_700C) }, 9257 PCI_DEVICE_ID_AMD_FE_GATE_700C) },
9258 { PCI_DEVICE(PCI_VENDOR_ID_AMD,
9259 PCI_DEVICE_ID_AMD_K8_NB) },
9274 { }, 9260 { },
9275 }; 9261 };
9276 u32 misc_ctrl_reg; 9262 u32 misc_ctrl_reg;
@@ -9285,7 +9271,7 @@ static int __devinit tg3_get_invariants(struct tg3 *tp)
9285 tp->tg3_flags2 |= TG3_FLG2_SUN_570X; 9271 tp->tg3_flags2 |= TG3_FLG2_SUN_570X;
9286#endif 9272#endif
9287 9273
9288 /* If we have an AMD 762 chipset, write 9274 /* If we have an AMD 762 or K8 chipset, write
9289 * reordering to the mailbox registers done by the host 9275 * reordering to the mailbox registers done by the host
9290 * controller can cause major troubles. We read back from 9276 * controller can cause major troubles. We read back from
9291 * every mailbox register write to force the writes to be 9277 * every mailbox register write to force the writes to be
@@ -9532,7 +9518,7 @@ static int __devinit tg3_get_invariants(struct tg3 *tp)
9532 tp->write32_rx_mbox = tg3_write_indirect_mbox; 9518 tp->write32_rx_mbox = tg3_write_indirect_mbox;
9533 9519
9534 iounmap(tp->regs); 9520 iounmap(tp->regs);
9535 tp->regs = 0; 9521 tp->regs = NULL;
9536 9522
9537 pci_read_config_word(tp->pdev, PCI_COMMAND, &pci_cmd); 9523 pci_read_config_word(tp->pdev, PCI_COMMAND, &pci_cmd);
9538 pci_cmd &= ~PCI_COMMAND_MEMORY; 9524 pci_cmd &= ~PCI_COMMAND_MEMORY;
@@ -10680,7 +10666,7 @@ static int __devinit tg3_init_one(struct pci_dev *pdev,
10680err_out_iounmap: 10666err_out_iounmap:
10681 if (tp->regs) { 10667 if (tp->regs) {
10682 iounmap(tp->regs); 10668 iounmap(tp->regs);
10683 tp->regs = 0; 10669 tp->regs = NULL;
10684 } 10670 }
10685 10671
10686err_out_free_dev: 10672err_out_free_dev:
@@ -10705,7 +10691,7 @@ static void __devexit tg3_remove_one(struct pci_dev *pdev)
10705 unregister_netdev(dev); 10691 unregister_netdev(dev);
10706 if (tp->regs) { 10692 if (tp->regs) {
10707 iounmap(tp->regs); 10693 iounmap(tp->regs);
10708 tp->regs = 0; 10694 tp->regs = NULL;
10709 } 10695 }
10710 free_netdev(dev); 10696 free_netdev(dev);
10711 pci_release_regions(pdev); 10697 pci_release_regions(pdev);
diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index 8bf4bacb5051..0e72708677e4 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -4,16 +4,6 @@
4#include <linux/types.h> 4#include <linux/types.h>
5#include <asm/byteorder.h> 5#include <asm/byteorder.h>
6 6
7/* Structure describing an Internet (DCCP) socket address. */
8struct sockaddr_dccp {
9 __u16 sdccp_family; /* Address family */
10 __u16 sdccp_port; /* Port number */
11 __u32 sdccp_addr; /* Internet address */
12 __u32 sdccp_service; /* Service */
13 /* Pad to size of `struct sockaddr': 16 bytes . */
14 __u32 sdccp_pad;
15};
16
17/** 7/**
18 * struct dccp_hdr - generic part of DCCP packet header 8 * struct dccp_hdr - generic part of DCCP packet header
19 * 9 *
@@ -188,6 +178,9 @@ enum {
188 178
189/* DCCP socket options */ 179/* DCCP socket options */
190#define DCCP_SOCKOPT_PACKET_SIZE 1 180#define DCCP_SOCKOPT_PACKET_SIZE 1
181#define DCCP_SOCKOPT_SERVICE 2
182
183#define DCCP_SERVICE_LIST_MAX_LEN 32
191 184
192#ifdef __KERNEL__ 185#ifdef __KERNEL__
193 186
@@ -382,6 +375,25 @@ enum dccp_role {
382 DCCP_ROLE_SERVER, 375 DCCP_ROLE_SERVER,
383}; 376};
384 377
378struct dccp_service_list {
379 __u32 dccpsl_nr;
380 __u32 dccpsl_list[0];
381};
382
383#define DCCP_SERVICE_INVALID_VALUE htonl((__u32)-1)
384
385static inline int dccp_list_has_service(const struct dccp_service_list *sl,
386 const u32 service)
387{
388 if (likely(sl != NULL)) {
389 u32 i = sl->dccpsl_nr;
390 while (i--)
391 if (sl->dccpsl_list[i] == service)
392 return 1;
393 }
394 return 0;
395}
396
385/** 397/**
386 * struct dccp_sock - DCCP socket state 398 * struct dccp_sock - DCCP socket state
387 * 399 *
@@ -417,7 +429,8 @@ struct dccp_sock {
417 __u64 dccps_gss; 429 __u64 dccps_gss;
418 __u64 dccps_gsr; 430 __u64 dccps_gsr;
419 __u64 dccps_gar; 431 __u64 dccps_gar;
420 unsigned long dccps_service; 432 __u32 dccps_service;
433 struct dccp_service_list *dccps_service_list;
421 struct timeval dccps_timestamp_time; 434 struct timeval dccps_timestamp_time;
422 __u32 dccps_timestamp_echo; 435 __u32 dccps_timestamp_echo;
423 __u32 dccps_packet_size; 436 __u32 dccps_packet_size;
@@ -443,6 +456,11 @@ static inline struct dccp_sock *dccp_sk(const struct sock *sk)
443 return (struct dccp_sock *)sk; 456 return (struct dccp_sock *)sk;
444} 457}
445 458
459static inline int dccp_service_not_initialized(const struct sock *sk)
460{
461 return dccp_sk(sk)->dccps_service == DCCP_SERVICE_INVALID_VALUE;
462}
463
446static inline const char *dccp_role(const struct sock *sk) 464static inline const char *dccp_role(const struct sock *sk)
447{ 465{
448 switch (dccp_sk(sk)->dccps_role) { 466 switch (dccp_sk(sk)->dccps_role) {
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 486d1c1676bd..c49d28eca561 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -491,6 +491,7 @@
491#define PCI_DEVICE_ID_AMI_MEGARAID2 0x9060 491#define PCI_DEVICE_ID_AMI_MEGARAID2 0x9060
492 492
493#define PCI_VENDOR_ID_AMD 0x1022 493#define PCI_VENDOR_ID_AMD 0x1022
494#define PCI_DEVICE_ID_AMD_K8_NB 0x1100
494#define PCI_DEVICE_ID_AMD_LANCE 0x2000 495#define PCI_DEVICE_ID_AMD_LANCE 0x2000
495#define PCI_DEVICE_ID_AMD_LANCE_HOME 0x2001 496#define PCI_DEVICE_ID_AMD_LANCE_HOME 0x2001
496#define PCI_DEVICE_ID_AMD_SCSI 0x2020 497#define PCI_DEVICE_ID_AMD_SCSI 0x2020
diff --git a/net/Kconfig b/net/Kconfig
index 2bdd5623fdd5..60f6f321bd76 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -140,6 +140,7 @@ config BRIDGE_NETFILTER
140 140
141 If unsure, say N. 141 If unsure, say N.
142 142
143source "net/netfilter/Kconfig"
143source "net/ipv4/netfilter/Kconfig" 144source "net/ipv4/netfilter/Kconfig"
144source "net/ipv6/netfilter/Kconfig" 145source "net/ipv6/netfilter/Kconfig"
145source "net/decnet/netfilter/Kconfig" 146source "net/decnet/netfilter/Kconfig"
@@ -206,8 +207,6 @@ config NET_PKTGEN
206 To compile this code as a module, choose M here: the 207 To compile this code as a module, choose M here: the
207 module will be called pktgen. 208 module will be called pktgen.
208 209
209source "net/netfilter/Kconfig"
210
211endmenu 210endmenu
212 211
213endmenu 212endmenu
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 95c4630b3b18..be7a660b6b24 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -258,13 +258,12 @@ extern int dccp_v4_send_reset(struct sock *sk,
258extern void dccp_send_close(struct sock *sk, const int active); 258extern void dccp_send_close(struct sock *sk, const int active);
259 259
260struct dccp_skb_cb { 260struct dccp_skb_cb {
261 __u8 dccpd_type; 261 __u8 dccpd_type:4;
262 __u8 dccpd_reset_code; 262 __u8 dccpd_ccval:4;
263 __u8 dccpd_service; 263 __u8 dccpd_reset_code;
264 __u8 dccpd_ccval; 264 __u16 dccpd_opt_len;
265 __u64 dccpd_seq; 265 __u64 dccpd_seq;
266 __u64 dccpd_ack_seq; 266 __u64 dccpd_ack_seq;
267 int dccpd_opt_len;
268}; 267};
269 268
270#define DCCP_SKB_CB(__skb) ((struct dccp_skb_cb *)&((__skb)->cb[0])) 269#define DCCP_SKB_CB(__skb) ((struct dccp_skb_cb *)&((__skb)->cb[0]))
diff --git a/net/dccp/input.c b/net/dccp/input.c
index c74034cf7ede..062e9f8359d0 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -384,9 +384,9 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
384 } 384 }
385 385
386out_invalid_packet: 386out_invalid_packet:
387 return 1; /* dccp_v4_do_rcv will send a reset, but... 387 /* dccp_v4_do_rcv will send a reset */
388 FIXME: the reset code should be 388 DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_PACKET_ERROR;
389 DCCP_RESET_CODE_PACKET_ERROR */ 389 return 1;
390} 390}
391 391
392static int dccp_rcv_respond_partopen_state_process(struct sock *sk, 392static int dccp_rcv_respond_partopen_state_process(struct sock *sk,
@@ -433,6 +433,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
433 struct dccp_hdr *dh, unsigned len) 433 struct dccp_hdr *dh, unsigned len)
434{ 434{
435 struct dccp_sock *dp = dccp_sk(sk); 435 struct dccp_sock *dp = dccp_sk(sk);
436 struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
436 const int old_state = sk->sk_state; 437 const int old_state = sk->sk_state;
437 int queued = 0; 438 int queued = 0;
438 439
@@ -473,7 +474,8 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
473 if (dh->dccph_type == DCCP_PKT_RESET) 474 if (dh->dccph_type == DCCP_PKT_RESET)
474 goto discard; 475 goto discard;
475 476
476 /* Caller (dccp_v4_do_rcv) will send Reset(No Connection)*/ 477 /* Caller (dccp_v4_do_rcv) will send Reset */
478 dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION;
477 return 1; 479 return 1;
478 } 480 }
479 481
@@ -487,8 +489,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
487 if (dccp_parse_options(sk, skb)) 489 if (dccp_parse_options(sk, skb))
488 goto discard; 490 goto discard;
489 491
490 if (DCCP_SKB_CB(skb)->dccpd_ack_seq != 492 if (dcb->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
491 DCCP_PKT_WITHOUT_ACK_SEQ)
492 dccp_event_ack_recv(sk, skb); 493 dccp_event_ack_recv(sk, skb);
493 494
494 ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); 495 ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb);
@@ -500,7 +501,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
500 */ 501 */
501 if (dp->dccps_options.dccpo_send_ack_vector) { 502 if (dp->dccps_options.dccpo_send_ack_vector) {
502 if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, sk, 503 if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, sk,
503 DCCP_SKB_CB(skb)->dccpd_seq, 504 dcb->dccpd_seq,
504 DCCP_ACKPKTS_STATE_RECEIVED)) 505 DCCP_ACKPKTS_STATE_RECEIVED))
505 goto discard; 506 goto discard;
506 /* 507 /*
@@ -551,8 +552,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
551 dh->dccph_type == DCCP_PKT_REQUEST) || 552 dh->dccph_type == DCCP_PKT_REQUEST) ||
552 (sk->sk_state == DCCP_RESPOND && 553 (sk->sk_state == DCCP_RESPOND &&
553 dh->dccph_type == DCCP_PKT_DATA)) { 554 dh->dccph_type == DCCP_PKT_DATA)) {
554 dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, 555 dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNC);
555 DCCP_PKT_SYNC);
556 goto discard; 556 goto discard;
557 } else if (dh->dccph_type == DCCP_PKT_CLOSEREQ) { 557 } else if (dh->dccph_type == DCCP_PKT_CLOSEREQ) {
558 dccp_rcv_closereq(sk, skb); 558 dccp_rcv_closereq(sk, skb);
@@ -563,13 +563,13 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
563 } 563 }
564 564
565 if (unlikely(dh->dccph_type == DCCP_PKT_SYNC)) { 565 if (unlikely(dh->dccph_type == DCCP_PKT_SYNC)) {
566 dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, 566 dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNCACK);
567 DCCP_PKT_SYNCACK);
568 goto discard; 567 goto discard;
569 } 568 }
570 569
571 switch (sk->sk_state) { 570 switch (sk->sk_state) {
572 case DCCP_CLOSED: 571 case DCCP_CLOSED:
572 dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION;
573 return 1; 573 return 1;
574 574
575 case DCCP_REQUESTING: 575 case DCCP_REQUESTING:
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 2afaa464e7f0..94a440b2685b 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -246,6 +246,9 @@ static int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr,
246 246
247 dp->dccps_role = DCCP_ROLE_CLIENT; 247 dp->dccps_role = DCCP_ROLE_CLIENT;
248 248
249 if (dccp_service_not_initialized(sk))
250 return -EPROTO;
251
249 if (addr_len < sizeof(struct sockaddr_in)) 252 if (addr_len < sizeof(struct sockaddr_in))
250 return -EINVAL; 253 return -EINVAL;
251 254
@@ -661,6 +664,16 @@ static inline u64 dccp_v4_init_sequence(const struct sock *sk,
661 dccp_hdr(skb)->dccph_sport); 664 dccp_hdr(skb)->dccph_sport);
662} 665}
663 666
667static inline int dccp_bad_service_code(const struct sock *sk,
668 const __u32 service)
669{
670 const struct dccp_sock *dp = dccp_sk(sk);
671
672 if (dp->dccps_service == service)
673 return 0;
674 return !dccp_list_has_service(dp->dccps_service_list, service);
675}
676
664int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) 677int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
665{ 678{
666 struct inet_request_sock *ireq; 679 struct inet_request_sock *ireq;
@@ -669,13 +682,22 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
669 struct dccp_request_sock *dreq; 682 struct dccp_request_sock *dreq;
670 const __u32 saddr = skb->nh.iph->saddr; 683 const __u32 saddr = skb->nh.iph->saddr;
671 const __u32 daddr = skb->nh.iph->daddr; 684 const __u32 daddr = skb->nh.iph->daddr;
685 const __u32 service = dccp_hdr_request(skb)->dccph_req_service;
686 struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
687 __u8 reset_code = DCCP_RESET_CODE_TOO_BUSY;
672 struct dst_entry *dst = NULL; 688 struct dst_entry *dst = NULL;
673 689
674 /* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */ 690 /* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */
675 if (((struct rtable *)skb->dst)->rt_flags & 691 if (((struct rtable *)skb->dst)->rt_flags &
676 (RTCF_BROADCAST | RTCF_MULTICAST)) 692 (RTCF_BROADCAST | RTCF_MULTICAST)) {
693 reset_code = DCCP_RESET_CODE_NO_CONNECTION;
677 goto drop; 694 goto drop;
695 }
678 696
697 if (dccp_bad_service_code(sk, service)) {
698 reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE;
699 goto drop;
700 }
679 /* 701 /*
680 * TW buckets are converted to open requests without 702 * TW buckets are converted to open requests without
681 * limitations, they conserve resources and peer is 703 * limitations, they conserve resources and peer is
@@ -718,9 +740,9 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
718 * dccp_create_openreq_child. 740 * dccp_create_openreq_child.
719 */ 741 */
720 dreq = dccp_rsk(req); 742 dreq = dccp_rsk(req);
721 dreq->dreq_isr = DCCP_SKB_CB(skb)->dccpd_seq; 743 dreq->dreq_isr = dcb->dccpd_seq;
722 dreq->dreq_iss = dccp_v4_init_sequence(sk, skb); 744 dreq->dreq_iss = dccp_v4_init_sequence(sk, skb);
723 dreq->dreq_service = dccp_hdr_request(skb)->dccph_req_service; 745 dreq->dreq_service = service;
724 746
725 if (dccp_v4_send_response(sk, req, dst)) 747 if (dccp_v4_send_response(sk, req, dst))
726 goto drop_and_free; 748 goto drop_and_free;
@@ -735,6 +757,7 @@ drop_and_free:
735 __reqsk_free(req); 757 __reqsk_free(req);
736drop: 758drop:
737 DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); 759 DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS);
760 dcb->dccpd_reset_code = reset_code;
738 return -1; 761 return -1;
739} 762}
740 763
@@ -1005,7 +1028,6 @@ int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1005 return 0; 1028 return 0;
1006 1029
1007reset: 1030reset:
1008 DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION;
1009 dccp_v4_ctl_send_reset(skb); 1031 dccp_v4_ctl_send_reset(skb);
1010discard: 1032discard:
1011 kfree_skb(skb); 1033 kfree_skb(skb);
@@ -1280,6 +1302,7 @@ static int dccp_v4_init_sock(struct sock *sk)
1280 sk->sk_write_space = dccp_write_space; 1302 sk->sk_write_space = dccp_write_space;
1281 dp->dccps_mss_cache = 536; 1303 dp->dccps_mss_cache = 536;
1282 dp->dccps_role = DCCP_ROLE_UNDEFINED; 1304 dp->dccps_role = DCCP_ROLE_UNDEFINED;
1305 dp->dccps_service = DCCP_SERVICE_INVALID_VALUE;
1283 1306
1284 return 0; 1307 return 0;
1285} 1308}
@@ -1301,6 +1324,11 @@ static int dccp_v4_destroy_sock(struct sock *sk)
1301 if (inet_csk(sk)->icsk_bind_hash != NULL) 1324 if (inet_csk(sk)->icsk_bind_hash != NULL)
1302 inet_put_port(&dccp_hashinfo, sk); 1325 inet_put_port(&dccp_hashinfo, sk);
1303 1326
1327 if (dp->dccps_service_list != NULL) {
1328 kfree(dp->dccps_service_list);
1329 dp->dccps_service_list = NULL;
1330 }
1331
1304 ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk); 1332 ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk);
1305 ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk); 1333 ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk);
1306 dccp_ackpkts_free(dp->dccps_hc_rx_ackpkts); 1334 dccp_ackpkts_free(dp->dccps_hc_rx_ackpkts);
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 18461bc04cbe..933e10db1789 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -93,9 +93,11 @@ struct sock *dccp_create_openreq_child(struct sock *sk,
93 struct inet_connection_sock *newicsk = inet_csk(sk); 93 struct inet_connection_sock *newicsk = inet_csk(sk);
94 struct dccp_sock *newdp = dccp_sk(newsk); 94 struct dccp_sock *newdp = dccp_sk(newsk);
95 95
96 newdp->dccps_role = DCCP_ROLE_SERVER;
96 newdp->dccps_hc_rx_ackpkts = NULL; 97 newdp->dccps_hc_rx_ackpkts = NULL;
97 newdp->dccps_role = DCCP_ROLE_SERVER; 98 newdp->dccps_service_list = NULL;
98 newicsk->icsk_rto = DCCP_TIMEOUT_INIT; 99 newdp->dccps_service = dreq->dreq_service;
100 newicsk->icsk_rto = DCCP_TIMEOUT_INIT;
99 do_gettimeofday(&newdp->dccps_epoch); 101 do_gettimeofday(&newdp->dccps_epoch);
100 102
101 if (newdp->dccps_options.dccpo_send_ack_vector) { 103 if (newdp->dccps_options.dccpo_send_ack_vector) {
diff --git a/net/dccp/output.c b/net/dccp/output.c
index ea6d0e91e511..156b1d29a156 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -85,7 +85,7 @@ int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb)
85 switch (dcb->dccpd_type) { 85 switch (dcb->dccpd_type) {
86 case DCCP_PKT_REQUEST: 86 case DCCP_PKT_REQUEST:
87 dccp_hdr_request(skb)->dccph_req_service = 87 dccp_hdr_request(skb)->dccph_req_service =
88 dcb->dccpd_service; 88 dp->dccps_service;
89 break; 89 break;
90 case DCCP_PKT_RESET: 90 case DCCP_PKT_RESET:
91 dccp_hdr_reset(skb)->dccph_reset_code = 91 dccp_hdr_reset(skb)->dccph_reset_code =
@@ -270,6 +270,7 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
270 struct request_sock *req) 270 struct request_sock *req)
271{ 271{
272 struct dccp_hdr *dh; 272 struct dccp_hdr *dh;
273 struct dccp_request_sock *dreq;
273 const int dccp_header_size = sizeof(struct dccp_hdr) + 274 const int dccp_header_size = sizeof(struct dccp_hdr) +
274 sizeof(struct dccp_hdr_ext) + 275 sizeof(struct dccp_hdr_ext) +
275 sizeof(struct dccp_hdr_response); 276 sizeof(struct dccp_hdr_response);
@@ -285,8 +286,9 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
285 skb->dst = dst_clone(dst); 286 skb->dst = dst_clone(dst);
286 skb->csum = 0; 287 skb->csum = 0;
287 288
289 dreq = dccp_rsk(req);
288 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE; 290 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE;
289 DCCP_SKB_CB(skb)->dccpd_seq = dccp_rsk(req)->dreq_iss; 291 DCCP_SKB_CB(skb)->dccpd_seq = dreq->dreq_iss;
290 dccp_insert_options(sk, skb); 292 dccp_insert_options(sk, skb);
291 293
292 skb->h.raw = skb_push(skb, dccp_header_size); 294 skb->h.raw = skb_push(skb, dccp_header_size);
@@ -300,8 +302,9 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
300 DCCP_SKB_CB(skb)->dccpd_opt_len) / 4; 302 DCCP_SKB_CB(skb)->dccpd_opt_len) / 4;
301 dh->dccph_type = DCCP_PKT_RESPONSE; 303 dh->dccph_type = DCCP_PKT_RESPONSE;
302 dh->dccph_x = 1; 304 dh->dccph_x = 1;
303 dccp_hdr_set_seq(dh, dccp_rsk(req)->dreq_iss); 305 dccp_hdr_set_seq(dh, dreq->dreq_iss);
304 dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dccp_rsk(req)->dreq_isr); 306 dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dreq->dreq_isr);
307 dccp_hdr_response(skb)->dccph_resp_service = dreq->dreq_service;
305 308
306 dh->dccph_checksum = dccp_v4_checksum(skb, inet_rsk(req)->loc_addr, 309 dh->dccph_checksum = dccp_v4_checksum(skb, inet_rsk(req)->loc_addr,
307 inet_rsk(req)->rmt_addr); 310 inet_rsk(req)->rmt_addr);
@@ -397,9 +400,6 @@ int dccp_connect(struct sock *sk)
397 skb_reserve(skb, MAX_DCCP_HEADER); 400 skb_reserve(skb, MAX_DCCP_HEADER);
398 401
399 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_REQUEST; 402 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_REQUEST;
400 /* FIXME: set service to something meaningful, coming
401 * from userspace*/
402 DCCP_SKB_CB(skb)->dccpd_service = 0;
403 skb->csum = 0; 403 skb->csum = 0;
404 skb_set_owner_w(skb, sk); 404 skb_set_owner_w(skb, sk);
405 405
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 18a0e69c9dc7..9bda2868eba6 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -94,7 +94,15 @@ EXPORT_SYMBOL_GPL(dccp_state_name);
94 94
95static inline int dccp_listen_start(struct sock *sk) 95static inline int dccp_listen_start(struct sock *sk)
96{ 96{
97 dccp_sk(sk)->dccps_role = DCCP_ROLE_LISTEN; 97 struct dccp_sock *dp = dccp_sk(sk);
98
99 dp->dccps_role = DCCP_ROLE_LISTEN;
100 /*
101 * Apps need to use setsockopt(DCCP_SOCKOPT_SERVICE)
102 * before calling listen()
103 */
104 if (dccp_service_not_initialized(sk))
105 return -EPROTO;
98 return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE); 106 return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE);
99} 107}
100 108
@@ -202,6 +210,42 @@ int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
202 return -ENOIOCTLCMD; 210 return -ENOIOCTLCMD;
203} 211}
204 212
213static int dccp_setsockopt_service(struct sock *sk, const u32 service,
214 char __user *optval, int optlen)
215{
216 struct dccp_sock *dp = dccp_sk(sk);
217 struct dccp_service_list *sl = NULL;
218
219 if (service == DCCP_SERVICE_INVALID_VALUE ||
220 optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
221 return -EINVAL;
222
223 if (optlen > sizeof(service)) {
224 sl = kmalloc(optlen, GFP_KERNEL);
225 if (sl == NULL)
226 return -ENOMEM;
227
228 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
229 if (copy_from_user(sl->dccpsl_list,
230 optval + sizeof(service),
231 optlen - sizeof(service)) ||
232 dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
233 kfree(sl);
234 return -EFAULT;
235 }
236 }
237
238 lock_sock(sk);
239 dp->dccps_service = service;
240
241 if (dp->dccps_service_list != NULL)
242 kfree(dp->dccps_service_list);
243
244 dp->dccps_service_list = sl;
245 release_sock(sk);
246 return 0;
247}
248
205int dccp_setsockopt(struct sock *sk, int level, int optname, 249int dccp_setsockopt(struct sock *sk, int level, int optname,
206 char __user *optval, int optlen) 250 char __user *optval, int optlen)
207{ 251{
@@ -218,8 +262,10 @@ int dccp_setsockopt(struct sock *sk, int level, int optname,
218 if (get_user(val, (int __user *)optval)) 262 if (get_user(val, (int __user *)optval))
219 return -EFAULT; 263 return -EFAULT;
220 264
221 lock_sock(sk); 265 if (optname == DCCP_SOCKOPT_SERVICE)
266 return dccp_setsockopt_service(sk, val, optval, optlen);
222 267
268 lock_sock(sk);
223 dp = dccp_sk(sk); 269 dp = dccp_sk(sk);
224 err = 0; 270 err = 0;
225 271
@@ -236,6 +282,37 @@ int dccp_setsockopt(struct sock *sk, int level, int optname,
236 return err; 282 return err;
237} 283}
238 284
285static int dccp_getsockopt_service(struct sock *sk, int len,
286 u32 __user *optval,
287 int __user *optlen)
288{
289 const struct dccp_sock *dp = dccp_sk(sk);
290 const struct dccp_service_list *sl;
291 int err = -ENOENT, slen = 0, total_len = sizeof(u32);
292
293 lock_sock(sk);
294 if (dccp_service_not_initialized(sk))
295 goto out;
296
297 if ((sl = dp->dccps_service_list) != NULL) {
298 slen = sl->dccpsl_nr * sizeof(u32);
299 total_len += slen;
300 }
301
302 err = -EINVAL;
303 if (total_len > len)
304 goto out;
305
306 err = 0;
307 if (put_user(total_len, optlen) ||
308 put_user(dp->dccps_service, optval) ||
309 (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
310 err = -EFAULT;
311out:
312 release_sock(sk);
313 return err;
314}
315
239int dccp_getsockopt(struct sock *sk, int level, int optname, 316int dccp_getsockopt(struct sock *sk, int level, int optname,
240 char __user *optval, int __user *optlen) 317 char __user *optval, int __user *optlen)
241{ 318{
@@ -248,6 +325,10 @@ int dccp_getsockopt(struct sock *sk, int level, int optname,
248 if (get_user(len, optlen)) 325 if (get_user(len, optlen))
249 return -EFAULT; 326 return -EFAULT;
250 327
328 if (optname == DCCP_SOCKOPT_SERVICE)
329 return dccp_getsockopt_service(sk, len,
330 (u32 __user *)optval, optlen);
331
251 len = min_t(unsigned int, len, sizeof(int)); 332 len = min_t(unsigned int, len, sizeof(int));
252 if (len < 0) 333 if (len < 0)
253 return -EINVAL; 334 return -EINVAL;
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 30aa8e2ee214..85190de5710a 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -8,6 +8,7 @@ menu "IP: Netfilter Configuration"
8# connection tracking, helpers and protocols 8# connection tracking, helpers and protocols
9config IP_NF_CONNTRACK 9config IP_NF_CONNTRACK
10 tristate "Connection tracking (required for masq/NAT)" 10 tristate "Connection tracking (required for masq/NAT)"
11 select NETFILTER_NETLINK if IP_NF_CONNTRACK_NETLINK!=n
11 ---help--- 12 ---help---
12 Connection tracking keeps a record of what packets have passed 13 Connection tracking keeps a record of what packets have passed
13 through your machine, in order to figure out how they are related 14 through your machine, in order to figure out how they are related
@@ -51,6 +52,15 @@ config IP_NF_CONNTRACK_EVENTS
51 52
52 IF unsure, say `N'. 53 IF unsure, say `N'.
53 54
55config IP_NF_CONNTRACK_NETLINK
56 tristate 'Connection tracking netlink interface'
57 depends on IP_NF_CONNTRACK && NETFILTER_NETLINK
58 default IP_NF_CONNTRACK if NETFILTER_NETLINK=y
59 default m if NETFILTER_NETLINK=m
60 help
61 This option enables support for a netlink-based userspace interface
62
63
54config IP_NF_CT_PROTO_SCTP 64config IP_NF_CT_PROTO_SCTP
55 tristate 'SCTP protocol connection tracking support (EXPERIMENTAL)' 65 tristate 'SCTP protocol connection tracking support (EXPERIMENTAL)'
56 depends on IP_NF_CONNTRACK && EXPERIMENTAL 66 depends on IP_NF_CONNTRACK && EXPERIMENTAL
@@ -774,11 +784,5 @@ config IP_NF_ARP_MANGLE
774 Allows altering the ARP packet payload: source and destination 784 Allows altering the ARP packet payload: source and destination
775 hardware and network addresses. 785 hardware and network addresses.
776 786
777config IP_NF_CONNTRACK_NETLINK
778 tristate 'Connection tracking netlink interface'
779 depends on IP_NF_CONNTRACK && NETFILTER_NETLINK
780 help
781 This option enables support for a netlink-based userspace interface
782
783endmenu 787endmenu
784 788
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
index 19cba16e6e1e..f8cd8e42961e 100644
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -1143,7 +1143,10 @@ void ip_ct_refresh_acct(struct ip_conntrack *ct,
1143 if (del_timer(&ct->timeout)) { 1143 if (del_timer(&ct->timeout)) {
1144 ct->timeout.expires = jiffies + extra_jiffies; 1144 ct->timeout.expires = jiffies + extra_jiffies;
1145 add_timer(&ct->timeout); 1145 add_timer(&ct->timeout);
1146 ip_conntrack_event_cache(IPCT_REFRESH, skb); 1146 /* FIXME: We loose some REFRESH events if this function
1147 * is called without an skb. I'll fix this later -HW */
1148 if (skb)
1149 ip_conntrack_event_cache(IPCT_REFRESH, skb);
1147 } 1150 }
1148 ct_add_counters(ct, ctinfo, skb); 1151 ct_add_counters(ct, ctinfo, skb);
1149 write_unlock_bh(&ip_conntrack_lock); 1152 write_unlock_bh(&ip_conntrack_lock);
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 7d38913754b1..9bcb398fbc1f 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -13,6 +13,7 @@
13#include <linux/config.h> 13#include <linux/config.h>
14#include <linux/proc_fs.h> 14#include <linux/proc_fs.h>
15#include <linux/jhash.h> 15#include <linux/jhash.h>
16#include <linux/bitops.h>
16#include <linux/skbuff.h> 17#include <linux/skbuff.h>
17#include <linux/ip.h> 18#include <linux/ip.h>
18#include <linux/tcp.h> 19#include <linux/tcp.h>
@@ -30,7 +31,7 @@
30#include <linux/netfilter_ipv4/ipt_CLUSTERIP.h> 31#include <linux/netfilter_ipv4/ipt_CLUSTERIP.h>
31#include <linux/netfilter_ipv4/ip_conntrack.h> 32#include <linux/netfilter_ipv4/ip_conntrack.h>
32 33
33#define CLUSTERIP_VERSION "0.7" 34#define CLUSTERIP_VERSION "0.8"
34 35
35#define DEBUG_CLUSTERIP 36#define DEBUG_CLUSTERIP
36 37
@@ -49,13 +50,14 @@ MODULE_DESCRIPTION("iptables target for CLUSTERIP");
49struct clusterip_config { 50struct clusterip_config {
50 struct list_head list; /* list of all configs */ 51 struct list_head list; /* list of all configs */
51 atomic_t refcount; /* reference count */ 52 atomic_t refcount; /* reference count */
53 atomic_t entries; /* number of entries/rules
54 * referencing us */
52 55
53 u_int32_t clusterip; /* the IP address */ 56 u_int32_t clusterip; /* the IP address */
54 u_int8_t clustermac[ETH_ALEN]; /* the MAC address */ 57 u_int8_t clustermac[ETH_ALEN]; /* the MAC address */
55 struct net_device *dev; /* device */ 58 struct net_device *dev; /* device */
56 u_int16_t num_total_nodes; /* total number of nodes */ 59 u_int16_t num_total_nodes; /* total number of nodes */
57 u_int16_t num_local_nodes; /* number of local nodes */ 60 unsigned long local_nodes; /* node number array */
58 u_int16_t local_nodes[CLUSTERIP_MAX_NODES]; /* node number array */
59 61
60#ifdef CONFIG_PROC_FS 62#ifdef CONFIG_PROC_FS
61 struct proc_dir_entry *pde; /* proc dir entry */ 63 struct proc_dir_entry *pde; /* proc dir entry */
@@ -66,8 +68,7 @@ struct clusterip_config {
66 68
67static LIST_HEAD(clusterip_configs); 69static LIST_HEAD(clusterip_configs);
68 70
69/* clusterip_lock protects the clusterip_configs list _AND_ the configurable 71/* clusterip_lock protects the clusterip_configs list */
70 * data within all structurses (num_local_nodes, local_nodes[]) */
71static DEFINE_RWLOCK(clusterip_lock); 72static DEFINE_RWLOCK(clusterip_lock);
72 73
73#ifdef CONFIG_PROC_FS 74#ifdef CONFIG_PROC_FS
@@ -76,23 +77,48 @@ static struct proc_dir_entry *clusterip_procdir;
76#endif 77#endif
77 78
78static inline void 79static inline void
79clusterip_config_get(struct clusterip_config *c) { 80clusterip_config_get(struct clusterip_config *c)
81{
80 atomic_inc(&c->refcount); 82 atomic_inc(&c->refcount);
81} 83}
82 84
83static inline void 85static inline void
84clusterip_config_put(struct clusterip_config *c) { 86clusterip_config_put(struct clusterip_config *c)
85 if (atomic_dec_and_test(&c->refcount)) { 87{
88 if (atomic_dec_and_test(&c->refcount))
89 kfree(c);
90}
91
92/* increase the count of entries(rules) using/referencing this config */
93static inline void
94clusterip_config_entry_get(struct clusterip_config *c)
95{
96 atomic_inc(&c->entries);
97}
98
99/* decrease the count of entries using/referencing this config. If last
100 * entry(rule) is removed, remove the config from lists, but don't free it
101 * yet, since proc-files could still be holding references */
102static inline void
103clusterip_config_entry_put(struct clusterip_config *c)
104{
105 if (atomic_dec_and_test(&c->entries)) {
86 write_lock_bh(&clusterip_lock); 106 write_lock_bh(&clusterip_lock);
87 list_del(&c->list); 107 list_del(&c->list);
88 write_unlock_bh(&clusterip_lock); 108 write_unlock_bh(&clusterip_lock);
109
89 dev_mc_delete(c->dev, c->clustermac, ETH_ALEN, 0); 110 dev_mc_delete(c->dev, c->clustermac, ETH_ALEN, 0);
90 dev_put(c->dev); 111 dev_put(c->dev);
91 kfree(c); 112
113 /* In case anyone still accesses the file, the open/close
114 * functions are also incrementing the refcount on their own,
115 * so it's safe to remove the entry even if it's in use. */
116#ifdef CONFIG_PROC_FS
117 remove_proc_entry(c->pde->name, c->pde->parent);
118#endif
92 } 119 }
93} 120}
94 121
95
96static struct clusterip_config * 122static struct clusterip_config *
97__clusterip_config_find(u_int32_t clusterip) 123__clusterip_config_find(u_int32_t clusterip)
98{ 124{
@@ -111,7 +137,7 @@ __clusterip_config_find(u_int32_t clusterip)
111} 137}
112 138
113static inline struct clusterip_config * 139static inline struct clusterip_config *
114clusterip_config_find_get(u_int32_t clusterip) 140clusterip_config_find_get(u_int32_t clusterip, int entry)
115{ 141{
116 struct clusterip_config *c; 142 struct clusterip_config *c;
117 143
@@ -122,11 +148,24 @@ clusterip_config_find_get(u_int32_t clusterip)
122 return NULL; 148 return NULL;
123 } 149 }
124 atomic_inc(&c->refcount); 150 atomic_inc(&c->refcount);
151 if (entry)
152 atomic_inc(&c->entries);
125 read_unlock_bh(&clusterip_lock); 153 read_unlock_bh(&clusterip_lock);
126 154
127 return c; 155 return c;
128} 156}
129 157
158static void
159clusterip_config_init_nodelist(struct clusterip_config *c,
160 const struct ipt_clusterip_tgt_info *i)
161{
162 int n;
163
164 for (n = 0; n < i->num_local_nodes; n++) {
165 set_bit(i->local_nodes[n] - 1, &c->local_nodes);
166 }
167}
168
130static struct clusterip_config * 169static struct clusterip_config *
131clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip, 170clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip,
132 struct net_device *dev) 171 struct net_device *dev)
@@ -143,11 +182,11 @@ clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip,
143 c->clusterip = ip; 182 c->clusterip = ip;
144 memcpy(&c->clustermac, &i->clustermac, ETH_ALEN); 183 memcpy(&c->clustermac, &i->clustermac, ETH_ALEN);
145 c->num_total_nodes = i->num_total_nodes; 184 c->num_total_nodes = i->num_total_nodes;
146 c->num_local_nodes = i->num_local_nodes; 185 clusterip_config_init_nodelist(c, i);
147 memcpy(&c->local_nodes, &i->local_nodes, sizeof(c->local_nodes));
148 c->hash_mode = i->hash_mode; 186 c->hash_mode = i->hash_mode;
149 c->hash_initval = i->hash_initval; 187 c->hash_initval = i->hash_initval;
150 atomic_set(&c->refcount, 1); 188 atomic_set(&c->refcount, 1);
189 atomic_set(&c->entries, 1);
151 190
152#ifdef CONFIG_PROC_FS 191#ifdef CONFIG_PROC_FS
153 /* create proc dir entry */ 192 /* create proc dir entry */
@@ -171,53 +210,28 @@ clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip,
171static int 210static int
172clusterip_add_node(struct clusterip_config *c, u_int16_t nodenum) 211clusterip_add_node(struct clusterip_config *c, u_int16_t nodenum)
173{ 212{
174 int i;
175
176 write_lock_bh(&clusterip_lock);
177 213
178 if (c->num_local_nodes >= CLUSTERIP_MAX_NODES 214 if (nodenum == 0 ||
179 || nodenum > CLUSTERIP_MAX_NODES) { 215 nodenum > c->num_total_nodes)
180 write_unlock_bh(&clusterip_lock);
181 return 1; 216 return 1;
182 }
183
184 /* check if we alrady have this number in our array */
185 for (i = 0; i < c->num_local_nodes; i++) {
186 if (c->local_nodes[i] == nodenum) {
187 write_unlock_bh(&clusterip_lock);
188 return 1;
189 }
190 }
191 217
192 c->local_nodes[c->num_local_nodes++] = nodenum; 218 /* check if we already have this number in our bitfield */
219 if (test_and_set_bit(nodenum - 1, &c->local_nodes))
220 return 1;
193 221
194 write_unlock_bh(&clusterip_lock);
195 return 0; 222 return 0;
196} 223}
197 224
198static int 225static int
199clusterip_del_node(struct clusterip_config *c, u_int16_t nodenum) 226clusterip_del_node(struct clusterip_config *c, u_int16_t nodenum)
200{ 227{
201 int i; 228 if (nodenum == 0 ||
202 229 nodenum > c->num_total_nodes)
203 write_lock_bh(&clusterip_lock);
204
205 if (c->num_local_nodes <= 1 || nodenum > CLUSTERIP_MAX_NODES) {
206 write_unlock_bh(&clusterip_lock);
207 return 1; 230 return 1;
208 }
209 231
210 for (i = 0; i < c->num_local_nodes; i++) { 232 if (test_and_clear_bit(nodenum - 1, &c->local_nodes))
211 if (c->local_nodes[i] == nodenum) { 233 return 0;
212 int size = sizeof(u_int16_t)*(c->num_local_nodes-(i+1));
213 memmove(&c->local_nodes[i], &c->local_nodes[i+1], size);
214 c->num_local_nodes--;
215 write_unlock_bh(&clusterip_lock);
216 return 0;
217 }
218 }
219 234
220 write_unlock_bh(&clusterip_lock);
221 return 1; 235 return 1;
222} 236}
223 237
@@ -285,25 +299,7 @@ clusterip_hashfn(struct sk_buff *skb, struct clusterip_config *config)
285static inline int 299static inline int
286clusterip_responsible(struct clusterip_config *config, u_int32_t hash) 300clusterip_responsible(struct clusterip_config *config, u_int32_t hash)
287{ 301{
288 int i; 302 return test_bit(hash - 1, &config->local_nodes);
289
290 read_lock_bh(&clusterip_lock);
291
292 if (config->num_local_nodes == 0) {
293 read_unlock_bh(&clusterip_lock);
294 return 0;
295 }
296
297 for (i = 0; i < config->num_local_nodes; i++) {
298 if (config->local_nodes[i] == hash) {
299 read_unlock_bh(&clusterip_lock);
300 return 1;
301 }
302 }
303
304 read_unlock_bh(&clusterip_lock);
305
306 return 0;
307} 303}
308 304
309/*********************************************************************** 305/***********************************************************************
@@ -415,8 +411,26 @@ checkentry(const char *tablename,
415 411
416 /* FIXME: further sanity checks */ 412 /* FIXME: further sanity checks */
417 413
418 config = clusterip_config_find_get(e->ip.dst.s_addr); 414 config = clusterip_config_find_get(e->ip.dst.s_addr, 1);
419 if (!config) { 415 if (config) {
416 if (cipinfo->config != NULL) {
417 /* Case A: This is an entry that gets reloaded, since
418 * it still has a cipinfo->config pointer. Simply
419 * increase the entry refcount and return */
420 if (cipinfo->config != config) {
421 printk(KERN_ERR "CLUSTERIP: Reloaded entry "
422 "has invalid config pointer!\n");
423 return 0;
424 }
425 clusterip_config_entry_get(cipinfo->config);
426 } else {
427 /* Case B: This is a new rule referring to an existing
428 * clusterip config. */
429 cipinfo->config = config;
430 clusterip_config_entry_get(cipinfo->config);
431 }
432 } else {
433 /* Case C: This is a completely new clusterip config */
420 if (!(cipinfo->flags & CLUSTERIP_FLAG_NEW)) { 434 if (!(cipinfo->flags & CLUSTERIP_FLAG_NEW)) {
421 printk(KERN_WARNING "CLUSTERIP: no config found for %u.%u.%u.%u, need 'new'\n", NIPQUAD(e->ip.dst.s_addr)); 435 printk(KERN_WARNING "CLUSTERIP: no config found for %u.%u.%u.%u, need 'new'\n", NIPQUAD(e->ip.dst.s_addr));
422 return 0; 436 return 0;
@@ -443,10 +457,9 @@ checkentry(const char *tablename,
443 } 457 }
444 dev_mc_add(config->dev,config->clustermac, ETH_ALEN, 0); 458 dev_mc_add(config->dev,config->clustermac, ETH_ALEN, 0);
445 } 459 }
460 cipinfo->config = config;
446 } 461 }
447 462
448 cipinfo->config = config;
449
450 return 1; 463 return 1;
451} 464}
452 465
@@ -455,13 +468,10 @@ static void destroy(void *matchinfo, unsigned int matchinfosize)
455{ 468{
456 struct ipt_clusterip_tgt_info *cipinfo = matchinfo; 469 struct ipt_clusterip_tgt_info *cipinfo = matchinfo;
457 470
458 /* we first remove the proc entry and then drop the reference 471 /* if no more entries are referencing the config, remove it
459 * count. In case anyone still accesses the file, the open/close 472 * from the list and destroy the proc entry */
460 * functions are also incrementing the refcount on their own */ 473 clusterip_config_entry_put(cipinfo->config);
461#ifdef CONFIG_PROC_FS 474
462 remove_proc_entry(cipinfo->config->pde->name,
463 cipinfo->config->pde->parent);
464#endif
465 clusterip_config_put(cipinfo->config); 475 clusterip_config_put(cipinfo->config);
466} 476}
467 477
@@ -533,7 +543,7 @@ arp_mangle(unsigned int hook,
533 543
534 /* if there is no clusterip configuration for the arp reply's 544 /* if there is no clusterip configuration for the arp reply's
535 * source ip, we don't want to mangle it */ 545 * source ip, we don't want to mangle it */
536 c = clusterip_config_find_get(payload->src_ip); 546 c = clusterip_config_find_get(payload->src_ip, 0);
537 if (!c) 547 if (!c)
538 return NF_ACCEPT; 548 return NF_ACCEPT;
539 549
@@ -574,56 +584,69 @@ static struct nf_hook_ops cip_arp_ops = {
574 584
575#ifdef CONFIG_PROC_FS 585#ifdef CONFIG_PROC_FS
576 586
587struct clusterip_seq_position {
588 unsigned int pos; /* position */
589 unsigned int weight; /* number of bits set == size */
590 unsigned int bit; /* current bit */
591 unsigned long val; /* current value */
592};
593
577static void *clusterip_seq_start(struct seq_file *s, loff_t *pos) 594static void *clusterip_seq_start(struct seq_file *s, loff_t *pos)
578{ 595{
579 struct proc_dir_entry *pde = s->private; 596 struct proc_dir_entry *pde = s->private;
580 struct clusterip_config *c = pde->data; 597 struct clusterip_config *c = pde->data;
581 unsigned int *nodeidx; 598 unsigned int weight;
582 599 u_int32_t local_nodes;
583 read_lock_bh(&clusterip_lock); 600 struct clusterip_seq_position *idx;
584 if (*pos >= c->num_local_nodes) 601
602 /* FIXME: possible race */
603 local_nodes = c->local_nodes;
604 weight = hweight32(local_nodes);
605 if (*pos >= weight)
585 return NULL; 606 return NULL;
586 607
587 nodeidx = kmalloc(sizeof(unsigned int), GFP_KERNEL); 608 idx = kmalloc(sizeof(struct clusterip_seq_position), GFP_KERNEL);
588 if (!nodeidx) 609 if (!idx)
589 return ERR_PTR(-ENOMEM); 610 return ERR_PTR(-ENOMEM);
590 611
591 *nodeidx = *pos; 612 idx->pos = *pos;
592 return nodeidx; 613 idx->weight = weight;
614 idx->bit = ffs(local_nodes);
615 idx->val = local_nodes;
616 clear_bit(idx->bit - 1, &idx->val);
617
618 return idx;
593} 619}
594 620
595static void *clusterip_seq_next(struct seq_file *s, void *v, loff_t *pos) 621static void *clusterip_seq_next(struct seq_file *s, void *v, loff_t *pos)
596{ 622{
597 struct proc_dir_entry *pde = s->private; 623 struct clusterip_seq_position *idx = (struct clusterip_seq_position *)v;
598 struct clusterip_config *c = pde->data;
599 unsigned int *nodeidx = (unsigned int *)v;
600 624
601 *pos = ++(*nodeidx); 625 *pos = ++idx->pos;
602 if (*pos >= c->num_local_nodes) { 626 if (*pos >= idx->weight) {
603 kfree(v); 627 kfree(v);
604 return NULL; 628 return NULL;
605 } 629 }
606 return nodeidx; 630 idx->bit = ffs(idx->val);
631 clear_bit(idx->bit - 1, &idx->val);
632 return idx;
607} 633}
608 634
609static void clusterip_seq_stop(struct seq_file *s, void *v) 635static void clusterip_seq_stop(struct seq_file *s, void *v)
610{ 636{
611 kfree(v); 637 kfree(v);
612
613 read_unlock_bh(&clusterip_lock);
614} 638}
615 639
616static int clusterip_seq_show(struct seq_file *s, void *v) 640static int clusterip_seq_show(struct seq_file *s, void *v)
617{ 641{
618 struct proc_dir_entry *pde = s->private; 642 struct clusterip_seq_position *idx = (struct clusterip_seq_position *)v;
619 struct clusterip_config *c = pde->data;
620 unsigned int *nodeidx = (unsigned int *)v;
621 643
622 if (*nodeidx != 0) 644 if (idx->pos != 0)
623 seq_putc(s, ','); 645 seq_putc(s, ',');
624 seq_printf(s, "%u", c->local_nodes[*nodeidx]);
625 646
626 if (*nodeidx == c->num_local_nodes-1) 647 seq_printf(s, "%u", idx->bit);
648
649 if (idx->pos == idx->weight - 1)
627 seq_putc(s, '\n'); 650 seq_putc(s, '\n');
628 651
629 return 0; 652 return 0;
diff --git a/net/socket.c b/net/socket.c
index c699e93c33d7..f9264472377f 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1862,7 +1862,8 @@ asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned int flag
1862 if (err < 0) 1862 if (err < 0)
1863 goto out_freeiov; 1863 goto out_freeiov;
1864 } 1864 }
1865 err = __put_user(msg_sys.msg_flags, COMPAT_FLAGS(msg)); 1865 err = __put_user((msg_sys.msg_flags & ~MSG_CMSG_COMPAT),
1866 COMPAT_FLAGS(msg));
1866 if (err) 1867 if (err)
1867 goto out_freeiov; 1868 goto out_freeiov;
1868 if (MSG_CMSG_COMPAT & flags) 1869 if (MSG_CMSG_COMPAT & flags)